o
    *j~X                     @   s   d dl Z d dlZd dlZd dlZd dlm  mZ d dlm	Z	m
Z
mZmZ d dlmZmZ d dlmZ dd Zddd	d
ddddddd
ZdddddddZdd Zdd Zdd Zdd  Z	!	"	#d.d$d%Zd&d' ZG d(d) d)Zd*d+ ZG d,d- d-eZdS )/    N)BeamSearchScorerLogitsProcessorListMinLengthLogitsProcessorNoRepeatNGramLogitsProcessor)mpuprint_rank_0)rouge_scorerc                 C   s$   | D ]}|  s|dks dS qdS )N,FT)isdigit)wch r   r/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/mglm/tasks/seq2seq/evaluate.py	_is_digit   s
   r   z-lrb-z-rrb-z-lsb-z-rsb-z-lcb-z-rcb-UNKz&amp;z&lt;z&gt;)
()[]{}[UNK]&<>z-LRB-z-RRB-z-LSB-z-RSB-z-LCB-z-RCB-)r   r   r   r   r   r   c                 C   s  |dkr| S |dkr|  dd} | S |  }g }d}d}d}d}|t|k rx|| }d}	|dkrG|r9|d n|d	 | }|d
7 }n)|dkr}t|dkr}|d dr}|t|d
 k r}||d
  dkr}|d d d |d< |d |d7 }n|dkr|t|d
 k r||d
  dv r|d||d
    |d7 }n|dkr|r|d n|d | }|d
7 }n|dkr|t|d k r||d
  dkr||d  dkr|d |d7 }n|dkrt|dkrt|d r|t|d
 k rt||d
  r|d  d||d
   7  < |d7 }nT|dkrUt|dkrU|d  rU|t|d
 k rU||d
   rU|d  d||d
   7  < |d7 }n|dkrt|dkrt|d d
kr|d  r|t|d k rt||d
  d
kr||d
   r||d  dkr|d }
|
d t|k rt||
d
  d
kr||
d
   r||
d  dkr|
d7 }
nn	|
d t|k s|d  d|||
 7  < |
}n|dkrH|t|d
 k r||d
  dkr|d |d7 }nm|t|d
 ks|dkr|d |d
7 }nU|d t	j
vr>||d
  d t	j
vr>|d  d7  < |d
7 }d}	n2|d |d
7 }n(|rgt|dkrg|d t	j
vrg|d  |7  < |d
7 }n	|| |d
7 }|	}|t|k s'd|S )NZ
cnn_dm_orgZgigawordr   r   Fr   "''``   'ntn't   )sdZll`....   r	    ---T )replacesplitlenappendendswithr   r
   isalphajoinstringpunctuation)textdatasetZinput_tokensoutput_tokensZhas_left_quoteZhas_left_single_quoteiZ	prev_dashtokZflag_prev_dashkr   r   r   fix_tokenization.   s  





8



$








Rr>   c                 C   s8   i }| D ]}||  v r||  d7  < qd||< q|S )Nr   )keys)tokenscounterr"   r   r   r   count_tokens   s   
rB   c           
      C   s   |    }|   }t|dkst|dkr$t|t|kr"dS dS t|}t|}d}| D ]}|| v rE|t|| || 7 }q2|t| }|t| }	d| |	 ||	 d  S )Nr   r   g       @gh㈵>)lowerr0   r1   rB   r?   min)
Ztext_aZtext_bZtokens_aZtokens_bZset_aZset_bmatchtokenprr   r   r   get_f1   s   rI   c                 C   sd   dd | D }g }t  }t|D ]\}}t |}t||@ t| |kr+|| |  ||O }q|S )Nc                 S   s   g | ]}|   qS r   )rC   r0   ).0lr   r   r   
<listcomp>       z$remove_duplicate.<locals>.<listcomp>)set	enumerater1   r2   )Zl_listduplicate_rateZtk_listZr_listZhistory_setr;   Zw_listZw_setr   r   r   remove_duplicate   s   
rQ   rouge-1ffffff?cnn_dmc              	      s  dddddd |D }g }|D ]}|  d} fdd|D }d	|}|| qg }	| D ]E}
g }|
  dD ]$t d
tfdd|D rQq=t }|dkr\q=| q=|rm|dk rmt||}d	|}|	| q2tj	
 dkrdd l}tdd}t||	D ]\}}||||dd	  qW d    n1 sw   Y  tj gddfddt|	|D }fdd|D }t|t| }|S )NZrouge1Zrouge2Z	rougeLsum)rR   zrouge-2zrouge-lc                 S   s   g | ]}|j d  qS )ref)meta)rJ   exampler   r   r   rL      s    z rouge_metric.<locals>.<listcomp>z[SEP]c                    s   g | ]}t | d qS )r9   )r>   )rJ   sentencerX   r   r   rL      rM   
rX   c                 3   s    | ]
}t  |d kV  qdS )g      ?N)rI   )rJ   r%   )rY   r   r   	<genexpr>   s    zrouge_metric.<locals>.<genexpr>   r   r   z./results.jsonr   )rU   predT)Zuse_stemmerc                    s   g | ]
\}}  ||qS r   )score)rJ   r]   rU   )scorerr   r   rL      s    c                    s   g | ]	}|   j qS r   )Zfmeasure)rJ   r^   )metricmetric_dictr   r   rL      s    )stripr0   r5   r2   r>   anyr1   rQ   torchdistributedget_rankjsonopenzipwritedumpsr   ZRougeScorersum)predictionslabelsexamplesr`   rP   r9   refsZref_listrU   Z	pred_list
predictionbufZs_lenlinerg   outputr]   Zscoresr   )r9   r`   ra   r_   rY   r   rouge_metric   sP   



ru   c                 C   s:   | d    }| d    }| d    }|||fS )z/Process batch and produce inputs for the model.r8   attention_maskZposition_id)longcuda)batchargsr@   rv   position_idsr   r   r   process_batch   s   
r|   c                   @   s   e Zd Zdd Zdd ZdS )DecoderEvaluaterc                 C   s   || _ |dj| _|dj| _|jr|djn|dj| _|dj| _t | _	|j
dkr>t|j
| j}| j	| |jdkrPt|j}| j	| d S d S )NZsopZeopZsMASKMASKpadr   )	tokenizerZget_commandZIdstart_token	end_tokenZ	task_mask
mask_token	pad_tokenr   
processorsZmin_tgt_lengthr   r2   Zno_repeat_ngram_sizer   )selfrz   r   	processorr   r   r   __init__   s(   


zDecoderEvaluater.__init__c           #   
      s  |   tj jdtdd t tj	 dkt
jdd}td t  t|D ]\}}t| \}}	}
|dt j j|j jdd}tj jftj|jd	}d
|ddddf< | j f}d}| jk r|dkr|||
|	dd^}}|d|dddf }|dd jd j d} fdd|D }| jdd}
t| D ]\}}|j}||
|dddf< q|
  j dd}
|! j d}|! j g}	n. j"s|d |
dddf< |ddddf }|||
|	g|R ddi^}}|dddf }t#j$|dd}%||}||dddf &| }|j'd }| j| }t#j(|dd} j)r^tj*|d j ddd\}}n
tj+|d j d}t,|d|}tj-|ddd\}}t,|d|}|| }|| }|j.||||j/j0d}|d }|d }|d |d}tj1|ddf |gdd}|rfdd|D ng }|j2rn	|d7 }| jk sv|j3||||j/j0d\}}g }| D ]}fdd|D }j45|}|6| q|d }t7|tj8r|9 :  }t;||D ]\}} |<||  q|d  j= dkr:td|d  d t>|  q.W d   n	1 sFw   Y  |?  tj@  td! g g }}!|A D ]\}}"|6|B|Cd" |!6|" qatj@  |g |!fS )#zgCalculate correct over total answers and return prediction if the
        `output_predictions` is true.I  r   '     secondsDistributed store createdF)
batch_size
max_length	num_beamsdevicelength_penaltyZdo_early_stopping)Zdtyper   g    eNr   Treturn_memoryr    c                    s4   g | ]}| d d  jd d  j dqS )r   r    )	unsqueezerepeatr   viewrJ   Zmem)rz   r   
seq_lengthr   r   rL   *  s    

z-DecoderEvaluater.evaluate.<locals>.<listcomp>r$   r   dim)r=   r   Zlargest)Znum_samples)Z
descendingr   )Zeos_token_idZpad_token_idZnext_beam_scoresZnext_beam_tokensZnext_beam_indicesc                    s   g | ]}|  qS r   r   r   )beam_idxr   r   rL   m  s    c                        g | ]}| j  jfvr|qS r   r   r   rJ   rF   r   r   r   rL   z  
    uid
Iteration  / Evaluation completedutf-8)Devalrd   re   TCPStore	master_iprandomrandintr   get_data_parallel_world_sizerf   datetime	timedeltar   no_gradrO   r|   sizer   Zout_seq_lengthr   r   r   Zzerosfloatr   tgt_seq_lengthr   r   new_onestolistindexr   Zreshape	new_zerosZno_block_positionFlog_softmaxr   Z	expand_asshapeZsoftmaxZselect_topkZtopkZmultinomialZgathersortprocessr   r   catZis_donefinalizer   	DecodeIdsr2   
isinstanceTensorcpunumpyri   rN   log_intervalr1   trainbarrieritemsgetdecode)#r   model
dataloaderexample_dictrz   storeidxdatar@   rv   r{   Zbeam_scorerZbeam_scoresrA   next_token_logitsmemsr;   r8   mask_pos
last_tokennext_token_scoresZ
vocab_sizeZprobs_next_tokensZ_indicesZnext_indicesZbeam_outputsZbeam_next_tokensrm   uid_listr   rq   ro   rW   r   )rz   r   r   r   r   r   evaluate   sD  













S

~



zDecoderEvaluater.evaluateN)__name__
__module____qualname__r   r   r   r   r   r   r}      s    r}   c                 C   s   |  dd} |  dd} |  dd} |  dd} |  d	d
} |  dd} |  dd} |  dd} |  dd} |  dd} |  dd} |  dd} | S )Nz` `r   z' 'r   zn ' tr#   z' sz'sz' mz'mz' rez'rez. . .r)   z . .z ..z- -r-   zu . s .zu.s.zu . k .zu.k.ze . g .ze.g.)r/   )r8   r   r   r   blanklm_fix_tokenization  s   r   c                   @   s   e Zd Zdd ZdS )BlankLMEvaluaterc           $   	      s  |   tj|jdtdd t tj	 dkt
jdd}td t  t|D ]\}}t||\}}	}
|}|d}g }g }| D ]}| fddt|D  |d qJd}d	g| }||jk r\|dkr|||
|	d
d^}}|d d df }||dd}
t| D ]\}}|| ||  }||
|df< q||d}||}	n1|
d d df d |
d d df< |d d dd f }|||
|	g|R dd
i^}}|d d df }tj|dd} ||}|jddd }t| D ]E\}}| jkr6|| d t|| k r2||  d7  <  j||< || ||  |
|df< d|
|df< nd
||< || r@ j||< qt|rGntj|| dgdd}|d7 }||jk smg }t| D ]q\}} fdd|D }g g}|D ]}| jkr|g  qv|d | qvg }d}||  D ](}| j!kr|t|k r||| 7 }|d7 }q| jfvr|| q j"#|d d }t$|}|| qd|d }t%|tj&r|' (  }t)||D ]\} }!|*| |! q|d |j+ dkrtd|d  dt|  q.W d    n	1 sw   Y  |,  tj-  td g g }}"|. D ]\} }#||/| 0d |"|# q9tj-  |g |"fS )Nr   r   r   r   r   r   c                    s   g | ]\}}| j kr|qS r   )r   )rJ   r;   xr   r   r   rL     s    z-BlankLMEvaluater.evaluate.<locals>.<listcomp>FTr   r    r$   r   r   r   c                    r   r   r   r   r   r   r   rL     r   r   r   r   r   r   )1r   rd   re   r   r   r   r   r   r   rf   r   r   r   r   rO   r|   r   r   r2   r   r   r   r   r   r   maxr   r1   r   r   allr   r   r   r   r   r   r   r   r   r   ri   rN   r   r   r   r   r   r   )$r   r   r   r   rz   r   r   r   r@   rv   r{   Z
src_tokensr   Zmask_positionsZcurrent_maskr8   rA   doner   r   r;   r   r   r   r   
next_tokenrm   ZblanksrF   r:   Zcurrent_blankr   r   rq   ro   rW   r   r   r   r     s   





 





1

d



zBlankLMEvaluater.evaluateN)r   r   r   r   r   r   r   r   r     s    r   )rR   rS   rT   )r   r   r6   rd   Ztorch.nn.functionalnnZ
functionalr   Zgeneration_utilsr   r   r   r   Zmegatron_utilr   r   Zrouge_scorer   r   Zgigaword_tok_dictZcnndm_tok_dictr>   rB   rI   rQ   ru   r|   r}   r   r   r   r   r   r   <module>   sP   
b

/ &