o
    *js                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlmZm	Z	 ddl
Z
ddlZddlmZ dZeg dZdd ZG d	d
 d
Zd@ddZdAddZdAddZG dd deZdd Zdd ZdBddZdCddZdd Zdd  Zd!d" Zd#d$ Z d%d& Z!d'd( Z"d)d* Z#d+d, Z$d-d. Z%d/d0 Z&d1d2 Z'd3d4 Z(d5d6 Z)e*d7kre+ Z,e,j-d8d9d: e,j-d;d<d: e,j-d=d>d: e,. Z/e/j01 2d?de/_0e)e/ dS dS )Dz?
This module computes evaluation metrics for DuReader dataset.
    N)Counterdefaultdict)Rouge ZYesZNoZDependsc                    s   t | t  k r|   }  fddtdt | d D }tdt  d D ]@}tdt | d D ]4}| |d   |d  krP||d  |d  d || |< q1t||d  | || |d  || |< q1q&|t |  t   S )a  
    Calculates longest common subsequence for a pair of tokenized strings
    :param string : list of str : tokens from a string split using whitespace
    :param sub : list of str : shorter string, also split using whitespace
    :returns: length (list of int): length of the longest common subsequence between the two strings

    Note: my_lcs only gives length of the longest common subsequence, not the actual LCS
    c                    s(   g | ]}d d t dt d D qS )c                 S   s   g | ]}d qS r    ).0ir   r   l/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/palm_v2/dureader_eval.py
<listcomp>0       z%my_lcs.<locals>.<listcomp>.<listcomp>r      )rangelen)r	   jsubr   r   r   0   s
    

zmy_lcs.<locals>.<listcomp>r   r   )r   r   max)stringr   lengthsr   r
   r   r   r   my_lcs$   s   	


",r   c                   @   s&   e Zd Zd	ddZdd Zdd ZdS )
Bleu   c                 C   s   || _ i | _i | _d S N)_nZ_hypo_for_imageZref_for_image)selfnr   r   r   __init__A   s   
zBleu.__init__c           
      C   s   t | t | ksJ t | }t| jd}|D ]2}|| }|| }t|t u s.J t|dks6J t|t u s>J t|dksFJ ||d |f7 }q|jddd\}}	||	fS )Nr   r   r   closest)optionverbose)listkeys
BleuScorerr   typer   compute_score)
r   ZgtsresZimgIdsZbleu_scoreridZhyporefZscorescoresr   r   r   r'   G   s   zBleu.compute_scorec                 C   s   dS )Nr   r   r   r   r   r   method[   s   zBleu.methodNr   )__name__
__module____qualname__r   r'   r-   r   r   r   r   r   ?   s    
r   r   Fc                 C   sn   |   }tt}td|d D ]!}tt|| d D ]}t||||  }||  d7  < qqt||fS )zTakes a string as input and returns an object that can be given to
    either cook_refs or cook_test. This is optional: cook_refs and cook_test
    can take string arguments as well.r   )splitr   intr   r   tuple)sr   outwordscountskr
   ngramr   r   r   precook_   s   r;   c           
      C   s   g }i }| D ]"}t ||\}}|| | D ]\}}	t||d|	||< qq|dkr5t|}||fS |dkrCtt|t| }||fS )zTakes a list of reference sentences for a single segment
    and returns an object that encapsulates everything that BLEU
    needs to know about them.r   shortestaverage)	r;   appenditemsr   getminfloatsumr   )
refseffr   reflenZ	maxcountsr*   Zrlr8   r:   countr   r   r   	cook_refsl   s   
rH   c           
   	      s   |\}}t | |d\ }i }|dkr"t fdd|D d |d< n||d<  |d<  fdd	td|d D |d
< dg| |d< | D ]\}}	|d t|d   t||d|	7  < qE|S )znTakes a test sentence and returns an object that
    encapsulates everything that BLEU needs to know about it.Tr    c                 3        | ]}t |  |fV  qd S r   absr	   r*   testlenr   r   	<genexpr>       zcook_test.<locals>.<genexpr>r   rF   rN   c                    s   g | ]}t d  | d qS )r   r   )r   r	   r9   rM   r   r   r      s    zcook_test.<locals>.<listcomp>guessr   correct)r;   rA   r   r?   r   r@   )
testZxxx_todo_changemerE   r   rF   Zrefmaxcountsr8   resultr:   rG   r   rM   r   	cook_test   s     
rV   c                   @   s   e Zd ZdZdZdd Zd)ddZd	d
 Zd*ddZd*ddZ	d*ddZ
d*ddZd*ddZdd Zdd Zdd Zdd Zdd Zd+d d!Zd,d"d#Zd-d%d&Zd-d'd(ZdS ).r%   zBleu scorer.
    )r   crefsctest_score_ratio_testlen_reflenspecial_reflenc                 C   s2   t | jd}t| j|_t| j|_d|_|S )z copy the refs.r   N)r%   r   copyrX   rW   rY   )r   newr   r   r   r^      s
   zBleuScorer.copyNr   c                 C   s(   || _ g | _g | _| || || _dS )z singular instance N)r   rW   rX   cook_appendr]   )r   rT   rD   r   r]   r   r   r   r      s
   
zBleuScorer.__init__c                 C   sT   |dur%| j t| |durt|| j d }| j| n| jd d| _dS )zCcalled by constructor and __iadd__ to avoid creating new instances.N)rW   r>   rH   rV   rX   rY   )r   rT   rD   Zcooked_testr   r   r   r`      s   
zBleuScorer.cook_appendc                 C      | j |d | jS Nr!   )r'   rZ   r   r!   r   r   r   ratio      zBleuScorer.ratioc                 C   s   | j |d| j|dfS )zreturn (bleu, len_ratio) pairrd   )Zfscorerf   re   r   r   r   score_ratio   s   zBleuScorer.score_ratioc                 C   s   d|  | S )Nz%.4f (%.2f))rh   re   r   r   r   score_ratio_str   s   zBleuScorer.score_ratio_strc                 C   rb   rc   )r'   r\   re   r   r   r   rF      rg   zBleuScorer.reflenc                 C   rb   rc   )r'   r[   re   r   r   r   rN      rg   zBleuScorer.testlenc                 C   sd   t |tu r	|g}t|t| jksJ |g | _t|| jD ]\}}| jt|| qd | _| S r   )	r&   strr   rW   rX   zipr>   rV   rY   )r   new_testtrsr   r   r   retest   s   zBleuScorer.retestc                 C   s   |  | S )z= replace test(s) with new test(s), and returns the new score.)ro   r'   )r   rl   r   r   r   rescore   s   zBleuScorer.rescorec                 C   s:   t | jt | jksJ dt | jt | jf t | jS )Nzrefs/test mismatch! %d<>%d)r   rW   rX   r,   r   r   r   size   s   

zBleuScorer.sizec                 C   s\   t |tu r| |d |d  | S | |sJ d| j|j | j|j d| _| S )z.add an instance (e.g., from another sentence).r   r   zincompatible BLEUs.N)r&   r4   r`   
compatiblerX   extendrW   rY   r   otherr   r   r   __iadd__   s   zBleuScorer.__iadd__c                 C   s   t |to
| j|jkS r   )
isinstancer%   r   rt   r   r   r   rr         zBleuScorer.compatibler=   c                 C   s   |  | jd d |S Nr   )_single_reflenrW   re   r   r   r   single_reflen  rx   zBleuScorer.single_reflenc                    sf   |dkr
t |}|S |dkrtt|t| }|S |dkr-t  fdd|D d }|S J d| )	Nr<   r=   r    c                 3   rI   r   rJ   rL   rM   r   r   rO     rP   z,BleuScorer._single_reflen.<locals>.<genexpr>r   Fzunsupported reflen option %s)rA   rB   rC   r   )r   Zreflensr!   rN   rF   r   rM   r   rz     s   zBleuScorer._single_reflenr   c                 C   s   d | _ | ||S r   )rY   r'   )r   r!   r"   r   r   r   recompute_score  s   zBleuScorer.recompute_scorec              
   C   s  | j }d}d}dd t|D }| jd ur| jS |d u r't| jdkr%dnd}d| _d| _dddg| dg| d	}| jD ]}|d
 }	|  j|	7  _| jd u rY| 	|d ||	}
n| j}
|  j|
7  _dD ]}t|D ]}|| |  || | 7  < qkqed}t|D ]%}|t
|d | | t
|d | |  9 }|| |d|d    q|	| |
|  }|dk rt|D ]}|| d  tdd|  9  < q|dkrt||
 q=| j|d< | j|d
< g }d}t|D ]!}|t
|d | | |d | |  9 }||d|d    q| j| | j|  }|dk r1t|D ]}||  tdd|  9  < q|dkr?t| td| || _| j|fS )Ng&.>gV瞯<c                 S   s   g | ]}g qS r   r   )r	   _r   r   r   r     r   z,BleuScorer.compute_score.<locals>.<listcomp>r   r=   r    r   )rN   rF   rR   rS   rN   rF   )rR   rS         ?rS   rR   ra   zratio:)r   r   rY   r   rW   r[   r\   rX   r]   rz   rB   r>   mathexpprint)r   r!   r"   r   ZsmallZtinyZ	bleu_listZ
totalcompscompsrN   rF   keyr9   Zbleurf   Zbleusr   r   r   r'     sr   


$



"


zBleuScorer.compute_score)NNr   Nr   )r=   NNry   )r/   r0   r1   __doc__	__slots__r^   r   r`   rf   rh   ri   rF   rN   ro   rp   rq   rv   rr   r{   rz   r|   r'   r   r   r   r   r%      s(    
	







r%   c                 C   s<   | s| S g }| D ]}dd t |D }|d| q|S )z
    Normalize strings to space joined chars.

    Args:
        s: a list of strings.

    Returns:
        A list of normalized strings.
    c                 S   s    g | ]}t | d kr|qS r   )r   strip)r	   cr   r   r   r   o       znormalize.<locals>.<listcomp> )r#   r>   join)r5   
normalizedsstokensr   r   r   	normalizea  s   
r   c                 C   s   d| v sJ dd| v sJ d | d d| v s"J d | d t| d ts2J d | d d| v s?J d	 | d t| d trNt| d d
ksWJ d | d dS )zX
    Check data.

    Raises:
        Raises AssertionError when data is not legal.
    question_idzMissing 'question_id' field.question_typez.Missing 'question_type' field. question_id: {}yesno_answersz.Missing 'yesno_answers' field. question_id: {}z'yesno_answers' field must be a list, if the 'question_type' is not
            'YES_NO', then this field should be an empty list.
            question_id: {}entity_answersz/Missing 'entity_answers' field. question_id: {}r   z'entity_answers' field
            must be a list, and has at least one element, which can be a empty list.
            question_id: {}N)formatrw   r#   r   )objtaskr   r   r   
data_checkt  s*   




r   c              
   C   s   ddd}i }g d}|r|dg7 }|  drt| dnd}|du r&| gn| }|D ]F}||d|dD ]<}	z	t|	 }
W n tyK   td	w t|
| |
d
 }||vs`J d	|i ||< |D ]
}|
| || |< qfq5q,|S )a  
    Read predict answers or reference answers from file.

    Args:
        file_name: the name of the file containing predict result or reference
                   result.

    Returns:
        A dictionary mapping question_id to the result information. The result
        information itself is also a dictionary with has four keys:
        - question_type: type of the query.
        - yesno_answers: A list of yesno answers corresponding to 'answers'.
        - answers: A list of predicted answers.
        - entity_answers: A list, each element is also a list containing the entities
                    tagged out from the corresponding answer string.
    Nc                 S   s   |d ur
| | |S t | |S r   )open)	file_namemodezip_objr   r   r   _open  s   
zread_file.<locals>._open)answersr   r   r   sourcez.zipr)r   z'Every line of data should be legal jsonr   zDuplicate question_id: {}r   )
endswithzipfileZipFilenamelistjsonloadsr   
ValueErrorr   r   )r   r   is_refr   resultsr$   zf	file_listfnliner   qidr9   r   r   r   	read_file  s8   


r   c           	      C   s   t |  t | ksJ dt | t |   i }t||| \}}t|D ]\}}||d|d  < q-t tt	dd | 
 tt	dd |
 }tdd |D t| }||d	< |S )
z(
    Compute bleu and rouge scores.
    zmissing keys: {}zBleu-%dr   c                 S      | d S ry   r   xr   r   r   <lambda>      z$compute_bleu_rouge.<locals>.<lambda>c                 S   r   ry   r   r   r   r   r   r     r   c                 S   s   g | ]}|d  d qS )zrouge-lfr   )r	   dr   r   r   r     s    z&compute_bleu_rouge.<locals>.<listcomp>Rouge-L)setr$   r   r   r'   	enumerater   Z
get_scoresr#   mapvaluesrC   r   )		pred_dictref_dictZ
bleu_orderr+   Zbleu_scoresr}   r
   Z
bleu_scoreZrouge_scorer   r   r   compute_bleu_rouge  s$   r   c                 C   sf   t | t |@ }t| }|dkrdS d| t|  }d| t| }d| | ||  }|||fS )zp
    Compute local precision recall and f1-score,
    given only one prediction list and one reference list
    r   r   r   r   r~      )r   rC   r   r   )	pred_listref_listcommonZnum_samepr   f1r   r   r   	local_prf  s   
r   c                 C   sJ  t | }d\}}}|D ]i}| |g g}t|dks#J d||d }|| }d}	d}
|D ]}t||d }||	krB|}
|}	q1|
du rZt|dkrXt|dd d	d }
ng }
t |
}t |}|t||@ 7 }|t|7 }|t|7 }q|dkrt|| nd}|dkrt|| nd}|dkrd| | ||  nd}|||d
S )z0
    Compute precision recall and f1-score.
    r   r   z6the number of entity list for question_id {} is not 1.r   Nr   c                 S   s   t | S r   )r   r   r   r   r   r     r   zcompute_prf.<locals>.<lambda>)r   )	PrecisionRecallF1)r   r$   r@   r   r   r   sortedrB   )r   r   Zref_question_idsZcorrect_predsZtotal_correctZtotal_predsr   Zpred_entity_listZall_ref_entity_listsZbest_local_f1Zbest_ref_entity_listZref_entity_listZlocal_f1Zgold_entitiesZpred_entitiesr   r   r   r   r   r   compute_prf  sD   
 r   c                 C   s,   dd |   D }dd |  D }||fS )z6
    Prepares data for calculation of prf scores.
    c                 S      i | ]	\}}||d  qS r   r   r	   r9   vr   r   r   
<dictcomp>
      zprepare_prf.<locals>.<dictcomp>c                 S   r   r   r   r   r   r   r   r     r   )r?   )r   r   predsrD   r   r   r   prepare_prf  s   r   c                 C   s,   i }|   D ]\}}||r|||< q|S )zM
    Filter a subset of the result_dict, where keys ends with 'key_tag'.
    )r?   r   )result_dictZkey_tagfilteredr9   r   r   r   r   filter_dict  s   
r   c                    st  i }i }i }|dkr|}| }n|  D ]\}}|d |kr+|||< || v r+| | ||< q|dks8|dks8|dkrGt|||\t}|S |dkrt|||\g d}	fdd	|	D }
fd
d	|	D }t}t|	|
|D ]\ }}t||} fdd	|  D }|| qt|S |dkrt||\t|||\}}t}|t|| |S td|)z
    Computes metrics.
    bothr   mainalldescriptionyesnor   c                       g | ]}t  |qS r   r   rQ   )r   r   r   r   6      zget_metrics.<locals>.<listcomp>c                    r   r   r   rQ   )r   r   r   r   7  r   c                    s    g | ]\}} d  | |fqS )|r   )r	   r   r   )r9   r   r   r   =  r   entityIllegal task name: {})	r?   prepare_bleur   rk   updater   r   r   r   )pred_result
ref_resultr   r   metricsZref_result_filteredZpred_result_filteredr   infor$   r   rD   predr*   mZk_metricZpred_dict_bleuZref_dict_bleur   )r9   r   r   r   get_metrics  sZ   



r   c                 C   sZ  g g }}|  }|D ]V}|dkrt|| |\}}n;|dkr't|| |\}}n.|dkr4t|| |\}}n!|dkrAt|| |\}}n|dkrNt|| |\}}ntd||ra|ra||7 }||7 }qt|}	t|}
|
	 D ]$\}}t
|
| |
|< t
|	|tg|	|< |r|tgkr|
|= |	|= qn|		 D ]\}}t|dksJ d|q|	|
fS )	zA
    Prepares data for calculation of bleu and rouge scores.
    r   r   r   r   r   r   r   z8There should be only one predict answer. question_id: {})r$   get_main_resultget_yesno_resultget_all_resultget_entity_resultget_desc_resultr   r   dictr?   r   r@   EMPTYr   )r   r   r   r   r   Zqidsr   r   r*   r   r   Zansr9   r   r   r   r   r   M  s@   
r   c                 C   sP   ||  d }|st g}|| i dg dd }|st g}| |fg| |fgfS )a-  
    Prepare answers for task 'main'.

    Args:
        qid: question_id.
        pred_result: A dict include all question_id's result information read
                     from args.pred_file.
        ref_result: A dict incluce all question_id's result information read
                    from args.ref_file.
    Returns:
        Two lists, the first one contains predict result, the second
        one contains reference result of the same question_id. Each list has
        elements of tuple (question_id, answers), 'answers' is a list of strings.
    r   Nr   )r   r@   )r   r   r   ref_anspred_ansr   r   r   r   r  s   r   c                 C       ||  d dkr
dS t | ||S )a/  
    Prepare answers for task 'entity'.

    Args:
        qid: question_id.
        pred_result: A dict include all question_id's result information read
                     from args.pred_file.
        ref_result: A dict incluce all question_id's result information read
                    from args.ref_file.
    Returns:
        Two lists, the first one contains predict result, the second
        one contains reference result of the same question_id. Each list has
        elements of tuple (question_id, answers), 'answers' is a list of strings.
    r   ZENTITYr   r   r   r   r   r   r   r   r        r   c                 C   r   )a4  
    Prepare answers for task 'description'.

    Args:
        qid: question_id.
        pred_result: A dict include all question_id's result information read
                     from args.pred_file.
        ref_result: A dict incluce all question_id's result information read
                    from args.ref_file.
    Returns:
        Two lists, the first one contains predict result, the second
        one contains reference result of the same question_id. Each list has
        elements of tuple (question_id, answers), 'answers' is a list of strings.
    r   ZDESCRIPTIONr   r   r   r   r   r   r     r   r   c                    sT   dd dd  d fdd	}||  d d	krd
S || |dd}|| |}||fS )a.  
    Prepare answers for task 'yesno'.

    Args:
        qid: question_id.
        pred_result: A dict include all question_id's result information read
                     from args.pred_file.
        ref_result: A dict incluce all question_id's result information read
                    from args.ref_file.
    Returns:
        Two lists, the first one contains predict result, the second
        one contains reference result of the same question_id. Each list has
        elements of tuple (question_id, answers), 'answers' is a list of strings.
    c                 S   s   g }g }t  }| D ]\}}||vr|||f || q	|||f q	|rDt|}|D ]\}}||  |7  < q.dd | D }|S )Nc                 S   s   g | ]\}}||fqS r   r   r   r   r   r   r     s    z3get_yesno_result.<locals>._uniq.<locals>.<listcomp>)r   r>   addr   r?   )Zlir   uniq_lileftr$   r9   r   Zdict_lir   r   r   _uniq  s   zget_yesno_result.<locals>._uniqc                 S   s@   | d d  }t dd | D }t| D ]
}||tgf q|S )Nc                 S   s   g | ]}|d  qS r   r   )r	   r   r   r   r   r     s    z<get_yesno_result.<locals>._expand_result.<locals>.<listcomp>)r   YESNO_LABELSr>   r   )r   expandedr$   r9   r   r   r   _expand_result  s
   z(get_yesno_result.<locals>._expand_resultFc                    sj    |vr fddg D S |  d }|  d }dd t ||D |} fdd|D }|S )Nc                    $   g | ]\}}t  d  | |fqS r}   rj   r   r   r   r   r        $ z<get_yesno_result.<locals>._get_yesno_ans.<locals>.<listcomp>r   r   c                 S   s   g | ]	\}}||gfqS r   r   r   r   r   r   r     r   c                    r   r   r   r   r  r   r   r     r  )rk   )r   r   r   r   r   Zlbl_ansretr   r   r  r   _get_yesno_ans  s   z(get_yesno_result.<locals>._get_yesno_ansr   YES_NOr   Tr   NFr   )r   r   r   r  r   r   r   r  r   r     s   

r   c                 C   s(   ||  d dkrt | ||S t| ||S )a,  
    Prepare answers for task 'all'.

    Args:
        qid: question_id.
        pred_result: A dict include all question_id's result information read
                     from args.pred_file.
        ref_result: A dict incluce all question_id's result information read
                    from args.ref_file.
    Returns:
        Two lists, the first one contains predict result, the second
        one contains reference result of the same question_id. Each list has
        elements of tuple (question_id, answers), 'answers' is a list of strings.
    r   r  )r   r   r   r   r   r   r     s   r   c                 C   s  i }g d}|durt |dg dS g }|dkr|dkrdg}|dkrNd	d
g}g d}|| D ]}|D ]}	|t| |	 |dd d|	d}
||
 q3q/no|dkrd	d
g}g d}|d }	|D ]7}|t| |	 |dd ddd}
||
 |D ]}|t| |	 |d | dd d|d}
||
 qzq`n$d	d
g}|D ]}|D ]}	|t| |	 |dd d|	d}
||
 qq||d< d|d< d|d< |S )a  
    Format metrics. 'err' field returns any error occurred during evaluation.

    Args:
        metrics: A dict object contains metrics for different tasks.
        task: Task name.
        err_msg: Exception raised during evaluation.
    Returns:
        Formatted result.
    r   searchZzhidaoNr   )errorMsg	errorCodedatar   r   r   r   zBleu-4r   )r   r   r   r   d   r   )namevaluer&   r   r   ZAllr   r  r  successr  )rj   roundr@   r>   )r   r   err_msgrU   sourcesr  Zmetric_namesZmetric_names_prfr  srcr   detailsr   r   r   r   format_metrics  sh   
r  c           	   
   C   s   d}i }z4t | j| j}t | j| jdd}g d}| jtddgvr(|dd }|D ]}t||| j|||< q*W n' tyL } z|}W Y d}~nd}~w ty_ } z|}W Y d}~nd}~ww tt	j
t|| j|dd	d
 dS )z
    Do evaluation.
    NTr  r	  r   r   r   F)ensure_asciiutf8)r   	pred_filer   ref_filer   r   r   AssertionErrorr   r   dumpsr  encode)	argserrr   r   r   r  r   veZaer   r   r   r   E  s6   

r   __main__r  zpredict file)helpr  zreference filer   z-task name: Main|Yes_No|All|Entity|Descriptionr}   )r   F)Nr   r  r.   )3r   argparser^   r   resysr   collectionsr   r   r   numpynpZrouger   r   r   r   r   r   r;   rH   rV   objectr%   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r/   ArgumentParserparseradd_argument
parse_argsr  r   lowerreplacer   r   r   r   <module>   s`   
 

 A

/%	3%;C