o
    *j                  
   @   s  d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	 ddl
mZmZmZmZ ddlZddlmZmZmZmZmZmZ ddlmZ eeeeeeef f  eeeeeef f  f ZG dd	 d	eZG d
d deZG dd deZG dd deZ G dd deZ!G dd deZ"G dd de"Z#G dd deZ$G dd deZ%G dd deZ&G dd deZ'G dd deZ(G d d! d!eZ)G d"d# d#eZ*G d$d% d%e*Z+G d&d' d'eZ,G d(d) d)eZ-G d*d+ d+eZ.G d,d- d-eZ/G d.d/ d/eZ0G d0d1 d1eZ1G d2d3 d3eZ2d4ed5ed6ee3ee3 f fd7d8Z4i d9e'd:e)d;e*d<e+d=e(d>e,d?e,d@e,dAe"dBe&dCe#dDedEe$dFedGe%dHe dIe"e"e-e.e/e0e1e2e!dJZ5dS )KzG
This file contains the pattern-verbalizer pairs (PVPs) for all tasks.
    N)ABCabstractmethod)defaultdict)DictListTupleUnion)InputExamplebuild_decoder_inputbuild_decoder_samplebuild_input_from_idsbuild_samplenum_special_tokens_to_add)print_rank_0c                	   @   s(  e Zd ZdZ								dDdeded	ed
efddZedd Z	edd Z
edefddZedefddZedefddZedd Zedeeeeef f fddZedeeeeef f fddZedeeeeef f fdd Zed!d" Zd#d$ Z		dEd%ed&ed'efd(d)Ze	dFd*eeee ef  d+efd,d-Zed*eeee ef  fd.d/Zd0eeee ef  d1eeee ef  d2ee d3efd4d5Zed%edefd6d7Zd%efd8d9Z d:d; Z!edee fd<d=Z"d>ee dee fd?d@Z#edAedefdBdCZ$dS )GPVPz
    This class contains functions to apply patterns and verbalizers as required by PET. Each task requires its own
    custom implementation of a PVP.
    r   N*   Ftrain
pattern_idverbalizer_fileseedfast_decodec                 C   s   || _ || _|| _|| _|| _|| _t|| _d| _	|
| _
|| _d| _|| _|	| _|j| _|j| _|j| _| jrEtd| j d| j  |rQt|| j| _dS dS )aM  
        Create a new PVP.

        :param args: the args
        :param tokenizer: the tokenizer
        :param label_list: the list of labels
        :param max_seq_length: the maximum length of the sequence
        :param pattern_id: the pattern id to use
        :param seed: a seed to be used for generating random numbers if necessary
        :param is_multi_token: if the verbalizers contain multiple tokens
        :param fast_decode: whether to use the fast decode mode for multi-token tasks
        :param continuous_prompt: whether to use continuous prompt optimization
        r      zPrompt tokens in pvp z spell length N)args	tokenizer
label_listmax_seq_lengthr   num_prompt_tokensrandomRandomrngnum_truncatedr   splitmax_dec_seq_length_is_multi_tokenmax_segment_lengthZ	task_maskcontinuous_promptprefix_promptr   spell_lengthr   _load_verbalizer_from_file	verbalize)selfr   r   r   r   r   r   r   is_multi_tokenr$   r   r!   r    r,   o/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/mglm/tasks/superglue/pvp.py__init__(   s2   
zPVP.__init__c                 C   s   | j S N)r#   r*   r,   r,   r-   r+   [      zPVP.is_multi_tokenc                 C      dS Nr   r,   r0   r,   r,   r-   r'   _      zPVP.spell_lengthreturnc                 C      | j djS z%Return the underlying LM's mask tokenMASKr   get_commandIdr0   r,   r,   r-   maskc      zPVP.maskc                 C   r6   z"Return the underlying LM's mask idr8   r9   r0   r,   r,   r-   mask_idh   r=   zPVP.mask_idc                    s   t  fdd jD S )z:Return the maximum number of verbalizers across all labelsc                 3   s    | ]
}t  |V  qd S r/   )lenr)   .0labelr0   r,   r-   	<genexpr>p   s    z*PVP.max_num_verbalizers.<locals>.<genexpr>)maxr   r0   r,   r0   r-   max_num_verbalizersm   s   zPVP.max_num_verbalizersc                 C   s   | dfS )z?Return an instance of this string that is marked as shortenableTr,   sr,   r,   r-   shortenabler   s   zPVP.shortenablerH   c                 C   s,   t | trt| d | d fS | tjS )z!Remove the final punctuation markr      )
isinstancetupler   remove_final_puncrstripstringpunctuationrG   r,   r,   r-   rM   w   s   
zPVP.remove_final_puncc                 C   8   t | trt| d | d fS | d  | dd  S zLowercase the first characterr   rJ   N)rK   rL   r   lowercase_firstlowerrG   r,   r,   r-   rS   ~      
zPVP.lowercase_firstc                 C   rQ   rR   )rK   rL   r   uppercase_firstupperrG   r,   r,   r-   rV      rU   zPVP.uppercase_firstc                   C   s   dgS r3   r,   r,   r,   r,   r-   available_patterns   r1   zPVP.available_patternsc           
      C   s  | j sdd |D }dd |D }||fS | j}d}||fD ]}|D ]
}|d u r,|d7 }q"qt|| }g g }}	|D ]%}|d u r\|dkr[||krT|| ||8 }q<|| d}q<|| q<|D ]%}|d u r|dkr||kr||	| ||8 }qd|	| d}qd|	| qd||	fS )Nc                 S      g | ]}|d ur|qS r/   r,   rB   partr,   r,   r-   
<listcomp>       z-PVP.replace_prompt_tokens.<locals>.<listcomp>c                 S   rY   r/   r,   rZ   r,   r,   r-   r\      r]   r   rJ   )r%   r   mathceilappend)
r*   parts_aparts_br   num_pospartsr[   Zavg_prompt_tokensZnew_parts_aZnew_parts_br,   r,   r-   replace_prompt_tokens   sF   






zPVP.replace_prompt_tokensexamplepriminglabeledc           0         sL  |s|rJ dj |\}}dd |D }jfdd}||}jdkr7gj dfg| }d}|rFd	d |D }||}jr||}	|jdur\j|j}
nd}
j	sg g g g g g f\}}}}}}g }|r|	|
 }t
|dd
  jj|| jd7  _dd |D }|rdd |D nd}|}|r||7 }|r|j}|d|   ||d d  }|S t|	D ]\}}t|t|}}t
|dd
  djg   jj|| jd7  _dd |D }|rdd |D nd}jdkr.t d j d } fddt|D }||gt| 7 }n g}|D ]}t|||jj jdddjd
}|\}}} }!}"}#}$fddt|D }%fdd|D }||% || ||! ||" ||# ||$ j|v r|j}&|d|& | ||&d  }q3|j}&|d|& | ||&d  }q3q|r|nd}t||||
|||j||d	}'|'S t|t|}} jj||djd7  _dd |D }|rdd |D nd}t||djj jdddd	}|\}}} }!}"}#}$t||!|"|
|jd}'g g g g g f\}}}}}(|	D ]F}t
|dd
  djg   dj  t| jj}|\})}*}*}+}*},}-||) ||+ ||" ||, |(|- q*t|'|||||(}'|'S  jj||g jd7  _dd |D }|rdd |D nd}|r|}|r||7 }|r|j} |j}.t|.dksJ d|.d }.t
|.j dd
}/|/||< |S t||djj jdddd	}|\}}} }!}"}#}$fddt|D }%fd d|D }! }#|jdurj|j}
nd}
t||!|#|"|$|
|j|%d!}'|'S )"a  
        Encode an input example using this pattern-verbalizer pair.

        Args:
            example: the input example to encode
            priming: whether to use this example for priming
            labeled: if ``priming=True``, whether the label should be appended to this example

        Returns:
            A tuple, consisting of a list of input ids and a list of token type ids
        B'labeled' can only be set to true if 'priming' is also set to truec                 S   "   g | ]}t |tr|n|d fqS FrK   rL   rB   xr,   r,   r-   r\          zPVP.encode.<locals>.<listcomp>c                    R   g }| D ]"\}}t |tr|}nt |tr g| }n	 |||f q|S r/   rK   strEncodeAsIdsintr`   Z	raw_partsrd   rn   rH   	prompt_idr   r,   r-   encode_input      

z PVP.encode.<locals>.encode_inputr   FNc                 S   rj   rk   rl   rm   r,   r,   r-   r\      ro   force_single_token
max_lengthc                 S      g | ]\}}|D ]}|qqS r,   r,   rB   r[   _Ztoken_idr,   r,   r-   r\      
    c                 S   r~   r,   r,   r   r,   r,   r-   r\      r   rJ   eopc                 S   r~   r,   r,   r   r,   r,   r-   r\         c                 S   r~   r,   r,   r   r,   r,   r-   r\     r   c                    s(   g | ]} |j  |d  j   qS )rJ   )r$   )rB   index)
answer_idsr*   r,   r-   r\   "  s    T)r   add_clsadd_sep	add_piecer?   c                       g | ]
\}}| kr|qS r,   r,   rB   idxtokenrw   r,   r-   r\   9  s
    c                       g | ]
}| kr
|nd qS r   r,   )rB   r   r   r,   r-   r\   =  s    )	positionsmasksrC   
logit_masktarget	unique_idZsegment_ids
prompt_idsc                 S   r~   r,   r,   r   r,   r,   r-   r\   d  r   c                 S   r~   r,   r,   r   r,   r,   r-   r\   g  r   r   r   r   r   )r   r   rC   r   c                 S   r~   r,   r,   r   r,   r,   r-   r\         c                 S   r~   r,   r,   r   r,   r,   r-   r\     
    
z1priming only supports one verbalization per labelc                    r   r,   r,   r   r   r,   r-   r\         c                    r   r   r,   rB   r   r   r,   r-   r\         )idsr   r   r   r   rC   r   r   )"r   	get_parts
num_tokensr&   r+   get_answersrC   r   r   r   get_verbalization_idsr    truncater   r?   	enumeratecopydeepcopyr:   r;   r$   r@   ranger   r   r`   r<   r   guidr"   r
   r   r)   get_verbalizer_ids)0r*   rf   rg   rh   raw_parts_araw_parts_brx   ra   rb   answersrC   ids_listpositions_listsep_list	mask_listtarget_listZprompt_listZsegment_id_listanswertokens_atokens_b	input_idsZmask_idxr   Zthis_parts_aZthis_parts_bZnum_segmentssegmentssegmentdatar   typespaddingsposition_idssep
target_ids
loss_masks
prompt_posZmask_possampleZlogit_mask_listZdec_idsr   Zdec_position_idsZdec_target_idsZdec_loss_masks
verbalizerverbalizer_idr,   )r   rw   r*   r   r-   encode   s6  

















$













	z
PVP.encoderd   only_shortenablec                    s   | rt  fdd| D S dS )Nc                    s    g | ]\}} r
|rt |qS r,   )r@   )rB   rn   rI   r   r,   r-   r\     s    z#PVP._seq_length.<locals>.<listcomp>r   )sum)rd   r   r,   r   r-   _seq_length  s   zPVP._seq_lengthc                 C   s>   t dd t| D }| | d d d | | d f| |< d S )Nc                 s   s$    | ]\}\}}|r|r|V  qd S r/   r,   )rB   r   seqrI   r,   r,   r-   rD     s    z#PVP._remove_last.<locals>.<genexpr>r   rJ   )rE   r   )rd   Zlast_idxr,   r,   r-   _remove_last  s   (zPVP._remove_lastra   rb   r   r}   c              	   C   s   |  ||  | }|r|t|7 }|t|||dddd7 }|| }|dkr(dS t|D ]}| j |dd| j |ddkrB| | q,| | q,dS )zCTruncate two sequences of text to a predefined total maximum lengthTF)r   r   r   r   r   )r   r@   r   r   r   )r*   ra   rb   r   r}   Z	total_lenZnum_tokens_to_remover   r,   r,   r-   r     s2   zPVP.truncatec                 C   r2   )a  
        Given an input example, apply a pattern to obtain two text sequences (text_a and text_b) containing exactly one
        mask token (or one consecutive sequence of mask tokens for PET with multiple masks). If a task requires only a
        single sequence of text, the second sequence should be an empty list.

        Args:
            example: the input example to process
        Returns:
            Two sequences of text. All text segments can optionally be marked as being shortenable.
        Nr,   r*   rf   r,   r,   r-   r     s   zPVP.get_partsc                    s    fdd j D S )Nc                    s   g | ]	}  |d  qS r   )r)   rA   r0   r,   r-   r\      s    z#PVP.get_answers.<locals>.<listcomp>)r   r   r,   r0   r-   r     s   zPVP.get_answersc                 C   s<   g }| j D ]}| |d }t|| jdd}|| q|S )Nr   Trz   )r   r)   r   r   r`   )r*   r   rC   r   r   r,   r,   r-   r     s   
zPVP.get_verbalizer_idsc                 C   r2   )z
        Return all verbalizations for a given label.

        :param label: the label
        :return: the list of verbalizations
        Nr,   r*   rC   r,   r,   r-   r)     s   zPVP.verbalizer   c                 C   s&   | | j}dgt| }d||< |S )Nr   rJ   )r   r?   r@   )r*   r   Z	label_idxlabelsr,   r,   r-   get_mask_positions  s   zPVP.get_mask_positionspathc                    s   t td }t| ddd(}|  D ]}| rt|}q|r-| ^}}|| |< qW d    n1 s8w   Y  td	   dt
t f fdd}|S )Nrzutf-8)encodingz3Automatically loaded the following verbalizer: 
 {}r5   c                    s     |  S r/   r,   )rC   r   Zverbalizersr,   r-   r)   .  s   z1PVP._load_verbalizer_from_file.<locals>.verbalize)r   dictopenread
splitlinesisdigitrt   r!   r   formatr   rr   )r   r   Zcurrent_pattern_idfhlinerC   Zrealizationsr)   r,   r   r-   r(     s*   
zPVP._load_verbalizer_from_file)r   Nr   Fr   Fr   r   FFrk   )%__name__
__module____qualname____doc__rt   rr   boolr.   propertyr+   r'   r<   r?   rF   staticmethodrI   r   r   rM   rS   rV   rX   re   r	   r   r   r   r   r   r   FilledPatternr   r   r   r)   r   r(   r,   r,   r,   r-   r   "   s    


3


   
'
   
		r   c                       s   e Zd Zedd Zedd Zedd Zedefdd	Z	ede
fd
dZdefddZdedefddZdee fddZ		ddededef fddZ  ZS )CopaPVPc                   C      ddgS Nr   rJ   r,   r,   r,   r,   r-   rX   6     zCopaPVP.available_patternsc                 C   r2   NTr,   r0   r,   r,   r-   r+   :  r4   zCopaPVP.is_multi_tokenc                 C      | j | j S r/   r   r&   r0   r,   r,   r-   r'   >     zCopaPVP.spell_lengthr5   c                 C      d}| j |jS r7   r9   r*   Z
mask_tokenr,   r,   r-   r<   B     zCopaPVP.maskc                 C   r   r>   r9   r   r,   r,   r-   r?   H  r   zCopaPVP.mask_idrf   c                 C   s<   d|  | |jd  }d|  | |jd  }||gS )N choice1choice2)rM   rS   meta)r*   rf   r   r   r,   r,   r-   r   N  s   zCopaPVP.get_answersc           	      C   s2  | j dv sJ | | d|j }| | |jd }| | |jd }|jd }|dv s3J |dkr:d}nd	}| j d
krUd d|d|dd ||d | jgdgg }}n8| j dkrod |dd| dd ||d | jgdgg }}n| j dkrd d|d|dd ||| jgdd gg }}nt| j | ||\}}||fS )Nr   rJ         r   r   r   questioncauseZeffectr    becausez, sor   "" or ""?.rJ   z or?r   )	r   rM   rI   text_arS   r   r<   NotImplementedErrorre   )	r*   rf   premiser   r   r   joinerra   rb   r,   r,   r-   r   U  sH   




zCopaPVP.get_partsc                 C      g S r/   r,   r   r,   r,   r-   r)   x     zCopaPVP.verbalizeFrg   rh   c                    s  | j s| jdk rt j|||dS |s|rJ d| j | | |j}d| | |j	d  }d| | |j	d  }|j	d }|dv sJJ |d	krPd
nd}t
| ddg}	| jrf|	 dj g g g g g f\}
}}}}||fD ]n}d|dd d|dd d|| jg|g}dd |D } fdd|D }|  j| j|d|	| jd7  _dd |D }t|d|	| j| j| jdddd	}|\}}}}}}}|
| || || || || qv|jdur| j|j}nd}t|
||||||jd}|S )a  
        Encode an input example using this pattern-verbalizer pair.

        Args:
            example: the input example to encode
            priming: whether to use this example for priming
            labeled: if ``priming=True``, whether the label should be appended to this example

        Returns:
             A tuple, consisting of a list of input ids and a list of token type ids
        r   rg   rh   ri   r   r   r   r   r   r   r   z soTrz   r   r   rJ   Nr   r   c                 S   rj   rk   rl   rm   r,   r,   r-   r\     s   " z"CopaPVP.encode.<locals>.<listcomp>c                    s2   g | ]\}}|rt |tr |jn||fqS r,   )rK   rr   rs   tokenization)rB   rn   rH   r   r,   r-   r\     s    r|   c                 S   r~   r,   r,   r   r,   r,   r-   r\     r   Fr   r   )r   r   rC   r   r   r   )r%   r   superr   r   rM   rI   r   rS   r   r   r+   r`   r:   r;   r<   r    r   r   r   r   rC   r   r   r   r   )r*   rf   rg   rh   r   r   r   r   r   r   r   r   r   r   r   choicerd   r   r   r   r   r   r   r   r   r   rC   r   	__class__r  r-   r   {  s|   










zCopaPVP.encoder   )r   r   r   r   rX   r   r+   r'   rr   r<   rt   r?   r	   r   r   r   r   r)   r   r   __classcell__r,   r,   r  r-   r   4  s.    


#r   c                       s   e Zd Zedd Zedd Zedd Zdefdd	Z	ded
e
fddZ		ddededef fddZd
ee fddZ  ZS )WscPVPc                   C      g dS N)r   rJ   r   r,   r,   r,   r,   r-   rX     r   zWscPVP.available_patternsc                 C   r2   r   r,   r0   r,   r,   r-   r+     r4   zWscPVP.is_multi_tokenc                 C   r   r/   r   r0   r,   r,   r-   r'     r   zWscPVP.spell_lengthrf   c                 C   s>   d|j d  }|g}d|j v r|j d }|dd |D 7 }|S )Nr   Z
span1_text
candidatesc                 S      g | ]}d | qS r   r,   )rB   candr,   r,   r-   r\         z&WscPVP.get_answers.<locals>.<listcomp>r   )r*   rf   r   r   r  r,   r,   r-   r     s   

zWscPVP.get_answersr5   c                 C   s   |j d }|j d }|j }d||  d ||< d|}| |}| jdkr;d |d d| d d | jgdgg }}n6| jd	krSd |d d
| d d | jgdgg }}n| jdkrld |d d| d d d| jgdgg }}nt| j| ||\}}||fS )NZ
span2_textZspan2_index*r   r   z The pronoun '*z*' refers tor   rJ   z) In the previous sentence, the pronoun '*r   z9 Question: In the passage above, what does the pronoun '*z*' refer to? Answer:)	r   r   r!   joinrI   r   r<   r   re   )r*   rf   ZpronounZpronoun_idxZwords_ar   ra   rb   r,   r,   r-   r     sJ   









zWscPVP.get_partsFrg   rh   c                    s  | j jdv rt j|||d}| jdkrd|d< |S |s"|r"J d| jj | |\}}dd |D } fd	d
}||}| jdkrQ g| j dfg| }d}	|r`dd |D }||}	| 	|d }
t
|
dd}|djg }|  j| j||	|| jd7  _dd |D }|	rdd |	D nd}t|||| j| j| j dddd	}|\}}}}}}} fddt|D } fdd|D }|jdur| j|j}nd}tj|tjdtj|tjdtj|tjdtj|tjdtj|tjdtj|tjd||jdS )a  
        Encode an input example using this pattern-verbalizer pair.
        Args:
            example: the input example to encode
            priming: whether to use this example for priming
            labeled: if ``priming=True``, whether the label should be appended to this example
        Returns:
             A tuple, consisting of a list of input ids and a list of token type ids
        )Z
generativeZmixr  r   r   rC   ri   c                 S   rj   rk   rl   rm   r,   r,   r-   r\     ro   z!WscPVP.encode.<locals>.<listcomp>c                    rp   r/   rq   ru   rv   r,   r-   rx      ry   z#WscPVP.encode.<locals>.encode_inputFNc                 S   rj   rk   rl   rm   r,   r,   r-   r\   1  ro   rz   r   r|   c                 S   r~   r,   r,   r   r,   r,   r-   r\   ;  r   c                 S   r~   r,   r,   r   r,   r,   r-   r\   <  r   Tr   c                    r   r,   r,   r   r   r,   r-   r\   I  r   c                    r   r   r,   r   r   r,   r-   r\   L  r   )Zdtype)textr   Zattention_maskZ	loss_maskZposition_idr   rC   uid)r   Z	loss_funcr  r   r!   r   r   r   r&   r   r   r:   r;   r    r   r   r   r   rC   r   r   nparrayZint64r   )r*   rf   rg   rh   r   r   r   rx   ra   rb   r   r   r   r   r   r   r   r   r   r   r   r   r   rC   r  rv   r-   r     s   







zWscPVP.encodec                 C   r  r/   r,   r   r,   r,   r-   r)   \  r  zWscPVP.verbalizer   )r   r   r   r   rX   r   r+   r'   r	   r   r   r   r   r   r   rr   r)   r
  r,   r,   r  r-   r    s&    


!Zr  c                   @   J   e Zd Zedd ZdefddZdedefddZde	e
 fd	d
ZdS )	RecordPVPc                 C   r2   r   r,   r0   r,   r,   r-   r+   b  r4   zRecordPVP.is_multi_tokenrf   c                 C      |j d }dd |D }|S )Nr  c                 S   r  r  r,   rB   r  r,   r,   r-   r\   h  r  z)RecordPVP.get_answers.<locals>.<listcomp>r  r*   rf   choicesr,   r,   r-   r   f     
zRecordPVP.get_answersr5   c                 C   sT   |  |j}d|jv sJ d|j d|jd\}}|d|  | jg|gg fS )Nz@placeholderz
question "z'" does not contain a @placeholder tokenr   )rI   r   text_br!   rN   r<   )r*   rf   r   Z
question_aZ
question_br,   r,   r-   r   k  s   zRecordPVP.get_partsc                 C   r  r/   r,   r   r,   r,   r-   r)   s  r  zRecordPVP.verbalizeNr   r   r   r   r+   r	   r   r   r   r   rr   r)   r,   r,   r,   r-   r  `  s    
r  c                   @   sV   e Zd Zedd Zedd ZdefddZdede	fd	d
Z
dee fddZdS )RacePVPc                 C   r2   r   r,   r0   r,   r,   r-   r+   y  r4   zRacePVP.is_multi_tokenc                   C   r   r   r,   r,   r,   r,   r-   rX   }  r   zRacePVP.available_patternsrf   c                 C   r  )Nr   c                 S   r  r  r,   r  r,   r,   r-   r\     r  z'RacePVP.get_answers.<locals>.<listcomp>r  r  r,   r,   r-   r     r!  zRacePVP.get_answersr5   c                 C   s   |  |j}d|j }d|v rK|jddd\}}| jdkr/|g|  | | jg|  |gfS | }|r:| |}|gd|  || jg|  |gfS | jdkr^|gd|  |d| jggfS |gd|  || jggfS )	Nr   r   rJ   )maxsplitr    Based on the previous passage,
 Question:r  )rI   r   r"  r!   r   rN   r<   rS   )r*   rf   contextr   leftrightr,   r,   r-   r     s4   



zRacePVP.get_partsc                 C   r  r/   r,   r   r,   r,   r-   r)     r  zRacePVP.verbalizeN)r   r   r   r   r+   r   rX   r	   r   r   r   r   rr   r)   r,   r,   r,   r-   r$  w  s    

 r$  c                   @   V   e Zd ZdgdgdZedd Zedd Zded	e	fd
dZ
d	ee fddZdS )RtePVP No YesZnot_entailment
entailmentc                   C   r  N)r   rJ   r   r      r,   r,   r,   r,   r-   rX     r   zRtePVP.available_patternsc                 C   r   r/   r   r0   r,   r,   r-   r'     r   zRtePVP.spell_lengthrf   r5   c              	   C   s\  |j }|jtj}| jdkr(d d| |dgd | jgdd d| |dg}}nz| jdkrEd | |dgd | jgdd | d| g}}n]| jd	krbd d| |dgd | jgd
d | |dg}}n@| jdkrd | |dgd | jgdd | d| g}}n#| jdkrd | |d d| d| dd d| jgg	g }}nt| j| 	||\}}||fS )Nr   r   " ?,z "rJ   r   r   r   . "r   r   r2  
 question:z True or False? answer:)
r   r"  rN   rO   rP   r   rI   r<   r   re   r*   rf   r   r"  ra   rb   r,   r,   r-   r     sN   











zRtePVP.get_partsc                 C   (   | j dkr|dkrdgS dgS tj| S )Nr2  r0   true false)r   r,  
VERBALIZERr   r,   r,   r-   r)        

zRtePVP.verbalizeNr   r   r   r<  r   rX   r   r'   r	   r   r   r   rr   r)   r,   r,   r,   r-   r,    s    

&r,  c                       sV   e Zd ZdgdgdgdZedd Zdedef fd	d
Zde	e
 fddZ  ZS )CbPVPr-  r.   Maybecontradictionr0  Zneutralc                   C   r  r1  r,   r,   r,   r,   r-   rX     r   zCbPVP.available_patternsrf   r5   c              	      sj   | j dkr/| |j}| d|j }d |d d|dd d| jgg	g }}| ||\}}||fS t |S )Nr2  r   r6  z true, false or neither?r7  )r   rI   r   r"  r<   re   r  r   r8  r  r,   r-   r     s   

zCbPVP.get_partsc                 C   s6   | j dkr|dkrdgS |dkrdgS dgS tj| S )Nr2  r0  r:  rB  r;  z neither)r   r?  r<  r   r,   r,   r-   r)     s   

zCbPVP.verbalize)r   r   r   r<  r   rX   r	   r   r   r   rr   r)   r
  r,   r,   r  r-   r?    s    
r?  c                   @   d   e Zd ZdgdgdZdgdgdZedd Zedd	 Zd
e	de
fddZdee fddZdS )BoolQPVPr-  r.  falsetruer;  r:  c                   C   r  N)r   rJ   r   r   r2     r,   r,   r,   r,   r-   rX     r   zBoolQPVP.available_patternsc                 C   r   r/   r   r0   r,   r,   r-   r'   	  r   zBoolQPVP.spell_lengthrf   r5   c                 C   s   |j }|j}| jdk r$d | |d d| d| dd | jgdg	g }}nC| jdk rBd | |dd | d| dd | jgdg	g }}n%| jd	k rbd
d | d| dd | jgdd | d| g	g }}nt| j| ||\}}||fS )Nr   r'  r   z	? Answer:r   r2  r&  r      zBased on the following passage)r   r"  r   rI   r<   r   re   )r*   rf   passager   ra   rb   r,   r,   r-   r     s:   




zBoolQPVP.get_partsc                 C   s2   | j dks| j dks| j dkrtj| S tj| S )Nr   r   r2  )r   rD  VERBALIZER_AVERBALIZER_Br   r,   r,   r-   r)   +  s   

zBoolQPVP.verbalizeNr   r   r   rL  rM  r   rX   r   r'   r	   r   r   r   rr   r)   r,   r,   r,   r-   rD     s    

rD  c                   @   r+  )
MultiRcPVPr-  r.  )r   rJ   c                   C   r  r1  r,   r,   r,   r,   r-   rX   5  r   zMultiRcPVP.available_patternsc                 C   r   r/   r   r0   r,   r,   r-   r'   9  r   zMultiRcPVP.spell_lengthrf   r5   c                 C   sh  |  | |j }|  |j }|jd }| jdkr7|dd dd| d d dd| dd | jgdgg }}nq| jdkrS|dd dd| dd d	|d
d | jgdgg }}nU| jdkrn|dd d| dd d|dd | jgdgg }}n:| jdkrd |d d| d| jgdd |g	g }}n"| jdkr|dd dd| dd d| dd | jgdgg }}nt| j| 	||\}}||fS )Nr   r   r   r'  r   r   z Is itrJ   z Is the correct answer "r   r   z . Based on the previous passage,z Is "z" a correct answer?r   z- []r2  )
rM   rI   r   rN   r"  r   r   r<   r   re   )r*   rf   rK  r   r   ra   rb   r,   r,   r-   r   =  sP   







zMultiRcPVP.get_partsc                 C   r9  )Nr   r   z Falsez True)r   rO  r<  r   r,   r,   r-   r)   b  r=  zMultiRcPVP.verbalizeNr>  r,   r,   r,   r-   rO  2  s    

%rO  c                   @   rC  )WicPVPr-  r.  rE  2bc                   C   r  r  r,   r,   r,   r,   r-   rX   l  r   zWicPVP.available_patternsc                 C   r   r/   r   r0   r,   r,   r-   r'   p  r   zWicPVP.spell_lengthrf   r5   c                 C   s   |j }|j}|jd }| jdkr.d | d| d | d d d| d d | jgdgg }}nB| jdkrN| |d | d	| d d
| d d | jggg }}n"| jdkrkd |dd d| |dd d| jgd|dgg }}nt| j| ||\}}||fS )Nwordr   r   z" / "z Similar sense of "r   r   rJ   r   z Does z) have the same meaning in both sentences?r   z .z Sense (1) (a) "z (z) ")r   r"  r   r   rI   r<   r   re   )r*   rf   r   r"  rT  ra   rb   r,   r,   r-   r   t  s8   







zWicPVP.get_partsc                 C      | j dkr
tj| S tj| S )Nr   )r   rQ  rM  rL  r   r,   r,   r-   r)     s   


zWicPVP.verbalizeNrN  r,   r,   r,   r-   rQ  h  s    

rQ  c                   @   sR   e Zd ZdgdgdgdgdZedd Zded	efd
dZd	e	e
 fddZdS )	AgnewsPVPz World Sports	 Businessz Tech)1rR  34c                   C   r  rH  r,   r,   r,   r,   r-   rX     r   zAgnewsPVP.available_patternsrf   r5   c                 C      |  |j}|  |j}| jdkr| jgd||gg fS | jdkr*| jgd||gg fS | jdkr:|d| jgd|gg fS | jdkrJ||d| jgdgg fS | jd	krZd
| jgd||gg fS | jdkri| jgd||gg fS td| j)Nr   :rJ   z News:r   ()r   r2  [ Category:rP  rI  - No pattern implemented for id {}rI   r   r"  r   r<   
ValueErrorr   r*   rf   r   r"  r,   r,   r-   r     "   





zAgnewsPVP.get_partsc                 C   
   t j| S r/   )rV  r<  r   r,   r,   r-   r)        
zAgnewsPVP.verbalizeNr   r   r   r<  r   rX   r	   r   r   r   rr   r)   r,   r,   r,   r-   rV    s    
rV  c                   @   sj   e Zd Zdgdgdgdgdgdgdgdgd	gd
gd
Zedd ZdedefddZde	e
 fddZdS )YahooPVPz Societyz Sciencez Healthz
 Educationz	 ComputerrW  rX  z Entertainmentz Relationshipz	 Politics)
rY  rR  rZ  r[  5678910c                   C   r  rH  r,   r,   r,   r,   r-   rX     r   zYahooPVP.available_patternsrf   r5   c                 C   r\  )Nr   r]  rJ   r'  r   r^  r_  r   r2  r`  rP  rI  ra  rb  rc  re  r,   r,   r-   r     rf  zYahooPVP.get_partsc                 C   rg  r/   )rj  r<  r   r,   r,   r-   r)     rh  zYahooPVP.verbalizeNri  r,   r,   r,   r-   rj    s     
rj  c                   @   s`   e Zd ZdgdgdgdZdgdgdgdZedd Zd	ed
efddZ	d
e
e fddZdS )MnliPVPz Wrongz Rightr@  rA  r-  r.  c                   C   r  Nr   r,   r,   r,   r,   r-   rX     r   zMnliPVP.available_patternsrf   r5   c                 C   sz   |  | |j}|  |j}| jdks| jdkr&d|dg| jgd|dgfS | jdks0| jdkr;|dg| jgd	|gfS d S )
Nr   r   r   r3  z, "rJ   r   r   r4  )rI   rM   r   r"  r   r<   re  r,   r,   r-   r     s   zMnliPVP.get_partsc                 C   s(   | j dks
| j dkrtj| S tj| S r   )r   rq  rL  rM  r   r,   r,   r-   r)      s   

zMnliPVP.verbalizeNr   r   r   rL  rM  r   rX   r	   r   r   r   rr   r)   r,   r,   r,   r-   rq    s    
	rq  c                   @   J   e Zd ZdgdgdZedd Zdedefdd	Zde	e
 fd
dZdS )YelpPolarityPVP bad good)rY  rR  c                   C   r  rr  r,   r,   r,   r,   r-   rX   	  r   z"YelpPolarityPVP.available_patternsrf   r5   c                 C   s   |  |j}| jdkrd| jgd|gg fS | jdkr$|d| jgdgg fS | jdkr3d| jgdg|gfS | jd	krB|gd
| jgdgfS td| j)Nr   zIt wasr   rJ   z. All in all, it wasr   ZJust!r   z In summary, the restaurant isrb  rI   r   r   r<   rd  r   r*   rf   r  r,   r,   r-   r     s   



zYelpPolarityPVP.get_partsc                 C   rg  r/   )ru  r<  r   r,   r,   r-   r)     rh  zYelpPolarityPVP.verbalizeNri  r,   r,   r,   r-   ru    s    
ru  c                   @   s8   e Zd ZdgdgdgdgdgdZdee fdd	Zd
S )YelpFullPVP	 terriblerv  z okayrw   great)rY  rR  rZ  r[  rk  r5   c                 C   rg  r/   )r{  r<  r   r,   r,   r-   r)   )  rh  zYelpFullPVP.verbalizeN)r   r   r   r<  r   rr   r)   r,   r,   r,   r-   r{     s    r{  c                   @   sf   e Zd ZdgdgddgdgddgdgddZed	d
 ZdedefddZde	e
 fddZdS )
XStancePVPZYesZNo)ZFAVORZAGAINSTZJaZNeinZOuiZNon)endefrc                   C   r  rH  r,   r,   r,   r,   r-   rX   =  r   zXStancePVP.available_patternsrf   r5   c                 C   s   |  |j}|  |j}| jdks| jdks| jdkr(d|dg| jgd|dgfS | jdks7| jdks7| jdkrA|g| jgd	|gfS d S )
Nr   r   r2  r   r5  rJ   r   rI  r   )rI   r   r"  r   r<   re  r,   r,   r-   r   A  s   zXStancePVP.get_partsc                 C   s.   | j dk rdn| j dk rdnd}tj| | S )Nr   r  r2  r  r  )r   r~  VERBALIZERS)r*   rC   langr,   r,   r-   r)   K  s    zXStancePVP.verbalizeN)r   r   r   r  r   rX   r	   r   r   r   rr   r)   r,   r,   r,   r-   r~  -  s    

r~  c                   @   sX   e Zd ZdgdgdZdgdgdZedd Zded	efd
dZ	d	e
e fddZdS )Sst2PVPr|  r}  0rY  rv  rw  c                   C   r   r   r,   r,   r,   r,   r-   rX   U  r   zSst2PVP.available_patternsrf   r5   c                 C   sD   |  |j}| jdks| jdkr|d| jgdgg fS td| j)Nr   rJ   z It wasr   rb  ry  rz  r,   r,   r-   r   Y  s   zSst2PVP.get_partsc                 C   rU  r3   )r   r  rL  rM  r   r,   r,   r-   r)   a  s   


zSst2PVP.verbalizeNrs  r,   r,   r,   r-   r  P  s    
r  c                   @   s>   e Zd ZdgdgdZdedefddZdee fdd	Z	d
S )ColaPVPz
 incorrectz correctr  rf   r5   c                 C   s>   |  |j}| jdkrd|dd| jgdgg fS td| j)Nr   r   z This isr   rb  ry  rz  r,   r,   r-   r   k  s   
zColaPVP.get_partsc                 C   rg  r/   )r  r<  r   r,   r,   r-   r)   s  rh  zColaPVP.verbalizeN)
r   r   r   r<  r	   r   r   r   rr   r)   r,   r,   r,   r-   r  h  s    r  c                   @   rt  )MrpcPVPr-  r.  r  c                   C   r   r   r,   r,   r,   r,   r-   rX   z  r   zMrpcPVP.available_patternsrf   r5   c                 C   s   |  |j}| jdkr|  | |j}|g| jgd|gfS | jdkr:|  | | |j}|gd|d| jggfS td| j)Nr   , rJ   z Does it mean thatr   rb  )	rI   r   r   rS   r"  r<   rM   rd  r   re  r,   r,   r-   r   ~  s   

zMrpcPVP.get_partsc                 C   rg  r/   )r  r<  r   r,   r,   r-   r)     rh  zMrpcPVP.verbalizeNri  r,   r,   r,   r-   r  w  s    
r  c                   @   rt  )QqpPVPr-  r.  r  c                   C   r   r   r,   r,   r,   r,   r-   rX     r   zQqpPVP.available_patternsrf   r5   c                 C   sl   |  |j}|  | |j}| jdkr|gd|| jgdgfS | jdkr.|g| jgd|gfS td| j)Nr   z Do you mean r   rJ   r  rb  )rI   r   rS   r"  r   r<   rd  r   re  r,   r,   r-   r     s   

zQqpPVP.get_partsc                 C   rg  r/   )r  r<  r   r,   r,   r-   r)     rh  zQqpPVP.verbalizeNri  r,   r,   r,   r-   r    s    
r  c                   @   rt  )QnliPVPr-  r.  r/  c                   C   r  r  r,   r,   r,   r,   r-   rX     r   zQnliPVP.available_patternsrf   r5   c              	   C   s   |  |j}|j}| jdkr"| |d| d| d| jgdgg fS | jdkr;| |d| d| d| jgdgg fS | jd	krVd
| d| d| jgd| d| gg fS td| j)Nr   r'  r   z? Do you know the answer?r   rJ   z6 Based on the previous passage, do you know the answerr   r   z6Based on the following passage, do you know the answerrb  )rM   r   r"  r   rI   r<   rd  r   )r*   rf   r   rK  r,   r,   r-   r     s4   



zQnliPVP.get_partsc                 C   rg  r/   )r  r<  r   r,   r,   r-   r)     rh  zQnliPVP.verbalizeNri  r,   r,   r,   r-   r    s    
r  c                   @   r  )SquadPVPc                 C   r2   r   r,   r0   r,   r,   r-   r+     r4   zSquadPVP.is_multi_tokenrf   c                 C   s   d|j d d  }|g}|S )Nr   r   r  r  )r*   rf   r   r   r,   r,   r-   r     s   zSquadPVP.get_answersr5   c                 C   s*   |  |j}|j}|d| | jgdgg fS )Nr   r   )rI   r   r"  r<   )r*   rf   r(  r   r,   r,   r-   r     s   zSquadPVP.get_partsc                 C   r  r/   r,   r   r,   r,   r-   r)     r  zSquadPVP.verbalizeNr#  r,   r,   r,   r-   r    s    
r  rT  r{   r5   c                 C   sj   | | j}|s
|S t|dksJ d|  d|| |d }||jvs3J d|  d|| |S )a  
    Get the token ids corresponding to a verbalization

    :param word: the verbalization
    :param tokenizer: the tokenizer to use
    :param force_single_token: whether it should be enforced that the verbalization corresponds to a single token.
           If set to true, this method returns a single int instead of a list and throws an error if the word
           corresponds to multiple tokens.
    :return: either the list of token ids or the single token id corresponding to this word
    rJ   zVerbalization "z-" does not correspond to a single token, got r   zVerbalization z is mapped to a special token )rs   r  r@   Z	DecodeIdsZcommand_id_mapZ	IdToToken)rT  r   r{   r   Zverbalization_idr,   r,   r-   r     s   r   ZagnewsZmnlizyelp-polarityz	yelp-fullZyahooZxstancez
xstance-dez
xstance-frZrteZwiccbZwscZboolqZcopaZmultircrecordzax-b)zax-gZsst2ZcolaZmrpcZqqpZqnliZsquadZrace)6r   r   r^   r   rO   abcr   r   collectionsr   typingr   r   r   r   numpyr  Ztasks.data_utilsr	   r
   r   r   r   r   utilsr   rr   r   r   r   r   r  r  r$  r,  r?  rD  rO  rQ  rV  rj  rq  ru  r{  r~  r  r  r  r  r  r  rt   r   ZPVPSr,   r,   r,   r-   <module>   s          3726/%+#$
	
