o
    *j                     @   s@  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlZd dlZd dlm  mZ d dlmZmZ d dlmZ d dlmZmZmZmZmZmZ d dlm Z  d dl!m"Z" d d	l#m$Z$ d d
l%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.m/Z0 d dl1m2Z2 ddl3m4Z4 ddl5m6Z6m7Z7 dZ8dZ9G dd dej:Z;G dd dej:Z<G dd dej:Z=G dd dej:Z>G dd dZ?G dd  d ej:Z@G d!d" d"ej:ZAG d#d$ d$e(e"ZBG d%d& d&eBZCG d'd( d(ej:ZDG d)d* d*ej:ZEG d+d, d,eFZGe*jHe2jIe$jJd-G d.d/ d/eBZKdS )0    N)	dataclass)AnyDictListOptionalUnion)Tensornn)xavier_uniform_)
BertConfig	BertModelBertTokenizerRobertaConfigRobertaModelRobertaTokenizer)ACT2FN)PreTrainedModel)Models)Model)
TorchModel)MODELS)TextGenerationModelOutputTokenGeneratorOutput)logger)Tasks   )
PalmConfig)compute_bleu_rouge	normalizezconfig.jsonzpytorch_model.binc                       s:   e Zd ZdZ		d
 fdd	Z					ddd	Z  ZS )MultiHeadedAttentiona  
    Multi-Head Attention module from
    "Attention is All You Need"
    :cite:`DBLP:journals/corr/VaswaniSPUJGKP17`.

    Similar to standard `dot` attention but uses
    multiple attention distributions simultaneously
    to select relevant items.

    .. mermaid::

       graph BT
          A[key]
          B[value]
          C[query]
          O[output]
          subgraph Attn
            D[Attn 1]
            E[Attn 2]
            F[Attn N]
          end
          A --> D
          C --> D
          A --> E
          C --> E
          A --> F
          C --> F
          D --> O
          E --> O
          F --> O
          B --> O

    Also includes several additional tricks.

    Args:
       head_count (int): number of parallel heads
       model_dim (int): the dimension of keys/values/queries,
           must be divisible by head_count
       dropout (float): dropout parameter
    皙?Tc                    s   || dksJ || | _ || _t   || _t||| j  | _t||| j  | _t||| j  | _	tj
dd| _t|| _|| _| jrRt||| _d S d S )Nr   dim)dim_per_head	model_dimsuper__init__
head_countr	   Linearlinear_keyslinear_valueslinear_queryZSoftmaxsoftmaxDropoutdropoutuse_final_linearfinal_linear)selfr(   r%   r/   r0   	__class__ n/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/palm_v2/text_generation.pyr'   \   s$   

zMultiHeadedAttention.__init__NFc	                    s  | d | j| j fdd}	 fdd}
|dur|dkrp| || || |}}}|	|}|	|}|j}|d durStj|d 	||fd	d
}|d durgtj|d 	||fd	d
}||d< ||d< nL|dkr| |}|d du r| || |}}|	|}|	|}n	|d |d }}||d< ||d< n| |}| |}| |}|	|}|	|}|	|}|t
 }t||d	d}|dur|d|}||d}| |}|dur|dddf | }|t|d	d	d  }t|ddddf |dgd}| |}| jr6|
t||}| |}|r4||fS |S t||}|rC||fS |S )a  
        Compute the context vector and the attention vectors.

        Args:
           key (`FloatTensor`): set of `key_len`
                key vectors `[batch, key_len, dim]`
           value (`FloatTensor`): set of `key_len`
                value vectors `[batch, key_len, dim]`
           query (`FloatTensor`): set of `query_len`
                 query vectors  `[batch, query_len, dim]`
           mask: binary mask indicating which keys have
                 non-zero attention `[batch, query_len, key_len]`
        Returns:
           (`FloatTensor`, `FloatTensor`) :

           * output context vectors `[batch, query_len, dim]`
           * one of the attention vectors `[batch, query_len, key_len]`
        r   c                    s   |   dddS )z  projection r!   r      )view	transposex
batch_sizer$   r(   r5   r6   shape   s   z+MultiHeadedAttention.forward.<locals>.shapec                    s   |  dd  d S )z  compute context r   r7   r!   )r9   
contiguousr8   r:   r<   r5   r6   unshape   s   z-MultiHeadedAttention.forward.<locals>.unshapeNr2   	self_keysr7   r"   self_valuescontextmemory_keysmemory_values   r   g Ngmr!   g&.>)sizer$   r(   r,   r*   r+   devicetorchcattomathsqrtmatmulr9   	unsqueezeZ	expand_asZmasked_fillr-   sumr/   r0   r1   )r2   keyvaluequerymasklayer_cachetypeZpredefined_graph_1return_attnr>   r@   rH   scoresattnZattn_maskedZ	drop_attnrC   outputr5   r<   r6   forwards   s   









&

zMultiHeadedAttention.forward)r    T)NNNNF__name__
__module____qualname____doc__r'   r[   __classcell__r5   r5   r3   r6   r   2   s    ,r   c                       *   e Zd ZdZd fdd	Zdd Z  ZS )PositionwiseFeedForwarda*   A two-layer Feed-Forward-Network with residual layer norm.

    Args:
        d_model (int): the size of input for the first-layer of the FFN.
        d_ff (int): the hidden layer size of the second-layer
            of the FNN.
        dropout (float): dropout probability in :math:`[0, 1)`.
    r    c                    s\   t    tj|dd| _t||| _td | _t	|| _
t||| _t	|| _d S )Nư>epsZgelu_new)r&   r'   r	   	LayerNorm
layer_normr)   w_1r   actvr.   	dropout_1w_2	dropout_2)r2   d_modeld_ffr/   r3   r5   r6   r'      s   

z PositionwiseFeedForward.__init__c              	   C   s4   |  | | | |}| | |}|| S N)rk   rj   ri   rh   rm   rl   )r2   r;   interrZ   r5   r5   r6   r[      s   zPositionwiseFeedForward.forward)r    r\   r5   r5   r3   r6   rc      s    		rc   c                       s<   e Zd ZdZdZ fddZ			d
ddZdd	 Z  ZS )TransformerDecoderLayera  
    Args:
      d_model (int): the dimension of keys/values/queries in
                       MultiHeadedAttention, also the input size of
                       the first-layer of the PositionwiseFeedForward.
      heads (int): the number of heads for MultiHeadedAttention.
      d_ff (int): the second-layer of the PositionwiseFeedForward.
      dropout (float): dropout probability(0-1.0).
      self_attn_type (string): type of self-attention scaled-dot, average
      c                    s   t    t|||d| _t|||d| _t|||| _tj|dd| _	tj|dd| _
t|| _| | j}| d| d S )N)r/   rd   re   rT   )r&   r'   r   	self_attncontext_attnrc   feed_forwardr	   rg   layer_norm_1layer_norm_2r.   drop_get_attn_subsequent_maskMAX_SIZEregister_buffer)r2   rn   headsro   r/   rT   r3   r5   r6   r'     s   
z TransformerDecoderLayer.__init__Nc              
   C   s   t |t j| jddd|dd|df t j d}| |}	|	}
|dur8t j||	fdd}
d}| j|
|
|	||dd}| 	|| }| 
|}| j|||||ddd	\}}| | 	|| }|||
fS )
a#  
        Args:
            inputs (`FloatTensor`): `[batch_size x 1 x model_dim]`
            memory_bank (`FloatTensor`): `[batch_size x src_len x model_dim]`
            src_pad_mask (`LongTensor`): `[batch_size x 1 x src_len]`
            tgt_pad_mask (`LongTensor`): `[batch_size x 1 x 1]`

        Returns:
            (`FloatTensor`, `FloatTensor`, `FloatTensor`):

            * output `[batch_size x 1 x model_dim]`
            * attn `[batch_size x 1 x src_len]`
            * all_input `[batch_size x current_step x model_dim]`

        Nr   r   r"   r2   )rT   rU   rV   rC   T)rT   rU   rV   rW   )rI   gtrV   uint8rT   rG   rw   rJ   rt   ry   rx   ru   rv   )r2   Zinputsmemory_banksrc_pad_masktgt_pad_maskprevious_inputrU   stepZdec_maskZ
input_norm	all_inputrS   Z
query_normmidrY   rZ   r5   r5   r6   r[     sD   
(



zTransformerDecoderLayer.forwardc                 C   s2   d||f}t jt |ddd}t|}|S )z
        Get an attention mask to avoid using the subsequent info.

        Args:
            size: int

        Returns:
            (`LongTensor`):

            * subsequent_mask `[1 x size x size]`
        r   )kr   )npZtriuZonesZastyperI   Z
from_numpy)r2   rG   Z
attn_shapeZsubsequent_maskr5   r5   r6   rz   U  s   

z1TransformerDecoderLayer._get_attn_subsequent_mask)NNN)	r]   r^   r_   r`   r{   r'   r[   rz   ra   r5   r5   r3   r6   rr      s    

8rr   c                       s0   e Zd Zd	 fdd	Zd
ddZdd Z  ZS )PositionalEncodingrs   c                    s   t    t||}td|d}ttjd|dtjdt	d|   }t
| | |d d dd df< t| | |d d dd df< |d}| d| t|| _|| _d S )Nr   r   r7   )dtypeg     @pe)r&   r'   rI   ZzerosarangerO   expfloatrL   logsincosr|   r	   r.   r/   r#   )r2   r/   r#   max_lenr   positionZdiv_termr3   r5   r6   r'   i  s   
$$

zPositionalEncoding.__init__Nc                 C   sl   |t | j }|r|| jd d |f d d d d d f  }n|| jd d d |df  }| |}|S Nr   )rL   rM   r#   r   rG   r/   )r2   embr   r5   r5   r6   r[   v  s   * 
zPositionalEncoding.forwardc                 C   s   | j d d d |df S r   )r   rG   )r2   r   r5   r5   r6   get_emb  s   zPositionalEncoding.get_emb)rs   rp   )r]   r^   r_   r'   r[   r   ra   r5   r5   r3   r6   r   g  s    

r   c                   @   s8   e Zd ZddedefddZdd Zdd	 Zd
d ZdS )TransformerDecoderStater!   srccache_num_layersc                 C   s2   || _ d | _d | _d | _|dkr| | d S d S Nr!   )r   r   previous_layer_inputscache_init_cache)r2   r   r   r5   r5   r6   r'     s   z TransformerDecoderState.__init__c                 C   s   || _ || _d | _d S rp   )r   r   r   )r2   Z	new_inputr   r5   r5   r6   update_state  s   
z$TransformerDecoderState.update_statec                 C   sB   i | _ t|D ]}d d d}d |d< d |d< || j d|< qd S )N)rD   rE   rA   rB   layer_{})r   rangeformat)r2   
num_layersnumrU   r5   r5   r6   r     s   
z#TransformerDecoderState._init_cachec                    s:   d fdd	 | j d| _ | jd ur | j d S d S )Nr   c                    s@   |   D ]\}}|d urt|tr | q||| |< qd S rp   )items
isinstancedict)structZ	batch_dimr   v_recursive_mapfnr5   r6   r     s   

z<TransformerDecoderState.map_batch_fn.<locals>._recursive_mapr   )r   r   )r2   r   r5   r   r6   map_batch_fn  s
   
z$TransformerDecoderState.map_batch_fnN)r!   )	r]   r^   r_   r   intr'   r   r   r   r5   r5   r5   r6   r     s
    r   c                       sH   e Zd ZdZdZ fddZ		ddededed	ed
ef
ddZ	  Z
S )TransformerDecodera  
    The Transformer decoder from "Attention is All You Need".


    .. mermaid::

       graph BT
          A[input]
          B[multi-head self-attn]
          BB[multi-head src-attn]
          C[feed forward]
          O[output]
          A --> B
          B --> BB
          BB --> C
          C --> O


    Args:
       num_layers (int): number of encoder layers.
       d_model (int): size of the model
       heads (int): number of heads
       d_ff (int): size of the inner FF layer
       dropout (float): dropout parameters
       embeddings (:obj:`onmt.modules.Embeddings`):
          embeddings to use, should have positional encodings
       attn_type (str): if using a separate copy attention
    Ztransformerc                    sd   t    || _|| _t| jj| _t fddt	|D | _
tjdd| _d | _d S )Nc                    s   g | ]	}t  qS r5   )rr   .0_ro   rn   r/   r}   r5   r6   
<listcomp>  s    z/TransformerDecoder.__init__.<locals>.<listcomp>rd   re   )r&   r'   r   
embeddingsr   Zembedding_dimpos_embr	   Z
ModuleListr   transformer_layersrg   rh   state)r2   r   rn   r}   ro   r/   r   r3   r   r6   r'     s   


zTransformerDecoder.__init__Nr   tgtr   r   memory_masksc                 C   s  |j }|}| \}}	| \}
}| |}| dksJ | ||}|}| jj}|j|d	|
||}|d urI|d}	|	|||	}n|j|d	|||	}|j
d u r^g }g }t| jD ]@}d }|j
d u rx|jd urx|j| }| j| ||||||j
d ur|j
d| nd |d\}}}|j
d u r|| || qe|j
d u rt|}| |}|j
d u r||| |||fS )NrF   r   r!   r   )r   rU   r   )r   rG   r   r#   r   padding_idxdataeqrO   expandr   r   r   r   r   r   r   appendrI   stackrh   r   )r2   r   r   r   r   r   Z	src_wordsZ	tgt_wordsZ	src_batchZsrc_lenZ	tgt_batchZtgt_lenr   rZ   Zsrc_memory_bankr   r   r   Zsaved_inputsattnsiZprev_layer_inputrY   r   r5   r5   r6   r[     sZ   















zTransformerDecoder.forward)NN)r]   r^   r_   r`   Zdecoder_typer'   r   r   r   r[   ra   r5   r5   r3   r6   r     s"    r   c                       s$   e Zd Z fddZdd Z  ZS )PalmPointerGeneratorc                    s(   t    t||| _td| _d S r   )r&   r'   r	   r)   denseZ
LogSoftmaxgen_func)r2   hidden_size
vocab_sizer3   r5   r6   r'     s   
zPalmPointerGenerator.__init__c                 C   s   |  |}| |}|S rp   )r   r   )r2   r;   r5   r5   r6   r[     s   

zPalmPointerGenerator.forward)r]   r^   r_   r'   r[   ra   r5   r5   r3   r6   r     s    r   c                       sT   e Zd ZdZeZdZ fddZede	e
eejf  fddZedd	 Z  ZS )
PalmPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    palmc                    s*   t  j|jfi | t t| | d S rp   )r&   r'   Zname_or_pathr   )r2   configkwargsr3   r5   r6   r'   -  s   zPalmPreTrainedModel.__init__pretrained_model_name_or_pathc                 K   sv   t j|t}t j|rt|nt }t j||j|_t j|t}t j|r0t	
|nd }| ||fi |S rp   )ospathjoinCONFIG_NAMEisfiler   Zfrom_json_fileencoder_pthWEIGHTS_NAMErI   load)clsr   r   config_filer   Zcheckpoint_file
checkpointr5   r5   r6   _from_pretrained1  s$   z$PalmPreTrainedModel._from_pretrainedc                 K   s(   | d}| jdd|i|}||_|S )ah  Instantiate the model.

        Args:
            kwargs: Input args.
                    model_dir: The model dir used to load the checkpoint and the label information.
                    num_labels: An optional arg to tell the model how many classes to initialize.
                                    Method will call utils.parse_label_mapping if num_labels not supplied.
                                    If num_labels is not found, the model will use the default setting (2 classes).

        Returns:
            The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
        	model_dirr   Nr5   )popr   r   )r   r   r   modelr5   r5   r6   _instantiateA  s   
z PalmPreTrainedModel._instantiate)r]   r^   r_   r`   r   Zconfig_classZbase_model_prefixr'   classmethodr   r   strr   PathLiker   r   ra   r5   r5   r3   r6   r   $  s    r   c                       s8   e Zd Zd	 fdd	ZedefddZdd Z  ZS )
AbsSummarizerNc                    s  t  j|fi | || _|jdks|jdkr!tt|j| _n|jdkr/t	t
|j| _|jdkrnt|j| jjjj}| jjjjj|jjd d< | jjjjjd d d d f |jd d|jjdd < || jjj_| jjj| _tj| j| jjj|jdkrdndd}|jrt| jjjjj|_t|j|j|j|j|j|d	| _ t!|j| j| _"| j jj| j"j#_|d ur| $|}| j%|d
d d S | j & D ];}t'|tj(tjfr|jjj)ddd nt'|tj*r|j+j,  |jj-d t'|tj(r|j+d ur|j+j,  q| j". D ]}|/ dkrt0| q|j,  q|j1rT|jdkr:tj| j| jjjdd}ntj| j| jjjdd}t| jjjj|_|| j _| j jj| j"j#_d S )Nbertzh_bertrobertai   r!   r   r   )r   )r}   ro   r/   r   F)strict        g{Gz?)meanZstd      ?)2r&   r'   r   encoderr   r   from_pretrainedr   r   r   r   Zmax_posr	   Z	Embeddingr   r   r   Zposition_embeddingsweightr   repeatr   Z	share_embcopydeepcopyZword_embeddingsr   Z
dec_layersZdec_hidden_sizeZ	dec_headsZdec_ff_sizeZdec_dropoutdecoderr   	generatorr   _unwrap_checkpointZload_state_dictmodulesr   r)   Znormal_rg   ZbiasZzero_fill_
parametersr#   r
   Zuse_bert_emb)r2   r   r   r   Zmy_pos_embeddingsZtgt_embeddingsmodulepr3   r5   r6   r'   Y  s   






zAbsSummarizer.__init__r   c                    sB   d}|D ]
  | v r|   } q|D ]  fdd|   D } q| S )N)r   r   c                    s4   i | ]\}}|  r|t d  d n||qS )r   N)
startswithlen)r   r   r   namer5   r6   
<dictcomp>  s    (z4AbsSummarizer._unwrap_checkpoint.<locals>.<dictcomp>)r   )r   Z
wrap_namesr5   r   r6   r     s   
z AbsSummarizer._unwrap_checkpointc           	      C   sN   | j ||dd\}}t|}| ||d d d df |\}}}||d |fS )NFZreturn_dictr!   )r   r   r   )	r2   r   r   mask_srcZtop_vecr   r   Zdecoder_outputsr   r5   r5   r6   r[     s   $zAbsSummarizer.forwardrp   )	r]   r^   r_   r'   staticmethodr   r   r[   ra   r5   r5   r3   r6   r   W  s
    H
r   c                       rb   )LabelSmoothingLossz
    With label smoothing,
    KL-divergence between q_{smoothed ground truth prob.}(w)
    and p_{prob. computed by model}(w) is minimized.
    c                    st   d|  k rdksJ  J || _ tt|   ||d  }t|f|}d|| j < | d|d d| | _d S )Nr   r   r7   r   one_hot)	r   r&   r   r'   rI   fullr|   rO   
confidence)r2   label_smoothingZtgt_vocab_sizeignore_indexZsmoothing_valuer   r3   r5   r6   r'     s   
zLabelSmoothingLoss.__init__c                 C   sR   | j |dd}|d|d| j ||| jkdd tj	||ddS )zf
        output (FloatTensor): batch_size x n_classes
        target (LongTensor): batch_size
        r   r   rP   )	reduction)
r   r   rG   Zscatter_rO   r  Zmasked_fill_r   FZkl_div)r2   rZ   targetZ
model_probr5   r5   r6   r[     s   zLabelSmoothingLoss.forward)r   r\   r5   r5   r3   r6   r     s    r   c                       s:   e Zd ZdZd fdd	Zdd Zdd Zd	d
 Z  ZS )NMTLossComputez(
    Standard NMT Loss Computation.
    r   c                    sN   t    || _|d | _|dkrt||| jd| _d S tj| jdd| _d S )NPADr   )r  rP   )r  r  )r&   r'   r   r   r   	criterionr	   ZNLLLoss)r2   r   symbolsr   r  r3   r5   r6   r'     s   

zNMTLossCompute.__init__c                 C   s   | d|dS )Nr!   r7   r8   rG   )r2   _vr5   r5   r6   _bottle  s   zNMTLossCompute._bottlec                 C   s   | d||dS )Nr!   r   r  )r2   r  r=   r5   r5   r6   	_unbottle  s   zNMTLossCompute._unbottlec           	      C   sd   |d d dd f }| | j }| |}| |}| d}| ||}|t	| |S )Nr   r!   )
ner   rP   r  r   r?   r8   r
  divr   )	r2   r   rZ   r  ZnormalizationZbottled_outputrX   Zgtruthlossr5   r5   r6   r[     s   

zNMTLossCompute.forward)r   )	r]   r^   r_   r`   r'   r  r  r[   ra   r5   r5   r3   r6   r    s    r  c                       s   e Zd ZdZeG dd dZd!def fddZdd	 Zd
d Z	d"ddde
fddZd#ddZdded dfddZd$ddZdejdejdeeejf fdd Z  ZS )%
Translatorz9
    Uses a model to translate a batch of sentences.
    c                   @   sh   e Zd ZU eed< ejed< ejed< ejed< dZed ed< dZ	eee
  ed< dZee
 ed< dS )	zTranslator.Batchr=   r   r   r   Nquery_idsrc_strtgt_str)r]   r^   r_   r   __annotations__rI   r   r  r   r  r   r  r5   r5   r5   r6   Batch  s   
 


r  cnndatasetc                    sv   t    t | _|j| _|| j_|j| _	| j	j
| _
|j| _|j| _| jd | _| jd | _| jj| _| jj| _d S )NBOSEOS)r&   r'   loggingZ
get_loggerr   r   argsr  r   r   r   	tokenizervocabr  start_token	end_tokenalpha	beam_size)r2   r   r  r3   r5   r6   r'     s   



zTranslator.__init__c                    st  |d }t |d t |d ksJ |j}|d |d |j|j|jf\}}}}}|j}		 g }
t|D ]} jjdkre jj	dksG jj	dkrS fdd	|| D }n jj	d
krd fdd	|| D }nz jj	d
kr j
dd	 || d D dddd}n\ jj	dkr j
dd	 || d D }d|dd}n= jj	dkr jjdkr fdd	|| D }dd	 |D }n jj	dkr߈ j
dd	 || d D }d|dd}|| } jj	d
kr j
dd	 || D }d|| }n fdd	|| D d d }d|} jjdkr'|||| |	| || f}n||||	| || f}|
| q2|
S )Nbatch
gold_scorepredictionsrX   qg_ranking_testr   r   c                    s0   g | ]}d   jdd |D ddqS ) c                 S      g | ]}t |qS r5   r   r   nr5   r5   r6   r   &      4Translator.from_batch.<locals>.<listcomp>.<listcomp> ## )r   r   convert_ids_to_tokensreplacer   eachr2   r5   r6   r   #  s    z)Translator.from_batch.<locals>.<listcomp>r   c                    s2   g | ]} j d d |D ddddqS )c                 S   r*  r5   r+  r,  r5   r5   r6   r   +  r.  r/  <s>r1  </s>)r   decoder3  r4  r6  r5   r6   r   *  s    c                 S   r*  r5   r+  r,  r5   r5   r6   r   1  s    r   r7  r1  r8  c                 S   r*  r5   r+  r,  r5   r5   r6   r   7  r.  r)  r0  
paraphrasec                    s"   g | ]} j d d |D qS )c                 S   r*  r5   r+  r,  r5   r5   r6   r   ;  r.  r/  )r   r2  r   predr6  r5   r6   r   :  s    c                 S   s   g | ]}d  |dd qS )r1  r0  )r   r3  r;  r5   r5   r6   r   >  s    c                 S   r*  r5   r+  r,  r5   r5   r6   r   C  r.  ##c                 S   r*  r5   r+  r   tr5   r5   r6   r   H  r.  c                    s   g | ]
} j jt| qS r5   )r   ids_to_tokensr   r>  r6  r5   r6   r   K  s    i  faq)r   r=   r  r   r  r  r   r  r  r   r   r9  r3  r2  r   r   )r2   translation_batchr%  r=   preds
pred_scorer  r   r  r  translationsbZ
pred_sentsZ	gold_sentZraw_srctranslationr5   r6  r6   
from_batch  s   






zTranslator.from_batchc           (      C   s  | j jd|  }| j jd|  }t|dd| _t|dd| _t|d dd| _| j jdkrE| j jdkrEd	g d	d
 }| j
| | j jd|  }t|dd| _g g }}d}	i i }
}t|D ]\}}| jd|d  dt|  | |}| |}|D ]S}|\}}}}}|dddd }| j jdkrd	dd |D }nD|dddddddddddddddddd }|dddd ddd!dd"dd#d }|dd  dddddddddddd d!dd"dd#d }| j jrNd}|d D ] }|d  |  }t| t| d$ krJ|} n|}q-| j jd%ksc| j jd&ksc| j jd'kr|d d}|d ur||gd(}||gd(}||g|d    d)}n|	|gd(}|	|gd(}|	|g|d    d)}t|| j | j
d
 t|| j | j
d
 t|| j | j
d
 | j
| d
  n| j jd*kr| j
|d
  | j
|d
  | j
| d
  n| j jd+kr1|d u r	t|	}|t|g |t|g | j
|d
  | j
d	|d |gd
  n| j jdkro|d u rAt|	}| j jdkrK|g}t|d g|
|< t|g||< | j
d	t||||d gd
  ng| j jd,kr|d    d-k rq| j
d	t|||gd
  | j
d	t|||gd
  | j
d	t||||t|d    gd
  n| j jdkr| j
t|d | d
  |	d7 }	q| j  | j  | j  qe| jd.|	  | j  | j  | j  |d/kr| j jd%ks| j jd&ks| j jd'kr-t !d0||f }| j| d S | j jd*kre| jd1 dd2l"m#} d3d t|dd4D } d5d t|dd4D }!| j$| |!d6d7}"t%|" d S | j jd+kss| j jdkrd8d9 }#|#||\}}d:d; t|D }
d<d; t|D }$t&|
|$}%t%|% d S | j jd+ks| j jdkr|#||\}}t'||}&dd2l"m#} | }'|'j$||d6d7}"t%d=(|&|" d S d S d S )>Nz.%d.goldz.%d.candidatewzutf-8z.sampler:  r   	)r  Zsource_queryZtarget_queryZpredict_query
z.%d.raw_srcr   zdata: r   z / <pad>r1  r=  r(  c                 S   st   g | ]6}| d d dd dd dd dd dd dd dd	 d
d dd dd dd qS )	[unused0]r1  [PAD]	[unused1] +r)  [SEP]	[unused2]<mask><q>rL  r7  r8  <unk>)r3  stripr4  r5   r5   r6   r   u  s2    




z(Translator.translate.<locals>.<listcomp>rM  rN  rO  rP  r)  rQ  rR  [CLS]z[UNK]rS  rT  r7  r8  rU  
   ZmarcoZsquadZ
qg_ranking)r  answers)r  rY  rX   r  ZdureaderrA  g      zcnt: %sr!   z./run.sh %s %szCalculating Rouge)Rougec                 S      g | ]}|  qS r5   rV  r   liner5   r5   r6   r         )encodingc                 S   r[  r5   r\  r]  r5   r5   r6   r     r_  T)Zavgc                 S   sF   dd | D } dd |D }d| v r|  d}d| |< d| v s| |fS )Nc                 S   s   g | ]
}|  d dqS ).r1  )rV  r3  r;  r5   r5   r6   r     s    zBTranslator.translate.<locals>.postprocess_text.<locals>.<listcomp>c                 S   r[  r5   r\  )r   labelr5   r5   r6   r     r.  r1  u   。)index)rC  labelsidxr5   r5   r6   postprocess_text   s   
z.Translator.translate.<locals>.postprocess_textc                 S      i | ]	\}}t ||qS r5   r   r   r   tmpr5   r5   r6   r   
      z(Translator.translate.<locals>.<dictcomp>c                 S   rg  r5   rh  ri  r5   r5   r6   r     rk  z'Dev eval result: Bleu-4={}, {}))r  result_pathcodecsopenZgold_out_fileZcan_out_fileZpred_json_score_out_filer  r   r   writeZsrc_out_file	enumerater   infor   translate_batchrH  r3  rV  Zrecall_evalsplitcpunumpytolistjsondumpr   extendr   flushclose
subprocess	getoutputrougerZ  Z
get_scoresprintr   Zcal_bleur   )(r2   Z	data_iterr   Z	gold_pathZcan_pathoutZraw_src_pathZpred_resultsZgold_resultsZcntZ	pred_dictZref_dictr   r%  Z
batch_datarE  Ztransr<  Zgoldr   r  rD  Zpred_strZgold_strZ	_pred_strsentZcan_pred_strZ	pred_jsonZ	gold_jsonZpred_json_scoreZcnn_resultsrZ  
candidatesZ
referencesZrouge_scorerf  Z	gold_dictZ
bleu_rougeZ
bleu_scorer~  r5   r5   r6   	translateY  s  

 









*










*





#zTranslator.translateFr%  fastc                 C   s@   | j   t  | |W  d   S 1 sw   Y  dS )aq  
        Translate a batch of sentences.

        Mostly a wrapper around :obj:`Beam`.

        Args:
           batch (:obj:`Batch`): a batch from a dataset object
           data (:obj:`Dataset`): the dataset object
           fast (bool): enables fast beam search (may not support all features)

        Todo:
           Shouldn't need the original dataset.
        N)r   evalrI   Zno_grad_fast_translate_batch)r2   r%  r  r5   r5   r6   rr    s   

$zTranslator.translate_batchr   c                 C   s   t tt| }|dkr"|| |d |d< ||< || }t | }|d  |9  < |d}||ddd|ddd j| }|dkrW|| }|S )Nr   r!   r   )	listr   r   rG   Zpermuter?   r8   r9   r   )r2   r;   countr#   permZout_sizer%  r5   r5   r6   _tile,  s"   

zTranslator._tilerX  r   ZInfr   c                 C   s   |dkrt t|||d}|t||d d k }|||< |dk rgtj|dd\}}tjtj|dddd}	|	|k}
|dkrHd|
d	d |f< |
d	d df 	 |
d	dd f< d|
d
< |

d||
}|||< |S )Nr   r!   ).r!   Nr   T)Z
descendingr"   r   .).r   )minmaxrG   rI   topksortZcumsumr  r-   cloneZscatter)r2   logitstop_ktop_pZfilter_valuemin_tokens_to_keepZindices_to_removeZsorted_logitsZsorted_indicesZcumulative_probsZsorted_indices_to_remover5   r5   r6   _top_k_top_p_filtering>  s2   

z!Translator._top_k_top_p_filteringc           /         s<  j j}j j}j |j}|j}|j}jj||dd\}}t	|jj
j}	|j}
|	 fdd j| dd}tj|tj|
d}tjd|   tj|
d}tj|  d	gjtj|
d}tjd
gtdg d	   |
d|}dd t|D }i }dd t|D |d< dd t|D |d< dg| |d< ||d< t|D ]u}|d d df d	d}|dd	}jj
|	|||d\}}}	j|dd	d}|d}||k rd|d d jf< d|d	  d j }j j r@j j!}|| }j"|j j#j j$d	d}tj%t&j'|ddd	d}t&j(|d	d}||d)d	7 }|| }t*|d|}|d }|d }n||d)d	7 }|| }|+d | }|j, dd\}}j j-r|d	}|dkrt|dD ]e}d}dd || D j j.dkrj/01 2 nfddD d 34d!d"2 t5dkrqufd#dtd	t5d	 D } t6| d }!|!| d d v rd$}|rd%||< qu|| }|| }"|7|}|"|d |"d )d	 }#|#dt8|9d|dd	gd}|:j}$|d	 |kr|$;j |$d d df :d	}%|$< r|d |d}&t|$dD ]}|| }'|%| rR|$| ;j |$| = d}(|(D ]})||' >|||)f |&||)d	d f f q]|%| rt?||' d&d d$d'}*j j@d(ksj j@d)krj j s|*d   D ]}+|+\},}-|d |' >|, |d |' >|- qq?|*d \},}-|d |' >|, |d |' >|- q?|%:d= d}.t5|.dkr |S |9d|.}|#9d|.}#|9d|.}|&9d|.d|d}|#d|9d}|	fd*d q|S )+NFr   c                    s   j |  |dS )Nr"   )r  r   r#   )r$  r2   r5   r6   <lambda>s      z2Translator._fast_translate_batch.<locals>.<lambda>r   r"   )r   rH   )r   r   rH   r   r   z-inf)rH   c                 S      g | ]}g qS r5   r5   r   r5   r5   r6   r     r  z4Translator._fast_translate_batch.<locals>.<listcomp>c                 S   r  r5   r5   r   r5   r5   r6   r     r  r'  c                 S   r  r5   r5   r   r5   r5   r6   r     r  rX   r&  r%  r!   )r   g@xg      @g      @)r  r  r  )Znum_samplesrF   c                 S   r*  r5   r+  r   rI  r5   r5   r6   r     r.  r   c                    s   g | ]} j j| qS r5   )r   r@  r  r6  r5   r6   r     s    r)  r0  r1  c                    s*   g | ]} |d    |  |d   fqS )r   r5   )r   r   )wordsr5   r6   r     s    "TgPKc                 S   s   | d S )Nr   r5   r:   r5   r5   r6   r    s    )rQ   reverser(  r:  c                    s   |  | S rp   )index_selectr  )select_indicesr5   r6   r    s    )Ar  
max_length
min_lengthr$  r=   r   r   r   r   r   r   r   rH   r   r  rI   r   longr  r!  Ztensorr   r   r   r8   r9   r   r[   ZsqueezerG   r"  r#  Zsample_topktemperaturer  r  r  Zmultinomialr  r-   Zlog_softmaxrO   ZgatherZreshaper  Zblock_trigramr   r   r9  rV  rs  r   r3  r   tuplefmodrJ   r  r   r   anyZnonzeror   sortedr  )/r2   r%  r  r  r=   r   r   Zsrc_featuresr   r   rH   Zbatch_offsetZbeam_offsetZ	alive_seqZtopk_log_probsZ
hypothesesresultsr   Zdecoder_inputZdec_outr   Z	log_probsr   Zlength_penaltyr  Z_scoresZtopk_idsZtopk_scoresZcurr_scorescur_lenr   ZfailZtrigramsZtrigramZtopk_beam_indexZbatch_indexis_finishedZend_conditionr'  rF  Zfinished_hypjZbest_hypr5  Zscorer<  Znon_finishedr5   )r$  r  r2   r  r6   r  a  sV  

















z Translator._fast_translate_batch	input_idsattention_maskreturnc                 K   s4   | j | d |d |d}| |}|d }d|iS )Nr   )r=   r   r   r   r'  )r  rG   rr  )r2   r  r  r   r%  rB  rC  r5   r5   r6   __call__!  s   

zTranslator.__call__)r  )Fr   )r%  r  )r]   r^   r_   r`   r   r  r   r'   rH  r  boolrr  r  r   r  r  rI   r   r   r  ra   r5   r5   r3   r6   r    s(    	J B


# Ar  )module_namec                       s@   e Zd Zd
 fdd	Zdd Zdeeef defdd	Z	  Z
S )PalmForTextGenerationNc                    s   t  j|fi | || _|jdkr&tj|jdd}|j|j|j	|j
d}n%|jdks0|jdkrKtj|jdd}|jd |jd	 |jd
 |jd d}|| _|| _t||| _t| jj|| jj|j| _t| | _d S )Nr   F)Zdo_lower_case)r  r  r	  ZEOQr   r   TrW  rQ  rN  rR  )r&   r'   r   r   r   r   r   Zcls_token_idZsep_token_idZpad_token_idZunk_token_idr   r   r  r  r   r   r  r   r   r  r  r  )r2   r   r   r   r  r  r3   r5   r6   r'   1  s8   

zPalmForTextGeneration.__init__c                 C   s0   | j |||d}| ||d }t||d dS )N)r   r   r   r   )r  r  )r   r  r   )r2   r  r  rd  rZ   r  r5   r5   r6   r[   N  s   zPalmForTextGeneration.forwardinputr  c                 K   sN   |  D ]\}}t| jj|| q| jdi |}|d }tdd |D dS )Nr'  c                 S   s   g | ]}|d  qS r   r5   r;  r5   r5   r6   r   \  r.  z2PalmForTextGeneration.generate.<locals>.<listcomp>)	sequencesr5   )r   setattrr   r  r   )r2   r  r   r   r   ZoutputsrC  r5   r5   r6   generateV  s
   zPalmForTextGeneration.generaterp   )r]   r^   r_   r'   r[   r   r   r   r   r  ra   r5   r5   r3   r6   r  .  s    r  )Lrm  r   rL   r   r|  dataclassesr   typingr   r   r   r   r   rw  ru  r   rI   Ztorch.nn.functionalr	   Z
functionalr  r   Ztorch.nn.initr
   Ztransformersr   r   r   r   r   r   Ztransformers.activationsr   Ztransformers.modeling_utilsr   Zmodelscope.metainfor   Zmodelscope.modelsr   Zmodelscope.models.baser   Zmodelscope.models.builderr   Zmodelscope.outputsr   r   Zmodelscope.utilsr   r  Zmodelscope.utils.constantr   configurationr   Zdureader_evalr   r   r   r   Moduler   rc   rr   r   r   r   r   r   r   r   r  objectr  Zregister_moduleZtext_generationr   r  r5   r5   r5   r6   <module>   s\     6g&m3\!    @