o
    *j_H                  	   @   s  d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	m
Z
mZ ddlZddlZddlZddlmZ ddlmZmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 e1 Z2dgZ3dd Z4G dd deZ5G dd deZ6e'j7e/j8e#j9dG dd de%Z:			d*d eeej;  d!e<d"e<d#e<fd$d%Z=		d+d eeej;  d!e<d"e<fd&d'Z>		d+d eeej;  d!e<d"e<fd(d)Z?dS ),zPyTorch UniTE model.    N)	dataclass)ceil)DictListOptionalTupleUnion)version)DropoutLinearModule	ParameterParameterList
Sequential)softmax)pad_sequence)XLMRobertaConfigXLMRobertaModelACT2FN)Models)
TorchModel)MODELS)InputFormat)TranslationEvaluationOutput)compatible_position_ids)Tasks)
get_loggerUniTEForTranslationEvaluationc                 C   st   |j dd}| | d }| | }|jg ddd| }|| | d jg ddd| }| | t|d  S )Ndim)r   T)Zkeepdim   g-q=)	unsqueezesumsizetorchsqrt)Ztensor
mask_floatZbroadcast_maskZnum_elements_not_maskedZtensor_maskedmeanZvariance r,   s/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/unite/translation_evaluation.py_layer_norm_all#   s   
r.   c                	       sV   e Zd Z	ddedededdf fddZ	ddeej d	ejdejfd
dZ	  Z
S )LayerwiseAttentionN
num_layers	model_dimdropoutreturnc                    s   t t|   || _|| _|| _ttj|fdd| _	tt
dgdd| _| jrJtt| j	}tt| j	d}| d| | d| d S d S )NT)Zrequires_gradg      ?g@xdropout_maskdropout_fill)superr/   __init__r0   r1   r2   r   r(   Zzerosscalar_parametersZFloatTensorgammalenemptyZfill_Zregister_buffer)selfr0   r1   r2   r4   r5   	__class__r,   r-   r7   2   s$   zLayerwiseAttention.__init__tensorsmaskc                 C   s   t jtdd |D dd}| jr(| jr(tt | j | jk| j	| j
dd}nt| j	dd}|dddd}| }|t|| jdd}|d d dd d f }| j| S )Nc                 s   s    | ]	}|j d dV  qdS r   r    N)r%   .0xr,   r,   r-   	<genexpr>M   s    z-LayerwiseAttention.forward.<locals>.<genexpr>r   r    r      )r(   catlistZtrainingr2   r   wherer4   Zuniform_r8   r5   viewfloatr.   r&   r9   )r<   r?   r@   Znormed_weightsr*   Zweighted_sumr,   r,   r-   forwardH   s$   
zLayerwiseAttention.forwardN)__name__
__module____qualname__intrK   r7   r   r(   TensorrL   __classcell__r,   r,   r=   r-   r/   0   s&    r/   c                       sz   e Zd Zdddgdddfdeded	ee d
edee deddf fddZdede	fddZ
dejdejfddZ  ZS )FeedForwardrF      i   ZSigmoidN皙?in_dimout_dimhidden_sizesactivationsfinal_activationr2   r3   c           	         s   t    g }|t||d  || | |t| tdt|D ]}|t||d  ||  || | |t| q'|t|d t| |dur_|| | t	| | _
dS )a  
        Feed Forward Neural Network.

        Args:
            in_dim (:obj:`int`):
                Number of input features.
            out_dim (:obj:`int`, defaults to 1):
                Number of output features. Default is 1 -- a single scalar.
            hidden_sizes (:obj:`List[int]`, defaults to `[3072, 768]`):
                List with hidden layer sizes.
            activations (:obj:`str`, defaults to `Sigmoid`):
                Name of the activation function to be used in the hidden layers.
            final_activation (:obj:`str`, Optional, defaults to `None`):
                Name of the final activation function if any.
            dropout (:obj:`float`, defaults to 0.1):
                Dropout ratio to be used in the hidden layers.
        r   rF   r   N)r6   r7   appendr   build_activationr
   ranger:   rQ   r   ff)	r<   rW   rX   rY   rZ   r[   r2   modulesir=   r,   r-   r7   c   s   
zFeedForward.__init__
activationc                 C   s   t | S rM   r   )r<   rb   r,   r,   r-   r]      s   zFeedForward.build_activationin_featuresc                 C   s
   |  |S rM   )r_   )r<   rc   r,   r,   r-   rL      s   
zFeedForward.forward)rN   rO   rP   rQ   r   strr   rK   r7   r   r]   r(   rR   rL   rS   r,   r,   r=   r-   rT   a   s.    +rT   )module_namec                )       s   e Zd Zdddddddddd	d
dddddddgdddfdedededededededededededededed ed!ed"ee d#ed$ee d%ef( fd&d'Z			d3d(e
jd)eee  d*ee
j d+efd,d-Zd.ed/e
jd0efd1d2Z  ZS )4r   rV   r   r$   rF   Zgelui   g{Gz?i   gh㈵>         Ti rU   tanhNattention_probs_dropout_probbos_token_ideos_token_idpad_token_id
hidden_acthidden_dropout_probhidden_sizeinitializer_rangeintermediate_sizelayer_norm_epsmax_position_embeddingsnum_attention_headsnum_hidden_layerstype_vocab_size	use_cache
vocab_sizemlp_hidden_sizesmlp_actmlp_final_actmlp_dropoutc                    sd  t  jdi | || _|| _|| _|| _|| _|| _|| _|| _	|	| _
|
| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _tdi d| jd| jd| jd| jd| jd| jd| jd| j
d	| jd
| jd| jd| jd| jd| j	d| jd| j| _t| jdd| _t| jd | j| jd| _t| jd| j| j| j| jd| _dS )a  The UniTE Model which outputs the scalar to describe the corresponding
            translation quality of hypothesis. The model architecture includes two
            modules: a pre-trained language model (PLM) to derive representations,
            and a multi-layer perceptron (MLP) to give predicted score.

            Args:
                attention_probs_dropout_prob (:obj:`float`, defaults to 0.1):
                    The dropout ratio for attention weights inside PLM.
                bos_token_id (:obj:`int`, defaults to 0):
                    The numeric id representing beginning-of-sentence symbol.
                eos_token_id (:obj:`int`, defaults to 2):
                    The numeric id representing ending-of-sentence symbol.
                pad_token_id (:obj:`int`, defaults to 1):
                    The numeric id representing padding symbol.
                hidden_act (:obj:`str`, defaults to :obj:`"gelu"`):
                    Activation inside PLM.
                hidden_dropout_prob (:obj:`float`, defaults to 0.1):
                    The dropout ratio for activation states inside PLM.
                hidden_size (:obj:`int`, defaults to 1024):
                    The dimensionality of PLM.
                initializer_range (:obj:`float`, defaults to 0.02):
                    The hyper-parameter for initializing PLM.
                intermediate_size (:obj:`int`, defaults to 4096):
                    The dimensionality of PLM inside feed-forward block.
                layer_norm_eps (:obj:`float`, defaults to 1e-5):
                    The value for setting epsilon to avoid zero-division inside
                        layer normalization.
                max_position_embeddings: (:obj:`int`, defaults to 512):
                    The maximum value for identifying the length of input sequence.
                num_attention_heads (:obj:`int`, defaults to 16):
                    The number of attention heads inside multi-head attention layer.
                num_hidden_layers (:obj:`int`, defaults to 24):
                    The number of layers inside PLM.
                type_vocab_size (:obj:`int`, defaults to 1):
                    The number of type embeddings.
                use_cache (:obj:`bool`, defaults to :obj:`True`):
                    Whether to use cached buffer to initialize PLM.
                vocab_size (:obj:`int`, defaults to 250002):
                    The size of vocabulary.
                mlp_hidden_sizes (:obj:`List[int]`, defaults to `[3072, 1024]`):
                    The size of hidden states inside MLP.
                mlp_act (:obj:`str`, defaults to :obj:`"tanh"`):
                    Activation inside MLP.
                mlp_final_act (:obj:`str`, `optional`, defaults to :obj:`None`):
                    Activation at the end of MLP.
                mlp_dropout (:obj:`float`, defaults to 0.1):
                    The dropout ratio for MLP.
            rk   rl   rm   ry   rp   rv   ru   rr   rn   ro   rj   rt   rw   rq   rs   rx   F)Zadd_pooling_layerrF   )r0   r1   r2   )rW   rX   rY   rZ   r[   r2   Nr,   )r6   r7   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r   Zencoder_configr   encoderr/   layerwise_attentionrT   	estimator)r<   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   kwargsr=   r,   r-   r7      s   F	
z&UniTEForTranslationEvaluation.__init__	input_idsinput_formatscorer3   c                 K   s~   | | j }| j||ddd}| |d |}| |jdd}t| 	 |d}	|d ur=|| 
d }
|
|	d< |	S )	NT)r   attention_maskZoutput_hidden_statesZreturn_dictZhidden_statesr   r    )r   r   r$   loss)nerm   longr~   r   r   Zsqueezer   cputolistpowr+   )r<   r   r   r   r   r   ZoutputsZ
mix_statespredoutputr   r,   r,   r-   rL     s$   
z%UniTEForTranslationEvaluation.forwardpathdeviceplm_onlyc                 C   sV   |r| j ||| _ d | j _ntj||d}t|d | | t	d|  d S )N)Zmap_locationzencoder.embeddings.position_idsz%Loading checkpoint parameters from %s)
r~   Zfrom_pretrainedtoZpoolerr(   loadr   Zload_state_dictloggerinfo)r<   r   r   r   Z
state_dictr,   r,   r-   load_checkpoint0  s   

z-UniTEForTranslationEvaluation.load_checkpoint)NN)rN   rO   rP   rK   rQ   rd   boolr   r   r7   r(   rR   r   r   rL   r   r   rS   r,   r,   r=   r-   r      s    	
 

 rf   rF   r$   all_input_concatmaximum_lengthpad_idxeos_idxc                 C   sF   | dd  D ]
}||d d df< qt | dkrt| ||S t| ||S )NrF   r      )r:   cut_long_sequences3cut_long_sequences2)r   r   r   r   groupr,   r,   r-   combine_input_sentences=  s
   r   c                    sz  t t|  } t  }| D ]}tfdd|D }tdd |D }t||krtt| t dd t  dd ddD }tt 	 | d	 t
||d	 krgt
|krgtfd
d  D  n| |d    |d < t  fddtdt|D }tdd t||D }t||D ]
\}	}
|
d |	d< q|| q|| qt dd |D }t|dd}|S )Nc                 3        | ]}| | V  qd S rM   Zmasked_selectr   rB   r   r,   r-   rE   P      
z&cut_long_sequences2.<locals>.<genexpr>c                 s       | ]}t |V  qd S rM   r:   rB   r,   r,   r-   rE   R      c                 s       | ]}|d  V  qdS r   Nr,   rB   r,   r,   r-   rE   V  r   c                 S      | d S NrF   r,   dr,   r,   r-   <lambda>W      z%cut_long_sequences2.<locals>.<lambda>Tkeyreverser$   c                 3        | ]\}}||  fV  qd S rM   r,   rC   kvoffsetr,   r-   rE   ]      rF   r   c                 3       | ]} | V  qd S rM   r,   rC   r   lengthsr,   r-   rE   b  r   c                 s        | ]\}}|d | V  qd S rM   r,   rC   rD   yr,   r,   r-   rE   c      r   c                 s       | ]
}t j|d dV  qdS rA   r(   rG   rB   r,   r,   r-   rE   k      Zbatch_firstZpadding_valuerH   ziptupler&   dict	enumeratesorteditemsr   valuesminr^   r:   r\   r   r   r   r   Zcollected_tuplesZtensor_tupleZall_lensZlengths_sorted_idxesZnew_lensZnew_tensor_tuplerD   r   Zconcat_tensorZall_input_concat_paddedr,   r   r   r   r-   r   J  sD   
 r   c                    s  t t|  } t  }| D ]6}tfdd|D }tdd |D }t||kr=tt| t dd t  dd ddD }tt 	 | d	 t
||d	 krit
|kritfd
d  D  nt 	 |kr
 |d   |d  kr| |d    |d    |d  kr |d < ng |d   |d < n\ |d   |d    kr |d  krn n0| |d   d  |d  kr݈  |d <  |d < n% |d    |d <  |d < n|d	   |d <   |d <  |d < t 	 |ksrt  fddtdt D }tdd t||D }t||D ]\}	}
|
d |	d< q+|| q|| qt dd |D }t|dd}|S )Nc                 3   r   rM   r   rB   r   r,   r-   rE   w  r   z&cut_long_sequences3.<locals>.<genexpr>c                 s   r   rM   r   rB   r,   r,   r-   rE   y  r   c                 s   r   r   r,   rB   r,   r,   r-   rE   }  r   c                 S   r   r   r,   r   r,   r,   r-   r   ~  r   z%cut_long_sequences3.<locals>.<lambda>Tr   r   c                 3   r   rM   r,   r   r   r,   r-   rE     r   r   rF   r$   c                 3   r   rM   r,   r   r   r,   r-   rE     r   c                 s   r   rM   r,   r   r,   r,   r-   rE     r   r   c                 s   r   rA   r   rB   r,   r,   r-   rE     r   r   r   r   r,   r   r-   r   q  s   





 r   )rf   rF   r$   )rf   rF   )@__doc__warningsdataclassesr   mathr   typingr   r   r   r   r   numpynpr(   Ztorch.utils.checkpoint	packagingr	   Ztorch.nnr
   r   r   r   r   r   Ztorch.nn.functionalr   Ztorch.nn.utils.rnnr   Ztransformersr   r   Ztransformers.activationsr   Zmodelscope.metainfor   Zmodelscope.models.baser   Zmodelscope.models.builderr   Z)modelscope.models.nlp.unite.configurationr   Zmodelscope.outputs.nlp_outputsr   Z-modelscope.utils.compatible_with_transformersr   Zmodelscope.utils.constantr   Zmodelscope.utils.loggerr   r   __all__r.   r/   rT   Zregister_moduleZtranslation_evaluationZuniter   rR   rQ   r   r   r   r,   r,   r,   r-   <module>   sl    14 )

(