o
    *j#                     @   s4  d dl mZ d dlZd dlm  mZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ ejejejdejejejdG dd deZejejejdG dd deZG dd dejZG dd dejZ G dd dejZ!G dd dejZ"dS )    )DictN)nn)CrossEntropyLoss)ACT2FNgelu)Heads)	TorchHead)HEADS)AttentionFillMaskModelOutputModelOutputBase
OutputKeys)Tasks)module_namec                       sP   e Zd Z				d fdd	Z		ddefd	d
ZdejdejfddZ  Z	S )BertFillMaskHead   r   -q=:w  c                    $   t  j||||d t| j| _d S N)hidden_size
hidden_actlayer_norm_eps
vocab_size)super__init__BertOnlyMLMHeadconfigclsselfr   r   r   r   kwargs	__class__ k/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/heads/fill_mask_head.pyr   &      zBertFillMaskHead.__init__Ninputsc                 K   8   |  |j}d }|d ur| ||}t|||j|jdS N)losslogitshidden_states
attentions)r   last_hidden_statecompute_lossr
   r+   r,   r   r&   Zattention_masklabelsr    r*   r)   r#   r#   r$   forward3      zBertFillMaskHead.forwardr*   returnc                 C   &   t  }||d| jj|d}|S Nr   viewr   r   r   r*   r0   Zloss_fctZmasked_lm_lossr#   r#   r$   r.   C   
   zBertFillMaskHead.compute_loss)r   r   r   r   NN)
__name__
__module____qualname__r   r   r1   torchTensorr.   __classcell__r#   r#   r!   r$   r   "   s    
r   c                       s`   e Zd ZddgZ				d fdd	Z				dd
efddZdejdejfddZ	dd Z
  ZS )XlmRobertaMaskHeadzlm_head.decoder.weightzlm_head.decoder.bias   r   h㈵>1 c                    r   r   )r   r   XLMRobertaLMHeadr   lm_headr   r!   r#   r$   r   P   r%   zXlmRobertaMaskHead.__init__Nr&   c                 K   r'   r(   )rG   r-   r.   r
   r+   r,   r/   r#   r#   r$   r1   ]   r2   zXlmRobertaMaskHead.forwardr*   r3   c                 C   r4   r5   r7   r9   r#   r#   r$   r.   m   r:   zXlmRobertaMaskHead.compute_lossc                 C   s   | j jS N)rG   decoderr   r#   r#   r$   get_output_embeddingss   s   z(XlmRobertaMaskHead.get_output_embeddings)rC   r   rD   rE   r;   )r<   r=   r>   Z_keys_to_ignore_on_load_missingr   r   r1   r?   r@   r.   rK   rA   r#   r#   r!   r$   rB   J   s    
rB   c                       $   e Zd Z fddZdd Z  ZS )BertPredictionHeadTransformc                    sV   t    t|j|j| _t|jtrt	|j | _
n|j| _
tj|j|jd| _d S N)eps)r   r   r   Linearr   dense
isinstancer   strr   transform_act_fn	LayerNormr   r   r   r!   r#   r$   r   y   s   
z$BertPredictionHeadTransform.__init__c                 C   s"   |  |}| |}| |}|S rH   )rQ   rT   rU   r   r+   r#   r#   r$   r1      s   


z#BertPredictionHeadTransform.forwardr<   r=   r>   r   r1   rA   r#   r#   r!   r$   rM   w   s    
rM   c                       rL   )BertLMPredictionHeadc                    sL   t    t|| _tj|j|jdd| _t	t
|j| _| j| j_d S )NF)bias)r   r   rM   	transformr   rP   r   r   rI   	Parameterr?   zerosrZ   rV   r!   r#   r$   r      s   


zBertLMPredictionHead.__init__c                 C   s   |  |}| |}|S rH   )r[   rI   rW   r#   r#   r$   r1      s   

zBertLMPredictionHead.forwardrX   r#   r#   r!   r$   rY      s    rY   c                       s2   e Zd Z fddZdejdejfddZ  ZS )r   c                    s   t    t|| _d S rH   )r   r   rY   predictionsrV   r!   r#   r$   r      s   
zBertOnlyMLMHead.__init__sequence_outputr3   c                 C   s   |  |}|S rH   )r^   )r   r_   Zprediction_scoresr#   r#   r$   r1      s   
zBertOnlyMLMHead.forward)r<   r=   r>   r   r?   r@   r1   rA   r#   r#   r!   r$   r      s    r   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )rF   z*Roberta Head for masked language modeling.c                    sd   t    t|j|j| _tj|j|jd| _t|j|j	| _
tt|j	| _| j| j
_d S rN   )r   r   r   rP   r   rQ   rU   r   
layer_normr   rI   r\   r?   r]   rZ   rV   r!   r#   r$   r      s   
zXLMRobertaLMHead.__init__c                 K   s*   |  |}t|}| |}| |}|S rH   )rQ   r   r`   rI   )r   featuresr    xr#   r#   r$   r1      s
   


zXLMRobertaLMHead.forwardc                 C   s,   | j jjjdkr| j| j _d S | j j| _d S )Nmeta)rI   rZ   ZdevicetyperJ   r#   r#   r$   _tie_weights   s   zXLMRobertaLMHead._tie_weights)r<   r=   r>   __doc__r   r1   re   rA   r#   r#   r!   r$   rF      s
    

rF   )#typingr   r?   Ztorch.nn.functionalr   Z
functionalFZtorch.nnr   Ztransformers.activationsr   r   Zmodelscope.metainfor   Zmodelscope.models.baser   Zmodelscope.models.builderr	   Zmodelscope.outputsr
   r   r   Zmodelscope.utils.constantr   Zregister_moduleZ	fill_maskZbert_mlmr   Zxlm_roberta_mlmrB   ModulerM   rY   r   rF   r#   r#   r#   r$   <module>   s(   &,