o
    *jR                     @   s  d Z ddlmZmZmZmZ ddlZddlZddlZddl	Z	ddl
mZ ddlZddlZddlmZ ddlmZ eeZdd	 Zd
d Zdd ZeejjjeedZejjZG dd dejZG dd dejZG dd dejZG dd dejZ G dd dejZ!G dd dejZ"G dd dejZ#G dd dejZ$G dd  d ejZ%G d!d" d"ejZ&G d#d$ d$ejZ'G d%d& d&ejZ(G d'd( d(ejZ)G d)d* d*ejZ*G d+d, d,ejZ+G d-d. d.e+Z,dS )/zPyTorch BERT model.     )absolute_importdivisionprint_functionunicode_literalsN)open)nn   )
BertConfigc                 C   s    | d dt | td   S )ab   Original Implementation of the gelu activation function in Google Bert repo when initially created.
        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
        Also see https://arxiv.org/abs/1606.08415
          ?      ?g       @)torcherfmathsqrtx r   q/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/multi_modal/clip/modeling_bert.pygelu"   s    r   c                 C   s6   d|  dt tdtj | dt | d     S )z Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
        Also see https://arxiv.org/abs/1606.08415
    r
   r      gHm?   )r   tanhr   r   pipowr   r   r   r   gelu_new+   s   "r   c                 C   s   | t |  S N)r   Zsigmoidr   r   r   r   swish3   s   r   )r   relur   r   c                       s*   e Zd ZdZ fddZdddZ  ZS )BertEmbeddingszLConstruct the embeddings from word, position and token_type embeddings.
    c                    sl   t t|   tj|j|jdd| _t|j|j| _	t|j
|j| _t|j|jd| _t|j| _d S )Nr   )Zpadding_idxeps)superr   __init__r   	Embedding
vocab_sizehidden_sizeword_embeddingsZmax_position_embeddingsposition_embeddingsZtype_vocab_sizetoken_type_embeddingsBertLayerNormlayer_norm_eps	LayerNormDropouthidden_dropout_probdropoutselfconfig	__class__r   r   r"   E   s   
zBertEmbeddings.__init__Nc           	      C   s   | d}|d u rtj|tj|jd}|d|}|d u r$t|}| |}| 	|}| 
|}|| | }| |}| |}|S )Nr   )dtypedevicer   )sizer   Zarangelongr5   	unsqueezeZ	expand_as
zeros_liker&   r'   r(   r+   r.   )	r0   	input_idstoken_type_idsposition_idsZ
seq_lengthZwords_embeddingsr'   r(   
embeddingsr   r   r   forwardT   s   







zBertEmbeddings.forwardNN__name__
__module____qualname____doc__r"   r>   __classcell__r   r   r2   r   r   A   s    r   c                       s.   e Zd Z fddZdd ZdddZ  ZS )	BertSelfAttentionc                    s   t t|   |j|j dkrtd|j|jf |j| _|j| _t|j|j | _| j| j | _	t
|j| j	| _t
|j| j	| _t
|j| j	| _t
|j| _d S )Nr   zLThe hidden size (%d) is not a multiple of the number of attention heads (%d))r!   rF   r"   r%   num_attention_heads
ValueErroroutput_attentionsintattention_head_sizeall_head_sizer   Linearquerykeyvaluer,   Zattention_probs_dropout_probr.   r/   r2   r   r   r"   i   s"   
zBertSelfAttention.__init__c                 C   s6   |  d d | j| jf }|j| }|ddddS )Nr   r   r   r   )r6   rG   rK   viewpermute)r0   r   Znew_x_shaper   r   r   transpose_for_scores}   s
   
z&BertSelfAttention.transpose_for_scoresNc                 C   s   |  |}| |}| |}| |}| |}| |}	t||dd}
|
t| j	 }
|d ur8|
| }
t
jdd|
}| |}|d urM|| }t||	}|dddd }| d d | jf }|j| }| jrw||f}|S |f}|S )NrQ   )dimr   r   r   r   )rN   rO   rP   rT   r   matmulZ	transposer   r   rK   r   ZSoftmaxr.   rS   
contiguousr6   rL   rR   rI   )r0   hidden_statesattention_mask	head_maskZmixed_query_layerZmixed_key_layerZmixed_value_layerZquery_layerZ	key_layerZvalue_layerZattention_scoresZattention_probsZcontext_layerZnew_context_layer_shapeoutputsr   r   r   r>      sB   








zBertSelfAttention.forwardr?   )rA   rB   rC   r"   rT   r>   rE   r   r   r2   r   rF   g   s    rF   c                       $   e Zd Z fddZdd Z  ZS )BertSelfOutputc                    sD   t t|   t|j|j| _t|j|jd| _	t
|j| _d S Nr   )r!   r^   r"   r   rM   r%   denser)   r*   r+   r,   r-   r.   r/   r2   r   r   r"         zBertSelfOutput.__init__c                 C   &   |  |}| |}| || }|S r   r`   r.   r+   r0   rY   input_tensorr   r   r   r>         

zBertSelfOutput.forwardrA   rB   rC   r"   r>   rE   r   r   r2   r   r^          r^   c                       &   e Zd Z fddZdddZ  ZS )BertAttentionc                    s.   t t|   t|| _t|| _t | _d S r   )	r!   rj   r"   rF   r0   r^   outputsetZpruned_headsr/   r2   r   r   r"      s   

zBertAttention.__init__Nc                 C   s4   |  |||}| |d |}|f|dd   }|S Nr   r   )r0   rk   )r0   re   rZ   r[   Zself_outputsattention_outputr\   r   r   r   r>      s   
zBertAttention.forwardr?   rg   r   r   r2   r   rj          rj   c                       r]   )BertIntermediatec                    sb   t t|   t|j|j| _t|j	t
s#tjd dkr+t|j	tr+t|j	 | _d S |j	| _d S )Nr   r   )r!   rp   r"   r   rM   r%   intermediate_sizer`   
isinstance
hidden_actstrsysversion_infounicodeACT2FNintermediate_act_fnr/   r2   r   r   r"      s   
zBertIntermediate.__init__c                 C   s   |  |}| |}|S r   )r`   ry   r0   rY   r   r   r   r>      s   

zBertIntermediate.forwardrg   r   r   r2   r   rp      s    
rp   c                       r]   )
BertOutputc                    sD   t t|   t|j|j| _t|j|j	d| _
t|j| _d S r_   )r!   r{   r"   r   rM   rq   r%   r`   r)   r*   r+   r,   r-   r.   r/   r2   r   r   r"      ra   zBertOutput.__init__c                 C   rb   r   rc   rd   r   r   r   r>      rf   zBertOutput.forwardrg   r   r   r2   r   r{      rh   r{   c                       ri   )	BertLayerc                    s0   t t|   t|| _t|| _t|| _d S r   )	r!   r|   r"   rj   	attentionrp   intermediater{   rk   r/   r2   r   r   r"      s   

zBertLayer.__init__Nc           	      C   sB   |  |||}|d }| |}| ||}|f|dd   }|S rm   )r}   r~   rk   )	r0   rY   rZ   r[   Zattention_outputsrn   Zintermediate_outputZlayer_outputr\   r   r   r   r>      s   
zBertLayer.forwardr?   rg   r   r   r2   r   r|      ro   r|   c                       ri   )BertEncoderc                    sB   t t|    j| _ j| _t fddt jD | _	d S )Nc                    s   g | ]}t  qS r   )r|   ).0_r1   r   r   
<listcomp>	  s    z(BertEncoder.__init__.<locals>.<listcomp>)
r!   r   r"   rI   output_hidden_statesr   Z
ModuleListrangenum_hidden_layerslayerr/   r2   r   r   r"     s   
zBertEncoder.__init__Nc           
      C   s   d}d}t | jD ]"\}}| jr||f }||||| }|d }| jr+||d f }q	| jr4||f }|f}	| jr?|	|f }	| jrG|	|f }	|	S )Nr   r   r   )	enumerater   r   rI   )
r0   rY   rZ   r[   Zall_hidden_statesZall_attentionsiZlayer_moduleZlayer_outputsr\   r   r   r   r>     s(   



zBertEncoder.forwardr?   rg   r   r   r2   r   r     s    r   c                       r]   )
BertPoolerc                    s.   t t|   t|j|j| _t | _d S r   )	r!   r   r"   r   rM   r%   r`   ZTanh
activationr/   r2   r   r   r"   '  s   zBertPooler.__init__c                 C   s(   |d d df }|  |}| |}|S )Nr   )r`   r   )r0   rY   Zfirst_token_tensorpooled_outputr   r   r   r>   ,  s   

zBertPooler.forwardrg   r   r   r2   r   r   %      r   c                       r]   )BertPredictionHeadTransformc                    sr   t t|   t|j|j| _t|jt	s#t
jd dkr*t|jtr*t|j | _n|j| _t|j|jd| _d S )Nr   r   r   )r!   r   r"   r   rM   r%   r`   rr   rs   rt   ru   rv   rw   rx   transform_act_fnr)   r*   r+   r/   r2   r   r   r"   7  s   
z$BertPredictionHeadTransform.__init__c                 C   s"   |  |}| |}| |}|S r   )r`   r   r+   rz   r   r   r   r>   C  s   


z#BertPredictionHeadTransform.forwardrg   r   r   r2   r   r   5  s    r   c                       r]   )BertLMPredictionHeadc                    sF   t t|   t|| _tj|j|jdd| _	t
t|j| _d S )NF)bias)r!   r   r"   r   	transformr   rM   r%   r$   decoder	Parameterr   Zzerosr   r/   r2   r   r   r"   L  s   

zBertLMPredictionHead.__init__c                 C   s   |  |}| || j }|S r   )r   r   r   rz   r   r   r   r>   W  s   
zBertLMPredictionHead.forwardrg   r   r   r2   r   r   J  s    r   c                       r]   )BertOnlyMLMHeadc                    s   t t|   t|| _d S r   )r!   r   r"   r   predictionsr/   r2   r   r   r"   _  s   zBertOnlyMLMHead.__init__c                 C      |  |}|S r   )r   )r0   sequence_outputprediction_scoresr   r   r   r>   c     
zBertOnlyMLMHead.forwardrg   r   r   r2   r   r   ]      r   c                       r]   )BertOnlyNSPHeadc                    s"   t t|   t|jd| _d S Nr   )r!   r   r"   r   rM   r%   seq_relationshipr/   r2   r   r   r"   j  s   zBertOnlyNSPHead.__init__c                 C   r   r   )r   )r0   r   seq_relationship_scorer   r   r   r>   n  r   zBertOnlyNSPHead.forwardrg   r   r   r2   r   r   h  r   r   c                       r]   )BertPreTrainingHeadsc                    s,   t t|   t|| _t|jd| _d S r   )	r!   r   r"   r   r   r   rM   r%   r   r/   r2   r   r   r"   u  s   
zBertPreTrainingHeads.__init__c                 C   s   |  |}| |}||fS r   )r   r   )r0   r   r   r   r   r   r   r   r>   z  s   

zBertPreTrainingHeads.forwardrg   r   r   r2   r   r   s  r   r   c                       s,   e Zd ZeZdZ fddZdd Z  ZS )BertPreTrainedModelZbertc                    s   t t|   || _d S r   )r!   r   r"   r1   r/   r2   r   r   r"     s   
zBertPreTrainedModel.__init__c                 C   s|   t |tjtjfr|jjjd| jjd nt |t	r'|j
j  |jjd t |tjr:|j
dur<|j
j  dS dS dS )z Initialize the weights g        )meanZstdr   N)rr   r   rM   r#   weightdataZnormal_r1   Zinitializer_ranger)   r   Zzero_Zfill_)r0   moduler   r   r   _init_weights  s   
z!BertPreTrainedModel._init_weights)	rA   rB   rC   r	   Zconfig_classZbase_model_prefixr"   r   rE   r   r   r2   r   r     s
    r   c                       s2   e Zd ZdZ fddZ				dddZ  ZS )	BertModela  
    Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
        **last_hidden_state**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, hidden_size)``
            Sequence of hidden-states at the output of the last layer of the model.
        **pooler_output**: ``torch.FloatTensor`` of shape ``(batch_size, hidden_size)``
            Last layer hidden-state of the first token of the sequence (classification token)
            further processed by a Linear layer and a Tanh activation function. The Linear
            layer weights are trained from the next sentence prediction (classification)
            objective during Bert pretraining. This output is usually *not* a good summary
            of the semantic content of the input, you're often better with averaging or pooling
            the sequence of hidden-states for the whole input sequence.
        **hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
            list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
            of shape ``(batch_size, sequence_length, hidden_size)``:
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        **attentions**: (`optional`, returned when ``config.output_attentions=True``)
            list of ``torch.FloatTensor`` (one for each layer)
            of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
            Attentions weights after the attention softmax,
            used to compute the weighted average in the self-attention heads.

    Examples:
        >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        >>> model = BertModel.from_pretrained('bert-base-uncased')
        >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0)  # Batch size 1
        >>> outputs = model(input_ids)
        >>> last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple

    c                    s>   t t| | t|| _t|| _t|| _| 	| j
 d S r   )r!   r   r"   r   r=   r   encoderr   poolerapplyr   r/   r2   r   r   r"     s
   


zBertModel.__init__Nc                 C   s0  |d u r	t |}|d u rt |}|dd}|jt|  jd}d| d }|d url| dkrO|dddd}|	| j
jdddd}n| dkr`|ddd}|jt|  jd}nd g| j
j }| j|||d}| j|||d	}|d }	| |	}
|	|
f|dd   }|S )
Nr   r   )r4   r   g     r   rQ   )r<   r;   )r[   )r   Z	ones_liker9   r8   tonext
parametersr4   rV   expandr1   r   r=   r   r   )r0   r:   rZ   r;   r<   r[   Zextended_attention_maskZembedding_outputZencoder_outputsr   r   r\   r   r   r   r>     sZ   


zBertModel.forward)NNNNr@   r   r   r2   r   r     s    r   )-rD   
__future__r   r   r   r   loggingr   osru   ior   jsonr   r   Zconfiguration_bertr	   	getLoggerrA   loggerr   r   r   Z
functionalr   rx   r+   r)   Moduler   rF   r^   rj   rp   r{   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sJ   
	&F#