o
    *j                     @   s   d dl Z d dlm  mZ d dl mZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ dd	lmZmZ G d
d dejZe
jejejdG dd deZdS )    N)nn)Models)Model)MODELS)SentencEmbeddingModelOutput)Tasks   )	BertModelBertPreTrainedModelc                       s(   e Zd ZdZ fddZdd Z  ZS )Poolera  
    Parameter-free poolers to get the sentence embedding
    'cls': [CLS] representation with BERT/RoBERTa's MLP pooler.
    'cls_before_pooler': [CLS] representation without the original MLP pooler.
    'avg': average of the last layers' hidden states at each token.
    'avg_top2': average of the last two layers.
    'avg_first_last': average of the first and the last layers.
    c                    s,   t    || _| jdv sJ d| j d S )N)clsavgavg_top2avg_first_lastzunrecognized pooling type %s)super__init__pooler_type)selfr   	__class__ n/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/bert/sentence_embedding.pyr      s
   
zPooler.__init__c                 C   s   |j }|j}| jdv r|d d df S | jdkr*||d d|dd S | jdkrO|d }|d }|| d |d d|dd }|S | jdkrt|d	 }|d }|| d |d d|dd }|S t)
N)r   r   r   r   r   g       @r   )Zlast_hidden_statehidden_statesr   Z	unsqueezesumNotImplementedError)r   outputsattention_maskZlast_hiddenr   Zfirst_hiddenZpooled_resultZsecond_last_hiddenr   r   r   forward    s6   





zPooler.forward)__name__
__module____qualname____doc__r   r   __classcell__r   r   r   r   r      s    	r   )module_namec                       sR   e Zd Z fddZd
ddZ									dddZe fdd	Z  ZS )BertForSentenceEmbeddingc                    sT   t  | || _|dd| _t| j| _|dd| _t| | j	t
|dd d S )Nemb_pooler_typer   	normalizeF)Zadd_pooling_layer)r   r   configgetr   r   poolerr(   setattrZbase_model_prefixr	   )r   r)   kwargsr   r   r   r   >   s   
z!BertForSentenceEmbedding.__init__Nc           
      C   s   d\}}|dur| j di |}|dur| j di |}t||d}|du s*|du r,|S | jjr`t }t||j}|du rXtj	|
d|jtjd}||
d|
d  }|||}	|	|_|S )a  
        Args:
            query (:obj: `dict`): Dict of pretrained models's input for the query sequence. See
                :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__`
                for details.
            docs (:obj: `dict`): Dict of pretrained models's input for the query sequence. See
                :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__`
                for details.
        Returns:
            Returns `modelscope.outputs.SentencEmbeddingModelOutput
        Examples:
            >>> from modelscope.models import Model
            >>> from modelscope.preprocessors import Preprocessor
            >>> model = Model.from_pretrained('damo/nlp_corom_sentence-embedding_chinese-base')
            >>> preprocessor = Preprocessor.from_pretrained('damo/nlp_corom_sentence-embedding_chinese-base')
            >>> print(model(**preprocessor('source_sentence':['This is a test'])))
        )NNN)query_embeddingsdoc_embeddingsr   )deviceZdtyper   )encoder   
base_modelZtrainingr   ZCrossEntropyLosstorchmatmulTZarangesizer0   longloss)
r   querydocslabelsr.   r/   r   Zloss_fctZscoresr8   r   r   r   r   G   s.   
z BertForSentenceEmbedding.forwardc
                 C   sD   | j j|||||||||	d	}
| |
|}
| jr tj|
ddd}
|
S )N)r   token_type_idsposition_ids	head_maskinputs_embedsoutput_attentionsoutput_hidden_statesreturn_dict   r   )pdim)r2   r   r+   r(   F)r   Z	input_idsr   r<   r=   r>   r?   r@   rA   rB   r   r   r   r   r1   n   s   
zBertForSentenceEmbedding.encodec                    sH   | d}| dd| ddd}tt| jd	d|i|}||_|S )
a'  Instantiate the model.

        Args:
            kwargs: Input args.
                    model_dir: The model dir used to load the checkpoint and the label information.

        Returns:
            The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
        	model_dirr'   r   r(   F)r'   r(   Zpretrained_model_name_or_pathNr   )r*   r   r   Zfrom_pretrainedrG   )r   r-   rG   Zmodel_kwargsmodelr   r   r   _instantiate   s   


z%BertForSentenceEmbedding._instantiate)NNN)	NNNNNNNNN)	r    r!   r"   r   r   r1   classmethodrI   r$   r   r   r   r   r&   ;   s    
	)
r&   )r3   Ztorch.nn.functionalr   Z
functionalrF   Zmodelscope.metainfor   Zmodelscope.modelsr   Zmodelscope.models.builderr   Zmodelscope.outputsr   Zmodelscope.utils.constantr   Zbackboner	   r
   Moduler   Zregister_moduleZsentence_embeddingZbertr&   r   r   r   r   <module>   s   ,