o
    *j                     @   s   d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ G dd	 d	e jjZejejejd
G dd dee	ZdS )    N)BloomConfig)
BloomModel)Models)MODELS
TorchModel)SentencEmbeddingModelOutput)Tasksc                       s(   e Zd ZdZ fddZdd Z  ZS )DecoderPoolerz
    Parameter-free poolers to get the sentence embedding
    'last': the last token state.
    'weighted_mean': position weighted average of all token states.
    c                    s,   t    || _| jdv sJ d| j d S )N)lastweighted_meanzunrecognized pooling type %s)super__init__pooler_type)selfr   	__class__ o/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/bloom/sentence_embedding.pyr      s
   
zDecoderPooler.__init__c                 C   sF  |j }| jdv rD|j\}}}tj|ddd\}}t|dk||d }	tj|	dd}	|	dd|d|}	t	|d|	j
dd}
|
S | jdkr|d	|  }tjd|jd d d
dd	|  |j}|j|j  kr|jksJ  J || }t|| d}|d}tj|dd}|| }
|
S t)N)r
      F)Zkeepdimr   )min)dimr   )startendg&.>)Zlast_hidden_stater   shapetorchr   whereclampZ	unsqueezeexpandZgatherZsqueezesizefloatarangetodevicesumNotImplementedError)r   outputsattention_maskZlast_hiddennlhvaluesindicesZgather_indicesZpooled_outputZinput_mask_expandedweightsZsum_embeddingsZsum_maskr   r   r   forward   sV   


"
zDecoderPooler.forward)__name__
__module____qualname____doc__r   r.   __classcell__r   r   r   r   r	      s    r	   )Z	group_keymodule_namec                       sH   e Zd ZdZ fddZdddZ		dddZe fd	d
Z  Z	S )BloomForSentenceEmbeddingz
    This model represent a text to a dense vector by the last token state or weighted mean of all token states.
    See `Language Models are Universal Embedders
    <https://arxiv.org/pdf/2310.08232.pdf>`_ for details.
    c                    sP   t  | || _|dd| _t| j| _|dd| _t| | j	t
| d S )Nemb_pooler_typer   	normalizeF)r   r   configgetr   r	   poolerr7   setattrZbase_model_prefixBloomModelTransform)r   r8   kwargsr   r   r   r   Q   s   z"BloomForSentenceEmbedding.__init__Nc           
      C   s   d\}}|dur| j di |}|dur| j di |}t||d}|du s*|du r,|S | jjratj }t||j}|du rYtj	|
d|jtjd}||
d|
d  }|||}	|	|_|S )a  
        Args:
            query (:obj: `dict`): Dict of pretrained models's input for the query sequence. See
                :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__`
                for details.
            docs (:obj: `dict`): Dict of pretrained models's input for the query sequence. See
                :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__`
                for details.
        Returns:
            Returns `modelscope.outputs.SentencEmbeddingModelOutput
        Examples:
            >>> from modelscope.models import Model
            >>> from modelscope.preprocessors import Preprocessor
            >>> model = Model.from_pretrained('damo/nlp_udever_bloom_560m')
            >>> preprocessor = Preprocessor.from_pretrained('damo/nlp_udever_bloom_560m')
            >>> inputs = preprocessor({'source_sentence': ['This is a test']})
            >>> outputs = model(**inputs)
            >>> print(outputs)
        NNN)query_embeddingsdoc_embeddingsr   )r#   Zdtyper   )encoder   
base_modelZtrainingr   nnZCrossEntropyLossmatmulTr!   r   r#   longloss)
r   querydocslabelsr?   r@   r&   Zloss_fctZscoresrG   r   r   r   r.   Y   s.   

z!BloomForSentenceEmbedding.forwardc                 C   s:   | j j||d}| ||}| jrtjjj|ddd}|S )N)r'      r   )pr   )rB   r.   r:   r7   r   rC   Z
functional)r   Z	input_idsr'   r&   Z
embeddingsr   r   r   rA      s   z BloomForSentenceEmbedding.encodec                    sh   | d}| dd| ddd}|du r"td	i |}| |}ntt| jd	d|i|}||_|S )
a'  Instantiate the model.

        Args:
            kwargs: Input args.
                    model_dir: The model dir used to load the checkpoint and the label information.

        Returns:
            The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
        	model_dirr6   r   r7   F)r6   r7   NZpretrained_model_name_or_pathr   )r9   r   r   r<   Zfrom_pretrainedrM   )clsr=   rM   Zmodel_kwargsr8   modelr   r   r   _instantiate   s   



z&BloomForSentenceEmbedding._instantiate)NNNr>   )
r/   r0   r1   r2   r   r.   rA   classmethodrP   r3   r   r   r   r   r5   H   s    
+
r5   )r   Ztransformersr   r   r<   Zmodelscope.metainfor   Zmodelscope.modelsr   r   Zmodelscope.outputsr   Zmodelscope.utils.constantr   rC   Moduler	   Zregister_moduleZsentence_embeddingZbloomr5   r   r   r   r   <module>   s   <