o
    *j                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlmZm	Z	m
Z
mZmZmZmZ ddlZddlm  mZ ddlZddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZmZm Z  dd	l!m"Z"m#Z#m$Z$ dd
l%m&Z& ddl'm(Z(m)Z)m*Z* ddl+m,Z, ddl-m.Z.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ej<dkrej=>d ej=?d ej=@d ej=Ad e5B Z4dZCdZDdgZEG dd deZFdd ZGG dd dejjHZIejJjKd d! ZLd"d# ZMG d$d% d%ejjHZNd&d' ZOejJjKd(d) ZP			d>d*d+ZQG d,d- d-ejjHZRG d.d/ d/ejjHZSG d0d1 d1ejjHZTG d2d3 d3ejjHZUG d4d5 d5e0e&ZVd6ZWd7ZXe)d8eWG d9d: d:eVZYe.jZe7j[e,j\d;G d<d= d=eVZ]dS )?z PyTorch ChatGLM model.     N)AnyCallableDictListOptionalTupleUnion)nn)CrossEntropyLoss	LayerNorm)	skip_init)LogitsProcessor)GenerationConfigLogitsProcessorListModelOutputStoppingCriteriaList)BaseModelOutputWithPast)BaseModelOutputWithPastAndCrossAttentionsCausalLMOutputWithPast)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forward)Models)MODELSModel
TorchModel)
OutputKeys)logger)Tasks   )ChatGLMConfig)ChatGLMTokenizerdarwinFTzTHUDM/ChatGLM-6BZChatGLM6BConfigzTHUDM/chatglm-6bc                   @   s(   e Zd ZdejdejdejfddZdS )InvalidScoreLogitsProcessor	input_idsscoresreturnc                 C   s0   t | st | r|  d|d< |S )Ng     j@).   )torchisnananyisinfZzero_)selfr%   r&    r.   n/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/chatglm/text_generation.py__call__:   s   z$InvalidScoreLogitsProcessor.__call__N)__name__
__module____qualname__r)   
LongTensorFloatTensorr0   r.   r.   r.   r/   r$   8   s    r$   c                 C   s  zddl }ddl}ddl}W n ty   td  w tj|}t	d|  |j
|}g }g }	|D ] \}
}t	d|
 d|  |j
||
}||
 |	| q6t||	D ]\}
}|
d}
tdd	 |
D rzt	d
d|
  q\| }|
D ]|}|d|r|d|}n|g}|d dks|d dkrt|d}nH|d dks|d dkrt|d}n6|d dkrt|d}n*|d dkrt|d}nz	t||d }W n ty   t	d
d|
  Y q~w t|dkrt|d }|| }q~|dd dkr
t|d}n
|dkr||}z|j|jks)J d|j d|j dW n tyC } z| j|j|jf7  _ d}~ww t	d|
  t||_q\| S )z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape /c                 s   s    | ]}|d v V  qdS ))Zadam_vZadam_mZAdamWeightDecayOptimizerZAdamWeightDecayOptimizer_1Zglobal_stepNr.   ).0nr.   r.   r/   	<genexpr>_   s    z0load_tf_weights_in_chatglm_6b.<locals>.<genexpr>z	Skipping z[A-Za-z]+_\d+z_(\d+)ZkernelgammaweightZoutput_biasbetabiasZoutput_weightsZsquadZ
classifier   r    iZ_embeddingszPointer shape z and array shape z mismatchedzInitialize PyTorch weight )renumpyZ
tensorflowImportErrorr   errorospathabspathinfotrainZlist_variablesZload_variableappendzipsplitr+   join	fullmatchgetattrAttributeErrorlenint	transposeshapeAssertionErrorargsr)   Z
from_numpydata)modelconfigZtf_checkpoint_pathr?   nptfZtf_pathZ	init_varsnamesZarraysnamerR   arraypointerZm_nameZscope_namesnumer.   r.   r/   load_tf_weights_in_chatglm_6bB   s   



r`   c                       s0   e Zd ZdZ fddZdejfddZ  ZS )PrefixEncoderz
    The torch.nn model to encode the prefix
    Input shape: (batch-size, prefix-length)
    Output shape: (batch-size, prefix-length, 2*layers*hidden)
    c              	      s   t    |j| _| jr7tj|j|j| _tj	tj
|j|jtj tj
|j|j|j d | _d S tj|j|j|j d | _d S )Nr>   )super__init__prefix_projectionr)   r	   	Embeddingpre_seq_lenhidden_size	embeddingZ
SequentialLinearZTanh
num_layerstransr-   rW   	__class__r.   r/   rc      s    




zPrefixEncoder.__init__prefixc                 C   s,   | j r| |}| |}|S | |}|S N)rd   rh   rk   )r-   ro   prefix_tokenspast_key_valuesr.   r.   r/   forward   s   


zPrefixEncoder.forward)	r1   r2   r3   __doc__rc   r)   Tensorrs   __classcell__r.   r.   rm   r/   ra      s    ra   c                 C   s*   d|  dt d|  dd|  |      S )zOpenAI's gelu implementation.      ?      ?gQ63E?gHm?)r)   tanhxr.   r.   r/   	gelu_impl   s
   r|   c                 C   s   t | S rp   )r|   rz   r.   r.   r/   gelu   s   r}   c                       sD   e Zd Zdejdf fdd	Zdd Zdd	d
Z fddZ  Z	S )RotaryEmbedding'  Fc                    sx   t    d|td|d |   }| }|| _|r(tj|| _	d | _
n| d| d | _
d | _d | _|| _d S )Nrx   r   r>   inv_freq)rb   rc   r)   arangefloathalf	learnabler	   	Parameterr   max_seq_len_cachedZregister_buffer
cos_cached
sin_cached	precision)r-   dimbaser   r   r   rm   r.   r/   rc      s   

zRotaryEmbedding.__init__c                 C   s   d S rp   r.   )r-   Z
state_dictro   Zlocal_metadatastrictZmissing_keysZunexpected_keys
error_msgsr.   r.   r/   _load_from_state_dict      z%RotaryEmbedding._load_from_state_dictr    Nc           	      C   s  |d u r	|j | }| jd u s|| jkr{| jrd n|| _tj||j| jjd}td|| j}tj	||fdd
|j}| jtjkrE| }| d d d d d f }| d d d d d f }| jtjkrm| }| }| jrt||fS ||| _| _| jd |df | jd |df fS )N)devicedtypezi,j->ijr   .)rR   r   r   r)   r   r   r   r   Zeinsumcattor   Zbfloat16r   cossinr   r   )	r-   r{   Zseq_dimseq_lentZfreqsZembr   r   r.   r.   r/   rs      s*   


$zRotaryEmbedding.forwardc                    s8   | j d ur|| j | _ | jd ur|| j| _t |S rp   )r   r   rb   _apply)r-   fnrm   r.   r/   r      s
   

zRotaryEmbedding._apply)r    N)
r1   r2   r3   r)   r   rc   r   rs   r   rv   r.   r.   rm   r/   r~      s
    
r~   c                 C   sP   | dd | j d d f | d| j d d d f }}tj| |f|jd dS )N.r   r>   r    r   )rR   r)   r   ndim)r{   x1x2r.   r.   r/   rotate_half   s
   6r   c                 C   sd   t ||ddt ||dd}}| | t| |  || t||  } }| |fS )Nr    r>   )Frh   squeeze	unsqueezer   )qkr   r   Zposition_idr.   r.   r/   apply_rotary_pos_emb_index   s   
r   c
                 C   sV  |d ur|d |d }
}t j|
|fdd}t j||fdd}|j\}}}}|	r-||f}nd }t|d }|r@|t||  }|d|d|d|df}||d |d |d  d}||d |d |d  d}t jddd|j	|j
d}t j||dd|dddddd	d
}|j| }| jr|| j_| || }n#|dk s||d |j	}| }|| }tj|dd}||}|d|d|d|df}||d|d |d  d}||d |d  |d d}t ||dd}|j| }|dddd }| d d |f }|j| }|||f}|S )Nr   r    r   r>   r      r   r   g        rx   )r<   alpha     )r)   r   rR   r   mathsqrtsizeviewzerosr   r   ZbaddbmmrQ   scale_mask_softmaxscale
contiguousallmasked_fill_r   softmaxtypeZbmmpermute)r-   query_layer	key_layervalue_layerattention_maskhidden_size_per_partitionlayer_id
layer_pastZscaling_attention_score	use_cacheZpast_keyZ
past_valuer   bZnhrg   presentZquery_key_layer_scaling_coeffZoutput_sizeZmatmul_resultattention_scoresattention_probsr   context_layerZnew_context_layer_shapeoutputsr.   r.   r/   attention_fn   s   



	




r   c                       sz   e Zd Zddejdf fdd	Zedd Z	ddd	Z			dd
ej	dej	de
eej	ej	f  dedef
ddZ  ZS )SelfAttentionNTc                    s   t t|   || _|| _|| _|| _|| _|| _t	|r$| j| jd  n| j| j dt
jdd| _d | _|d u r>|| | _n|| _|| j | _tt
jj|d| j ||d| _tt
jj| j|||d| _d S )Nr>   r   F)r   r   r   r   r=   r   )rb   r   rc   r   rg   r   num_attention_heads!num_attention_heads_per_partitionposition_encoding_2dr~   r)   r   
rotary_embr   hidden_size_per_attention_headinner_hidden_sizer   r	   ri   query_key_valuedense)r-   rg   r   r   r   r=   params_dtyper   rm   r.   r/   rc   w  sH   
	zSelfAttention.__init__c                 C   s   |  |d | S )Nr   )r   )r   r   r.   r.   r/   attention_mask_func  s   z!SelfAttention.attention_mask_funcFc                 C   sF   |  d }| | | }tj|||d}|r!tdd |D S |S )a#  Split a tensor along its last dimension.
        Arguments:
            tensor: input tensor.
            num_partitions: number of partitions to split the tensor
            contiguous_split_chunks: If True, make each chunk contiguous
                                    in memory.
        r    r   c                 s   s    | ]}|  V  qd S rp   )r   )r7   chunkr.   r.   r/   r9         z<SelfAttention.split_tensor_along_last_dim.<locals>.<genexpr>)r   r   r)   rJ   tuple)r-   tensorZnum_partitionsZcontiguous_split_chunksZlast_dimZlast_dim_sizeZtensor_listr.   r.   r/   split_tensor_along_last_dim  s   z)SelfAttention.split_tensor_along_last_dimhidden_statesr   r   r   output_attentionsc                 C   s  |  |}| dd | jd| j f }	|j|	 }| |d\}
}}| jr|
jd|
jd d\}}|jd|jd d\}}| j	||
 d d\}}|dddddf dd |dddddf dd }}t|||||\}}t|||||\}}tj||g|jd d}
tj||g|jd d}n|dd}| j	||
 d d\}}t|
||||\}
}t| |
|||| j|||d		\}}}| |}||f}|r||f7 }|S )
q
        hidden_states: [seq_len, batch, hidden_size]
        attention_mask: [(1, 1), seq_len, seq_len]
        Nr   r   r>   r    r   )r   r   )	r-   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   maxrQ   r   r   r)   concatr   r   r   )r-   r   position_idsr   r   r   r   r   Zmixed_raw_layerZnew_tensor_shaper   r   r   q1q2Zk1Zk2r   r   block_position_idsr   r   r   outputr   r.   r.   r/   rs     sZ   


  




zSelfAttention.forwardFNFF)r1   r2   r3   r)   r   rc   staticmethodr   r   ru   r   r   boolrs   rv   r.   r.   rm   r/   r   u  s0    3

r   c                       s$   e Zd Z fddZdd Z  ZS )GEGLUc                    s   t    tj| _d S rp   )rb   rc   r   r}   activation_fnr-   rm   r.   r/   rc     s   
zGEGLU.__init__c                 C   s&   |j d|jd d\}}|| | S )Nr>   r    r   )r   r   r   )r-   r{   r   r   r.   r.   r/   rs     s   zGEGLU.forward)r1   r2   r3   rc   rs   rv   r.   r.   rm   r/   r     s    r   c                       s2   e Zd Zdddeejf fdd	Zdd Z  ZS )GLUNTc                    sr   t t|   || _|| _|| _|d u rd| }|| _ttj	j
| j| j||d| _ttj	j
| j| j||d| _d S )N   r   )rb   r   rc   r   activation_funcrg   r   r   r)   r	   ri   dense_h_to_4hdense_4h_to_h)r-   rg   r   r   r=   r   r   rm   r.   r/   rc     s*   zGLU.__init__c                 C   s"   |  |}| |}| |}|S )z>
        hidden_states: [seq_len, batch, hidden_size]
        )r   r   r   )r-   r   Zintermediate_parallelr   r.   r.   r/   rs   :  s   


zGLU.forward)	r1   r2   r3   r}   r)   r   rc   rs   rv   r.   r.   rm   r/   r     s     r   c                       sh   e Zd Zddedejddf fdd	Z			ddejdejd	ee	ejejf  d
e
de
f
ddZ  ZS )GLMBlockNT   c              	      sn   t t|   || _|||d| _|| _t||||||	| jd| _|||d| _|
| _	t
|||||	d| _d S )Neps)r   r=   r   r   )r   r=   r   r   )rb   r   rc   r   input_layernormr   r   	attentionpost_attention_layernormrj   r   mlp)r-   rg   r   layernorm_epsilonr   r   r   	layernormuse_biasr   rj   r   rm   r.   r/   rc   K  s0   
zGLMBlock.__init__Fr   r   r   r   r   c              	   C   s   |  |}| j|||||||d}	|	d }
|	dd }d| j d }|| |
 }| |}| |}|| | }|rA|f| }|S |f|dd  }|S )r   )r   r   r   r   r   r   r    Nr>   rw   )r   r   rj   r   r   )r-   r   r   r   r   r   r   r   Zattention_inputZattention_outputsZattention_outputr   r   Z	mlp_inputZ
mlp_outputr   r.   r.   r/   rs   z  s,   
	


zGLMBlock.forwardr   )r1   r2   r3   r   r)   r   rc   ru   r   r   r   rs   rv   r.   r.   rm   r/   r   I  s.    5r   c                       sr   e Zd ZdZdZdZeZdZdgZ	 fddZ
dejfd	d
Zdd ZdddZdddZe fddZ  ZS )ChatGLMPreTrainedModelz
    An abstract class to handle weights initialization and
    a simple interface for downloading and loading pretrained models.
    FTtransformerr   c                    s*   t  j|jfi | t t| | d S rp   )rb   rc   name_or_pathr   )r-   rW   kwargsrm   r.   r/   rc     s   zChatGLMPreTrainedModel.__init__modulec                 C   s   dS )zInitialize the weights.Nr.   )r-   r   r.   r.   r/   _init_weights  r   z$ChatGLMPreTrainedModel._init_weightsc           	         sz   |j \}} fdd|D }tj|||f|d}|  t|D ]\}}d||d d d |f< q |d |dk  }|S )Nc                       g | ]}|   jjqS r.   tolistindexrW   bos_token_idr7   seqr   r.   r/   
<listcomp>      z4ChatGLMPreTrainedModel.get_masks.<locals>.<listcomp>r   r    rw   )rR   r)   onesZtril_	enumerateZ
unsqueeze_r   )	r-   r%   r   
batch_size
seq_lengthcontext_lengthsr   icontext_lengthr.   r   r/   	get_masks  s   


z ChatGLMPreTrainedModel.get_masksc                    s   |j \}fdd|D }jrPtjtj dd|d}t|D ]\}}	|| |||	d f< q% fdd|D }
tj|
dd}
tj||
fdd}|S tjtj dd|d}|sst|D ]\}}	|| ||	d < qf|S )Nc                    r   r.   r   r  r   r.   r/   r    r  z;ChatGLMPreTrainedModel.get_position_ids.<locals>.<listcomp>r   r   r    c              
      s>   g | ]}t t j|t j d t j| t j d d fqS )r   r    )r)   r   r   longr   )r7   r  )r   r  r.   r/   r    s"    
r   )	rR   r   r)   r   r  r   repeatr
  stack)r-   r%   mask_positionsr   gmaskr  r  r   r  r  r   r.   )r   r-   r  r/   get_position_ids  s>   




z'ChatGLMPreTrainedModel.get_position_idsc                 C   s   t |tr
||_d S d S rp   )
isinstanceChatGLMModelgradient_checkpointing)r-   r   valuer.   r.   r/   _set_gradient_checkpointing  s   

z2ChatGLMPreTrainedModel._set_gradient_checkpointingc                    s<   | dd}| dd tt| jdd|i|}||_|S )a'  Instantiate the model.

        Args:
            kwargs: Input args.
                    model_dir: The model dir used to load the checkpoint and the label information.

        Returns:
            The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
        	model_dirNcfgZpretrained_model_name_or_pathr.   )poprb   r   from_pretrainedr  )clsr   r  rV   rm   r.   r/   _instantiate  s   z#ChatGLMPreTrainedModel._instantiater   )r1   r2   r3   rt   Zis_parallelizableZsupports_gradient_checkpointingr!   config_classZbase_model_prefixZ_no_split_modulesrc   r	   Moduler   r  r  r  classmethodr!  rv   r.   r.   rm   r/   r     s    

$r   aM  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class.
    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general
    usage and behavior.

    Parameters:
        config ([`~ChatGLM6BConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the configuration.
            Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a:
  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`ChatGLM6BTokenizer`].
            See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0, 1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings.
            Selected in the range `[0, config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert *input_ids* indices into associated vectors
            than the model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zdThe bare ChatGLM-6B Model transformer outputting raw hidden-states without any specific head on top.c                       s   e Zd ZdZdef fddZdd Zdejfdd	Z	ej
fd
dZeedeeeed									ddeej deej deej deeeejejf df  deej dee dee dee dee deeejdf ef fddZ  ZS )r  a  

    The model can behave as an encoder (with only self-attention) as well
    as a decoder, in which case a layer of cross-attention is added between
    the self-attention layers, following the architecture described in [Attention is
    all you need](https://arxiv.org/abs/1706.03762) by Ashish Vaswani,
    Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.

    To behave as an decoder the model needs to be initialized with the
    `is_decoder` argument of the configuration set to `True`.
    To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder`
    argument and `add_cross_attention` set to `True`; an
    `encoder_hidden_states` is then expected as an input to the forward pass.
    rW   c                    s$  t  | |j_|j_tj_|j_|j_|j	_	|j
_
|j_jj _|j_|j_|j_ttjjjjjd_d_fdd tj fddtj	D _tjj
d_jd ur D ]}d|_qstj _t|_ tj!d_"d S d S )	N)Znum_embeddingsZembedding_dimr   Fc                    s*   t  j j j|  j jtd j jd
S )NT)r   r   r   r   r   r   )	r   rg   r   r   r   r   r   r   r   )r   r   r.   r/   	get_layerw  s   z(ChatGLMModel.__init__.<locals>.get_layerc                    s   g | ]} |qS r.   r.   )r7   r   )r%  r.   r/   r    s    z)ChatGLMModel.__init__.<locals>.<listcomp>r   g?)#rb   rc   max_sequence_lengthrg   r)   r   r   r   
vocab_sizerj   r   r   r   r   rf   rd   r   r	   re   word_embeddingsr  Z
ModuleListrangelayersr   final_layernorm
parametersZrequires_gradr   r  rq   ra   prefix_encoderZDropoutdropout)r-   rW   paramrm   )r%  r-   r/   rc   _  sD   

zChatGLMModel.__init__c                 C      | j S rp   r(  r   r.   r.   r/   get_input_embeddings     z!ChatGLMModel.get_input_embeddingsnew_embeddingsc                 C   
   || _ d S rp   r1  r-   r4  r.   r.   r/   set_input_embeddings     
z!ChatGLMModel.set_input_embeddingsc                 C   sp   | j d|d|}| ||}||| j| jd | j	| j
| j	 }| |}|g dd}|S )Nr   r   r>   )r>   r    r   r   r   )rq   r   expandr   r-  r   r   rf   rj   r   rg   r.  r   rJ   )r-   r  r   r   rq   rr   r.   r.   r/   
get_prompt  s   

zChatGLMModel.get_promptzbatch_size, sequence_length)
checkpointoutput_typer"  Nr%   r   r   rr   .inputs_embedsr   r   output_hidden_statesreturn_dictr'   c
                    s<  |d ur|n| j j}|d ur|n| j j}|d ur|n| j j}|	d ur$|	n| j j}	| jr2| jr2|r2d}|d ur>|d ur>td|d urL|jd d \}
}n|d ur[|jd d \}
}}ntd|d u rh| 	|}|d u r| j
d ur| j|jd |j|jd}n
td gt| j }|d u r| j||jd}|d u r| j j| j j}}||v r|n| ||v rdnd} fd	d
|D }| j|||j|d}| j
d ur|d urt|
d|d| j
|j}|dk  }tj||fdd}|dd}|rdnd }|rdnd }|rdnd }|d u rtjdd|jd }n||j}t| jD ]V\}}|r.||f }|| }| jrL| jrLtjj||||t ||||}n||||t ||||d}|d }|rh||d f }|rw|||rrdnd f }q"| !|}|r||f }|	stdd ||||fD S t"||||dS )NFzDYou cannot specify both input_ids and inputs_embeds at the same timer>   z5You have to specify either input_ids or inputs_embedsr   )r  r   r   r  Tc                    s   g | ]	}|   qS r.   )r  r  r  
mask_tokenr.   r/   r        z(ChatGLMModel.forward.<locals>.<listcomp>)r  r   r  r    r   rw   r   r   r.   )r   r   r   r   r   r   c                 s   s    | ]	}|d ur|V  qd S rp   r.   )r7   vr.   r.   r/   r9   2  s    z'ChatGLMModel.forward.<locals>.<genexpr>)Zlast_hidden_staterr   r   
attentions)#rW   r   r>  r   use_return_dictr  Ztraining
ValueErrorrR   r(  rf   r:  r   r   r   rO   r*  r  mask_token_idgmask_token_idr  r)   r	  r   r   r   r   rQ   r   r
  utilsr;  r   r+  r   )r-   r%   r   r   rr   r=  r   r   r>  r?  r  r  _MASKgMASK	use_gmaskr  Zprefix_attention_maskr   ZpresentsZall_self_attentionsZall_hidden_statesr  layerr   Z	layer_retr.   r@  r/   rs     s   





	


zChatGLMModel.forward)	NNNNNNNNN)r1   r2   r3   rt   r!   rc   r2  r)   ru   r7  r   r:  r   CHATGLM_6B_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOCr   r4   r   r   r   r   rs   rv   r.   r.   rm   r/   r  K  s\    9	
r  )module_namec                       sZ  e Zd Zdef fddZdd Zdd Z		d>d	ed
ee	e
f dededee	e
f f
ddZ				d?dejdeej deej deej deej defddZ										d@deej deej deej deeej  deej deej dee dee dee dee fddZedeeejejf d f d!ejdeeejejf d f fd"d#Zd$d% Ze 		&	'	(	)	*	dAd+e	d,eee	e	f  d-efd.d/Ze 		&	(	)	*	dBd+e	d,eee	e	f  d-efd0d1Ze 				d?d2ee d3ee d4ee  d5ee!eejgee f  fd6d7Z"dCd8efd9d:Z#d;edefd<d=Z$  Z%S )DChatGLMForConditionalGenerationrW   c                    sz   t  | |j| _|j| _t|| _ttj|j	|j
dtjd| _|| _d| _| jjr4| j| jjdd t|j| _d S )NFr   T)
empty_init)rb   rc   r&  r   r  r   r   r	   ri   rg   r'  r)   r   lm_headrW   	quantizedquantization_bitquantizer"   r  r   	tokenizerrl   rm   r.   r/   rc   A  s    
z(ChatGLMForConditionalGeneration.__init__c                 C   r0  rp   rV  r   r.   r.   r/   get_output_embeddings^  r3  z5ChatGLMForConditionalGeneration.get_output_embeddingsc                 C   r5  rp   r[  r6  r.   r.   r/   set_output_embeddingsa  r8  z5ChatGLMForConditionalGeneration.set_output_embeddingsFr   model_kwargsis_encoder_decoderstandardize_cache_formatr'   c           	      C   s   | j ||d|d< d|v rO|d }|d urO|jtjkrOtj||g |jd d dR gdd}|d d d d dd f  }d|d	< tj||gd
d|d< d|v r{|d }|ddd f  }|d d dd d f  d7  < tj||gdd|d< |S )N)r`  rr   r   r   r    r   r   F).r   r>   r   .)Z_extract_past_from_model_outputr   r)   r   r   Znew_onesrR   clone)	r-   r   r^  r_  r`  r   Znew_attention_maskr   Znew_position_idr.   r.   r/   #_update_model_kwargs_for_generationd  s2   


zCChatGLMForConditionalGeneration._update_model_kwargs_for_generationNr%   pastrr   r   r   c                    s  |j \}jjjj}}	|	|v r|	n| |	|v rdnd}
| } fdd|D }|d us3|d ur|d d df d}|d urV|jtjkrV|d d d d dd f }nd }|d ure|ddd f }n6fdd|D }j	rtj
fddt||D tj|jd	d}ntj
d
d |D tj|jd	d}|d u r|}||||dS |d ur|jtjkrd }|d u rj||jd}|d u rΈj||j||
d}||||dS )NTFc                    s   g | ]}|  qS r.   )r  r  r@  r.   r/   r    s    zQChatGLMForConditionalGeneration.prepare_inputs_for_generation.<locals>.<listcomp>r   .c                    s   g | ]	}|  jjqS r.   )r  rW   r  r  r   r.   r/   r    rB  c                    s   g | ]
\}}| | gqS r.   r.   )r7   mask_positionr  )r  r.   r/   r    s    r   c                 S   s   g | ]}|qS r.   r.   )r7   rd  r.   r.   r/   r    s    )r%   rr   r   r   r  )r   r  r  )rR   rW   rG  rH  r  r   r   r)   r   r   r   rI   r  r   r  r  )r-   r%   rc  rr   r   r   r   r  rK  rL  rM  Zseqsr  Z
last_tokenr  r.   )rA  r-   r  r/   prepare_inputs_for_generation  sx   


z=ChatGLMForConditionalGeneration.prepare_inputs_for_generationr=  labelsr   r   r>  r?  c                 C   s:  |d ur|n| j j}|
d ur|
n| j j}
| j||||||||	|
d	}|d }| |ddd }d }|d ur{|tj	}|dd dd d f  }|ddd f  }t
dd}||j}||d|d|d}||j}||j}|
s|f|dd   }|d ur|f| S |S t|||j|j|jd	S )
N)	r%   r   r   rr   r=  r   r   r>  r?  r   r    r>   .r   i)Zignore_index)losslogitsrr   r   rD  )rW   r   rE  r   rV  r   r   r   r)   Zfloat32r
   r   r   r   r   r   rr   r   rD  )r-   r%   r   r   rr   r=  rf  r   r   r>  r?  Ztransformer_outputsr   Z	lm_logitsrg  Zshift_logitsZshift_labelsZloss_fctr   r.   r.   r/   rs     sL   
z'ChatGLMForConditionalGeneration.forward.beam_idxc                    s   t  fdd| D S )aL  
        This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or
        [`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct
        beam_idx at every generation step.

        Output shares the same memory storage as `past`.
        c              	   3   sH    | ]}|d   d |d  j|d  d |d jfV  qdS )r   r    N)Zindex_selectr   r   )r7   r   ri  r.   r/   r9     s    zAChatGLMForConditionalGeneration._reorder_cache.<locals>.<genexpr>)r   )rc  ri  r.   rj  r/   _reorder_cache
  s   z.ChatGLMForConditionalGeneration._reorder_cachec                 C   s   |  }|dd}ddgddgddgd	d
gddgg}|D ] }td|d  d|d  |}td|d  d|d  |}q|S )Nu   [[训练时间]]u   2023年,u   ，!u   ！:u   ：;u   ；z\?u   ？z([\u4e00-\u9fff])%sr   z\1%sr    z%s([\u4e00-\u9fff])z%s\1)stripreplacer?   sub)r-   responseZpunktsitemr.   r.   r/   process_response  s    z0ChatGLMForConditionalGeneration.process_response   r    Tffffff?ffffff?queryhistory
max_lengthc
                 K   s   |d u rg }|	d u rt  }	|	t  ||||||	d|
}|s#|}n d}t|D ]\}\}}|d|||7 }q)|dt||7 }||gdd}|| j}| jd	i ||}|	 d t|d d d  }|
|}| |}|||fg }||fS )
N)r{  	num_beams	do_sampletop_ptemperaturelogits_processor    [Round {}]
问：{}
答：{}
   [Round {}]
问：{}
答：ptZreturn_tensorsr   r%   r.   )r   rH   r$   r
  rP  rO   r   r   generater  decoderu  )r-   rZ  ry  rz  r{  r|  r}  r~  r  r  r   
gen_kwargspromptr  	old_queryrs  inputsr   r.   r.   r/   _chat,  s<   	 

z%ChatGLMForConditionalGeneration._chatc	                 k   s
   |d u rg }|d u rt  }|t  |||||d|	}
|s#|}n d}t|D ]\}\}}|d|||7 }q)|dt||7 }||gdd}|| j}| jd	i ||
D ](}|	 d t|d d d  }|
|}| |}|||fg }||fV  qZd S )
N)r{  r}  r~  r  r  r  r  r  r  r  r   r%   r.   )r   rH   r$   r
  rP  rO   r   r   stream_generater  r  ru  )r-   rZ  ry  rz  r{  r}  r~  r  r  r   r  r  r  r  rs  r  r   Znew_historyr.   r.   r/   stream_chatW  s>    

z+ChatGLMForConditionalGeneration.stream_chatgeneration_configr  stopping_criteriaprefix_allowed_tokens_fnc              	   +   s   |j d |j d }}|d u r| j}t|}|jdi |}	|j|j}}
t|
tr/|
g}
|	dd u o:|j
d u}|rN|jd u rNtd|j
 dt n|jd urj|j| |_
|sjtd|j d|j
 dt ||j
kr| jjrud	nd
}td| d| d|j
 d |d ur|nt }|d ur|nt }| j|||||d}| j||d}| |}||j d d}d }	 | j|fi |	}| di |dddd}|jd d dd d f }|||}|||}tjj|dd}|jrt j!|dd"d nt j#|dd t j$| d d d f gdd}| j%||	| jjd}	|&t' fdd|
D ( }|) dks<|||r>d S |V  q)Nr   r   r{  zUsing `max_length`'s default (z) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.zBoth `max_new_tokens` (=z) and `max_length`(=z) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)Zdecoder_input_idsr%   zInput length of z is z, but `max_length` is set to zX. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.)r  input_ids_seq_lengthZencoder_input_idsr  r  )r  r  r    TF)r?  r   r>  r   )Znum_samples)r_  c                 3   s    | ]} |kV  qd S rp   r.   )r7   r  Znext_tokensr.   r/   r9     r   zBChatGLMForConditionalGeneration.stream_generate.<locals>.<genexpr>r.   )*rR   r  copydeepcopyupdater  eos_token_idr  rP   getr{  Zmax_new_tokenswarningswarnUserWarningr   rW   r_  warningr   r   Z_get_logits_processorZ_get_stopping_criteriaZ_get_logits_warpernewZfill_re  rh  r	   
functionalr   r}  r)   Zmultinomialr   Zargmaxr   rb  mulsumr  r   )r-   r%   r  r  r  r  r   rJ  r  r^  r  Zhas_default_max_lengthZinput_ids_stringZlogits_warperZunfinished_sequencesr&   Zmodel_inputsr   Znext_token_logitsZnext_token_scoresZprobsr.   r  r/   r    s   








z/ChatGLMForConditionalGeneration.stream_generatebitsc                 K   sX   |dkrd S ddl m} | jrtd | S d| _|| j_|| j|fd|i|| _| S )Nr   r    )rY  zAlready quantized.TrU  )ZquantizationrY  rW  r   rF   rW   rX  r   )r-   r  rU  r   rY  r.   r.   r/   rY    s    
z(ChatGLMForConditionalGeneration.quantizeinputc           	   	   C   s   |d }|d }d|v r|d }nd}d|v r|d }nd}d|v r'|d }nd}d	|v r2|d	 }nd
}t |tjkr?| }| j| j||||||d\}}td tj	|tj
|iS )Ntextrz  r{  rv  r  rx  r|  r    r}  T)r{  r  r|  r}  zGeneration finished.)r   r)   ru   r  r  rZ  r   rF   r   ZRESPONSEZHISTORY)	r-   r  r  rz  r{  r  r|  r}  rs  r.   r.   r/   chat  s6   





z$ChatGLMForConditionalGeneration.chat)FF)NNNN)
NNNNNNNNNN)Nrv  r    Trw  rx  N)Nrv  Trw  rx  Nr   )&r1   r2   r3   r!   rc   r\  r]  r   r   strr   r   rb  r)   r4   r   ru   dictre  r   r5   rs   r   rk  ru  Zno_gradr   rP   r  r  r   r   r   r   r  rY  r  rv   r.   r.   rm   r/   rT  >  s    


&
H	

=*(nrT  )NTF)^rt   r  r   rC   r?   sysr  typingr   r   r   r   r   r   r   r)   Ztorch.nn.functionalr	   r  r   Ztorch.utils.checkpointZtorch.nnr
   r   Ztorch.nn.utilsr   Z&transformers.generation.logits_processr   Ztransformers.generation.utilsr   r   r   r   Ztransformers.modeling_outputsr   r   r   Ztransformers.modeling_utilsr   Ztransformers.utilsr   r   r   Zmodelscope.metainfor   Zmodelscope.modelsr   r   r   Zmodelscope.outputsr   Zmodelscope.utilsr   loggingZmodelscope.utils.constantr   configurationr!   Ztokenizationr"   platformZ_CZ_jit_set_profiling_modeZ_jit_set_profiling_executorZ_jit_override_can_fuse_on_cpuZ_jit_override_can_fuse_on_gpuZ
get_loggerrQ  rR  Z(CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LISTr$   r`   r#  ra   Zjitscriptr|   r}   r~   r   r   r   r   r   r   r   r   ZCHATGLM_6B_START_DOCSTRINGrO  r  Zregister_moduler  Z
chatglm_6brT  r.   r.   r.   r/   <module>   s    $

J 
6

v 1f_2 p