o
    *j                  
   @   s  d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZ ddlZddlm  mZ ddlZddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZmZmZ dd	lmZm Z  dd
l!m"Z" ddl#m$Z$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ej3dkrej45d ej46d ej47d ej48d e,9 Z+dZ:dZ;dgZ<dd Z=G dd deZ>G dd dejj?Z@	dCd ejAd!eBd"eCd#e	ejA fd$d%ZDG d&d' d'ej?ZEejFjGd(ejAd)ejAd#ejAfd*d+ZHG d,d- d-ejj?ZIG d.d/ d/ejj?ZJG d0d1 d1ejj?ZKd2d3 ZLG d4d5 d5ejj?ZMG d6d7 d7ejj?ZNG d8d9 d9ejj?ZOG d:d; d;e%e"ZPG d<d= d=ejj?ZQG d>d? d?ePZRe0jSe.jTe'jUd@G dAdB dBePZVdS )Dz PyTorch ChatGLM model.     N)AnyCallableDictListOptionalTuple)nn)CrossEntropyLoss	LayerNorm)	skip_init)LogitsProcessor)GenerationConfigLogitsProcessorListModelOutputStoppingCriteriaList)BaseModelOutputWithPastCausalLMOutputWithPast)PreTrainedModel)Model
TorchModel)Models)
OutputKeys)logger)Tasks   )MODELS   )ChatGLM2ConfigdarwinFTzTHUDM/ChatGLM2-6BZChatGLM6BConfigzTHUDM/chatglm2-6bc                 O   s   | |i |S N )clsargskwargsr    r    o/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/chatglm2/text_generation.pydefault_init2      r%   c                   @   s(   e Zd ZdejdejdejfddZdS )InvalidScoreLogitsProcessor	input_idsscoresreturnc                 C   s0   t | st | r|  d|d< |S )Ng     j@).   )torchisnananyisinfZzero_)selfr(   r)   r    r    r$   __call__8   s   z$InvalidScoreLogitsProcessor.__call__N)__name__
__module____qualname__r,   
LongTensorFloatTensorr1   r    r    r    r$   r'   6   s    r'   c                       s6   e Zd ZdZdef fddZdejfddZ  Z	S )PrefixEncoderz
    The torch.nn model to encode the prefix
    Input shape: (batch-size, prefix-length)
    Output shape: (batch-size, prefix-length, 2*layers*hidden)
    configc                    s   t    |j| _| jr:|j|j |j d }tj|j	|| _
tjtj||jtj tj|j|| _d S tj|j	|j|j |j d | _
d S )N   )super__init__prefix_projection
num_layerskv_channelsmulti_query_group_numr,   r   	Embeddingpre_seq_len	embeddingZ
SequentialLinearhidden_sizeZTanhtrans)r0   r8   Zkv_size	__class__r    r$   r;   G   s    


zPrefixEncoder.__init__prefixc                 C   s,   | j r| |}| |}|S | |}|S r   )r<   rB   rE   )r0   rH   prefix_tokenspast_key_valuesr    r    r$   forwardV   s   


zPrefixEncoder.forward)
r2   r3   r4   __doc__r   r;   r,   TensorrK   __classcell__r    r    rF   r$   r7   @   s    r7   tensornum_partitionscontiguous_split_chunksr*   c                 C   sF   |   d }|  | | }tj| ||d}|r!tdd |D S |S )a5  Split a tensor along its last dimension.

    Arguments:
        tensor: input tensor.
        num_partitions: number of partitions to split the tensor
        contiguous_split_chunks: If True, make each chunk contiguous
                                 in memory.

    Returns:
        A list of Tensors
    r   dimc                 s   s    | ]}|  V  qd S r   )
contiguous).0chunkr    r    r$   	<genexpr>v       z.split_tensor_along_last_dim.<locals>.<genexpr>)rS   sizer,   splittuple)rO   rP   rQ   Zlast_dimZlast_dim_sizeZtensor_listr    r    r$   split_tensor_along_last_dim_   s   r\   c                       sV   e Zd Z				d fdd	Z	ddeded	ejd
ejdef
ddZdddZ	  Z
S )RotaryEmbeddingr   FNc                    sR   t    ddtjd|d|dj|d|   }| d| || _|| _|| _d S )N      ?'  r   r9   device)dtypeinv_freq)	r:   r;   r,   arangetoZregister_bufferrS   original_impl
rope_ratio)r0   rS   rg   rf   ra   rb   rc   rF   r    r$   r;   }   s   

zRotaryEmbedding.__init__r_   seq_lenn_elemrb   ra   basec           
   	   C   s   d|t jd|d||d|   }t j|||d| j }t || }t jt |t |gdd}	|t jt j	t j
fv rI|t j	krE|		 n|	 }	|	S )aM  Enhanced Transformer with Rotary Position Embedding.

        Derived from: https://github.com/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/
        transformers/rope/__init__.py. MIT License:
        https://github.com/labmlai/annotated_deep_learning_paper_implementations/blob/master/license.
        r^   r   r9   rb   ra   rR   )r,   rd   rg   outerfloatstackcossinZfloat16Zbfloat16Zint8half)
r0   rh   ri   rb   ra   rj   thetaZseq_idxZ	idx_thetacacher    r    r$   forward_impl   s(   
zRotaryEmbedding.forward_implr   c                 C   s   | j || j| jj| jjdS Nrk   )ru   rS   rc   rb   ra   )r0   Zmax_seq_lenoffsetr    r    r$   rK      s   zRotaryEmbedding.forward)r   FNN)r_   )r   )r2   r3   r4   r;   intr,   rb   ra   ru   rK   rN   r    r    rF   r$   r]   {   s&    
!r]   x
rope_cachec           	      C   s   |  d|  d|  d|  df\}}}}|jd d }| dd |f | d|d f } }|d | }| |d||d d}||dd| dd}t|d |d  |d	 |d	   |d	 |d  |d |d	   gd}|d}tj||fdd
S )Nr   r   r9   r   .rl   ).r   ).r   rR   )rY   shapereshapeviewr,   ro   flattencat)	ry   rz   sq_npZrot_dimZx_passZxshapedZx_out2r    r    r$   apply_rotary_pos_emb   s$   ,"
	r   c                       s4   e Zd Z			d fdd	ZdejfddZ  ZS )	RMSNormh㈵>Nc                    s.   t    tjtj|||d| _|| _d S )Nra   rb   )r:   r;   r,   r   	Parameteremptyweighteps)r0   Znormalized_shaper   ra   rb   r#   rF   r    r$   r;      s
   

zRMSNorm.__init__hidden_statesc                 C   sF   |j }|tjdjddd}|t|| j  }| j| |S )Nr9   rl   T)Zkeepdim)	rb   re   r,   float32powmeanZrsqrtr   r   )r0   r   Zinput_dtypeZvariancer    r    r$   rK      s   zRMSNorm.forward)r   NN)r2   r3   r4   r;   r,   rM   rK   rN   r    r    rF   r$   r      s    r   c                       s*   e Zd Zdef fddZdd Z  ZS )CoreAttentionr8   c                    s   t t|   |j| _|j| _| jrd| _td|| _|j|j }|| _	||j | _
|j| _d }t| j
| _| jrD| j}|  j|9  _|| _tj|j| _d S )NTr   )r:   r   r;   Zapply_query_key_layer_scalingattention_softmax_in_fp32maxlayer_numberr>   num_attention_headshidden_size_per_partitionhidden_size_per_attention_head!num_attention_heads_per_partitionmathsqrtnorm_factorcoeffr,   r   Dropoutattention_dropout)r0   r8   r   projection_sizer   rF   r    r$   r;      s"   zCoreAttention.__init__c                 C   s  t tjdd }|dkradd |||fD \}}}|d u r5|jd |jd kr5tjjj|||dd}n|d ur<| }tjj||||}|dddd	}|	 d d
 | j
f }|j| }|S |	d|	d|	d|	df}||d |d |d  d}||d	 |d |d  d}tj|d |d  |d |d	 |j|jd}	tj|	|dd|dddddd| j d}
|
j| }| jr| }| jd ur|| j }|d u r|jd |jd	 krtj|d d|d |d	 |jtjd}|  | }|d ur||td}tj|dd}||}| |}|	d|	d|	d|	d	f}||	d|d |d  d}||d |d  |d d}t||dd}|j| }|dddd	 }|	 d d
 | j
f }|j| }|S )N.r   r9   c                 S   s   g | ]
}| d dddqS )r   r9   r   r   )permute)rU   kr    r    r$   
<listcomp>   s    z)CoreAttention.forward.<locals>.<listcomp>T)Z	is_causalr   r   r{   rl   rk   g        r^   )betaalphar   z-infrR   ) rx   r,   __version__rZ   r|   r   
functionalZscaled_dot_product_attentionr   rY   r   r}   r~   r   rb   ra   Zbaddbmm	transposer   r   rn   r   onesbooltril_Zmasked_fillFsoftmaxZtype_asr   ZbmmrT   )r0   query_layer	key_layervalue_layerattention_maskZpytorch_major_versioncontext_layerZnew_context_layer_shapeZoutput_sizeZmatmul_input_bufferZmatmul_resultZattention_scoresZattention_probsr    r    r$   rK      s   
\



	




	


zCoreAttention.forward)r2   r3   r4   r   r;   rK   rN   r    r    rF   r$   r      s    r   c                       sD   e Zd ZdZddef fddZ		dddZ		dd	d
Z  ZS )SelfAttentionzParallel self-attention layer abstract class.

    Self-attention layer takes input with size [s, b, h]
    and returns output of the same size.
    Nr8   c                    s   t t|   td|| _|j|j | _| j|j | _|j| _	|j
| _
d| j | _| j
r<|j| _| jd| j |j  | _tj|j| jf|jpH|j|dt|| _t|| j| _tj| j|jf|j|dt|| _d S )Nr   r   r9   biasra   )r:   r   r;   r   r   r>   r   r   r   r   multi_query_attentionZqkv_hidden_sizer?   $num_multi_query_groups_per_partitionr   rC   rD   add_bias_linearZadd_qkv_bias_config_to_kwargsquery_key_valuer   core_attentiondense)r0   r8   r   ra   rF   r    r$   r;   v  s@   
zSelfAttention.__init__c                 C   s,   | j r| j}n| j}tj|||| j||dS rv   )r   r   r   r,   r   r   )r0   Zinference_max_sequence_len
batch_sizera   rb   r   r    r    r$   _allocate_memory  s   zSelfAttention._allocate_memoryTc                 C   s  |  |}| jrU|j| j| j | j| j | j| j gdd\}}}	|| d d | j| jf }|| d d | j| jf }|	|	 d d | j| jf }	n| d d | jd| j f }
|j|
 }t|d\}}}	|d urt	||}t	||}|d ur|\}}t
j||fdd}t
j||	fdd}	|r||	f}nd }| jr|d}|ddd| j| j d}| | d d | j| jf }|	d}	|	ddd| j| j d}	|	 |	 d d | j| jf }	| |||	|}| |}||fS )Nrl   rR   r   r   r{   r9   )r   r   rZ   r   r   r   r~   rY   r\   r   r,   r   	unsqueezeexpandrT   r   r   )r0   r   r   rotary_pos_embkv_cache	use_cacheZmixed_x_layerr   r   r   Znew_tensor_shapeZcache_kZcache_vr   outputr    r    r$   rK     s   









	
zSelfAttention.forwardr   )NNNT)	r2   r3   r4   rL   r   r;   r   rK   rN   r    r    rF   r$   r   o  s    %
r   c                 C   s   d| j i}|S )Nrb   )torch_dtype)r"   Zcommon_kwargsr    r    r$   r     s   r   c                       0   e Zd ZdZddef fddZdd Z  ZS )	MLPzMLP.

    MLP will take the input with h hidden state, project it to 4*h
    hidden dimension, perform nonlinear transformation, and project the
    state back into h hidden dimension.
    Nr8   c                    sx   t t|   |j| _tj|j|jd f| j|dt	|| _
dd }|| _tj|j|jf| j|dt	|| _d S )Nr9   r   c                 S   s&   t j| ddd} t| d | d  S )Nr9   rl   rR   r   r   )r,   rV   r   Zsilu)ry   r    r    r$   swiglu'  s   zMLP.__init__.<locals>.swiglu)r:   r   r;   r   Zadd_biasr   rC   rD   Zffn_hidden_sizer   dense_h_to_4hactivation_funcdense_4h_to_h)r0   r8   ra   r   rF   r    r$   r;     s,   zMLP.__init__c                 C   s"   |  |}| |}| |}|S r   )r   r   r   )r0   r   Zintermediate_parallelr   r    r    r$   rK   5  s   


zMLP.forwardr   r2   r3   r4   rL   r   r;   rK   rN   r    r    rF   r$   r     s    r   c                       s6   e Zd ZdZd	def fddZ		d
ddZ  ZS )GLMBlockzA single transformer layer.

    Transformer layer takes input with size [s, b, h] and returns an
    output of the same size.
    Nr8   c                    s   t t|   || _|j| _|j| _|jrtnt}||j	|j
||jd| _t|||d| _|j| _||j	|j
||jd| _t||d| _d S )Nr   ra   rb   r`   )r:   r   r;   r   (apply_residual_connection_post_layernormfp32_residual_connectionrmsnormr   r
   rD   layernorm_epsilonr   input_layernormr   self_attentionhidden_dropoutpost_attention_layernormr   mlp)r0   r8   r   ra   LayerNormFuncrF   r    r$   r;   E  s,   zGLMBlock.__init__Tc                 C   s   |  |}| j|||||d\}}| jr|}n|}tjjj|| j| jd}	||	 }	| 	|	}| 
|}
| jr9|}n|	}tjjj|
| j| jd}|| }||fS )Nr   r   )ptraining)r   r   r   r,   r   r   dropoutr   r   r   r   )r0   r   r   r   r   r   Zlayernorm_outputZattention_outputZresidualZlayernorm_inputZ
mlp_outputr   r    r    r$   rK   d  s2   





zGLMBlock.forwardr   r   r   r    r    rF   r$   r   >  s    $r   c                       sR   e Zd ZdZddef fddZdd Z				dd
ee dee fddZ	  Z
S )GLMTransformerzTransformer class.Nr8   c                    s   t t|   j| _j| _j| _fdd tj fddt	| jD | _
| jrAjr3tnt}|jjjd| _d| _d S )Nc                    s   t  | dS )Nr`   )r   )r   )r8   ra   r    r$   build_layer  r&   z,GLMTransformer.__init__.<locals>.build_layerc                    s   g | ]} |d  qS )r   r    rU   i)r   r    r$   r     s    z+GLMTransformer.__init__.<locals>.<listcomp>r   F)r:   r   r;   r   post_layer_normr=   r,   r   Z
ModuleListrangelayersr   r   r
   rD   r   r   final_layernormgradient_checkpointing)r0   r8   ra   r   rF   )r   r8   ra   r$   r;     s"   
zGLMTransformer.__init__c                 C   s
   | j | S r   )r   )r0   r   r    r    r$   
_get_layer  s   
zGLMTransformer._get_layerTFr   output_hidden_statesc              	   C   s   |sdd t | jD }|rdnd }| jr!| jr!|r!td d}d }|r'dnd }	t | jD ]9}
|r7|	|f }	| |
}| jrQ| jrQtjj		||||||
 |}n||||||
 |d}|\}}|rg||f }q.|ro|	|f }	| j
rw| |}|||	|fS )Nc                 S   s   g | ]}d qS r   r    )rU   r   r    r    r$   r     s    z*GLMTransformer.forward.<locals>.<listcomp>r    zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr   )r   r=   r   r   r   Zwarning_oncer   r,   utils
checkpointr   r   )r0   r   r   r   	kv_cachesr   r   presentsall_self_attentionsall_hidden_statesindexlayerZ	layer_retr   r    r    r$   rK     sH   	




zGLMTransformer.forwardr   )NTF)r2   r3   r4   rL   r   r;   r   r   r   rK   rN   r    r    rF   r$   r     s    r   c                       sr   e Zd ZdZdZdZeZdZdgZ	 fddZ
dejfd	d
ZdddZdd ZdddZe fddZ  ZS )ChatGLMPreTrainedModelz
    An abstract class to handle weights initialization and
    a simple interface for downloading and loading pretrained models.
    FTtransformerr   c                    s*   t  j|jfi | t t| | d S r   )r:   r;   Zname_or_pathr   )r0   r8   r#   rF   r    r$   r;     s   zChatGLMPreTrainedModel.__init__modulec                 C   s   dS )zInitialize the weights.Nr    )r0   r   r    r    r$   _init_weights  s   z$ChatGLMPreTrainedModel._init_weightsNc                 C   s   |j \}}tj||||jd}|  d}|r |d d j d }|r3tjtj||||jd|fdd}|d ur>||d }|sM|d urM||dd 8 }|dk  }|d |S )Nr`   r   rl   rR   r   g      ?)	r|   r,   r   ra   r   r   r   r   Z
unsqueeze_)r0   r(   rJ   padding_maskr   
seq_lengthfull_attention_maskpast_lengthr    r    r$   	get_masks  s:   

	
z ChatGLMPreTrainedModel.get_masksc                 C   s.   |j \}}tj|tj|dd|d}|S )Nrk   r   r   )r|   r,   rd   longr   repeat)r0   r(   ra   r   r   position_idsr    r    r$   get_position_ids  s   
z'ChatGLMPreTrainedModel.get_position_idsc                 C   s   t |tr
||_d S d S r   )
isinstancer   r   )r0   r   valuer    r    r$   _set_gradient_checkpointing  s   

z2ChatGLMPreTrainedModel._set_gradient_checkpointingc                    s<   | dd}| dd tt| jdd|i|}||_|S )a'  Instantiate the model.

        Args:
            kwargs: Input args.
                    model_dir: The model dir used to load the checkpoint and the label information.

        Returns:
            The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
        	model_dirNcfgZpretrained_model_name_or_pathr    )popr:   r   Zfrom_pretrainedr   )r!   r#   r   modelrF   r    r$   _instantiate#  s   z#ChatGLMPreTrainedModel._instantiater   F)r2   r3   r4   rL   Zis_parallelizableZsupports_gradient_checkpointingr   Zconfig_classZbase_model_prefixZ_no_split_modulesr;   r   Moduler   r   r   r   classmethodr  rN   r    r    rF   r$   r     s    

r   c                       r   )	r@   zLanguage model embeddings.Nr8   c                    s<   t t|   |j| _tj|j| j|j|d| _|j| _d S rv   )	r:   r@   r;   rD   r   padded_vocab_sizer   word_embeddingsr   )r0   r8   ra   rF   r    r$   r;   :  s   zEmbedding.__init__c                 C   s0   |  |}|}|dd }| jr| }|S )Nr   r   )r	  r   rT   r   rn   )r0   r(   Zwords_embeddingsZ
embeddingsr    r    r$   rK   F  s   
zEmbedding.forwardr   r   r    r    rF   r$   r@   7  s    r@   c                       s   e Zd Zddef fddZdd Zejfdd	Z								dd
e	ej
 de	ej de	ej de	eeej
ej
f df  de	ej
 de	e de	e de	e fddZdefddZ  ZS )ChatGLMModelNTr8   c                    s4  t  | |rt}nt}i }|d ur||d< |t|fi || _|j| _|j| _|j| _|j	| _	|jd u r<|j
|j n|j}t|d |j|j||jd| _|t|fi || _|tj|j
|jfd|jd|| _|j| _|j| _| jd ur|  D ]}d|_q{t| j | _t|| _ tj!d| _"d S d S )Nra   r9   )rg   rf   ra   rb   F)r   rb   g?)#r:   r;   r   r%   r@   rB   r=   r?   r>   r   rD   r   r]   rg   Zoriginal_roper   r   r   encoderr   rC   r  output_layerrA   r<   
parametersZrequires_gradr,   rd   r   rI   r7   prefix_encoderr   r   )r0   r8   ra   
empty_initZinit_methodZinit_kwargsZ
rotary_dimparamrF   r    r$   r;   T  sV   


zChatGLMModel.__init__c                 C   s   | j jS r   )rB   r	  )r0   r    r    r$   get_input_embeddings  s   z!ChatGLMModel.get_input_embeddingsc                 C   sj   | j d|d|}| ||}||| j| jd | j	| j
}| |}|g dd}|S )Nr   rl   r9   )r9   r   r   r      )rI   r   r   re   r  typer~   rA   r=   r?   r>   r   r   rZ   )r0   r   ra   rb   rI   rJ   r    r    r$   
get_prompt  s   

zChatGLMModel.get_promptr   r   r   rJ   .inputs_embedsr   r   return_dictc
                 C   sb  |d ur|n| j j}|d ur|n| j j}|	d ur|	n| j j}	|j\}
}|d u r,| |}| jd urR|d u r?| j|
|j|j	d}|d urRt
j||
| jf|gdd}|d u rl|d ur^| rd|rl|dkrl| j|||d}| | j}|d ur{|| }n|d d |f }|dd }| j||||||d\}}}}|	stdd	 ||||fD S t||||d
S )N)r   ra   rb   rl   rR   r   )r   r   )r   r   r   r   c                 s   s    | ]	}|d ur|V  qd S r   r    )rU   vr    r    r$   rW     s    z'ChatGLMModel.forward.<locals>.<genexpr>)Zlast_hidden_staterJ   r   
attentions)r8   r   r   use_return_dictr|   rB   rA   r  ra   rb   r,   r   new_onesallr   r   r   r   rT   r  r[   r   )r0   r(   r   r   r   rJ   r  r   r   r  r   r   r   r   r   r   r   r    r    r$   rK     sr   




zChatGLMModel.forwardweight_bit_widthc                 C   s   ddl m} || j| | S )Nr   quantize)quantizationr  r  )r0   r  r  r    r    r$   r    s   zChatGLMModel.quantizer   )NNNNNNNN)r2   r3   r4   r   r;   r  r,   rr   r  r   rM   Z
BoolTensorr   r   rK   rx   r  rN   r    r    rF   r$   r
  R  s@    +	

Kr
  )module_namec                       s  e Zd Zd>def fddZ		d?dedeeef d	e	d
e	deeef f
ddZ
				d@dejdeej deej deej de	defddZ											dAdeej deej deej deeej  deej deej dee	 dee	 dee	 dee	 dee	 fddZedeeejejf df d ejdeeejejf df fd!d"Zd#d$ Z	dBd%ed&eeeef  fd'd(Z	dBd%ed&eeeef  fd)d*Ze 			+		,	,	dCd%ed&eeeef  d-efd.d/Ze 					,	,		dDd%ed&eeeef  d-efd0d1Ze 					dEd2ee d3ee d4ee  d5ee!eejgee f  fd6d7Z"dFd8efd9d:Z#d;edefd<d=Z$  Z%S )G ChatGLM2ForConditionalGenerationTNr8   c                    sR   t  | |j| _t|||d| _|| _d| _| jjr'| j	| jjdd d S d S )Nr  ra   FT)r  )
r:   r;   
max_lengthZmax_sequence_lengthr
  r   r8   	quantizedquantization_bitr  )r0   r8   r  ra   rF   r    r$   r;     s   z)ChatGLM2ForConditionalGeneration.__init__Foutputsmodel_kwargsis_encoder_decoderstandardize_cache_formatr*   c                 C   s   | j ||d|d< d|v r$|d }tj|||jd dfgdd|d< d|v rE|d }|d	dd f  }|d7 }tj||gdd|d< d
|d< |S )N)r)  rJ   r   r   r   rl   rR   r   .Fis_first_forward)Z_extract_past_from_model_outputr,   r   r  r|   clone)r0   r&  r'  r(  r)  r   r   Znew_position_idr    r    r$   #_update_model_kwargs_for_generation  s,   

	
zDChatGLM2ForConditionalGeneration._update_model_kwargs_for_generationr(   rJ   r   r   r*  c                 K   sP   |d u r| j ||jd}|s |ddd f }|d d dd f }||||ddS )Nr`   .rl   T)r(   rJ   r   r   return_last_logit)r   ra   )r0   r(   rJ   r   r   r*  r#   r    r    r$   prepare_inputs_for_generation  s   	z>ChatGLM2ForConditionalGeneration.prepare_inputs_for_generationr  labelsr   output_attentionsr   r  r-  c              
   C   sL  |d ur|n| j j}|
d ur|
n| j j}
| j|||||||	|
d}|d }|r-|dd  }| j|}|dd }d }|d ur|tj	}|dd dd d f  }|ddd f  }t
dd}||j}||d|d|d}||j}||j}|
s|f|dd   }|d ur|f| S |S t|||j|j|jdS )	N)r(   r   r   rJ   r  r   r   r  r   rl   r   .i)Zignore_index)losslogitsrJ   r   r  )r8   r   r  r   r  r   rT   re   r,   r   r	   ra   r~   rY   rb   r   rJ   r   r  )r0   r(   r   r   rJ   r  r/  r   r0  r   r  r-  Ztransformer_outputsr   Z	lm_logitsr1  Zshift_logitsZshift_labelsZloss_fctr   r    r    r$   rK   '  sP   
z(ChatGLM2ForConditionalGeneration.forwardpast.beam_idxc                    s   t  fdd| D S )aL  
        This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or
        [`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct
        beam_idx at every generation step.

        Output shares the same memory storage as `past`.
        c              	   3   sH    | ]}|d   d |d  j|d  d |d jfV  qdS )r   r   N)Zindex_selectre   ra   )rU   Z
layer_pastr4  r    r$   rW   r  s    zBChatGLM2ForConditionalGeneration._reorder_cache.<locals>.<genexpr>)r[   )r3  r4  r    r5  r$   _reorder_cachef  s   z/ChatGLM2ForConditionalGeneration._reorder_cachec                 C   s   |  }|dd}|S )Nu   [[训练时间]]u   2023年)stripreplace)r0   responser    r    r$   process_responsew  s   z1ChatGLM2ForConditionalGeneration.process_responsequeryhistoryc                 C   s,   |j ||d}||gdd}|| j}|S )Nr<  ptreturn_tensors)Zbuild_promptre   ra   )r0   	tokenizerr;  r<  promptinputsr    r    r$   build_inputs|  s   z-ChatGLM2ForConditionalGeneration.build_inputsc                 C   s|   |r%d t|d |}|j|dd}|dd  }|j|d fgddd}nd t|d |}||gdd}|| j}|S )	Nu   

[Round {}]

问：{}

答：r   F)add_special_tokensr>  )r@  rE  u   [Round {}]

问：{}

答：r?  )formatlenencodeZbatch_encode_plusre   ra   )r0   rA  r;  r<  rB  r(   rC  r    r    r$   build_stream_inputs  s   z4ChatGLM2ForConditionalGeneration.build_stream_inputsr   皙?r#  c
                 K   s   |d u rg }|	d u rt  }	|	t  |d u r| j}||||||	d|
}| j|||d}| jdi ||}| d t|d d d  }||}| 	|}|||fg }||fS )N)r#  	num_beams	do_sampletop_ptemperaturelogits_processorr=  r   r(   r    )
r   appendr'   r   rD  generatetolistrG  decoder:  )r0   rA  r;  r<  r#  rK  rL  rM  rN  rO  r#   
gen_kwargsrC  r&  r9  r    r    r$   _chat  s.   	 

z&ChatGLM2ForConditionalGeneration._chatc                 k   s   |d u rg }|	d u rt  }	|	t  |d u r| j}|||||	d|}|d u r4|
s4| j|||d}n| j|||d}|d urp|d d jd }| jjd urU|| jj8 }| j	|7  _	|j
}tj|d||fdd}||d< | jdi |||
d|D ]?}|
r|\}}| d t|d d d  }||}|r|d	 d
kr| |}|||fg }|
r|||fV  q||fV  qd S )N)r#  rL  rM  rN  rO  r=  r   r   rR   r   )rJ   return_past_key_valuesr(   rl   u   �r    )r   rP  r'   r   rD  rI  r|   r   rA   r   r   r,   r   r  stream_generaterR  rG  rS  r:  )r0   rA  r;  r<  rJ   r#  rL  rM  rN  rO  rV  r#   rT  rC  r   r   r&  r9  Znew_historyr    r    r$   stream_chat  sh   
 


z,ChatGLM2ForConditionalGeneration.stream_chatgeneration_configrO  stopping_criteriaprefix_allowed_tokens_fnc              	   +   s   |j d |j d }}	|d u r| j}t|}|jdi |}
|j|j}}t|tr/|g}|	dd u o:|j
d u}|rN|jd u rNtd|j
 dt n|jd urj|j|	 |_
|sjtd|j d|j
 dt |	|j
kr| jjrud	nd
}td| d|	 d|j
 d |d ur|nt }|d ur|nt }| j||	|||d}| j||d}| |}||j d d}d }	 | j|fi |
}| di |dddd}|jd d dd d f }|||}|||}tjj|dd}|jrt j!|dd"d nt j#|dd t j$| d d d f gdd}| j%||
| jjd}
|&t' fdd|D ( }|r9||j)fV  n|V  |* dksI|||rKd S q)Nr   rl   r#  zUsing `max_length`'s default (z) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.zBoth `max_new_tokens` (=z) and `max_length`(=z) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)Zdecoder_input_idsr(   zInput length of z is z, but `max_length` is set to zX. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.)rY  input_ids_seq_lengthZencoder_input_idsr[  rO  )rY  rZ  r   TF)r  r0  r   rR   )Znum_samples)r(  c                 3   s    | ]} |kV  qd S r   r    r   Znext_tokensr    r$   rW   ^  rX   zCChatGLM2ForConditionalGeneration.stream_generate.<locals>.<genexpr>r    )+r|   rY  copydeepcopyupdateZbos_token_ideos_token_idr   rx   getr#  Zmax_new_tokenswarningswarnUserWarningr   r8   r(  warningr   r   Z_get_logits_processorZ_get_stopping_criteriaZ_get_logits_warpernewZfill_r.  r2  r   r   r   rL  r,   ZmultinomialZsqueezeZargmaxr   r,  mulsumr   rJ   r   )r0   r(   rY  rO  rZ  r[  rV  r#   r   r\  r'  ra  Zhas_default_max_lengthZinput_ids_stringZlogits_warperZunfinished_sequencesr)   Zmodel_inputsr&  Znext_token_logitsZnext_token_scoresZprobsr    r]  r$   rW    s   








z0ChatGLM2ForConditionalGeneration.stream_generatebitsc                 K   s^   |dkrd S ddl m} | jrtd | S d| _|| j_|| jj|f||d|| j_| S )Nr   r   r  zAlready quantized.Tr"  )	r  r  r$  r   infor8   r%  r   r  )r0   rj  r  ra   r#   r  r    r    r$   r  h  s$   

z)ChatGLM2ForConditionalGeneration.quantizeinputc           
   	   C   s   |d }|d }d|v r|d }nd}d|v r|d }nd}d|v r'|d }nd}d	|v r2|d	 }nd
}t |tjkr?| }| j|||||||d\}	}tj|	tj|iS )Ntextr<  r#  i   rN  gffffff?rK  r   rL  T)r#  rN  rK  rL  )r  r,   rM   rR  rU  r   ZRESPONSEZHISTORY)
r0   rl  rA  rm  r<  r#  rN  rK  rL  r9  r    r    r$   chat~  s4   




z%ChatGLM2ForConditionalGeneration.chat)TN)FF)NNNT)NNNNNNNNNNFr   )NNr   TrJ  rJ  N)NNNTrJ  rJ  NF)NNNNF)FN)&r2   r3   r4   r   r;   r   r   strr   r   r,  r,   r5   r   rM   dictr.  r   r6   rK   staticmethodr6  r:  r   rD  rI  Zno_gradrx   rU  rX  r   r   r   r   rW  r  rn  rN   r    r    rF   r$   r!    s   


$
	

?

#;pr!  r  )WrL   r^  r   sysrc  typingr   r   r   r   r   r   r,   Ztorch.nn.functionalr   r   r   Ztorch.utils.checkpointZtorch.nnr	   r
   Ztorch.nn.utilsr   Z&transformers.generation.logits_processr   Ztransformers.generation.utilsr   r   r   r   Ztransformers.modeling_outputsr   r   Ztransformers.modeling_utilsr   Z
modelscoper   r   Zmodelscope.metainfor   Zmodelscope.outputsr   Zmodelscope.utilsr   loggingZmodelscope.utils.constantr    r   configurationr   platformZ_CZ_jit_set_profiling_modeZ_jit_set_profiling_executorZ_jit_override_can_fuse_on_cpuZ_jit_override_can_fuse_on_gpuZ
get_loggerZ_CHECKPOINT_FOR_DOCZ_CONFIG_FOR_DOCZ(CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LISTr%   r'   r  r7   rM   rx   r   r\   r]   Zjitscriptr   r   r   r   r   r   r   r   r   r@   r
  Zregister_modulern  Zchatglm2_6br!  r    r    r    r$   <module>   s     

"
9  ,WTN 