o
    *jz                     @   s
  d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
m  mZ d dlZ	d dl	mZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dl m!Z! d dl"m#Z#m$Z$m%Z%m&Z&m'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9m:Z:m;Z;m<Z<m=Z= erd dl>m?Z? zd dl@mAZA W n eBy   dZAY nw zd dlCmDZD d dl@mAZA dZEW n eBy   dZEeFd Y nw zd dlGmHZH W n eBy   dZHeFd Y nw e3 ZIdZJd ZKdgZLzd d!lMmNZN W n eBy&   dZNeFd" Y nw G d#d$ d$e	jjOZPG d%d& d&ejOZQG d'd( d(ejOZRG d)d* d*ejOZSG d+d, d,e-eZTe5jUe1jVe/jWd-G d.d/ d/eTZXG d0d1 d1e	jjOZYd2d3 ZZd8d4d5Z[G d6d7 d7e	jjOZ\dS )9    N)TYPE_CHECKINGCallableListOptionalTupleUnion)nn)autocast)CrossEntropyLoss)GenerationConfigPreTrainedTokenizerStoppingCriteriaList)LogitsProcessorList)GenerateOutput)BaseModelOutputWithPastCausalLMOutputWithPast)PreTrainedModel)set_seed)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging)assert_device_mapget_device_map)Model
TorchModel)Models)Tasks)
get_logger   )MODELS   )
QWenConfig)HistoryTypeStopWordsLogitsProcessordecode_tokensget_stop_words_idsmake_context)BaseStreamer	rearrange)apply_rotary_emb_funcTFzWarning: import flash_attn rotary fail, please install FlashAttention rotary to get better performance https://github.com/Dao-AILab/flash-attention/tree/main/csrc/rotary)rms_normzWarning: import flash_attn rms_norm fail, please install FlashAttention layer_norm to get better performance https://github.com/Dao-AILab/flash-attention/tree/main/csrc/layer_normzqwen-7br#   )flash_attn_unpadded_funczkWarning: import flash_attn fail, please install FlashAttention https://github.com/Dao-AILab/flash-attentionc                       s,   e Zd Z			d fdd	Zdd Z  ZS )	FlashSelfAttentionFN        c                    s@   t    td usJ dtd usJ d|| _|| _|| _d S )NzFPlease install FlashAttention first, e.g., with pip install flash-attnz:Please install einops first, e.g., with pip install einops)super__init__r.   r+   causalsoftmax_scale	dropout_p)selfr3   r4   attention_dropout	__class__ d/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/qwen/backbone.pyr2   U   s   



zFlashSelfAttention.__init__c                 C   s  t dd |||fD sJ t dd |||fD sJ |jd |jd }}|jd }dd |||fD \}}}tjd|d | |tj|jd}| jrX||ksRJ | j}|}	n||k}tjd|d | |tj|jd}	d| _t	|||||	||| j| j
|d	
}
t|
d
|d}
|
S )Nc                 s   s"    | ]}|j tjtjfv V  qd S N)dtypetorchZfloat16Zbfloat16.0ir:   r:   r;   	<genexpr>g   s     z-FlashSelfAttention.forward.<locals>.<genexpr>c                 s   s    | ]}|j V  qd S r<   )is_cudar?   r:   r:   r;   rB   h   s    r   r"   c                 S   s   g | ]}t |d qS )zb s ... -> (b s) ...r*   )r@   xr:   r:   r;   
<listcomp>k   s    z.FlashSelfAttention.forward.<locals>.<listcomp>)stepr=   device)r4   r3   z(b s) ... -> b s ...)b)allshaper>   arangeZint32rG   trainingr3   r5   r.   r4   r+   )r6   qkv
batch_sizeZseqlen_qZseqlen_kZcu_seqlens_qZ	is_causalZcu_seqlens_koutputr:   r:   r;   forwarde   sR   


zFlashSelfAttention.forward)FNr0   __name__
__module____qualname__r2   rR   __classcell__r:   r:   r8   r;   r/   S   s    r/   c                       s   e Zd Zd fdd	ZdddZ		dddZdd	 Zd
d Z							ddee	e
j  dee	e
j  dee
j dee
j dee
j dee
j dee dee fddZ  ZS )QWenAttentionNc                    s  t    |j} jdttj||ftjddd||dd  jdt	ddd t
d| _|j _|j _|j _|j _|j _ j j  _|j _d _d  _|j|j  _ j|j d	kshJ  j|j  _t|jd
 j  _tj|j j|j d _|jp|j  _  jrt!d ur j st"d|j#d _$|j _|j%dkrd  _&n|j%dk sJ t' j|j%  _& j&d urƈ j&n j}t(||j)d _*|j+ _+|j, _, fddt-ddD }t.|d d d d d f  _/d _0t1|j# _2d S )Nbiasr=   r"   F)
persistentZmasked_biasg     Tr   r    rY   )r3   r7         ?)basec                    s(   g | ]}| j krt| j nd qS )r"   )
seq_lengthmathlogr?   r6   r:   r;   rE      s    z*QWenAttention.__init__.<locals>.<listcomp>i   )3r1   r2   max_position_embeddingsZregister_bufferr>   Ztrilonesboolviewtensormaxlayer_numberZparams_dtyper_   hidden_size
split_sizeZnum_attention_heads	num_headshead_dimuse_flash_attnscale_attn_weights	layer_idxZkv_channelsZprojection_sizeZhidden_size_per_attention_headr   Linearc_attnno_biasc_projbf16Zfp16is_fp32r.   r/   Z
attn_pdropcore_attention_flashZ
rotary_pctZrotary_ndimsintRotaryEmbeddingZrotary_emb_base
rotary_embuse_dynamic_ntkuse_logn_attnrangeTensorlogn_tensor_ntk_cachedDropoutattn_dropout)r6   configri   Zmax_positionsdimZ	logn_listr8   rb   r;   r2      sv   




zQWenAttention.__init__c                 C   s  t ||dd}| jr|t jg |dd |j|jd }|d|d}}| jd d d d || |d |f }	t 	|jj
}
t jg |
|jd|j}
t |	||j|
}tjj|dd}||j}| |}|d urw|| }t ||}|dd}||fS )	N      ?r=   rG   rZ   r   r"      )r>   matmul	transposero   fullsizer=   rG   rY   finfomintowherer   
functionalsoftmaxtyper   )r6   querykeyvalueattention_mask	head_maskattn_weightsquery_length
key_lengthcausal_mask
mask_valueattn_outputr:   r:   r;   _attn   s@   

zQWenAttention._attnc                 C   s  |  \}}}}	|  \}
}
}}
tj|| ||tj|jd}d}| jr.|t| dd  }tdd1 |d||	|	ddd|	|}}tj
|| | d|d	}|||||}W d    n1 sgw   Y  | d| d}}| jd d d d || |d |f }t|jj}tj||jd
|j}t|||}|d ur|| }tjj|dd}|jtjkrtd||j}| |}|d ur|| }t||}||fS )Nr   r]   r   r   F)enabledr   r   )betaalpharZ   r   zDError with upcasting, attn_weights does not have dtype torch.float32)r   r>   emptyZfloat32rG   ro   floatr	   Zreshaper   ZbaddbmmrY   r   r=   r   rg   r   r   r   r   r   RuntimeErrorr   r   r   )r6   r   r   r   r   r   Zbszrl   Z	q_seq_lenZdk_Z	k_seq_lenr   Zscale_factorrM   rN   r   r   r   r   r   r:   r:   r;   _upcast_and_reordered_attn   sn   
	
z(QWenAttention._upcast_and_reordered_attnc                 C   s&   |  d d ||f }||}|S )Nr   )r   rf   r6   rg   rl   Zattn_head_sizeZ	new_shaper:   r:   r;   _split_heads6  s   
zQWenAttention._split_headsc                 C   s,   |  }| d d || f }||S )Nr   )
contiguousr   rf   r   r:   r:   r;   _merge_heads;  s   
zQWenAttention._merge_headsFhidden_states
layer_pastr   r   encoder_hidden_statesencoder_attention_maskoutput_attentions	use_cachec	           !      C   sB  |  |}	|	j| jdd\}
}}| |
| j| j}
| || j| j}| || j| j}| d }|r<||d jd 7 }| jrg|| d krg| j	sgt
|| j dd }dt
| d }t|d}|| _n| j}| j||d|j}|d urt|tr|}n|fd }|d ur|\}}|
jd }|d d | d d d d d f }|d d | d d d d d f }t|
|}
t||}|d ur|d |d }}tj||fdd}tj||fdd}|r||f}nd }| jr)| j	s)| jj|
jkr| j|
j|
| _|d|
d }|d}| jd d ||d d d d f }|
||
 }
| jrQtd urQ| jsQ|
jrQ|
||}}}|  |||}t!|d" }n,|
#dddd}
|#dddd}|#dddd}| $|
||||\}}| %|| j| j}| &|}||f} |r| jrtd ur| jst'd| |f7 } | S )	Nr   r   r"   r   )	ntk_alphazb s h d -> b s (h d)r    z/Cannot output attentions while using flash-attn)(rr   splitrk   r   rl   rm   r   rJ   r{   rL   r`   ra   r_   ceilrh   r   rz   r   rG   
isinstancetupleapply_rotary_pos_embr>   catr|   r   type_asZ	expand_asrn   r.   rv   rC   rw   r+   r   Zpermuter   r   rt   
ValueError)!r6   r   r   r   r   r   r   r   r   Zmixed_x_layerr   r   r   Z
kv_seq_lenZcontext_valuer   Zrotary_pos_embZ	q_pos_embZ	k_pos_embcur_lenZpast_keyZ
past_valueZpresentZ	seq_startZseq_endr   rM   rN   rO   Zcontext_layerr   Zattn_weightoutputsr:   r:   r;   rR   @  s   




""



""



zQWenAttention.forwardr<   )NNNNNNNFF)rT   rU   rV   r2   r   r   r   r   r   r   r>   FloatTensorr~   re   rR   rW   r:   r:   r8   r;   rX      s@    
E&
9	rX   c                       s$   e Zd Z fddZdd Z  ZS )QWenMLPc                    sl   t    tj|j|jd |j d| _tj|j|jd |j d| _|jd }tj||j|j d| _	d S )Nr   r\   )
r1   r2   r   rq   rj   Zffn_hidden_sizers   w1w2rt   )r6   r   Z	ff_dim_inr8   r:   r;   r2     s   

zQWenMLP.__init__c                 C   s0   |  |}| |}|t| }| |}|S r<   )r   r   FZsilurt   )r6   r   Za1Za2Zintermediate_parallelrQ   r:   r:   r;   rR     s
   


zQWenMLP.forwardrS   r:   r:   r8   r;   r     s    r   c                       s   e Zd Zd fdd	Z							ddeeej  deeej  deej d	eej d
eej deej dee	 dee	 fddZ
  ZS )	QWenBlockNr"   c                    sp   t    || _|| _|j| _|j}|j| _|j| _t||jd| _	t
||d| _t||jd| _t|| _d S )Neps)ri   )r1   r2   
num_expertri   (apply_residual_connection_post_layernormrj   ru   RMSNormlayer_norm_epsilonln_1rX   attnln_2r   mlp)r6   r   rp   r   rj   r8   r:   r;   r2     s&   
zQWenBlock.__init__Fr   r   r   r   r   r   r   r   c	                 C   s   |  |}	| j|	|||||d}
|
d }|
dd  }| jr |	}n|}|| }| |}	| jr1|	}n|}| |	}|| }|rE|f| }|S |f|dd   }|S )N)r   r   r   r   r   r   r"   )r   r   r   r   r   )r6   r   r   r   r   r   r   r   r   Zlayernorm_outputZattn_outputsr   r   ZresidualZlayernorm_inputZ
mlp_outputr:   r:   r;   rR     s4   



zQWenBlock.forward)Nr"   r   )rT   rU   rV   r2   r   r   r>   r   r~   re   rR   rW   r:   r:   r8   r;   r     s4    	r   c                       sT   e Zd ZeZdZdZdZdgZ fddZ	dd Z
dd	d
Ze fddZ  ZS )QWenPreTrainedModelZtransformerFTr   c                    s*   t  j|jfi | t t| | d S r<   )r1   r2   Zname_or_pathr   )r6   r   kwargsr8   r:   r;   r2     s   zQWenPreTrainedModel.__init__c                 C   s   t |tjr|jjjd| jjd |jdur|jj	  n,t |tj
r=|jjjd| jjd |jdur<|jj|j 	  nt |trI|jjd | D ]\}}|dkrh|jjd| jjtd| jj  d qMdS )zInitialize the weights.r0   )meanZstdNr]   zc_proj.weightr   )r   r   rq   weightdataZnormal_r   Zinitializer_rangerY   Zzero_	EmbeddingZpadding_idxr   Zfill_Znamed_parametersr`   sqrtn_layer)r6   modulenamepr:   r:   r;   _init_weights
  s4   


z!QWenPreTrainedModel._init_weightsc                 C   s   t |tr
||_d S d S r<   )r   	QWenModelgradient_checkpointing)r6   r   r   r:   r:   r;   _set_gradient_checkpointing!  s   

z/QWenPreTrainedModel._set_gradient_checkpointingc                    sP   | dd }|d u rtdi |}| |}ntt| jdd|i|}||_|S )N	model_dirZpretrained_model_name_or_pathr:   )popr#   r1   r   Zfrom_pretrainedr   )clsr   r   r   modelr8   r:   r;   _instantiate%  s   
z QWenPreTrainedModel._instantiateF)rT   rU   rV   r#   Zconfig_classZbase_model_prefixZis_parallelizableZsupports_gradient_checkpointingZ_no_split_modulesr2   r   r   classmethodr   rW   r:   r:   r8   r;   r     s    
r   )module_namec                       s   e Zd ZdgZ fddZdd Zdd Z													dd	eej	 d
ee
e
ej   deej deej	 deej	 deej deej deej deej dee dee dee dee fddZ  ZS )r   zattn.masked_biasc                    s   t     j| _ j| _ j| _ j} j| _	d| _
| j	dkr;t|| j| _| | jj d| _| | jj nd | _d| _t| j| j| _t j| _t fddt jD | _t| j jd| _|   d S )NFZlearnedposition_embeddings c                    s   g | ]}t  |d qS ))rp   )r   r?   r   r:   r;   rE   L  s    z&QWenModel.__init__.<locals>.<listcomp>r   )r1   r2   Zpadded_vocab_sizeZ
vocab_sizeZnum_hidden_layersrj   Z	embed_dimrc   Zpos_embZposition_embedding_typer   r   r   wpeZinit_methodr   r   Z_position_embeddings_keywter   Z
embd_pdropdropZ
ModuleListr}   hr   r   ln_fZ	post_init)r6   r   Zmax_sequence_lengthr8   r   r;   r2   6  s0   

zQWenModel.__init__c                 C   s   | j S r<   r   rb   r:   r:   r;   get_input_embeddingsY  s   zQWenModel.get_input_embeddingsc                 C   s
   || _ d S r<   r   )r6   Znew_embeddingsr:   r:   r;   set_input_embeddings\  s   
zQWenModel.set_input_embeddingsN	input_idspast_key_valuesr   token_type_idsposition_idsr   inputs_embedsr   r   r   r   output_hidden_statesreturn_dictc                    s   d ur n| j j |d ur|n| j j}d urn| j j|d ur$|n| j j}|d ur4|d ur4td|d urJ| }|d|d }|jd }n|d ur\| d d }|jd }ntd|d urg|j	n|j	}|d urv|d|d }|d ur|d|d }|d u rd}t
d gt| j }n	|d d d}|d u rtj||d | tj|d}|dd|d }|d ur|dkrtd||d}|d d d d d d f }|j| jd}d	| t| jj }d }	| || j j}|d u r| |}|}| jd ur| |}|| }| |}||df }| jr,| jr,r,td
 dr1dnd } r8dnd }|r?dnd }tt| j|D ]]\}\}}|rW||f }| jrx| jrx fdd}tj j!!|||d ||| ||	}n|||||| ||	 d}|d }du r|| rdnd f } r||d f }qI| "|}||}|st
dd |||fD S t#||||dS )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   r   z5You have to specify either input_ids or inputs_embedsr   r   z$batch_size has to be defined and > 0rZ   r]   zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr:   c                    s    fdd}|S )Nc                     s    g | R  S r<   r:   )Zinputs)r   r   r   r:   r;   custom_forward  s   zHQWenModel.forward.<locals>.create_custom_forward.<locals>.custom_forwardr:   )r   r   r   r   )r   r;   create_custom_forward  s   z0QWenModel.forward.<locals>.create_custom_forward)r   r   r   r   r   r   r   Tr   r"   c                 s   s    | ]	}|d ur|V  qd S r<   r:   )r@   rO   r:   r:   r;   rB     s    z$QWenModel.forward.<locals>.<genexpr>)Zlast_hidden_stater   r   Z
attentions)$r   r   r   r   Zuse_return_dictr   r   rf   rJ   rG   r   lenr   r>   rK   longZ	unsqueezer   r=   r   r   Zget_head_maskr   r   r   r   r   rL   loggerZwarning_once	enumerateziputils
checkpointr   r   )r6   r   r   r   r   r   r   r   r   r   r   r   r   r   Zinput_shaperP   rG   Zpast_lengthr   Zposition_embedsZoutput_shapeZpresentsZall_self_attentionsZall_hidden_statesrA   blockr   r   r   r:   r   r;   rR   _  s   









zQWenModel.forward)NNNNNNNNNNNNN)rT   rU   rV   Z_keys_to_ignore_on_load_missingr2   r   r   r   r>   Z
LongTensorr   r~   r   re   rR   rW   r:   r:   r8   r;   r   2  sZ    #	
r   c                       s6   e Zd Zd
 fdd	Z		dddZddd	Z  ZS )ry   '  c                    sd   t    || _|| _d|td|d |   | _tj	
dd u r'tdd | _d| _d| _d S )Nr]   r   r   einopsz'einops is required for Rotary Embedding)r1   r2   r   r^   r>   rK   r   inv_freq	importlibutil	find_specr   _rotary_pos_emb_cache_seq_len_cached_ntk_alpha_cached)r6   r   r^   r8   r:   r;   r2     s   
 
zRotaryEmbedding.__init__r   r]   c           
      C   s   || }|| j ks|| jkrg| j|| j| jd    }	 tjd| jd| jjd | j | _d|| j  | _|| _ || _tj|| jjd}t	|
| j| j}tj||fdd}ddlm}	 |	|d| _d S d S )	Nr   r   )rG   r]   r   r   r*   zn d -> 1 n 1 d)r  r  r^   r   r>   rK   r  rG   r   outerr   r   r  r+   r
  )
r6   max_seq_lenoffsetr   Zseqlenr^   seqfreqsZembr+   r:   r:   r;   update_rotary_pos_emb_cache  s$   z+RotaryEmbedding.update_rotary_pos_emb_cachec                 C   s(   |  ||| | jd d ||| f S r<   )r  r
  )r6   r  r  r   r:   r:   r;   rR   !  s   zRotaryEmbedding.forward)r  )r   r]   )rT   rU   rV   r2   r  rR   rW   r:   r:   r8   r;   ry     s    
ry   c                 C   s>   ddl m} || ddd} | jdd\}}tj| |fddS )	Nr   r*   z... (j d) -> ... j dr   )jr   r   r   )r  r+   Zunbindr>   r   )rD   r+   x1Zx2r:   r:   r;   _rotate_half&  s   r  c           	      C   s   |r;|   }|dd}|d d d |jd d f  }|d d d |jd d f  }t|||| }|S |jd }| dd |f | d|d f }}|  }|  }||  t||   }tj	||fdd| S )Nr   r"   r   r   .r   )
r   ZsqueezerJ   cossinr,   r   r  r>   r   )	tr  use_flash_rotaryZt_r  r  rQ   Zrot_dimZt_pass_r:   r:   r;   r   .  s   ""
"r   c                       s8   e Zd Zd
dedef fddZdd Zdd	 Z  ZS )r   ư>r   r   c                    s&   t    || _tt|| _d S r<   )r1   r2   r   r   	Parameterr>   rd   r   )r6   r   r   r8   r:   r;   r2   A  s   
zRMSNorm.__init__c                 C   s$   |t |djddd| j  S )Nr   r   T)Zkeepdim)r>   Zrsqrtpowr   r   )r6   rD   r:   r:   r;   _normF  s   $zRMSNorm._normc                 C   s<   t d ur|jrt || j| jS | | |}|| j S r<   )r-   rC   r   r   r  r   r   )r6   rD   rQ   r:   r:   r;   rR   I  s   
zRMSNorm.forward)r  )	rT   rU   rV   rx   r   r2   r  rR   rW   r:   r:   r8   r;   r   ?  s    r   r   )]r  r`   typingr   r   r   r   r   r   r>   Ztorch.nn.functionalr   r   r   Ztorch.utils.checkpointZtorch.cuda.ampr	   Ztorch.nnr
   Ztransformersr   r   r   Z&transformers.generation.logits_processr   Ztransformers.generation.utilsr   Ztransformers.modeling_outputsr   r   Ztransformers.modeling_utilsr   Ztransformers.trainer_utilsr   Ztransformers.utilsr   r   r   r   r   Z'transformers.utils.model_parallel_utilsr   r   Z
modelscoper   r   Zmodelscope.metainfor   Zmodelscope.utils.constantr   Zmodelscope.utils.loggerr   r   r!   configurationr#   Zqwen_generation_utilsr$   r%   r&   r'   r(   Z!transformers.generation.streamersr)   r  r+   ImportErrorZflash_attn.layers.rotaryr,   r  printZflash_attn.ops.rms_normr-   r   Z_CHECKPOINT_FOR_DOCZ_CONFIG_FOR_DOCZ"QWen_PRETRAINED_MODEL_ARCHIVE_LISTZflash_attn.flash_attn_interfacer.   Moduler/   rX   r   r   r   Zregister_moduleZbackboneZqwen_7br   ry   r  r   r   r:   r:   r:   r;   <module>   s    A  J3 H,
