o
    *j8                     @   s  d Z ddlmZmZmZmZ ddlZddlZddl	m
  mZ ddlmZ ddlmZ ddlmZ e Zeeeef  Zee Zeee  Zdeded	ed
efddZdd ZdejdefddZdd Z				d8dededeeeef  dededefddZd d d!d"ee d#ee d$ee ded%ed&ed'efd(d)Z d d d!d"ee d#ee d*ee ded%ed+ed&ed'efd,d-Z!	 	 d9d"eejef ded%ed+eded&ed'ed
efd.d/Z"G d0d1 d1eZ#dd2e$d3 fd4d5Z%d6d7 Z&dS ):zGeneration support.    )IterableListTupleUnionN)PreTrainedTokenizer)LogitsProcessor)
get_loggerbatchpad_id
seq_lengthreturnc                 C   s2   | D ]}t |}||k r||g||   q| S N)lenextend)r	   r
   r   tokenscontext_length r   q/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/qwen/qwen_generation_utils.py	pad_batch   s   r   c              
   C   sX  |   \}}|r|}nd}ttj|||f| jd|d||}tj|   tj| jd}	|r5d|	| |k< tj|tj| jd}
|
	d
| }
|rM|
 }
|sQ|rt|D ]M}|
|| | |kf }|rg| }d}t|  d D ]0}|| }|rd||d|d dd|d f< |r|
||d df  |d | 8  < |d }qqqU|dk }||	|
fS )z4Build masks and position id for left to right model.   )device)Zdtyper           r   Ng      ?)sizetorchZtrilZonesr   viewfloatZarangelongZ	unsqueezeZ	expand_asclonerange)dataZ	eod_tokenreset_position_idsreset_attention_maskeod_mask_lossZmicro_batch_sizer   Zatt_mask_batchattention_maskZ	loss_maskposition_idsbZ	eod_indexZ
prev_indexjir   r   r   get_ltor_masks_and_position_ids!   sN   

 $
r(   context_tokenseod_idc                 C   s2   |   | j}t||dddd\}}}|||fS )z#Generate batch from context tokens.F)r    r!   r"   )
contiguoustor   r(   )r)   r*   r   r#   _r$   r   r   r   	get_batch`   s   
r.   c                 C   sH   | dkr| d|jgg}|S | dkr|jg|jgg}|S td| )NrawzHuman:chatmlUnknown chat format )encoder*   	im_end_idim_start_idNotImplementedError)chat_format	tokenizerstop_words_idsr   r   r   get_stop_words_idso   s   r9       r0   r7   queryhistorysystemmax_window_sizer6   c              	      s  |d u rg }|dkrd\}}j g}jg}	d  fdd}
|
d|\}}|| |	 }d}g }t|D ]O\}}|
d|\}}|| |	 }|
d	|\}}|| |	 } |   | }d| | | d| | | }t|t| t| }||k r|| }|| }q7 || }| | | | }| | |
d|d
  |	   | d	   7 }|d| d| | d| d7 }||fS |dkr|}|}||fS td|)Nr0   )z<|im_start|>z
<|im_end|>
c                    s&   |  d|  |    | fS )Nr@   )r2   )ZrolecontentZ	nl_tokensr7   r   r   _tokenize_str   s   z#make_context.<locals>._tokenize_strr>   r:   userZ	assistantr   zuser
z
assistant
r/   r1   )r4   r3   r2   reversedr   r5   )r7   r<   r=   r>   r?   r6   Zim_startZim_endZim_start_tokensZim_end_tokensrC   Zsystem_textZsystem_tokens_partZsystem_tokensZraw_textr)   Z
turn_queryZturn_responseZ
query_textZquery_tokens_partZquery_tokensZresponse_textZresponse_tokens_partZresponse_tokensZnext_context_tokensZ	prev_chatZcurrent_context_sizer   rB   r   make_contexty   sh   

 
rF   F)verbosereturn_end_reasonr   
stop_words	eod_wordsraw_text_lenrG   rH   c                C   s   | | |d  }|rtd| dt|  }|D ]
}	||	d }q|D ]}
|
|v r1d|
}||
d }q&| }|rItd| td| |rO||fS |S )Nz
Raw Generate: Gen length r:   Gen r   
End Reason:z
Generate: )decodeprintr   replacestripsplit)r   rI   rJ   r7   rK   rG   rH   trim_decode_tokens
end_reason	stop_wordZeod_wordr   r   r   _decode_default   s"   




rW   eod_token_idsr   c                C   s   dt |  }|}	t|t | D ]}	| |	 |v r%d|| |	 g} nq|| d |	 |d  }
|rKtd|| |d   td|
 td| |D ]
}|
|d }
qM|
 }
|rctd|
 |ri|
|fS |
S )NrL   rM   z
Raw Generate w/o EOD:z
Raw Generate:rN   r:   z

Generate:)r   r   rO   rP   rQ   rR   )r   rI   rX   r7   rK   r   rG   rH   rU   Zeod_token_idxrT   rV   r   r   r   _decode_chatml   s4   



rY   c              
   C   sr   t | r|    } |dkr!t| g |j|jg|||||dS |dkr2t| dgdg||||dS t	d|)Nr0   )rI   rX   r7   rK   r   rG   rH   r/   z<|endoftext|>)rI   rJ   r7   rK   rG   rH   r1   )
r   Z	is_tensorcpunumpytolistrY   r4   r3   rW   r5   )r   r7   rK   r   r6   rG   rH   r   r   r   decode_tokens   s0   
	


r]   c                   @   s|   e Zd ZdZdeee  defddZdejdej	dej	fd	d
Z
dejdee defddZdee dee fddZdS )StopWordsLogitsProcessora  
    :class:`transformers.LogitsProcessor` that enforces that when specified sequences appear, stop geration.

    Args:
        stop_words_ids (:obj:`List[List[int]]`):
            List of list of token ids of stop ids. In order to get the tokens of the words
            that should not appear in the generated text, use :obj:`tokenizer(bad_word,
            add_prefix_space=True).input_ids`.
        eos_token_id (:obj:`int`):
            The id of the `end-of-sequence` token.
    r8   eos_token_idc                    s   t |trt|dkrtd| dtdd |D r$td| dtdd |D r5td| dtt fd	d
|| _ | _| jD ]}t|dksVJ d	|qGd S )Nr   z4`stop_words_ids` has to be a non-emtpy list, but is .c                 s   s    | ]	}t |t V  qd S r   )
isinstancelist).0Zbad_word_idsr   r   r   	<genexpr>8  s    z4StopWordsLogitsProcessor.__init__.<locals>.<genexpr>z3`stop_words_ids` has to be a list of lists, but is c                 s   s"    | ]}t d d |D V  qdS )c                 s   s*    | ]}t |ttjf p|d k V  qdS )r   N)ra   intnpinteger)rc   Ztoken_idr   r   r   rd   >  s    
z>StopWordsLogitsProcessor.__init__.<locals>.<genexpr>.<genexpr>N)any)rc   Zstop_word_idsr   r   r   rd   =  s    
zLEach list in `stop_words_ids` has to be a list of positive integers, but is c                    s
   |  gkS r   r   )Zbad_token_seqr_   r   r   <lambda>G  s   
 z3StopWordsLogitsProcessor.__init__.<locals>.<lambda>z7Stop words token sequences {} cannot have an empty list)
ra   r   r   
ValueErrorrh   rb   filterr8   r_   format)selfr8   r_   stop_token_seqr   ri   r   __init__1  s:   



z!StopWordsLogitsProcessor.__init__	input_idsscoresr   c                 C   s6   |  |}t|D ]\}}|rtd||| jf< q	|S )Ni   )_calc_stopped_samples	enumerater   r_   )rn   rq   rr   stopped_samplesr'   Zshould_stopr   r   r   __call__P  s   
z!StopWordsLogitsProcessor.__call__prev_tokensr   c                 C   sF   t |dkrdS t |t |krdS |t | d   |kr!dS dS )Nr   TF)r   r\   )rn   rw   r   r   r   r   _tokens_matchX  s   z&StopWordsLogitsProcessor._tokens_matchprev_input_idsc                 C   s@   g }|D ]}d}| j D ]}| ||rd} nq|| q|S )NFT)r8   rx   append)rn   ry   ru   Zprev_input_ids_slicematchro   r   r   r   rs   f  s   
z.StopWordsLogitsProcessor._calc_stopped_samplesN)__name__
__module____qualname____doc__r   re   rp   r   
LongTensorZFloatTensorrv   r   boolrx   rs   r   r   r   r   r^   $  s(    


r^   r   ZInfc           
      C   s   |dkr| t | |d d k }|| |< |dkr^t j| ddd\}}t jtj|dddd}||k}|dd	df  |dd
d	f< d|d< t|dD ]}	||	 ||	  }|| |	 |< qM| S )zThis function has been mostly taken from huggingface conversational
    ai code at
        https://medium.com/huggingface/how-to-build-a-state-of-the-art-
             conversational-ai-with-transfer-learning-2d818ac26313r   ).Nr   Tr   )Z
descendingdim)r   .Nr   ).r   )	r   ZtopksortZcumsumFZsoftmaxr   r   r   )
ZlogitsZtop_kZtop_pZfilter_valueZindices_to_removeZsorted_logitsZsorted_indicesZcumulative_probsZsorted_indices_to_remover'   r   r   r   top_k_logitsu  s*   

r   c                 C   s   | | }d| |  ||  S )Nr   )Ztype_as)Zval1Zval2booleanr   r   r   switch  s   
r   )Nr:   r;   r0   )FF)'r   typingr   r   r   r   r[   rf   r   Ztorch.nn.functionalnnZ
functionalr   Ztransformersr   Ztransformers.generationr   Zmodelscope.utils.loggerr   loggerstrZHistoryTypere   Z
TokensTypeZBatchTokensTyper   r(   r   r.   r9   rF   r   rW   rY   r]   r^   r   r   r   r   r   r   r   <module>   s   

	?
H
(	
+
%Q"