o
    *j                     @   sN   d dl Z d dlZd dlZd dlZd
ddZd
ddZdd ZG dd	 d	ZdS )    Nc                 C   sP   i }t  |d< tj |d< t |d< | d ur&| jdkr&t j| |d< |S NZrng_state_torchZrng_state_npZrng_state_rndcudaZrng_state_torch_cuda)torchZget_rng_statenprandomZ	get_stategetstatetyper   )devicerandom_states r   e/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/peer/sas_utils.pyget_random_states   s   r   c                 C   sZ   t | d  tj| d  t| d  |d ur)|jdkr+t j| d  d S d S d S r   )r   Zset_rng_stater   r   Z	set_statesetstater   r   )r
   r	   r   r   r   set_random_states   s   r   c                 C   s  | d u rd S ddg}t | r&t |  rd|d< t |  r%d|d< nTt| tu rztt| D ]+}t | | r]t | |  rL|d  d7  < t | |  r]|d  d7  < q2|d dkrl|d  d7  < |d dkrz|d  d7  < t	|dkr|S d S )Nr      
   )
r   Z	is_tensorisnananyisinfr   tuplerangelensum)dataresultir   r   r   check_nan_inf%   s,   
r   c                   @   s&   e Zd ZdddZdd Zdd ZdS )	SequenceSideInfoNc                    s   |d ur| _ nddlm} |d _ tjj  _ fddt	 j j
D tfddt	tD  _dd D }tj|tjd _d S )	Nr   )ElectraTokenizerzgoogle/electra-small-generatorc                    s   g | ]	} j |gqS r   )	tokenizerdecode.0r   )selfr   r   
<listcomp>L   s    z-SequenceSideInfo.__init__.<locals>.<listcomp>c                    s$   g | ]} | d d dkr|qS )r      ##r   r!   )tokensr   r   r$   Q   s   $ c                 S   s\   g | ]*}|d  dkr|d dkrd n|d d dkr%dt dt|d  nt dt|qS )r   []r%   r&   r      )minr   )r"   tr   r   r   r$   R   s    
Zdtype)r   Ztransformersr   Zfrom_pretrainednltktokenizeZpunktZPunktSentenceTokenizersen_tokenizerr   Z
vocab_sizesetr   ind_subtokensr   tensorint8
len_tokens)r#   r   r   tmpr   )r#   r'   r   __init__B   s"   

zSequenceSideInfo.__init__c                    s   | j |}tdd | j|d D  | dd     d< t fddtt	 D }t fddtt	 D }t||fS )Nc                 S   s   g | ]}t |d  qS )r%   )r   )r"   xr   r   r   r$   ]   s    
z1SequenceSideInfo.getSenTokIdx.<locals>.<listcomp>Z	input_idsr   r   c                    s$   g | ]}|t j | t jd  qS r.   )r   Zonesr5   r!   Zsen_lengthsr   r   r$   d   s    c                    s    g | ]}t j | t jd qS r:   )r   Zaranger5   r!   r;   r   r   r$   h   s    )
r1   r0   r   arrayr   Zbatch_encode_plusr   Zconcatenater   r   )r#   sentence_position_embedding
inputs_strZseq_len_totalZ	sentencesZidx_senZidx_tokr   r;   r   getSenTokIdxZ   s   


zSequenceSideInfo.getSenTokIdxc                    s  d}t  d ttjfrd}t  tjdr4j }tjt	 fdd|D  j
d}ntjt	 fdd  D  j
d}t } jd	 }|d d d|f |d
< |d d d	| d| f |d< dkrtj dd\}}	jt|}
t|
dkrt fdd|
D jdd }n	tj jtjd}d|d d df< |d d d	d f }td	dD ]"}t||k|d d ddf |k}t|dkr n|d	 ||< q||d< j   }||d< |r| D ]
}||  ||< q|S )NFr   Tbatch_decodec                    s    g | ]} | jd  qS r   )r?   shape)r"   Z	input_str	inputs_idr#   r=   r   r   r$   x   s    z;SequenceSideInfo.generate_seq_side_info.<locals>.<listcomp>)r	   c                    s(   g | ]} j| jd  qS rA   )r?   r   r    rB   )r"   Z	input_orirC   r   r   r$      s    
r   Z ss_sentence_position_in_sequencer%   Zss_token_position_in_sentence)Zreturn_inversec                    s   g | ]} |kqS r   r   )r"   st)rD   r   r   r$      s    )Zaxisr.      r)   Zss_token_position_in_whole_wordZss_token_string_length)
isinstancelistr   Zndarrayr   r4   hasattrr   r@   r<   r	   numpydictrB   uniquer3   intersectionr2   r   stackr   charZzerosr5   r   logical_andr6   longkeys)r#   r=   rD   Zis_np_arrayr>   Zsen_tok_idxZside_info_dictZ
seq_lengthrL   _r3   Z
idx_tok_wwZidx_tok_ww_1r   posZinputs_str_lenkeyr   rC   r   generate_seq_side_infoo   st   
	

z'SequenceSideInfo.generate_seq_side_infoN)__name__
__module____qualname__r8   r?   rV   r   r   r   r   r   @   s    
r   rW   )	r   r/   rJ   r   r   r   r   r   r   r   r   r   r   <module>   s    

