o
    *ÎjcV  ã                   @   sV  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
Zd dlZd dlmZ d dlm  mZ d dlmZ d dlmZ d dlmZ eƒ Zdd„ Zd	d
„ Zdd„ Zdd„ Z	d%dejdejdejdejdef
dd„Z dejdejdejdejfdd„Z!			d&dejdejde"de#de#d e	eee#  ejf fd!d"„Z$G d#d$„ d$ƒZ%dS )'é    N)Údefaultdict)ÚListÚOptionalÚTuple)ÚReduceOp)Úclip_grad_norm_)Ú
get_loggerc                 C   s¶  |   ¡  | dd¡}| dd¡}| dd¡}| dd¡}	| dd¡}
| d	d
¡}| dd
¡}t d¡ |¡}t|ƒD ]Œ\}}|d
krIt |tj	¡ |dkrP dS |\}}}}}| |¡}| |¡}| |¡}|duro| |¡}| 
d¡}|dkryq:| |ƒ\}}t||||ƒ\}}|| }|| }| ¡  |d
 | dkr°t|  ¡ |ƒ}t |¡r¬| ¡  | ¡  || dkrÆt d ||	|
|||| ¡ ¡¡ q:| d
¡ |d
krÙt |tj	¡ dS dS )z Train one epoch
    Z	grad_clipg      I@Úlog_intervalé
   Úepochr   ÚrankÚ
local_rankÚ
world_sizeé   Z
grad_accumNz3RANK {}/{}/{} TRAIN Batch {}/{} size {} loss {:.6f})ÚtrainÚgetÚtorchÚtensorÚtoÚ	enumerateÚdistÚ
all_reducer   ÚSUMÚsizeÚctc_lossZbackwardr   Ú
parametersÚisfiniteÚstepZ	zero_gradÚloggerÚinfoÚformatÚitemÚfill_)ÚmodelZ	optimizerÚdata_loaderÚdeviceÚwriterÚargsZclipr	   r   r   r   r   Zaccum_batchsÚiterator_stopÚ	batch_idxÚbatchÚkeyÚfeatsÚtargetÚfeats_lengthsÚtarget_lengthsÚnum_uttsÚlogitsÚ_ÚlossÚaccZ	grad_norm© r5   úp/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/trainers/audio/kws_utils/batch_utils.pyÚexecutor_train$   sZ   





þÿ€
ÿr7   c                 C   sr  |   ¡  | dd¡}| dd¡}d}d}d}t d¡ |¡}	tjd|d}
| d	d¡}| d
d¡}| dd¡}t ¡ Â t|ƒD ]¥\}}|dkrOt 	|	t
j¡ |	dkrU n¡|\}}}}}| |¡}| |¡}| |¡}|durt| |¡}| d¡}|dkr~q@| |ƒ\}}t||||dƒ\}}t |¡rÉ||7 }|| ¡ 7 }|| ¡ 7 }|
d  | ¡ 7  < |
d  || 7  < |
d  |7  < |
d  | ¡ 7  < || dkråt d ||||||| ¡ | ||| ¡	¡ q@|	 d¡ |dkröt 	|	t
j¡ W d  ƒ n	1 sw   Y  |dkrt 	|
t
j¡ t d |
d ¡¡ |
 d¡}
|
d  ¡ |
d  ¡  |
d  ¡ |
d  ¡  fS )z Cross validation on
    r	   r
   r   r   r   ç        )é   )r%   r   r   r   NTé   é   zORANK {}/{}/{} CV Batch {}/{} size {} loss {:.6f} acc {:.2f} history loss {:.6f}zTotal utts number is {}Úcpu)Úevalr   r   r   r   ZzerosÚno_gradr   r   r   r   r   r   r   r   Úsumr!   r   r   r    r"   )r#   r$   r%   r'   r	   r   Znum_seen_uttsZnum_seen_tokensZ
total_lossr(   Zcounterr   r   r   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r5   r6   Úexecutor_cv\   sx   






ÿ
ýþ€
€Ø
*
 
ÿr@   c           %      C   sÆ  |  dd¡dusJ dƒ‚tj |d d¡}|  dd¡}|  ¡  d}d}	t ¡ 0 t|dd	d
}
t|ƒD ]ô\}}t	j	 
¡ }|\}}}}}| |¡}| |¡}|durY| |¡}| d¡}|dkrcq6| |ƒ\}}| d¡}| ¡ }t	j	 
¡ }tt|ƒƒD ]ƒ}|| }|| d|| … }t||| |ƒ}d}d}|D ]M}|d }|d }t|ƒt|ƒks¯J ‚| ¡ D ])} ||  d }!t||!ƒ}"|"dkrÜ| }t|"|"t|!ƒ ƒD ]
}#|||# d 9 }qÏ nq³|durèt |¡} nq›|durø|
 d |||¡¡ q}|
 d |¡¡ q}t	j	 
¡ }$|||  ¡ 7 }|	|$|  ¡ 7 }	|| dkr*t d |¡¡ tj ¡  q6t d |d |	d ¡¡ W d  ƒ n1 sCw   Y  W d  ƒ |S W d  ƒ |S 1 s\w   Y  |S )z Test model with decoder
    Ztest_dirNz2Please config param: test_dir, to store score filez	score.txtr	   r
   r8   ÚwÚutf8)Úencodingr   r:   ç      ð?Ztoken_idéÿÿÿÿÚprobz{} detected {} {:.3f}
z{} rejected
zProgress batch {}z5Total infer cost {:.2f} mins, decode cost {:.2f} minsg      N@)r   ÚosÚpathÚjoinr=   r   r>   Úopenr   ÚdatetimeÚnowr   r   Úsoftmaxr<   ÚrangeÚlenÚctc_prefix_beam_searchÚkeysÚ
is_sublistÚmathÚsqrtÚwriter    Útotal_secondsr   r   ÚsysÚstdoutÚflush)%r#   r$   r%   Zkeywords_tokenZkeywords_idxsetr'   Zscore_abs_pathr	   Zinfer_secondsZdecode_secondsZfoutr)   r*   Zbatch_start_timerQ   r,   r-   r.   r/   r0   r1   r2   Zinfer_end_timeÚir+   ÚscoreÚhypsZhit_keywordZ	hit_scoreZone_hypZ
prefix_idsZprefix_nodesÚwordÚlabÚoffsetÚidxZdecode_end_timer5   r5   r6   Úexecutor_test¢   s    ÿ







ÿ
ú
þÿ
ÿÿÿÿ
€þÿ*ÂD¼D¼Dra   c                 C   s”   t | ƒt |ƒk r
dS t | ƒt |ƒkr| |krdS dS tt | ƒt |ƒ ƒD ]#}| | |d krGtt |ƒƒD ]}| ||  || krB nq4|  S q$dS )NrE   r   )rO   rN   )Z	main_listZ
check_listrZ   Újr5   r5   r6   rR   õ   s   ÿ€rR   Fr1   r-   Úlogits_lengthsr/   Úneed_accc                 C   sH   d}|rt | |||ƒ}|  dd¡} |  d¡} tj| |||dd}||fS )zá CTC Loss
    Args:
        logits: (B, D), D is the number of keywords plus 1 (non-keyword)
        target: (B)
        logits_lengths: (B)
        target_lengths: (B)
    Returns:
        (float): loss of current batch
    r8   r   r   r:   r?   )Z	reduction)Úacc_utteranceZ	transposeZlog_softmaxÚFr   )r1   r-   rc   r/   rd   r4   r3   r5   r5   r6   r     s   

ÿr   Úlogits_lengthÚtarget_lengthc                 C   s$  | d u rdS |   d¡} |  ¡ } | ¡ }d}d}d}d}tƒ }t|  d¡ƒD ]^}	| |	 d ||	 … }
t|
||	 d ddƒ}dd„ ||	 d ||	 …  ¡ D ƒ}g }t|ƒdkr_dd„ |d d D ƒ}| ||¡}|d dkrƒ||d 7 }||d	 7 }||d
 7 }||d 7 }q%t	|| | | ƒd | S )Nr   r:   r;   é   c                 S   ó   g | ]}t |ƒ‘qS r5   ©Ústr©Ú.0r!   r5   r5   r6   Ú
<listcomp>5  ó    z!acc_utterance.<locals>.<listcomp>c                 S   rj   r5   rk   rm   r5   r5   r6   ro   8  rp   ÚallÚinsÚsubÚdelg      Y@)
rM   r<   Ú
CalculatorrN   r   rP   ÚtolistrO   Ú	calculateÚfloat)r1   r-   rg   rh   Z
total_wordZ	total_insZ	total_subZ	total_delZ
calculatorrZ   r[   r\   r^   ÚrecÚresultr5   r5   r6   re   $  s>   
"€ÿÿÿre   r;   é   Úkeywords_tokensetÚscore_beam_sizeÚpath_beam_sizeÚreturnc              
   C   sä  |   d¡}| }tƒ ddg ffg}td|ƒD ]R}|| }	tdd„ ƒ}
|	 |¡\}}g }g }t| ¡ | ¡ ƒD ])\}}|durQ|dkrP||v rP| |¡ | |¡ q6|dkr_| |¡ | |¡ q6t|ƒdkrgq|D ]í}|	|  	¡ }|D ]â\}\}}}t|ƒdkr„|d nd}|dkr§|
| \}}}|||  ||  }| 
¡ }|||f|
|< qs||krtj|dd	d
sà|
| \}}}|||  }| 
¡ }||d d krÙ||d d< ||d d< |||f|
|< tj|dd	d
s||f }|
| \}}}|||  }| 
¡ }| t|||d¡ |||f|
|< qs||f }|
| \}}}|r6||d d kr5||d d< ||d d< n| 
¡ }| t|||d¡ |||  ||  }|||f|
|< qsqit|
 ¡ dd„ dd}
|
d|… }qdd„ |D ƒ}|S )ay   CTC prefix beam search inner implementation

    Args:
        logits (torch.Tensor): (1, max_len, vocab_size)
        logits_lengths (torch.Tensor): (1, )
        keywords_tokenset (set): token set for filtering score
        score_beam_size (int): beam size for score
        path_beam_size (int): beam size for path

    Returns:
        List[List[int]]: nbest results
    r   rD   r8   c                   S   s
   ddg fS )Nr8   r5   r5   r5   r5   r6   Ú<lambda>b  s   
 z(ctc_prefix_beam_search.<locals>.<lambda>Ngš™™™™™©?rE   gíµ ÷Æ°>)Zabs_tolrF   Úframe)Útokenr   rF   c                 S   s   | d d | d d  S )Nr   r   r5   )Úxr5   r5   r6   r€   ¦  s    T)r+   Úreversec                 S   s6   g | ]}|d  |d d  |d d  |d d f‘qS )r   r   r:   r5   )rn   Úyr5   r5   r6   ro   ª  s   6 z*ctc_prefix_beam_search.<locals>.<listcomp>)r   ÚtuplerN   r   ZtopkÚziprv   ÚappendrO   r!   ÚcopyrS   ÚiscloseÚdictÚsortedÚitems)r1   rc   r|   r}   r~   ÚmaxlenZ	ctc_probsZcur_hypsÚtZprobsZ	next_hypsZtop_k_probsZtop_k_indexZfilter_probsZfilter_indexrF   r`   ÚsZpsÚprefixZpbZpnbZ	cur_nodesÚlastZn_pbZn_pnbÚnodesZn_prefixr\   r5   r5   r6   rP   E  sŽ   
ÿ

€

€


ÿ€
€
ÿÙ*ÿrP   c                   @   s4   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ ZdS )ru   c                 C   s>   i | _ g | _i | _d| jd< d| jd< d| jd< d| jd< d S )Nr   Úcorr   rs   rt   rr   )ÚdataÚspaceÚcost©Úselfr5   r5   r6   Ú__init__°  s   


zCalculator.__init__c                 C   s  |  dd¡ |  dd¡ t| jƒt|ƒk r$| j g ¡ t| jƒt|ƒk s| jD ]'}|D ]
}d|d< d|d< q+t|ƒt|ƒk rN| dddœ¡ t|ƒt|ƒk s>q'tt|ƒƒD ]}|| j| d d< d| j| d d< qUtt|ƒƒD ]}|| jd | d< d| jd | d< qpd| jd d d< |D ]}|| jvr¨t|ƒdkr¨dddddd	œ| j|< q|D ]}|| jvrÃt|ƒdkrÃdddddd	œ| j|< q«t|ƒD ]™\}}t|ƒD ]\}}	|dksÜ|dkrÝqÐtj}
d
}| j|d  | d | j	d  }d}||
k rü|}
|}| j| |d  d | j	d  }d}||
k r|}
|}||	kr1| j|d  |d  d | j	d  }d}n| j|d  |d  d | j	d  }d}||
k rN|}
|}|
| j| | d< || j| | d< qÐqÈg g ddddddœ}t|ƒd }t|ƒd }	 | j| | d dkràt|| ƒdkrÂ| j||  d d | j||  d< | j||  d d | j||  d< |d d |d< |d d |d< |d   d|| ¡ |d   d|| ¡ |d }|d }n,| j| | d dkrFt|| ƒdkr)| j||  d d | j||  d< | j||  d d | j||  d< |d d |d< |d d |d< |d   d|| ¡ |d   d|| ¡ |d }|d }nÆ| j| | d dkr¦t|| ƒdkr| j||  d d | j||  d< | j||  d d | j||  d< |d d |d< |d d |d< |d   d|| ¡ |d   dd¡ |d }nf| j| | d dkrìt|| ƒdkrÕ| j||  d d | j||  d< |d d |d< |d   dd¡ |d   d|| ¡ |d }n | j| | d dkrû	 |S t
dj||| j| | d dƒ qy)Nr   Ú r   ZnonÚerror)r   rœ   rt   rr   ©rq   r”   rs   rr   rt   Únoner   r”   rs   )r^   ry   rq   r”   rs   rr   rt   Trq   r^   ry   z<this should not happen , i = {i} , j = {j} , error = {error})rZ   rb   rœ   )ÚinsertrO   r–   rˆ   rN   r•   r   rW   Úmaxsizer—   Úprintr    )r™   r^   ry   ÚrowÚelementrZ   rb   r‚   Z	lab_tokenZ	rec_tokenZmin_distZ	min_errorr   rœ   rz   r5   r5   r6   rw   ¹  sò   ÿ

ÿ€û€û€  

$$
çù	$$$$
$$
$
ýÿÿØzCalculator.calculatec                 C   s¢   ddddddœ}| j D ]C}|d | j | d  |d< |d | j | d  |d< |d | j | d  |d< |d | j | d  |d< |d | j | d  |d< q|S ©Nr   r   rq   r”   rs   rr   rt   ©r•   )r™   rz   r‚   r5   r5   r6   Úoverall3  s   
zCalculator.overallc                 C   sª   ddddddœ}|D ]H}|| j v rR|d | j | d  |d< |d | j | d  |d< |d | j | d  |d< |d | j | d  |d< |d | j | d  |d< q
|S r¤   r¥   )r™   r•   rz   r‚   r5   r5   r6   Úcluster=  s   
€zCalculator.clusterc                 C   s   t | j ¡ ƒS )N)Úlistr•   rQ   r˜   r5   r5   r6   rQ   H  s   zCalculator.keysN)Ú__name__Ú
__module__Ú__qualname__rš   rw   r¦   r§   rQ   r5   r5   r5   r6   ru   ®  s    	z
ru   )F)Nr;   r{   )&rK   rS   rG   rW   Úcollectionsr   Útypingr   r   r   ÚnumpyÚnpr   Ztorch.distributedÚdistributedr   Ztorch.nn.functionalÚnnZ
functionalrf   r   Ztorch.nn.utilsr   Zmodelscope.utils.loggerr   r   r7   r@   ra   rR   ZTensorÚboolr   re   ÚsetÚintrP   ru   r5   r5   r5   r6   Ú<module>   sd   8FS
üÿþý
üÿ
ÿ$ûÿþýüû
úi