o
    )j$                     @   s   d dl Z d dlZd dlZd dlZd dlm  mZ d dlm	Z	 dd Z
dd Zdd ZG d	d
 d
ejjZdddZG dd dZdS )    N)pad_sequencec                 C   s0  t | ddd}| }W d    n1 sw   Y  g }g }tt|D ]O}||  }|d dkrQ||d   }|d dkrP|dt|d  }t|}q%q%|d d	krt||d   }|d dkrt|dt|d  }t|}q%q%t|tj	}	t|tj	}
t|	|
g}t
j|t
j	d
}|S )Nrzutf-8)encodingr   z
<AddShift>   z<LearnRateCoef>   z	<Rescale>Zdtype)open	readlinesrangelensplitlistnparrayastypefloat32torch	as_tensor)	cmvn_fileflinesZ
means_listZ	vars_listiZ	line_itemZadd_shift_lineZrescale_linemeansvarscmvn r   g/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/audio/vc/src/encoder.py	load_cmvn	   s4   
r   c                 C   s`   | j }| j\}}|ddd|f }|ddd|f }| ||7 } | ||9 } | tjS )z"
    Apply CMVN with mvn data
    r   r   N   )deviceshapetotyper   r   )inputsr   r   framedimr   r   r   r   r   
apply_cmvn#   s   
r&   c                 C   s  g }| j d }tt|| }| d |d d d}t|| f} ||d d  }t|D ]F}||||  krM|| || || |  	dd q/||||   }| || d  	d}	t|D ]}
t
|	| d f}	qd||	 q/t|}|tjS )Nr   r   r   )r    intr   ceilrepeatr   Zvstackr
   appendviewZhstackr"   r   )r#   lfr_mlfr_nZ
LFR_inputsTZT_lfrZleft_paddingr   Znum_paddingr$   _ZLFR_outputsr   r   r   	apply_lfr4   s$   

r1   c                       s   e Zd Z													
	d&dededededededededededededef fddZdefddZde	j
fdd Zde	j
d!e	j
fd"d#Zde	j
d!e	j
fd$d%Z  ZS )'WavFrontendN>  hammingP      
   r'   r         ?TFr   fswindown_melsframe_lengthframe_shiftfilter_length_minfilter_length_maxr-   r.   dither
snip_edgesupsacle_samplesc                    s|   t    || _|| _|| _|| _|| _|| _|| _|	| _	|
| _
|| _|| _|| _|| _| jd u r6d | _d S t| j| _d S N)super__init__r9   r:   r;   r<   r=   r>   r?   r-   r.   r   r@   rA   rB   r   r   )selfr   r9   r:   r;   r<   r=   r>   r?   r-   r.   r@   rA   rB   kwargs	__class__r   r   rE   K   s"   

zWavFrontend.__init__returnc                 C   s   | j | j S rC   )r;   r-   )rF   r   r   r   output_sizem   s   zWavFrontend.output_sizeinputc                 K   s  | d}g }g }t|D ][}|| }|| d | }	| jr"|	d }	|	d}	tj|	| j| j| j| j	d| j
| j| jd	}
| jdksF| jdkrNt|
| j| j}
| jd urYt|
| j}
|
 d}||
 || qt|}|dkr|d d d d d d f }||fS t|ddd}||fS )Nr              )num_mel_binsr<   r=   r@   energy_floorwindow_typesample_frequencyrA   r   TZbatch_firstZpadding_value)sizer
   rB   	unsqueezekaldifbankr;   r<   r=   r@   r:   r9   rA   r-   r.   r1   r   r&   r+   r   r   r   )rF   rL   input_lengthsrG   
batch_sizefeats
feats_lensr   waveform_lengthwaveformmatfeat_length	feats_padr   r   r   forwardp   sF   





zWavFrontend.forwardrX   c                 C   s   | d}g }g }t|D ]<}|| }|| d | }| jr"|d }|d}tj|| j| j| j| j	d| j
| jd}	|	 d}
||	 ||
 qt|}t|ddd}||fS )Nr   rM   rN   )rO   r<   r=   r@   rP   rQ   rR   TrS   )rT   r
   rB   rU   rV   rW   r;   r<   r=   r@   r:   r9   r+   r   r   r   )rF   rL   rX   rY   rZ   r[   r   r\   r]   r^   r_   r`   r   r   r   forward_fbank   s2   




zWavFrontend.forward_fbankc           
      C   s   | d}g }g }t|D ];}||d || d d f }| jdks&| jdkr.t|| j| j}| jd ur9t|| j}| d}|| || qt	|}t
|ddd}	|	|fS )Nr   r   TrN   rS   )rT   r
   r-   r.   r1   r   r&   r+   r   r   r   )
rF   rL   rX   rY   rZ   r[   r   r^   r_   r`   r   r   r   forward_lfr_cmvn   s   




zWavFrontend.forward_lfr_cmvn)Nr3   r4   r5   r6   r7   r'   r'   r   r   r8   TF)__name__
__module____qualname__strr(   floatboolrE   rK   r   ZTensorra   rb   rc   __classcell__r   r   rH   r   r2   I   sb    	
"
/r2   r'   c           
         s.   dkrt d t| ts|  } tt| }|d u r/|d u r)tt| }n| }n|d u s5J |tt| ks?J t	j
d|t	jd}|d||}|| d}||k}|d ur|d|ksqJ |d|f dk r{|    t fddt| D }	||	 ||j}|S )Nr   zlength_dim cannot be 0: {}r   r'   c                 3   s(    | ]}|d  fv rt dndV  qdS )r   N)slice).0r   
length_dimr   r   	<genexpr>   s
    
z make_pad_mask.<locals>.<genexpr>)
ValueErrorformat
isinstancer   tolistr(   r   maxrT   r   ZarangeZint64rU   expandnewr%   tupler
   Z	expand_asr!   r   )
lengthsZxsrn   maxlenbsZ	seq_rangeZseq_range_expandZseq_length_expandmaskindr   rm   r   make_pad_mask   s0   
 
r}   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )Encoderc                 C   s*   t |dddd| _tj|t d| _d S )N      rN   )r-   r.   r@   )Zprovider_options)r2   frontonnxruntimeZInferenceSessionZget_available_providersasr_session)rF   Zencoder_front_pathZencoder_onnx_pathr   r   r   rE      s   zEncoder.__init__c                 C   s   t j|ddd }t|}|ddg}t|}tt|g}| 	||\}}|
   }t|d d d d d f  }| jjddg|| 
  dd	d
}t|d S )Nr3   srr   r   r'   Zys_padZolensr   )Zxs_padmasks)Z
input_feed)librosaloadr   reshaper   FloatTensor	IntTensorr   r   r   detachcpunumpyr}   r   runr   )rF   wav_pathwavwav_lenrZ   	feats_lenr   Zoutsr   r   r   	inference   s   
zEncoder.inferencec                 C   sX   t j|ddd }t|}|ddg}t|}tt|g}| 	||\}}|S )Nr3   r   r   r   r'   )
r   r   r   r   r   r   r   r   r   r   )rF   r   r   r   rZ   r   r   r   r   	get_feats  s   
zEncoder.get_featsN)rd   re   rf   rE   r   r   r   r   r   r   r~      s    r~   )Nr'   N)r   r   r   r   r   Ztorchaudio.compliance.kaldiZ
compliancerV   Ztorch.nn.utils.rnnr   r   r&   r1   nnModuler2   r}   r~   r   r   r   r   <module>   s     
#