o
    *j                     @   s   d dl Z d dlZd dlZd dlZd dlmZmZ d dlZd dlZ	d dl
Zd dlZd dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZ d	Zd
ZdZd
ZdZd
Z ej!ej"ej#dG dd deZ$dS )    N)AnyDict)File)	Pipelines)
OutputKeys)InputPipeline)	PIPELINES)	ModelFileTasksi  i  Zhammingi   )module_namec                       s   e Zd ZdZdZ fddZdedeee	f fddZ
d	d
 Zdeee	f deee	f fddZdd Zdeee	f deee	f fddZ  ZS )ANSDFSMNPipelineap  ANS (Acoustic Noise Suppression) inference pipeline based on DFSMN model.

    Args:
        stream_mode: set its work mode, default False
        In stream model, it accepts bytes as pipeline input that should be the audio data in PCM format.
        In normal model, it accepts str and treat it as the path of local wav file or the http link of remote wav file.
    i  c           	         s  t  jdd|i| tj| jjtj}tj	|r*t
j|| jdd}| j| | j  |dd| _| jrntt| jjd   d }tj|d| _ttd D ]}| jd	 qRtd t d | _d| _tt d | _t
jtd| jd
  fdd}dd }|| _|| _d S )NmodelT)Zmap_locationZweights_onlystream_modeF      )maxlen    )Zperiodicdevicec              	      s   t j| tttd ddS )NF)centerwindowZreturn_complex)torchstftN_FFT
HOP_LENGTHSTFT_WIN_LEN)xr    n/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/audio/ans_dfsmn_pipeline.pyr   C   s   z'ANSDFSMNPipeline.__init__.<locals>.stftc                 S   s   t j| tttd|dS )NF)Z
hop_lengthZ
win_lengthr   r   length)librosaistftr   r   WINDOW_NAME_HAM)r   slenr   r   r   r"   M   s   z(ANSDFSMNPipeline.__init__.<locals>.istftr   ) super__init__ospathjoinr   Z	model_dirr
   ZTORCH_MODEL_BIN_FILEexistsr   loadr   Zload_state_dictevalgetr   WINLENSTRIDEZlordercollectionsdequebufferrangeappendbyte_length_remainfirst_forwardtensor_give_up_lengthZhamming_windowr   r   r"   )	selfr   kwargsZmodel_bin_file
checkpointZbyte_buffer_lengthir   r"   	__class__r   r   r&   )   s6   

	
zANSDFSMNPipeline.__init__inputsreturnc                 K   s  | j rt|tstdt|| jjkr"tdt| d| jj g }d}| jt| | t	d krt	d | j }t
||| D ]}| j|| jdtjdd qAt }| jD ]}|| qZtj| tjd	}	t|	tj}
||
 d| _||7 }| jt| | t	d ks3t
|t|D ]}| j|| jdtjdd |  jd7  _qd
|iS t|trt|}nt|tr|}n
tdt| d| |}
d
|
iS )Nz"Only support bytes in stream mode.zinputs length too large: z > r   r   r   F)	byteordersignedZdtypeaudiozUnsupported type .)r   
isinstancebytes	TypeErrorlenr2   r   
ValueErrorr5   r/   r3   r4   to_bytessysr@   ioBytesIOwritenp
frombuffer	getbufferint16r   
from_numpytypeFloatTensorstrr   readbytes2tensor)r8   r>   Zpreprocess_paramsZtensor_listZcurrent_indexZbyte_length_to_addr;   bytes_iobdatadata_tensorZ
data_bytesr   r   r   
preprocessY   sf   






zANSDFSMNPipeline.preprocessc                 C   sx   t t|\}}|tj}t|jdkr|d d df }|| j	kr-t
j||| j	d}|d }t|tj}|S )Nr   r   )Zorig_srZ	target_sri   )sfrW   rL   rM   astyperO   Zfloat32rH   shapeSAMPLE_RATEr!   Zresampler   rS   rT   rU   )r8   Z
file_bytesZdata1fsr[   r\   r   r   r   rX      s   
zANSDFSMNPipeline.bytes2tensorc                 K   s   | j rBt }|d D ]1}| |}| jr!|d | j  }d| _n|t d  }|| j| j  }||t	j
  q| }n|d }| |}|t	j
 }tj|iS )NrC   F)r   rL   rM   _forwardr6   r7   r.   rN   r_   rO   rR   tobytesgetvaluer   
OUTPUT_PCM)r8   r>   Zforward_paramsrY   origin_audio
masked_sigZoutputsr   r   r   forward   s"   



zANSDFSMNPipeline.forwardc           
   
   C   s   t  < |d}dd l}|jjj|dddd| jtd}|d}| 	|}| 
|}|ddd}||  }W d    n1 sCw   Y  |  }|d d d d df d	|d d d d df   }| |t|}	|	S )
Nr   g      ?g      D@g      4@x   )ZditherZframe_lengthZframe_shiftZnum_mel_binsZsample_frequencyZwindow_typer   r   y              ?)r   Zno_gradZ	unsqueeze
torchaudioZ
complianceZkaldiZfbankra   r#   r   r   Zpermutecpudetachnumpyr"   rH   )
r8   rg   Zaudio_inrk   ZfbanksZmasksZspectrumZmasked_specZmasked_spec_complexrh   r   r   r   rc      s,   




0zANSDFSMNPipeline._forwardc                 K   s<   | j sd| v rt|d tj|tj tjd| j	 |S )NZoutput_pathrB   )
r   keysr^   rN   rO   rP   r   rf   rR   ra   )r8   r>   r9   r   r   r   postprocess   s   zANSDFSMNPipeline.postprocess)__name__
__module____qualname____doc__ra   r&   r   r   rV   r   r]   rX   ri   rc   rp   __classcell__r   r   r<   r   r      s    0(

*r   )%r0   rL   r'   rK   typingr   r   r!   rn   rO   Z	soundfiler^   r   Zmodelscope.fileior   Zmodelscope.metainfor   Zmodelscope.outputsr   Zmodelscope.pipelines.baser   r   Zmodelscope.pipelines.builderr	   Zmodelscope.utils.constantr
   r   r   r   r#   r   r.   r/   Zregister_moduleZacoustic_noise_suppressionZspeech_dfsmn_ans_psm_48k_causalr   r   r   r   r   <module>   s4   