o
    )j                     @   s   d dl Z d dlZd dlZd dlmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d	d
lmZmZ d	dlmZ ejejejdG dd deZG dd de	jZ dS )    N)DictOptionalTuple)Models)
TorchModel)Tensor)MODELS)update_conf)Tasks   )
GlobalCMVNload_kaldi_cmvn)FSMN)module_namec                       sp   e Zd ZdZ					ddededed	ed
edee f fddZ	dd Z
deeef fddZdd Z  ZS )FSMNDecoratorz? A decorator of FSMN for integrating into modelscope framework N  '
  F	model_dir	cmvn_filebackbone	input_dim
output_dimtrainingc           	         sZ   t  j|g|R i | d| _d| _|r | ||||| _dS |tj|dd| _dS )a  initialize the fsmn model from the `model_dir` path.

        Args:
            model_dir (str): the model path.
            cmvn_file (str): cmvn file
            backbone (dict): params related to backbone
            input_dim (int): input dimension of network
            output_dim (int): output dimension of network
            training (bool): training or inference mode
        Nzconfig.yaml)Zmodel_workspaceZconfig_path)super__init__model	model_cfg
init_modelospathjoin)	selfr   r   r   r   r   r   argskwargs	__class__ l/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/audio/kws/nearfield/model.pyr      s   

zFSMNDecorator.__init__c                 C   s   t | dr| j  d S d S )Ntmp_dir)hasattrr(   cleanupr!   r&   r&   r'   __del__<   s   
zFSMNDecorator.__del__returnc                 C   s$   | j dur|dur| j |S | jS )zP
        Args:
            input (torch.Tensor): Input tensor (B, T, D)
        N)r   forwardr   )r!   inputr&   r&   r'   r.   @   s   zFSMNDecorator.forwardc                 C   s   |d urt |\}}tt| t| }nd }d}d }	|d }
|d }|d }|d }|d }|d }|d }|d	 }|d
 }t||
|||||||||}d }d }t|||||	|||}|S )N   input_affine_dim
num_layers
linear_dimproj_dim
left_orderright_orderleft_strideright_strideoutput_affine_dim)r   r   torchZ
from_numpyfloatr   KWSModel)r!   r   r   r   r   meanZistdglobal_cmvnZ
hidden_dimpreprocessingr1   r2   r3   r4   r5   r6   r7   r8   r9   
classifier
activationZ	kws_modelr&   r&   r'   r   J   s8   

zFSMNDecorator.init_model)NNr   r   F)__name__
__module____qualname____doc__strdictintr   boolr   r,   r   r   r.   r   __classcell__r&   r&   r$   r'   r      s.    !
r   c                       s   e Zd ZdZdedededeej deej dejdejd	ejf fd
dZdd Z	dd Z
ejdddejdfdejdejdeejejf fddZdd Z  ZS )r<   a  Our model consists of four parts:
    1. global_cmvn: Optional, (idim, idim)
    2. preprocessing: feature dimension projection, (idim, hdim)
    3. backbone: backbone or feature extractor of the whole network, (hdim, hdim)
    4. classifier: output layer or classifier of KWS model, (hdim, odim)
    5. activation:
        nn.Sigmoid for wakeup word
        nn.Identity for speech command dataset
    idimodimhdimr>   r?   r   r@   rA   c	           	         s>   t    || _|| _|| _|| _|| _|| _|| _|| _	dS )ab  
        Args:
            idim (int): input dimension of network
            odim (int): output dimension of network
            hdim (int): hidden dimension of network
            global_cmvn (nn.Module): cmvn for input feature, (idim, idim)
            preprocessing (nn.Module): feature dimension projection, (idim, hdim)
            backbone (nn.Module): backbone or feature extractor of the whole network, (hdim, hdim)
            classifier (nn.Module): output layer or classifier of KWS model, (hdim, odim)
            activation (nn.Module): nn.Identity for training, nn.Sigmoid for inference
        N)
r   r   rK   rL   rM   r>   r?   r   r@   rA   )	r!   rK   rL   rM   r>   r?   r   r@   rA   r$   r&   r'   r   w   s   

zKWSModel.__init__c                 C   s
   | j  S N)r   to_kaldi_netr+   r&   r&   r'   rO      s   
zKWSModel.to_kaldi_netc                 C   s   | j |S rN   )r   to_pytorch_net)r!   Z
kaldi_filer&   r&   r'   rP      s   zKWSModel.to_pytorch_netr   )Zdtypexin_cacher-   c                 C   sh   | j d ur
|  |}| jd ur| |}| ||\}}| jd ur&| |}| jd ur0| |}||fS rN   )r>   r?   r   r@   rA   )r!   rQ   rR   Z	out_cacher&   r&   r'   r.      s   







zKWSModel.forwardc                 C   s"   | j d ur
| j   | j  d S rN   )r?   fuse_modulesr   r+   r&   r&   r'   rS      s   

zKWSModel.fuse_modules)rB   rC   rD   rE   rH   r   nnModuler   rO   rP   r:   Zzerosr;   r   r   r.   rS   rJ   r&   r&   r$   r'   r<   l   s<    
	 
r<   )!r   systempfiletypingr   r   r   r:   Ztorch.nnrT   Zmodelscope.metainfor   Zmodelscope.modelsr   Zmodelscope.models.baser   Zmodelscope.models.builderr   Z"modelscope.utils.audio.audio_utilsr	   Zmodelscope.utils.constantr
   Zcmvnr   r   Zfsmnr   Zregister_moduleZkeyword_spottingZ"speech_kws_fsmn_char_ctc_nearfieldr   rU   r<   r&   r&   r&   r'   <module>   s(   T