o
    )jj                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlm  mZ	 d dlm
Z
mZ ddlmZ dZd0ddZdd	 ZG d
d dejZG dd de
ZG dd dejZG dd dejjZG dd dejZd1ddZd2ddZG dd dejjZG dd dejjZG dd dejZ	 G d d! d!ejjZG d"d# d#ejjZG d$d% d%ejjZG d&d' d'ejjZG d(d) d)ejjZ G d*d+ d+ejZ!G d,d- d-ejZ"G d.d/ d/ejZ#dS )3    N)Conv1dConvTranspose1d   )	Generator皙?c                    s   fdd  fddt fddt| D }t |ddddd	f |ddddd	f< t |ddd
dd	f |ddd
dd	f< |durQd||< t|S )z Sinusoid position encoding tablec                    s   | t dd|d     S )N'     )nppower)positionZhid_idx)d_hid g/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/audio/vc/src/vocoder.py	cal_angle   s   z.get_sinusoid_encoding_table.<locals>.cal_anglec                    s    fddt D S )Nc                    s   g | ]} |qS r   r   ).0Zhid_j)r   r   r   r   
<listcomp>   s    zKget_sinusoid_encoding_table.<locals>.get_posi_angle_vec.<locals>.<listcomp>)ranger   )r   r   r   r   get_posi_angle_vec   s   z7get_sinusoid_encoding_table.<locals>.get_posi_angle_vecc                    s   g | ]} |qS r   r   )r   Zpos_i)r   r   r   r      s    z/get_sinusoid_encoding_table.<locals>.<listcomp>Nr   r   r           )r	   arrayr   sincostorchZFloatTensor)Z
n_positionr   Zpadding_idxZsinusoid_tabler   )r   r   r   r   get_sinusoid_encoding_table   s   ..
r   c                 C   s   |   d d }|   dd  \}}t||}|| }|| }||d  | }|| }	| jg |d|R  }
td|	d||}| | }|	 d}| j
g ||	|R  }|j}|d|||
 |jg |dR  }|S )Nr   r   )sizemathgcdviewr   ZarangeZunfoldZ
new_tensorlong
contiguousZ	new_zerosdeviceZ
index_add_to)signalZ
frame_stepZouter_dimensionsframesZframe_lengthZsubframe_lengthZsubframe_stepZsubframes_per_frameZoutput_sizeZoutput_subframesZsubframe_signalframeresultZdevice_of_resultr   r   r   overlap_and_add'   s(   r)   c                       s$   e Zd Z fddZdd Z  ZS )	LastLayerc	           	         sb   t t|   ttj|di || _ttj||d d fi || _tjj||||d| _	d S )Nr   r   biasr   )
superr*   __init__getattrr   nn
activationpadr   conv)	selfin_channelsout_channelsnonlinear_activationnonlinear_activation_paramsr2   kernel_size
pad_paramsr,   	__class__r   r   r.   D   s   "zLastLayer.__init__c                 C   s"   |  |}| |}| |}|S N)r1   r2   r3   r4   xr   r   r   forwardN   s   


zLastLayer.forward__name__
__module____qualname__r.   r@   __classcell__r   r   r;   r   r*   B   s    
r*   c                       s    e Zd ZdZ fddZ  ZS )	Conv1d1x1z*1x1 Conv1d with customized initialization.c                    s    t t| j||ddd|d dS )zInitialize 1x1 Conv1d module.r   r   )r9   paddingdilationr,   N)r-   rF   r.   )r4   r5   r6   r,   r;   r   r   r.   X   s   

zConv1d1x1.__init__)rB   rC   rD   __doc__r.   rE   r   r   r;   r   rF   U   s    rF   c                       s&   e Zd Zd fdd	Zdd Z  ZS )
LastLinearTc                    sX   t t|   tjdd| _t|| _t|||d| _	t|| _
t|||d| _d S )N皙?)negative_sloper+   )r-   rJ   r.   r0   	LeakyReLUr1   ZBatchNorm1dbn_1rF   linear_1bn_2linear_2)r4   Zhidden_channelout_channelr,   r;   r   r   r.   e   s   zLastLinear.__init__c                 C   s@   |  |}| |}| |}|  |}| |}| |}|S r=   )r1   rN   rO   rP   rQ   r>   r   r   r   r@   m   s   





zLastLinear.forwardTrA   r   r   r;   r   rJ   c   s    rJ   c                       *   e Zd ZdZd fdd	Zdd Z  ZS )	Stretch2dzStretch2d module.nearestc                    s$   t t|   || _|| _|| _dS )zInitialize Stretch2d module.
        Args:
            x_scale (int): X scaling factor (Time axis in spectrogram).
            y_scale (int): Y scaling factor (Frequency axis in spectrogram).
            mode (str): Interpolation mode.
        N)r-   rU   r.   x_scaley_scalemode)r4   rW   rX   rY   r;   r   r   r.   z   s   
zStretch2d.__init__c                 C   s   t j|| j| jf| jdS )zCalculate forward propagation.
        Args:
            x (Tensor): Input tensor (B, C, F, T).
        Returns:
            Tensor: Interpolated tensor (B, C, F * y_scale, T * x_scale),
        )Zscale_factorrY   )FZinterpolaterX   rW   rY   r>   r   r   r   r@      s   zStretch2d.forward)rV   rB   rC   rD   rI   r.   r@   rE   r   r   r;   r   rU   w   s    rU   c                       s*   e Zd Z		d fdd	Zdd Z  ZS )UpsampleLayerr   Tc	           	   	      s<   t t|   t|ddd| _tj|||||||d| _d S )Nr   rV   )rY   rH   r,   )r-   r\   r.   rU   upsampler0   r   r3   )	r4   Z
in_channelrR   upsample_rater9   striderG   rH   r,   r;   r   r   r.      s   	zUpsampleLayer.__init__c                 C   s$   |  |d}| |d}|S )Nr   )r^   	unsqueezer3   squeezer>   r   r   r   r@      s   zUpsampleLayer.forward)r   TrA   r   r   r;   r   r\      s
    	r\   r   {Gz?c                 C   s.   | j j}|ddkr| jj|| d S d S )NZConvr   )r<   rB   findweightdatanormal_)mmeanZstd	classnamer   r   r   init_weights   s   rk   c                 C   s   t | | | d S Nr   )int)r9   rH   r   r   r   get_padding   s   rn   c                       &   e Zd Zd fdd	Zdd Z  ZS )		ResBlock1   r   rq      Tc                    s   t t|   tt|||d|d t||d |dt|||d|d t||d |dt|||d|d t||d |dg| _tt|||ddt|d|dt|||ddt|d|dt|||ddt|d|dg| _d S )Nr   r   rH   rG   r,   r   )	r-   rp   r.   r0   
ModuleListr   rn   convs1convs2r4   channelsr9   rH   r,   r;   r   r   r.      sv   zResBlock1.__init__c                 C   sL   t | j| jD ]\}}t|t}||}t|t}||}|| }q|S r=   )ziprv   rw   rZ   
leaky_reluLRELU_SLOPE)r4   r?   c1c2xtr   r   r   r@      s   
zResBlock1.forward)rq   rr   TrA   r   r   r;   r   rp      s    8rp   c                       ro   )		ResBlock2rq   r   rq   Tc                    sb   t t|   tt|||d|d t||d |dt|||d|d t||d |dg| _d S )Nr   r   rt   )r-   r   r.   r0   ru   r   rn   convsrx   r;   r   r   r.      s*   zResBlock2.__init__c                 C   s,   | j D ]}t|t}||}|| }q|S r=   )r   rZ   r{   r|   )r4   r?   cr   r   r   r   r@     s
   

zResBlock2.forward)rq   r   TrA   r   r   r;   r   r      s    r   c                       rT   )BasisSignalLayerzBasis Signal@   c                    sD   t t|   tj|d|ddd| _t|| j_|| _	d S )Nr   r   Fr+   )
r-   r   r.   r0   Linearr   layer	Parameterre   L)r4   Zbasis_signal_weightr   r;   r   r   r.     s   
zBasisSignalLayer.__init__c                 C   s   |  |}t|| jd }|S rl   )r   r)   r   )r4   re   sourcer   r   r   r@   &  s   
zBasisSignalLayer.forward)r   r[   r   r   r;   r   r     s    	r   c                       s6   e Zd ZdZdddddif fdd	Zd	d
 Z  ZS )CausalConv1dz3CausalConv1d module with customized initialization.r   TZConstantPad1dvaluer   c                    sL   t t|   ttj||d | fi || _tjj|||||d| _dS )zInitialize CausalConv1d module.r   r]   N)	r-   r   r.   r/   r   r0   r2   r   r3   )r4   r5   r6   r9   rH   r,   r2   r:   r;   r   r   r.   2  s   	zCausalConv1d.__init__c                 C   s,   |  | |ddddd|df S )zCalculate forward propagation.
        Args:
            x (Tensor): Input tensor (B, in_channels, T).
        Returns:
            Tensor: Output tensor (B, out_channels, T).
        Nr   )r3   r2   r   r>   r   r   r   r@   E  s   ,zCausalConv1d.forwardr[   r   r   r;   r   r   /  s    r   c                       s,   e Zd ZdZ	d fdd	Zdd Z  ZS )CausalConvTranspose1dz<CausalConvTranspose1d module with customized initialization.Tc                    s0   t t|   tjj|||||d| _|| _dS )z(Initialize CausalConvTranspose1d module.r+   N)r-   r   r.   r   r0   r   deconvr`   )r4   r5   r6   r9   r`   r,   r;   r   r   r.   R  s
   

zCausalConvTranspose1d.__init__c                 C   s$   |  |ddddd| j f S )zCalculate forward propagation.
        Args:
            x (Tensor): Input tensor (B, in_channels, T_in).
        Returns:
            Tensor: Output tensor (B, out_channels, T_out).
        N)r   r`   r>   r   r   r   r@   ^  s   $zCausalConvTranspose1d.forwardrS   r[   r   r   r;   r   r   O  s
    r   c                	       s@   e Zd ZdZdddddddid	i d
f	 fdd	Zdd Z  ZS )ResidualStackz+Residual stack module introduced in MelGAN.rq       r   TrM   rL   rK   ZReflectionPad1dFc
           
         s  t t|   |	sT|d d dksJ dtjttj|d	i |ttj||d d | fi |tjj|||||dttj|d	i |tjj||d|d| _n-tjttj|d	i |t	|||||||dttj|d	i |tjj||d|d| _tjj||d|d| _
dS )
a  Initialize ResidualStack module.
        Args:
            kernel_size (int): Kernel size of dilation convolution layer.
            channels (int): Number of channels of convolution layers.
            dilation (int): Dilation factor.
            bias (bool): Whether to add bias parameter in convolution layers.
            nonlinear_activation (str): Activation function module name.
            nonlinear_activation_params (dict): Hyperparameters for activation function.
            pad (str): Padding function module name before dilated convolution layer.
            pad_params (dict): Hyperparameters for padding function.
            use_causal_conv (bool): Whether to use causal convolution.
        r   r   r   z$Not support even number kernel size.r]   r+   )rH   r,   r2   r:   Nr   )r-   r   r.   r   r0   Z
Sequentialr/   r   stackr   
skip_layer)
r4   r9   ry   rH   r,   r7   r8   r2   r:   Zuse_causal_convr;   r   r   r.   k  sr   zResidualStack.__init__c                 C   s   |  || | S )zCalculate forward propagation.
        Args:
            c (Tensor): Input tensor (B, channels, T).
        Returns:
            Tensor: Output tensor (B, chennels, T).
        )r   r   )r4   r   r   r   r   r@     s   zResidualStack.forwardr[   r   r   r;   r   r   h  s    Ar   c                
       sv   e Zd Zdg dg dddg dg dg dg dgddf	 fd	d
	Zdd Zdd Zdd Zdd Zdd Z  Z	S )HiFiGANGeneratorP   rq         )rs      r   r      1)
      r   r   rr   Tc
                    sr  t t|   t|| _t|| _t||ddd|	d| _|dkr"tnt	}
t
 | _tt||D ]B\}\}}| j|du rTt|d|  |d|d   ||d|d |	dnt|d|  |d|d   |||d |d  |d |	d	 q0t
 | _tt| jD ]$}|d|d   }tt||D ]\}\}}| j|
||||	d
 qqt|dddd|	d| _|   |   d S )Nr   r   rq   rG   r,   r   Fr   r_   r9   r`   rG   r,   rG   Zoutput_paddingr,   r+   )r-   r   r.   lennum_kernelsnum_upsamplesr   conv_prerp   r   r0   ru   ups	enumeraterz   appendr\   r   	resblocksr   	conv_postapply_weight_normreset_parameters)r4   input_channelsresblock_kernel_sizesupsample_ratesupsample_initial_channelresblock_typeupsample_kernel_sizesresblock_dilation_sizestransposedconvr,   resblockiukchjdr;   r   r   r.     s\   





zHiFiGANGenerator.__init__c                 C      dd }|  | dS )z:Remove weight normalization module from all of the layers.c                 S   s<   zt d|  d tjj|  W d S  ty   Y d S w )NzWeight norm is removed from .)loggingdebugr   r0   utilsremove_weight_norm
ValueErrorrh   r   r   r   _remove_weight_norm  s   z@HiFiGANGenerator.remove_weight_norm.<locals>._remove_weight_normNapply)r4   r   r   r   r   r     s   z#HiFiGANGenerator.remove_weight_normc                 C   r   )z9Apply weight normalization module from all of the layers.c                 S   sD   t | tjjst | tjjr tjj|  td|  d d S d S )NzWeight norm is applied to r   )	
isinstancer   r0   r   r   r   Zweight_normr   r   r   r   r   r   _apply_weight_norm  s   z>HiFiGANGenerator.apply_weight_norm.<locals>._apply_weight_normNr   )r4   r   r   r   r   r     s   z"HiFiGANGenerator.apply_weight_normc                 C   r   )zReset parameters.
        This initialization follows official implementation manner.
        https://github.com/descriptinc/melgan-neurips/blob/master/mel2wav/modules.py
        c                 S   sF   t | tjjst | tjjr!| jjdd t	d|  d d S d S )Nr   rc   zReset parameters in r   )
r   r   r0   r   r   re   rf   rg   r   r   r   r   r   r   _reset_parameters  s   z<HiFiGANGenerator.reset_parameters.<locals>._reset_parametersNr   )r4   r   r   r   r   r     s   z!HiFiGANGenerator.reset_parametersc                 C   s   |  |}t| jD ]=}t|t}| j| |}d }t| jD ]!}|d u r3| j|| j |  |}q || j|| j |  |7 }q || j }q
t|}| 	|}|S r=   )
r   r   r   rZ   r{   r|   r   r   r   r   r4   r?   r   xsr   r   r   r   r@     s   


zHiFiGANGenerator.forwardc                 C      t |tjstj|tjdt|  j}|	dd
d}| |}t| jD ]=}t|t}| j| |}d }t| jD ]!}|d u rR| j|| j |  |}q?|| j|| j |  |7 }q?|| j }q)t|}| |}|S N)Zdtyper   r   r   r   ZTensorZtensorfloatr$   next
parametersr#   	transposera   r   r   r   rZ   r{   r|   r   r   r   r   r   r   r   r   	inference'  (   


zHiFiGANGenerator.inference)
rB   rC   rD   r.   r   r   r   r@   r   rE   r   r   r;   r   r     s     8r   c                       sd   e Zd Zdg dddgddddgg dg dg dgd	d
d
d	f fdd	ZdddZdd Z  ZS )ConditionGeneratori   r   rq   r   r      r   rr   TFc                    s  t t|   t|| _t|| _t||ddd|d| _td|dd| _|dkr*t	nt
}tjtg d| _t | _tt||D ]B\}\}}| j|du rht|d	|  |d	|d   ||d|d	 |d
nt|d	|  |d	|d   |||d	 |d	  |d	 |d qDt | _tt| jD ]$}|d	|d   }tt||D ]\}\}}| j|||||d qqt|dddd|d| _|	rtdddd| _nd | _|
rt | _d S d | _d S )Nr   r   rq   r      r   )r   r   r   Fr   r   r   r+   r   r   r   )Zdim_inZ	style_dimZmax_conv_dim)r-   r   r.   r   r   r   r   r   spk_fcrp   r   r   r0   r   Zrandnspk_inforu   r   r   rz   r   r\   r   r   r   r   r   unetFsmnEncoderV2extra_layer)r4   r   r   r   r   r   r   r   r   r   
extra_infor,   r   r   r   r   r   r   r   r;   r   r   r.   @  sh   






zConditionGenerator.__init__N      ?c                 C   sf  | g d}tj|d| j ddd}tj|dd}|| j }tj|dd}|| ||  }|d ur=| |}	||	7 }| || |d }
t	| j
D ]=}t|
t}
| j| |
}
d }t	| jD ]!}|d u rx| j|| j |  |
}qe|| j|| j |  |
7 }qe|| j }
qOt|
}
| |
}
| jd ur| |
d|}
|
d}
|
 g d}
|
S )N)r   r   r   r   r   T)dimZkeepdimr   )Zpermuter   sumra   r   Zsoftmaxr   r   r   r   r   rZ   r{   r|   r   r   r   r   r   rb   )r4   inpsZextra_mcabZscorer   Zspk_inpr   r?   r   r   r   r   r   r   r@     s4   





zConditionGenerator.forwardc                 C   r   r   r   r   r   r   r   r     r   zConditionGenerator.inference)Nr   r   )rB   rC   rD   r.   r@   r   rE   r   r   r;   r   r   >  s    
?#r   c                       s2   e Zd ZdZddgdf fdd	Zdd Z  ZS )FeedForwardNetzA two-feed-forward-layer moduler   r   c                    sd   t    tj|||d |d d d d| _tj|||d |d d d dd| _t|| _d S )Nr   r   r   )r9   rG   F)r9   rG   r,   )r-   r.   r0   r   w_1w_2Dropoutdropout)r4   Zd_inr   Zd_outr9   r   r;   r   r   r.     s   
zFeedForwardNet.__init__c                 C   s@   | dd}t| |}| |}| |}| dd}|S )Nr   r   )r   rZ   Zrelur   r   r   )r4   r?   outputr   r   r   r@     s   

zFeedForwardNet.forwardr[   r   r   r;   r   r     s    r   c                       s(   e Zd Zd fdd	ZdddZ  ZS )	MemoryBlockV2r   c              	      s   t t|   tt|d d }t|d d }|dkr%||7 }||8 }||| _| _tj|||dd|dd| _	t
|| _d S )Nr   r   r   F)groupsr,   )r-   r   r.   rm   roundlprpr0   r   conv_dwr   r   )r4   r   filter_sizeshiftr   Zleft_paddingZright_paddingr;   r   r   r.     s   zMemoryBlockV2.__init__Nc                 C   s   |d ur| |dd}tj|dd| j| jddfddd}| | dd dd}||7 }| 	|}|d urE| |dd}|S )Nr   r   Zconstantr   )rY   r   r   r   )
Zmasked_fillra   rZ   r2   r   r   r   r"   r   r   )r4   inputmaskr?   r   r   r   r   r@     s   
zMemoryBlockV2.forward)r   r=   rA   r   r   r;   r   r     s    r   c                       s8   e Zd Z								d fd	d
	ZdddZ  ZS )r   r   r   0  r      r   r   r   c	           
   	      s  t t|   || _|| _|| _|| _|| _ | _t	 t
s+ fddt| jD | _t | _t | _t||| _| jt||||d td|D ]}	| jt||||d qMt | _t|D ]}	| jt||| j|	 | qetjj||dd| _d S )Nc                    s   g | ]} qS r   r   )r   _r   r   r   r     s    z*FsmnEncoderV2.__init__.<locals>.<listcomp>)r   r   Fr+   )r-   r   r.   r   fsmn_num_layersnum_memory_unitsffn_inner_dimr   r   r   listr   r0   ru   adapterffn_lstr   projr   r   memory_block_lstr   r   fc)
r4   r   r   Z	input_dimr   r   r   Zspk_dimr   r   r;   r   r   r.     sJ   



zFsmnEncoderV2.__init__Nc                 C   s   t || j| j}| |}t| j| jD ]$\}}||}|||}t || j| j}|d|dkr9||7 }q| |}t	j
|dd}|S )Nr   r   r   )rZ   r   Ztrainingr   rz   r   r   r   r   r   ri   )r4   r   r   r?   ZffnZmemory_blockcontextZmemoryr   r   r   r@   /  s   


zFsmnEncoderV2.forward)r   r   r   r   r   r   r   r   r=   rA   r   r   r;   r   r     s    0r   r=   )r   rc   )r   )$r   r   numpyr	   r   Ztorch.nnr0   Ztorch.nn.functionalZ
functionalrZ   r   r   Z	Starganv3r   r|   r   r)   Moduler*   rF   rJ   rU   r\   rk   rn   rp   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s>   


D N 	{#!