o
    )j                     @   s  d dl Z d dlZd dlZd dlmZmZmZ d dlZd dlm	Z	 d dlm
Z
 d dlmZmZ de
de
de
fdd	Zde
de
de
fd
dZG dd deZG dd dej	jZeeef ZG dd dejjZdd Zde
defddZG dd dejjZG dd dej	jZdddede	jfddZG d d! d!ej	jZ 	dVde
d"ed#ed$e!de
f
d%d&Z"G d'd( d(ejjZ#d)d* Z$G d+d, d,ejjZ%	-	.dWde
d/ed0ed1ed2e&f
d3d4Z'de
de
fd5d6Z(G d7d8 d8ej	jZ)G d9d: d:e	jZ*G d;d< d<ejjZ+G d=d> d>ej	jZ,G d?d@ d@ej	jZ-G dAdB dBejjZ.G dCdD dDej	jZ/G dEdF dFej	jZ0de
fdGdHZ1de
fdIdJZ2de
fdKdLZ3de
fdMdNZ4G dOdP dPej	jZ5de
dQede
fdRdSZ6e7dTkr{e 8 9e j: e;dU e<dU dS dS )X    N)OptionalTupleUnion)Tensor)
custom_bwd
custom_fwdxyreturnc                 C   s0   t | |}t | | }|t t |  S N)torchmaxabslog1pexp)r   r	   Z	max_valuediff r   w/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/audio/ans/zipenhancer_layers/scaling.pylogaddexp_onnx   s   r   c                 C   s6   t j rt | |S t j rt| |S t | |S r   )r   jitis_scripting	logaddexpZonnxZis_in_onnx_exportr   )r   r	   r   r   r   r   *   s
   


r   c                   @   sf   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Z	ddd defddZdS )PiecewiseLinearz
    Piecewise linear function, from float to float, specified as nonempty list of (x,y) pairs with
    the x values in order.  x values <[initial x] or >[final x] are map to [initial y], [final y]
    respectively.
    c                 G   s   t |dksJ t |t |dkr"t|d tr"t|d j| _ndd |D | _| jD ]\}}t|ttfs>J t|t|ttfsKJ t|q-tt | jd D ]!}| j|d  d | j| d ksvJ || j| | j|d  fqUd S )N   r   c                 S   s    g | ]\}}t |t |fqS r   )float.0r   r	   r   r   r   
<listcomp>I        z,PiecewiseLinear.__init__.<locals>.<listcomp>)	len
isinstancer   listpairsr   inttyperange)selfargsr   r	   ir   r   r   __init__D   s   "zPiecewiseLinear.__init__c                 C   s   dt | jdd  dS )NzPiecewiseLinear(r   ))strr"   r&   r   r   r   __str__U   s   zPiecewiseLinear.__str__c                 C   s   || j d d kr| j d d S || j d d kr | j d d S | j d \}}tdt| j D ]&}| j | \}}||krP||krP||| ||  ||     S ||}}q/J )Nr   r   r*   )r"   r%   r   )r&   r   Zcur_xZcur_yr(   Znext_xZnext_yr   r   r   __call__Y   s   
zPiecewiseLinear.__call__c                    s   t  fdd| jD  S )Nc                    s   g | ]
\}}||  fqS r   r   r   alphar   r   r   i   s    z+PiecewiseLinear.__mul__.<locals>.<listcomp>)r   r"   )r&   r1   r   r0   r   __mul__h   s   zPiecewiseLinear.__mul__c                    sP   t  ttfrt fdd| jD  S |  \} tdd t|j jD  S )Nc                    s    g | ]}|d  |d   fqS r   r   r   )r   pr   r   r   r   m   r   z+PiecewiseLinear.__add__.<locals>.<listcomp>c                 S   s(   g | ]\}}|d  |d |d  fqS r3   r   r   spZxpr   r   r   r   o   s    )r    r   r#   r   r"   get_common_basiszipr&   r   sr   r5   r   __add__k   s   zPiecewiseLinear.__add__c                 C   sH   t |ttfrtd|f}| j|dd\}}tdd t|j|jD  S )Nr   Tinclude_crossingsc                 S   *   g | ]\}}|d  t |d |d fqS r3   )r   r6   r   r   r   r   v       z'PiecewiseLinear.max.<locals>.<listcomp>r    r   r#   r   r8   r9   r"   r:   r   r   r   r   r   s   zPiecewiseLinear.maxc                 C   sN   t |ts
t |trtd|f}| j|dd\}}tdd t|j|jD  S )Nr   Tr=   c                 S   r?   r3   )minr6   r   r   r   r   }   r@   z'PiecewiseLinear.min.<locals>.<listcomp>rA   r:   r   r   r   rB   y   s   zPiecewiseLinear.minc                 C   s   | j |j kS r   )r"   )r&   otherr   r   r   __eq__   s   zPiecewiseLinear.__eq__Fr4   r>   c                    sr  t  tsJ t ttdd jD dd  jD  }fdd|D } fdd|D }|rg }tt|d D ]M}|| || k}||d  ||d  k}	||	krt|| ||  }
t||d  ||d   }|
|
|  }|| |||d  ||    }|	| q=t|dkrtt|| }fdd|D } fdd|D }tt
|| tt
|| fS )	aH  
        Returns (self_mod, p_mod) which are equivalent piecewise linear
        functions to self and p, but with the same x values.

          p: the other piecewise linear function
          include_crossings: if true, include in the x values positions
              where the functions indicate by this and p cross.
        c                 S   s   g | ]\}}|qS r   r   )r   r   _r   r   r   r          z4PiecewiseLinear.get_common_basis.<locals>.<listcomp>c                       g | ]} |qS r   r   r   r   r-   r   r   r      rF   c                    rG   r   r   rH   r4   r   r   r      rF   r   r   c                    rG   r   r   rH   r-   r   r   r      rF   c                    rG   r   r   rH   rI   r   r   r      rF   )r    r   r$   sortedsetr"   r%   r   r   appendr9   )r&   r4   r>   Zx_valsZy_vals1Zy_vals2Zextra_x_valsr(   Z_compare_results1Z_compare_results2Zdiff_curZ	diff_nextposZextra_x_valr   )r4   r&   r   r8      s2   " 
z PiecewiseLinear.get_common_basisN)F)__name__
__module____qualname____doc__r)   r.   r/   r2   r<   r   rB   rD   boolr8   r   r   r   r   r   =   s     r   c                       sR   e Zd ZdZdddef fddZdefdd	Zd
d Zdd Z	dd Z
  ZS )ScheduledFloataH  
    This object is a torch.nn.Module only because we want it to show up in [top_level module].modules();
    it does not have a working forward() function.  You are supposed to cast it to float, as
    in, float(parent_module.whatever), and use it as something like a dropout prob.

    It is a floating point value whose value changes depending on the batch count of the
    training loop.  It is a piecewise linear function where you specify the (x,y) pairs
    in sorted order on x; x corresponds to the batch index.  For batch-index values before the
    first x or after the last x, we just use the first or last y value.

    Example:
       self.dropout = ScheduledFloat((0.0, 0.2), (4000.0, 0.0), default=0.0)

    `default` is used when self.batch_count is not set or not in training mode or in
     torch.jit scripting mode.
            defaultrV   c                   s*   t    d | _d | _|| _t| | _d S r   )superr)   batch_countnamerV   r   schedule)r&   rV   r'   	__class__r   r   r)      s
   
zScheduledFloat.__init__r
   c                 C   s"   d| j  dt| jjdd  S )Nzbatch_count=z, schedule=r   r*   )rX   r,   rZ   r"   r-   r   r   r   
extra_repr   s    zScheduledFloat.extra_reprc                 C   sn   | j }|d u s| jrtj stj rt| jS | | j }t		 dk r5t
d| j d| j  d|  |S )Ng-C6*?zScheduledFloat: name=z, batch_count=z, ans=)rX   trainingr   r   r   
is_tracingr   rV   rZ   randomlogginginforY   )r&   rX   ansr   r   r   	__float__   s   
zScheduledFloat.__float__c                 C   sD   t |ts
t |trt| j| | jdS t| j|j | j|j dS NrU   )r    r   r#   rS   rZ   rV   r&   r   r   r   r   r<      s
   zScheduledFloat.__add__c                 C   sJ   t |ts
t |trt| j|| jdS t| j|jt| j|jdS re   )r    r   r#   rS   rZ   r   rV   rf   r   r   r   r      s   zScheduledFloat.max)rN   rO   rP   rQ   r   r)   r,   r]   rd   r<   r   __classcell__r   r   r[   r   rS      s    rS   c                   @   s8   e Zd ZdZededefddZedefddZd	S )
SoftmaxFunctionz
    Tries to handle half-precision derivatives in a randomized way that should
    be more accurate for training than the default behavior.
    r   dimc                 C   s<   |j |d}t r|tj}| | |j| _|| _|S )Nri   )	softmaxr   is_autocast_enabledtofloat16save_for_backwarddtypeZx_dtyperi   )ctxr   ri   rc   r   r   r   forward   s   
zSoftmaxFunction.forwardans_gradc                 C   s|   | j \}tjjjdd( |tj}|tj}|| }|||j| jdd  }|d fW  d    S 1 s7w   Y  d S )NFenabledTri   Zkeepdim)	saved_tensorsr   cudaampautocastrm   float32sumri   )rq   rs   rc   x_gradr   r   r   backward  s   $zSoftmaxFunction.backwardN)	rN   rO   rP   rQ   staticmethodr   r#   rr   r~   r   r   r   r   rh      s    rh   c                 C   s@   | j |dd\}}| | |   | j|dd}| | | S )NTrv   )r   Zsub_Zexp_r|   Zdiv_)tensorri   Zmax_valsrE   Zsum_expr   r   r   inplace_softmax  s   

r   ri   c                 C   s0   | j rtj stj rt| |S t| |S r   )requires_gradr   r   r   r_   r   rh   apply)r   ri   r   r   r   rk   (  s   
rk   c                   @   sH   e Zd ZededededededefddZed	edefd
dZdS )BiasNormFunctionr   bias	log_scalechannel_dimstore_output_for_backpropr
   c           
      C   s   |j dksJ |dk r||j  }|| _|| _t|d |j D ]}|d}qtj|| d |dd}|d |  }|| }	| |rF|		 n||	 |	 |	  |	S )Nr   r   r*      Trv         )
ndimr   r   r%   	unsqueezer   meanr   ro   detach)
rq   r   r   r   r   r   rE   _x_bias_squarescalesrc   r   r   r   rr   9  s&   	
zBiasNormFunction.forwardrs   c           	      C   s   | j \}}}}| jr|| }n|}| }d|_d|_d|_t ' tj|| d | jdd}|d |  }|| }|j	|d W d    n1 sLw   Y  |j
|j
 |j
d d fS )NTr   rv   r   Zgradient)rw   r   r   r   r   enable_gradr   r   r   r~   gradflatten)	rq   rs   Zans_or_xr   r   r   r   r   rc   r   r   r   r~   U  s"   

zBiasNormFunction.backwardN)	rN   rO   rP   r   r   r#   rR   rr   r~   r   r   r   r   r   2  s"    r   c                       s\   e Zd ZdZ					ddeded	ed
edededdf fddZdedefddZ	  Z
S )BiasNorma  
    This is intended to be a simpler, and hopefully cheaper, replacement for
    LayerNorm.  The observation this is based on, is that Transformer-type
    networks, especially with pre-norm, sometimes seem to set one of the
    feature dimensions to a large constant value (e.g. 50), which "defeats"
    the LayerNorm because the output magnitude is then not strongly dependent
    on the other (useful) features.  Presumably the weight and bias of the
    LayerNorm are required to allow it to do this.

    Instead, we give the BiasNorm a trainable bias that it can use when
    computing the scale for normalization.  We also give it a (scalar)
    trainable scale on the output.


    Args:
       num_channels: the number of channels, e.g. 512.
       channel_dim: the axis/dimension corresponding to the channel,
         interpreted as an offset from the input's ndim if negative.
         This is NOT the num_channels; it should typically be one of
         {-2, -1, 0, 1, 2, 3}.
      log_scale: the initial log-scale that we multiply the output by; this
         is learnable.
      log_scale_min: FloatLike, minimum allowed value of log_scale
      log_scale_max: FloatLike, maximum allowed value of log_scale
      store_output_for_backprop: only possibly affects memory use; recommend
         to set to True if you think the output of this module is more likely
         than the input of this module to be required to be stored for the
         backprop.
    r*         ?            ?Fnum_channelsr   r   log_scale_minlog_scale_maxr   r
   Nc                    s^   t t|   || _|| _tt|| _	tt
|jddd| _|| _|| _|| _d S )Nr   g-C6?)r   Zstd)rW   r   r)   r   r   nn	Parameterr   r   r   emptyZnormal_r   r   r   r   )r&   r   r   r   r   r   r   r[   r   r   r)     s   	
zBiasNorm.__init__r   c                 C   s   |j | j | jksJ tj stj rM| j}|dk r!||j7 }| j}t	|d |jD ]}|
d}q,tj|| d |dd}|d | j  }|| S t| jt| jt| j| jd}t|| j|| j| jS )	Nr   r   r*   r   Trv   r   )rB   r   r^   )shaper   r   r   r   r   r_   r   r   r%   r   r   r   r   limit_param_valuer   r   r   r^   r   r   r   )r&   r   r   r   rE   r   r   r   r   r   r   rr     s.   
zBiasNorm.forward)r*   r   r   r   F)rN   rO   rP   rQ   r#   r   rR   r)   r   rr   rg   r   r   r[   r   r   j  s.    !r   r   )initial_scaler   c                 O   s   t j|i |}t / |jdd  | 9  < |jdur3tj j|jd|  d|   W d   |S W d   |S 1 s>w   Y  |S )aT  
    Behaves like a constructor of a modified version of nn.Linear
    that gives an easy way to set the default initial parameter scale.

    Args:
        Accepts the standard args and kwargs that nn.Linear accepts
        e.g. in_features, out_features, bias=False.

        initial_scale: you can override this if you want to increase
           or decrease the initial magnitude of the module's output
           (affects the initialization of weight_scale and bias_scale).
           Another option, if you want to do something like this, is
           to re-initialize the parameters.
    N皙皙?)r   Linearr   no_gradweightr   inituniform_)r   r'   kwargsrc   r   r   r   ScaledLinear  s   



r   c                	       s|   e Zd ZdZ		ddedededef fdd	ZddededefddZ	defddZ
dededeeef fddZ  ZS )ChunkCausalDepthwiseConv1da?  
    Behaves like a depthwise 1d convolution, except that it is causal in
    a chunkwise way, as if we had a block-triangular attention mask.
    The chunk size is provided at test time (it should probably be
    kept in sync with the attention mask).

    This has a little more than twice the parameters of a conventional
    depthwise conv1d module: we implement it by having one
    depthwise convolution, of half the width, that is causal (via
    right-padding); and one depthwise convolution that is applied only
    within chunks, that we multiply by a scaling factor which depends
    on the position within the chunk.

    Args:
        Accepts the standard args and kwargs that nn.Linear accepts
        e.g. in_features, out_features, bias=False.

        initial_scale: you can override this if you want to increase
           or decrease the initial magnitude of the module's output
           (affects the initialization of weight_scale and bias_scale).
           Another option, if you want to do something like this, is
           to re-initialize the parameters.
    r   Tchannelskernel_sizer   r   c                    s  t    |d dksJ |d d }tj||||ddd| _tj|||||d |d| _ttd||| _	|| _
t : | jjd d   |9  < | jjd d   |9  < |rqtjj| jjd| d|  W d    d S W d    d S 1 s|w   Y  d S )Nr   r   r   T)in_channelsout_channelsgroupsr   paddingr   r   r   )rW   r)   r   ZConv1dcausal_convchunkwise_convr   r   zeroschunkwise_conv_scaler   r   r   r   r   r   )r&   r   r   r   r   Zhalf_kernel_sizer[   r   r   r)     sD   
	
"z#ChunkCausalDepthwiseConv1d.__init__r*   r   
chunk_sizer
   c                 C   s   |j \}}}| jd }|dk s||kr|}| | }tjj|||f}| |dd|| f }|j |||fks;J |d|df }	|	j d | }
|	|||
|}	|	dddd||
 ||}	| 	|	}	| 
|}|	| }	|	||
||dddd}	|	|||
| dd|f }	|	| S )zForward function.

        Args:
               x: a Tensor of shape (batch_size, channels, seq_len)
        chunk_size: the chunk size, in frames; does not have to divide seq_len exactly.
        r   r   .Nr      )r   r   r   r   
functionalpadr   ZreshapeZpermuter   _get_chunk_scale)r&   r   r   
batch_sizer   seq_lenleft_padZ	right_padx_causalx_chunkZ
num_chunkschunk_scaler   r   r   rr     s<   






z"ChunkCausalDepthwiseConv1d.forwardc                 C   s   | j d }| j d }|| jk r%|ddd|f }|dd| df }n'|| j }|jd }tj|||j|jd}tj||fdd}tj||fdd}d||  S )zxReturns tensor of shape (num_channels, chunk_size) that will be used to
        scale the output of self.chunkwise_conv.r   r   N)devicerp   r*   rj   r   )r   r   r   r   r   r   rp   cat)r&   r   Z	left_edgeZ
right_edgetr   r   r   r   r   r   A  s   




z+ChunkCausalDepthwiseConv1d._get_chunk_scalecachec           
      C   s   |j \}}}| jd }|j d |ksJ |j d |ftj||gdd}|d| df }| |}|j |||fks<J |d|df }| |}| j|d}	||	 }|| |fS )zStreaming Forward function.

        Args:
            x: a Tensor of shape (batch_size, channels, seq_len)
            cache: cached left context of shape (batch_size, channels, left_pad)
        r   r*   rj   .N)r   )r   r   r   r   r   r   r   )
r&   r   r   r   r   r   r   r   r   r   r   r   r   streaming_forwardR  s   
 

z,ChunkCausalDepthwiseConv1d.streaming_forward)r   T)r*   )rN   rO   rP   rQ   r#   r   rR   r)   r   rr   r   r   r   rg   r   r   r[   r   r     s,    -*
r   limitpenaltyrY   c                 C   s@   |   }|  | dk}||| tj|   }t| ||} | S )aH  
    Returns x unmodified, but in backprop will put a penalty for the excess of
    the absolute values of elements of x over the limit "limit".  E.g. if
    limit == 10.0, then if x has any values over 10 it will get a penalty.

    Caution: the value of this penalty will be affected by grad scaling used
    in automatic mixed precision training.  For this reasons we use this,
    it shouldn't really matter, or may even be helpful; we just use this
    to disallow really implausible values of scores to be given to softmax.

    The name is for randomly printed debug info.
    r   )signr   rm   r   Zint8	with_loss)r   r   r   rY   Zx_signZ
over_limitZaux_lossr   r   r   penalize_abs_values_gtv  s
   r   c                   @   s8   e Zd ZedededefddZedefddZd	S )
WithLossr   r	   rY   c                 C   sD   |j | _t dk r |d ur |  }td| d|d |S )NgMb`?zWithLoss: name=z, loss-sum=z.3e)r   y_shaper`   r|   itemra   rb   )rq   r   r	   rY   Zloss_sumr   r   r   rr     s
   zWithLoss.forwardrs   c                 C   s   |t j| j|j|jdd fS )Nrp   r   )r   Zonesr   rp   r   )rq   rs   r   r   r   r~     s   zWithLoss.backwardN)rN   rO   rP   r   r   r,   rr   r~   r   r   r   r   r     
    r   c                 C   s   t | ||S r   )r   r   )r   r	   rY   r   r   r   r     s   r   c                   @   s8   e Zd ZedededefddZedefddZd	S )
LimitParamValuer   rB   r   c                 C   s&   |  | ||ksJ || _|| _|S r   )ro   rB   r   )rq   r   rB   r   r   r   r   rr     s
   
zLimitParamValue.forwardr}   c                 C   sZ   | j \}|tt|dk|| jk dd }|tt|dk || jkdd9 }|d d fS )Nr   g      r   )rw   r   wherelogical_andrB   r   )rq   r}   r   r   r   r   r~     s   
zLimitParamValue.backwardN)rN   rO   rP   r   r   r   rr   r~   r   r   r   r   r     r   r   333333?TrB   r   probr^   c                 C   s"   |rt   |k rt| ||S | S r   )r`   r   r   )r   rB   r   r   r^   r   r   r   r     s   
r   c                 C   s*   t j s
t j r| S | jdddd S )Nr   r*   rj   r   )r   r   r   r_   chunkr5   r   r   r   _no_op  s   r   c                       s$   e Zd Z fddZdd Z  ZS )Identityc                    s   t t|   d S r   )rW   r   r)   r-   r[   r   r   r)     s   zIdentity.__init__c                 C   s   t |S r   )r   rf   r   r   r   rr     s   zIdentity.forward)rN   rO   rP   r)   rr   rg   r   r   r[   r   r     s    r   c                       s4   e Zd Zdef fddZdedefddZ  ZS )Dropout2r4   c                    s   t    || _d S r   )rW   r)   r4   )r&   r4   r[   r   r   r)     s   

zDropout2.__init__r   r
   c                 C   s   t jjj|t| j| jdS )N)r4   r^   )r   r   r   Zdropoutr   r4   r^   rf   r   r   r   rr     s   zDropout2.forward)rN   rO   rP   	FloatLiker)   r   rr   rg   r   r   r[   r   r     s    r   c                   @   <   e Zd ZdZededefddZededefddZd	S )
SwooshLFunctionz;
    swoosh_l(x) =  log(1 + exp(x-4)) - 0.08*x - 0.035
    r   r
   c              	   C   s  |j }|jtjkr|tj}tjd|j|jd}d}tjj	j
dd t  | }d|_ t||d ||  d }|sP|W  d    W  d    S |jt|d	 |j}|}d
| d }|| d||   }	|	t| }
tdkr|
 dksJ |
 dk sJ |
tj}| | |jtjkst r|tj}|W  d    W  d    S 1 sw   Y  W d    d S 1 sw   Y  d S )NrT   r   {GzFrt   T      @Q?r   r   {Gzt?     o@__main__      p@r   rp   r   rn   rm   r{   r   r   rx   ry   rz   r   r   r   r~   Z	ones_liker   Z	rand_likerN   rB   r   Zuint8ro   rl   )rq   r   r   zerocoeffr	   r   floorceil_diffd_scaledd_intr   r   r   rr     s@   
	
"zSwooshLFunction.forwardy_gradc                 C   s8   | j \}d}|}d| d }||| d  | }|| S )Nr   r   r   r   rw   )rq   r   dr   r   r   r   r   r   r~      s   zSwooshLFunction.backwardNrN   rO   rP   rQ   r   r   rr   r~   r   r   r   r   r     s    $r   c                   @      e Zd ZdedefddZdS )SwooshLr   r
   c                 C   L   t j s
t j r!t jd|j|jd}t||d d|  d S t	|S zReturn Swoosh-L activation.rT   r   r   {Gz?r   )
r   r   r   r_   r   rp   r   r   r   r   r&   r   r   r   r   r   rr   .     
zSwooshL.forwardNrN   rO   rP   r   rr   r   r   r   r   r   ,      r   c                   @   r   )SwooshLOnnxr   r
   c                 C   .   t jd|j|jd}t||d d|  d S r   r   r   rp   r   r   r   r   r   r   rr   <     zSwooshLOnnx.forwardNr   r   r   r   r   r   :  r   r   c                   @   r   )
SwooshRFunctionzo
     swoosh_r(x) =  log(1 + exp(x-1)) - 0.08*x - 0.313261687

    derivatives are between -0.08 and 0.92.
    r   r
   c              	   C   s  |j }|jtjkr|tj}tjd|j|jd}tjj	j
dd t  | }d|_ t||d d|  d }|sN|W  d    W  d    S |jt|d	 |j}d
}d}|| d||   }|t| }	tdkr|	 dks{J |	 dk sJ |	tj}
| |
 |jtjkst r|tj}|W  d    W  d    S 1 sw   Y  W d    d S 1 sw   Y  d S )NrT   r   Frt   Tr   r   tN0z?r   r   皙?r   r   r   r   )rq   r   r   r   r	   r   r   r   r   r   r   r   r   r   rr   I  s>   

"zSwooshRFunction.forwardr   c                 C   s,   | j \}d}d}||| d  | }|| S )Nr   r   r   r   )rq   r   r   r   r   r   r   r   r~   m  s
   zSwooshRFunction.backwardNr   r   r   r   r   r   B  s    #r   c                   @   r   )SwooshRr   r
   c                 C   r   zReturn Swoosh-R activation.rT   r   r   r   r   )
r   r   r   r_   r   rp   r   r   r   r   r   r   r   r   rr   y  r   zSwooshR.forwardNr   r   r   r   r   r   w  r   r   c                   @   r   )SwooshROnnxr   r
   c                 C   r   r   r   r   r   r   r   rr     r   zSwooshROnnx.forwardNr   r   r   r   r   r    r   r  c                 C   sF   | d }d|    | j}t|tdk||}|d|   d S )Nr   r   infr   r   r   logrm   rp   r   r   r   r   x_offsetlog_sumr   r   r   SwooshLForward     r  c                 C   s^   | d }d|    | j}t|tdk||}ddd|     }|d|   d |fS )zo
    https://k2-fsa.github.io/k2/python_api/api.html#swoosh-l-forward-and-deriv
    :param x:
    :return:
    r   r   r  q=
ףp?r   r   r   r  r   r  r  Zderivr   r   r   SwooshLForwardAndDeriv  
   r  c                 C   sF   | d }d|    | j}t|tdk||}|d|   d S )Nr   r  r   r   r  r  r   r   r   SwooshRForward  r	  r  c                 C   s^   | d }d|    | j}t|tdk||}ddd|     }|d|   d |fS )zo
    https://k2-fsa.github.io/k2/python_api/api.html#swoosh-r-forward-and-deriv
    :param x:
    :return:
    r   r  r
  r   r   r   r  r  r   r   r   SwooshRForwardAndDeriv  r  r  c                       s\   e Zd ZdZ					ddeded	ed
ededee de	f fddZ
defddZ  ZS )ActivationDropoutAndLineara2  
     This merges an activation function followed by dropout and then a nn.Linear module;
     it does so in a memory efficient way so that it only stores the input to the whole
     module.  If activation == SwooshL and dropout_shared_dim != None, this will be
     equivalent to:
       nn.Sequential(SwooshL(),
                     Dropout3(dropout_p, shared_dim=dropout_shared_dim),
                     ScaledLinear(in_channels, out_channels, bias=bias,
                                  initial_scale=initial_scale))
    If dropout_shared_dim is None, the dropout would be equivalent to
    Dropout2(dropout_p).  Note: Dropout3 will be more memory efficient as the dropout
    mask is smaller.

     Args:
        in_channels: number of input channels, e.g. 256
        out_channels: number of output channels, e.g. 256
        bias: if true, have a bias
        activation: the activation function, for now just support SwooshL.
        dropout_p: the dropout probability or schedule (happens after nonlinearity).
        dropout_shared_dim: the dimension, if any, across which the dropout mask is
             shared (e.g. the time dimension).  If None, this may be less memory
             efficient if there are modules before this one that cache the input
             for their backprop (e.g. Balancer or Whiten).
    Tr   rT   r*   r   r   r   r   
activation	dropout_pdropout_shared_dimr   c           	         sF   t    t||||d}|j| _| d|j || _|| _|| _d S )N)r   r   r   )	rW   r)   r   r   Zregister_parameterr   r  r  r  )	r&   r   r   r   r  r  r  r   Zlinear_moduler[   r   r   r)     s   


z#ActivationDropoutAndLinear.__init__r   c                 C   sf   t j st j s| js1| jdkrt|}n| jdkr!t|}nJ | jt jj	
|| j| jS d S )Nr   r   )r   r   r   r_   r^   r  r  r  r   r   Zlinearr   r   rf   r   r   r   rr     s   




z"ActivationDropoutAndLinear.forward)Tr   rT   r*   r   )rN   rO   rP   rQ   r#   rR   r,   r   r   r   r)   r   rr   rg   r   r   r[   r   r    s.    r  r   c                 C   sn   || j d kr| ddd|ddddf S t| j }||d  |d< tj|| j| jd}tj| |fddS )zb

    :param x: (b, c, t, f)
    :param num_channels:
    :return: x: (b, num_channels, t, f)
    r   Nr   rj   )r   r!   r   r   rp   r   r   )r   r   r   r   r   r   r   convert_num_channels  s    
r  r   r   r   )r   T)=ra   mathr`   typingr   r   r   r   Ztorch.nnr   r   Ztorch.cuda.ampr   r   r   r   objectr   ModulerS   r   r   ZautogradFunctionrh   r   r#   rk   r   r   r   r   r   r,   r   r   r   r   rR   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  rN   	getLoggersetLevelINFOZset_num_threadsZset_num_interop_threadsr   r   r   r   <module>   s   s<
8O )
!
	
65
O

