o
    )j.                     @   sd   d Z ddlZddlmZ G dd dejZG dd dejZG dd dejZG d	d
 d
ejZdS )zM This implementation is adapted from https://github.com/wenet-e2e/wespeaker.
    Nc                       (   e Zd ZdZ fddZdd Z  ZS )TAPzG
    Temporal average pooling, only first-order mean is considered
    c                       t t|   d S N)superr   __init__selfkwargs	__class__ j/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/audio/sv/pooling_layers.pyr         zTAP.__init__c                 C   s   |j dd}|jdd}|S )Ndim   Z	start_dim)meanflatten)r	   xpooling_meanr   r   r   forward   s   zTAP.forward__name__
__module____qualname____doc__r   r   __classcell__r   r   r   r   r          r   c                       r   )TSDPzR
    Temporal standard deviation pooling, only second-order std is considered
    c                    r   r   )r   r!   r   r   r   r   r   r      r   zTSDP.__init__c                 C   s(   t t j|ddd }|jdd}|S Nr   r   g:0yE>r   r   )torchsqrtvarr   )r	   r   pooling_stdr   r   r   r      s   zTSDP.forwardr   r   r   r   r   r!      r    r!   c                       r   )TSTPz
    Temporal statistics pooling, concatenate mean and std, which is used in
    x-vector
    Comment: simple concatenation can not make full use of both statistics
    c                    r   r   )r   r'   r   r   r   r   r   r   -   r   zTSTP.__init__c                 C   sP   |j dd}ttj|ddd }|jdd}|jdd}t||fd}|S r"   )r   r#   r$   r%   r   cat)r	   r   r   r&   statsr   r   r   r   0   s   zTSTP.forwardr   r   r   r   r   r'   &   s    r'   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	ASTPzx Attentive statistics pooling: Channel- and context-dependent
        statistics pooling, first used in ECAPA_TDNN.
       Fc                    sX   t t|   || _|rtj|d |dd| _n	tj||dd| _tj||dd| _d S )N   r   )Zkernel_size)r   r*   r   global_context_attnnZConv1dlinear1linear2)r	   Zin_dimZbottleneck_dimr-   r   r   r   r   @   s   
zASTP.__init__c           	      C   s  t |jdkr||jd |jd |jd  |jd }t |jdks%J | jrNtj|ddd|}ttj|dddd	 |}tj	|||fdd
}n|}t
| |}tj| |dd
}tj|| dd
}tj||d  dd
|d  }t|jd	d}tj	||gdd
S )z
        x: a 3-dimensional tensor in tdnn-based architecture (B,F,T)
            or a 4-dimensional tensor in resnet architecture (B,C,F,T)
            0-dim: batch-dimension, last-dim: time-dimension (frame-dimension)
           r   r      r,   r   T)r   Zkeepdimg|=r   )min)lenshapeZreshaper-   r#   r   Z	expand_asr$   r%   r(   tanhr/   Zsoftmaxr0   sumclamp)	r	   r   Zcontext_meanZcontext_stdZx_inalphar   r%   Zstdr   r   r   r   R   s(   *zASTP.forward)r+   Fr   r   r   r   r   r*   ;   s    r*   )	r   r#   Ztorch.nnr.   Moduler   r!   r'   r*   r   r   r   r   <module>   s   