o
    0j6                     @   s  d dl Z d dlmZ d dlZd dlm  mZ d dlmZmZ G dd dej	Z
G dd dej	ZG dd	 d	ej	ZG d
d dej	ZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZG dd deZi dededddfdede
de
dedd ifd!ed"ed#ed$ejd%ejd&ejd'ejd(ejd)ed*ed+ejZeeZd,d- ZedZedZedZedZed#Z ed)Z!ed"Z"ed!Z#dS ).    N)OrderedDict)Tensornnc                   @   "   e Zd ZdZdedefddZdS )NewGELUActivationz
    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
    the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    inputreturnc                 C   s6   d| dt tdtj |dt |d     S )N      ?      ?       @Hm?g      @)paddletanhmathsqrtpipowselfr    r   y/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/models/common/transformers/activations.pyforward   s   zNewGELUActivation.forwardN__name__
__module____qualname____doc__r   r   r   r   r   r   r          r   c                       sL   e Zd ZdZddef fddZdedefdd	Zdedefd
dZ  Z	S )GELUActivationa  
    Original Implementation of the GELU activation function in Google BERT repo when initially created. For
    information: OpenAI GPT's GELU is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) This is now written in C in nn.functional
    Also see the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    Fuse_gelu_pythonc                    s(   t    |r| j| _d S tjj| _d S N)super__init___gelu_pythonactr   
functionalgelu)r   r   	__class__r   r   r"   3   s   
zGELUActivation.__init__r   r   c                 C   s    |d dt |td   S )Nr	   r
   r   )r   erfr   r   r   r   r   r   r#   :   s    zGELUActivation._gelu_pythonc                 C   s
   |  |S r    )r$   r   r   r   r   r   =      
zGELUActivation.forward)F)
r   r   r   r   boolr"   r   r#   r   __classcell__r   r   r'   r   r   +   s
    r   c                   @   r   )FastGELUActivationz}
    Applies GELU approximation that is slower than QuickGELU but more accurate. See: https://github.com/hendrycks/GELUs
    r   r   c                 C   s*   d| dt |d dd| |     S )Nr	   r
   g3E?r   )r   r   r   r   r   r   r   F   s   zFastGELUActivation.forwardNr   r   r   r   r   r-   A       r-   c                   @   r   )QuickGELUActivationzr
    Applies GELU approximation that is fast but somewhat inaccurate. See: https://github.com/hendrycks/GELUs
    r   r   c                 C   s   |t d|  S )NgZd;?)Fsigmoidr   r   r   r   r   V   s   zQuickGELUActivation.forwardNr   r   r   r   r   r/   Q   r.   r/   c                       s<   e Zd ZdZdedef fddZdedefdd	Z  ZS )
ClippedGELUActivationa  
    Clip the range of possible GeLU outputs between [min, max]. This is especially useful for quantization purpose, as
    it allows mapping negatives values in the GeLU spectrum. For more information on this trick, please refer to
    https://arxiv.org/abs/2004.09602.

    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
    initially created.

    For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))). See https://arxiv.org/abs/1606.08415
    minmaxc                    s8   ||krt d| d| dt   || _|| _d S )Nzmin should be < max (got min: z, max: ))
ValueErrorr!   r"   r3   r4   )r   r3   r4   r'   r   r   r"   g   s
   

zClippedGELUActivation.__init__xr   c                 C   s   t t|| j| jS r    )r   Zclipr&   r3   r4   )r   r7   r   r   r   r   o   s   zClippedGELUActivation.forward)	r   r   r   r   floatr"   r   r   r,   r   r   r'   r   r2   Z   s    r2   c                   @   r   )SiLUActivationa  
    See Gaussian Error Linear Units (Hendrycks et al., https://arxiv.org/abs/1606.08415) where the SiLU (Sigmoid Linear
    Unit) was originally introduced and coined, and see Sigmoid-Weighted Linear Units for Neural Network Function
    Approximation in Reinforcement Learning (Elfwing et al., https://arxiv.org/abs/1702.03118) and Swish: a Self-Gated
    Activation Function (Ramachandran et al., https://arxiv.org/abs/1710.05941v1) where the SiLU was experimented with
    later.
    r   r   c                 C   
   t |S r    )r0   silur   r   r   r   r   |   r*   zSiLUActivation.forwardNr   r   r   r   r   r9   s   s    r9   c                   @   r   )MishActivationz
    See Mish: A Self-Regularized Non-Monotonic Activation Function (Misra., https://arxiv.org/abs/1908.08681). Also
    visit the official repository for the paper: https://github.com/digantamisra98/Mish
    r   r   c                 C   r:   r    )r0   mishr   r   r   r   r      r*   zMishActivation.forwardNr   r   r   r   r   r<      r   r<   c                   @   r   )LinearActivationz[
    Applies the linear activation function, i.e. forwarding input directly to output.
    r   r   c                 C   s   |S r    r   r   r   r   r   r      s   zLinearActivation.forwardNr   r   r   r   r   r>      r.   r>   c                       s   e Zd Z fddZ  ZS )ClassInstantierc                    s4   t  |}t|tr|n|i f\}}|di |S )Nr   )r!   __getitem__
isinstancetuple)r   keycontentclskwargsr'   r   r   r@      s   zClassInstantier.__getitem__)r   r   r   r@   r,   r   r   r'   r   r?      s    r?   r&   Zgelu_10i
   )r3   r4   	gelu_fastgelu_newZgelu_pytorch_tanhgelu_pythonr   TZlinearr=   
quick_geluZ	hardswishZpreluZreluZrelu6r1   r;   Zswishr   c                 C   s,   | t v rt |  S td|  dtt   )Nz	function z not found in ACT2FN mapping )ACT2FNKeyErrorlistkeys)Zactivation_stringr   r   r   get_activation   s
   rP   )$r   collectionsr   r   Zpaddle.nn.functionalr   r%   r0   r   ZLayerr   r   r-   r/   r2   r9   r<   r>   r?   Z	HardswishZPReLUZReLUZReLU6ZSigmoidZTanhZACT2CLSrL   rP   rJ   rI   r&   rH   rK   r;   r=   Z
linear_actr   r   r   r   <module>   sv   	
		
	