o
    "j                     @   s,   d dl mZ d dlmZ G dd deZdS )    )
functional)Layerc                       s.   e Zd ZdZ		d fdd	Zdd Z  ZS )
FusedEcMoea  A FusedEcMoe Layer.

    Parameters:
        hidden_size (int): The dim size of input units.
        inter_size (int): The dim size of feed forward network.
        num_expert (int): The number of experts.
        act_type (string): The activation type. Currently only support `gelu`, `relu`.
        weight_attr (ParamAttr, optional): The attribute for the learnable
            weight of this layer. The default value is None and the weight will be
            initialized to zero. For detailed information, please refer to
            paddle.ParamAttr.
        bias_attr (ParamAttr|bool, optional): The attribute for the learnable bias
            of this layer. If it is set to False, no bias will be added to the output.
            If it is set to None or one kind of ParamAttr, a bias parameter will
            be created according to ParamAttr. For detailed information, please refer
            to paddle.ParamAttr. The default value is None and the bias will be
            initialized to zero.

    Attribute:
        **weight** (Parameter): the learnable weight of this layer.
        **bias** (Parameter): the learnable bias of this layer.

    Shape:
        - input: Multi-dimentional tensor with shape :math:`[batch\_size, seq\_len, d\_model]` .
        - output: Multi-dimentional tensor with shape :math:`[batch\_size, seq\_len, d\_model]` .

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:GPU)
            >>> import paddle
            >>> paddle.device.set_device('gpu')
            >>> from paddle.incubate.nn.layer.fused_ec_moe import FusedEcMoe

            >>> x = paddle.randn([10, 128, 1024]) # [bsz, seq_len, d_model]
            >>> gate = paddle.randn([10, 128, 8]) # [bsz, seq_len, num_experts]
            >>> moe = FusedEcMoe(1024, 4096, 8, act_type="gelu")
            >>> y = moe(x, gate)
            >>> print(y.shape)
            [10, 128, 1024]
    Nc                    s   t    |||g}|d|g}|||g}	|d|g}
| j }| j|||dd| _| j|||dd| _| j|	||dd| _| j|
||dd| _|| _	| j	dvrRt
dd S )N   F)shapeattrdtypeZis_biasT)ZgeluZreluz'Currently only support `gelu`, `relu`. )super__init__Z_helperZget_default_dtypeZcreate_parameterbmm_weight0	bmm_bias0bmm_weight1	bmm_bias1act_typeNotImplementedError)selfZhidden_sizeZ
inter_sizeZnum_expertsr   Zweight_attrZ	bias_attrZweight0_shapeZbias0_shapeZweight1_shapeZbias1_shaper   	__class__ f/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/incubate/nn/layer/fused_ec_moe.pyr
   >   s,   
	





zFusedEcMoe.__init__c              	   C   s    t ||| j| j| j| j| jS )N)FZfused_ec_moer   r   r   r   r   )r   xZgater   r   r   forward^   s   zFusedEcMoe.forward)NN)__name__
__module____qualname____doc__r
   r   __classcell__r   r   r   r   r      s    0 r   N)Zpaddle.incubate.nnr   r   Z	paddle.nnr   r   r   r   r   r   <module>   s   