o
    #j                     @   s|  d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ ddlmZ g dZe	ee jddZG dd deZG dd deZG dd deZG dd deZ e Z!G dd deZ"G dd deZ#G dd deZ$G dd deZ%G d d! d!eZ&G d"d# d#eZ'G d$d% d%eZ(G d&d' d'eZ)d(d) Z*dS )*    N)_legacy_C_opsin_dynamic_mode)check_variable_and_dtype)_create_tensor)
get_logger)	ParamAttrcore)
functional)Constant)FakeQuantActLSQPlusFakeQuantWeightLSQPlus)unique_name   )Layer)FakeQuantAbsMaxFakeQuantMovingAverageAbsMaxFakeQuantChannelWiseAbsMaxQuantizedConv2DQuantizedConv2DTransposeQuantizedLinearMovingAverageAbsMaxScaleMAOutputScaleLayerFakeQuantMAOutputScaleLayer	QuantStubQuantizedRowParallelLinearQuantizedColumnParallelLinearQuantizedMatmulz&%(asctime)s-%(levelname)s: %(message)s)fmtc                       4   e Zd ZdZ					d
 fdd	Zdd	 Z  ZS )r   a  
    FakeQuantAbsMax layer does the abs_max quant and then dequant.
    Its computational formula is described as below:

    :math:`scale = max(abs(X))`
    :math:`range = 2^{bit\_length - 1} - 1`
    :math:`Out = round(X / scale * range) * scale / range`
    N   float32Fc                    s~   t    || _|| _|| _|r| dnd}t|| _|r:t| jt	ddd}| j
dg|| jd| _d| j_d S d | _d S )	N.scalequant_dequant.scaleMbP?FnameZinitializerZ	trainable   shapeattrdtypeT)super__init___quant_bits_name_reduce_typer   generate_scale_namer   r
   create_parameter_dtype_scalestop_gradient)selfr%   
quant_bitsr*   quant_on_weightreduce_typescale_prefix
scale_attr	__class__ ]/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/nn/quant/quant_layers.pyr,   =   s"   


zFakeQuantAbsMax.__init__c           	      C   s6  t  rNd| jf}t|j|j d|j|jdd}| j}| jdkr+t	j
j|t	j
jjd |s?ttjjj| jdg| jdd}d|_tj|||g|R  \}}|S t|d	d
gd d| ji}d|gi}| jj|j d|jtjjjddd}| j}|s| jj| j| jtjjjddd}|g|gd}| jjd|||d |S )N
bit_length.quantized.dequantizedFtyper%   r(   r*   persistablemaxopr&   Tinputr    r   Xr%   r*   rC   rD   r5   ZOutZOutScale fake_quantize_dequantize_abs_maxrC   inputsoutputsattrs)r   r-   r   rC   r%   r(   r*   r4   r/   paddledistributed
all_reduceReduceOpMAXr   VarDescVarType
LOD_TENSORr1   r3   r5   r   rL   r   _helpercreate_variable	append_op	r6   rH   rP   	quant_outZ	out_scaleout_rN   rO   r>   r>   r?   forwardX   sv   






zFakeQuantAbsMax.forward)Nr   r    FN__name__
__module____qualname____doc__r,   r`   __classcell__r>   r>   r<   r?   r   3   s    r   c                       r   )r   aM  
    FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant.
    Its computational formula is described as below:

    :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
    :math:`range = 2^{bit\_length - 1} - 1`
    :math:`Out = round(X / scale * range) * scale / range`
    N?r   r    c                    s   t    || _|| _|| _|r| dnd}tt|tddd}| j	dg||d| _
d| j
_|r8| d	nd
}tt|tddd}	| j	dg|	|d| _d| j_|r[| dnd}
tt|
tddd}| j	dg||d| _d| j_d S )Nr!   r"   r#   Fr$   r&   r'   T.statezquant_dequant.state.accumzquant_dequant.accum)r+   r,   _moving_rater-   r/   r   r   r0   r
   r2   r4   r5   _state_accum)r6   r%   moving_rater7   r*   r9   r:   r;   state_prefix
state_attraccum_prefix
accum_attrr<   r>   r?   r,      sD   
z%FakeQuantMovingAverageAbsMax.__init__c           
   	   C   sh  t  rXd| jd| jd| j f}t|j|j d|j|jdd}| j	dkr0t
jj| jt
jjjd | jr6| jnd }| jr>| jnd }tj|| j|||| j||g|R  \}}}}|S t|d	d
gd | j| j| j d}|g| jgd}| jj|j d|jtjjjddd}|g| jgd}	| jr| jg|d< | jg|d< | jg|	d< | jg|	d< | jjd||	|d |S )Nrm   r@   is_testrA   FrB   rE   rF   rH   r    r   )rm   r@   rr   )rI   ZInScalerJ   rK   InStateInAccumOutStateOutAccum/fake_quantize_dequantize_moving_average_abs_maxrM   )r   rj   r-   trainingr   rC   r%   r(   r*   r/   rQ   rR   rS   r4   rT   rU   rk   rl   r   rw   r   rY   rZ   r   rV   rW   rX   r[   
r6   rH   rP   r]   stateaccumr^   r_   rN   rO   r>   r>   r?   r`      s   

	

z$FakeQuantMovingAverageAbsMax.forward)Nrg   r   r    Nra   r>   r>   r<   r?   r      s    -r   c                       s4   e Zd Z							d
 fdd	Zdd	 Z  ZS )r   Nr   r   r    Fc           
         s   |sJ dt    || _|| _|| _|| _|| _|| _|r$| dnd}t	|| _
|rJt| j
tddd}	| j| jg|	| jd| _d| j_d S d | _d S )	Nz5Channel_wise only can be used on weight quantization.r!   r"   g        Fr$   r'   T)r+   r,   r-   _quant_axisr3   r.   _channel_numr/   r   r0   r1   r   r
   r2   r4   r5   )
r6   r%   channel_numr7   
quant_axisr*   r8   r9   r:   r;   r<   r>   r?   r,     s0   

z#FakeQuantChannelWiseAbsMax.__init__c           	      C   sF  t  rTd| jd| jf}t|j|j d|j|jdd}| j}| j	dkr.t
jj|t
jjjd |d u rEttjjj| j| jg| jdd}d|_tj|||g|R  \}}|S t|d	d
gd | j| jd}d|gi}| jj|j d|jtjjjddd}| j}|s| jj| j| jtjjjddd}|g|gd}| jjd|||d |S )Nr@   r   rA   FrB   rE   rF   TrH   r    r   )r@   r   rI   rJ   rK   -fake_channel_wise_quantize_dequantize_abs_maxrM   )r   r-   r|   r   rC   r%   r(   r*   r4   r/   rQ   rR   rS   rT   rU   r   rV   rW   rX   r1   r}   r3   r5   r   r   r   rY   rZ   r[   r\   r>   r>   r?   r`   A  s   





z"FakeQuantChannelWiseAbsMax.forward)NNr   r   r    FNrb   rc   rd   r,   r`   rf   r>   r>   r<   r?   r     s    #r   c                       s(   e Zd Z	d fdd	Zdd Z  ZS )	r   Nrg   r    c                    s   t    || _|| _|r| dnd}t|}t|tddd}| jdg||d| _	d| j	_
|r7| d	nd
}tt|tddd}	| jdg|	|d| _d| j_
|rZ| dnd}
tt|
tddd}| jdg||d| _d| j_
dS )a  
        MovingAverageMaxScale layer is used to calculating the output quantization
        scale of Layer. Its computational formula is described as below:

        :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
        :math:`Out = X`
        r!   zoutscale.scaler   Fr$   r&   r'   Trh   zoutscale.stateri   zoutscale.accumN)r+   r,   rj   r/   r   r0   r   r
   r2   r4   r5   rk   rl   )r6   r%   rm   r*   r9   r:   Z
scale_namer;   rn   ro   rp   rq   r<   r>   r?   r,     s@   



z!MovingAverageAbsMaxScale.__init__c           
      C   sV  t  rSd| jd| j f}t|j|j d|j|jdd}| jdkr-t	j
j| jt	j
jjd | jr3| jnd }| jr;| jnd }tj||||| j||g|R  \}}}}|S t|dd	d
gd | j| j d}d|gi}| jj|j d|jtjjjddd}|g| jgd}	| jr| jg|d< | jg|d< | jg|	d< | jg|	d< | jjd||	|d |S )Nrm   rr   z.tmpFrB   rE   rF   rH   r    Zfloat64r   )rm   rr   rI   rJ   rK   rs   rt   ru   rv   moving_average_abs_max_scalerM   )r   rj   rx   r   rC   r%   r(   r*   r/   rQ   rR   rS   r4   rT   rU   rk   rl   r   r   r   rY   rZ   r   rV   rW   rX   r[   ry   r>   r>   r?   r`     sp   




z MovingAverageAbsMaxScale.forward)Nrg   r    Nr   r>   r>   r<   r?   r     s    -r   c                       <   e Zd ZdZ									d
 fdd	Zdd	 Z  ZS )r   z
    The computational logic of QuantizedConv2D is the same with Conv2D.
    The only difference is that its inputs are all fake quantized.
    r   rg   abs_maxNc              
      s   t    |j| _|j| _|j| _|j| _| jdkr|j| _|j| _|j| _|j	| _	|j
| _
d| _|	d ur:|	 | _nt|| j	j||| jd| j	j| j | jd| _|
d urY|
 | _nt|| ||| jdd| _|d urn| nd | _|d ur{| | _d S d | _d S )Nzerosr   Tr%   rm   r7   r*   r8   r~   r   Fr%   rm   r7   r*   r8   )r+   r,   _groups_stride_padding_padding_mode _reversed_padding_repeated_twice	_dilation_data_formatweightbiasZ_conv2d_quant_axis_fake_quant_weight_get_fake_quant_typer%   r3   r(   _fake_quant_input	full_name_act_preprocess_weight_preprocessr6   layerweight_bitsactivation_bitsrm   Zweight_quantize_typeZactivation_quantize_typeZweight_pre_layerZact_pre_layerZweight_quant_layerZact_quant_layerr<   r>   r?   r,     sR   





zQuantizedConv2D.__init__c              
   C   s   | j d ur
|  |}| |}| j}| jd ur| | j}| |}| jdkr6tj|| j| j| j	d}d| _
tj||| j| j
| j| j| j| j	dS )Nr   )modedata_formatr   )r   paddingstridedilationgroupsr   )r   r   r   r   r   r   Fpadr   r   r   Zconv2dr   r   r   r   )r6   rH   quant_inputr   quant_weightr>   r>   r?   r`   A  s2   





zQuantizedConv2D.forward	r   r   rg   r   r   NNNNra   r>   r>   r<   r?   r     s    >r   c                       s>   e Zd ZdZ									d
 fdd	Zddd	Z  ZS )r   a  

    The computational logic of QuantizedConv2DTranspose is the same with Conv2DTranspose.
    The only difference is that its inputs are all fake quantized.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> import paddle.nn as nn
            >>> from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose

            >>> x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.)
            >>> conv = nn.Conv2DTranspose(4, 6, (3, 3))
            >>> conv_quantized = QuantizedConv2DTranspose(conv)
            >>> y_quantized = conv_quantized(x_var)
            >>> y_var = conv(x_var)
            >>> print(y_var.shape)
            [2, 6, 10, 10]
            >>> print(y_quantized.shape)
            [2, 6, 10, 10]

    r   rg   r   Nc              
      s   t    |j| _|j| _|j| _|j| _|j| _|j| _|j	| _	|j
| _
d| _|	dur1|	 | _nt|| j	j||| jd| j	j| j | jd| _|
durP|
 | _nt|| ||| jdd| _|dure| nd| _|durr| | _dS d| _dS )z[
        Constructor.

        The arguments are the same as ImperativeQuantAware.
        r&   NTr   Fr   )r+   r,   r   r   r   output_padding_output_paddingr   r   r   r   Z_conv2d_transpose_quant_axisr   r   r%   r3   r(   r   r   r   r   r   r<   r>   r?   r,   y  sP   



z!QuantizedConv2DTranspose.__init__c                 C   s   | j d ur
|  |}| |}| j}| jd ur| | j}| |}|d u r*| j}nd}tj||| j| j	|| j
| j| j|| jd
S )Nr   )r   r   r   r   r   r   output_sizer   )r   r   r   r   r   r   r   Zconv2d_transposer   r   r   r   r   r   )r6   rH   r   r   r   r   r   r>   r>   r?   r`     s,   




z QuantizedConv2DTranspose.forwardr   Nra   r>   r>   r<   r?   r   `  s    @r   c                       r   )r   z
    The computational logic of QuantizedLinear is the same with Linear.
    The only difference is that its inputs are all fake quantized.
    r   rg   r   Nc                    s   t    |j| _|j| _|j| _d| _|	d ur|	 | _nt|| jj||| jd| jj	| j | jdd	| _|
d ur=|
 | _
nt|| ||| jdd| _
|d urR| nd | _|d ur_| | _d S d | _d S )Nr&   T)r%   rm   r7   r*   r8   r~   r   quant_linearFr   )r+   r,   r   r   r%   _linear_quant_axisr   r   r3   r(   r   r   r   r   r   r<   r>   r?   r,     sD   



zQuantizedLinear.__init__c                 C   s^   | j d ur
|  |}| |}| j}| jd ur| | j}| |}tj||| j| jd}|S )Nxr   r   r%   )	r   r   r   r   r   r   linearr   r%   )r6   rH   r   r   r   r^   r>   r>   r?   r`     s   




zQuantizedLinear.forwardr   ra   r>   r>   r<   r?   r     s    7r   c                       8   e Zd Z									d	 fdd	Zdd Z  ZS )
r   r   rg   r   Nc                    s   t    	 |	d u sJ d|
d u sJ d|j| _|j| _|j| _d| _|j| _|j| _|j	| _	t
|| jj||| jd| jj| j | jtj dkrLdnd d	| _t
|| ||| jdd d| _|d urg| nd | _|d urt| | _d S d | _d S )	NzHWhen quantizing ColumnParallelLinear, weight_quant_layer should be None.zEWhen quantizing ColumnParallelLinear, act_quant_layer should be None.r&   TrE   r%   rm   r7   r*   r8   r~   r   r9   Fr%   rm   r7   r*   r8   r9   )r+   r,   r   r   r.   r%   r   is_mpmodel_parallel_groupgather_outputr   r3   r(   rQ   rR   get_world_sizer   r   r   r   r   r   r<   r>   r?   r,   $  sV   


z&QuantizedColumnParallelLinear.__init__c                 C   s   | j rtjjj|| jd}n|}| jd ur| |}| |}| j}| j	d ur-| 	| j}| 
|}tj||| j| jd}| jrO| j rOtjjj|| jd}|S |}|S )Ngroupr   )r   rQ   rR   
collectiveZ_c_identityr   r   r   r   r   r   r   r   r   r%   r   Z	_c_concat)r6   rH   input_parallelr   r   r   output_paralleloutputr>   r>   r?   r`   e  s,   




z%QuantizedColumnParallelLinear.forwardr   r   r>   r>   r<   r?   r   #      Ar   c                       r   )
r   r   rg   r   Nc                    s  t    |	d u sJ d|
d u sJ d|j| _|j| _|j| _d| _|j| _|j| _|j	| _	t
|| jj||| jd| jj| j | jtj dkrKdnd d	| _t
|| ||| jdtj dkrcdnd d| _|d uro| nd | _|d ur|| | _d S d | _d S )	NzQWhen quantizing RowParallelLinear, weight_quant_layer cannot defined by yourself.zNWhen quantizing RowParallelLinear, act_quant_layer cannot defined by yourself.r&   TrE   r   Fr   )r+   r,   r   r   r.   r%   r   input_is_parallelr   r   r   r3   r(   rQ   rR   r   r   r   r   r   r   r   r<   r>   r?   r,     sX   


z#QuantizedRowParallelLinear.__init__c           	      C   s   | j s| js	|}n
tjjj|| jd}| jd ur| |}| |}| j	}| j
d ur0| 
| j	}| |}tj||| jd}| jrNtjjj|| jddd}n|}| jd ur\|| j }|S |}|S )Nr   )r   r   r%   T)r   Zuse_calc_streamZuse_model_parallel)r   r   rQ   rR   r   Z_c_splitr   r   r   r   r   r   r   r   r%   Z_mp_allreducer   )	r6   rH   r   r   r   r   r   Zoutput_r   r>   r>   r?   r`     s6   




z"QuantizedRowParallelLinear.forwardr   r   r>   r>   r<   r?   r     r   r   c                       s@   e Zd ZdZ										d fdd	Zdd	d
Z  ZS )r   z
    The computational logic of QuantizedMatmul is the same with Matmul.
    The only difference is that its inputs are all fake quantized.
    Nr   rg   r   c                    sz   t    |
d ur|
 | _|
 | _nt|||dd| _t|||dd| _|d ur+| nd | _|d ur8| | _d S d | _d S )NF)rm   r7   r8   )r+   r,   _fake_quant_x_fake_quant_yr   _act_preprocess_x_act_preprocess_yr   r<   r>   r?   r,     s,   

zQuantizedMatmul.__init__Fc           	      C   sR   | j d ur
|  |}| |}| jd ur| |}| |}t|||||}|S r   )r   r   r   r   rQ   matmul)	r6   r   yZtranspose_xZtranspose_yr%   Zquant_xZquant_yr^   r>   r>   r?   r`     s   





zQuantizedMatmul.forward)
Nr   r   rg   r   r   NNNN)FFNra   r>   r>   r<   r?   r     s    (r   c                       s4   e Zd ZdZ					d	 fdd	Zdd Z  ZS )
r   z
    Add MovingAverageMaxScale layer to the behind of the input layer.
    Calculate the scale (moving average abs max) for the output of the input layer.
    Nrg   r    c                    s4   t    || _|du r| }t||||| _dS )z
        Construct
        N)r+   r,   _layerr   r   _ma_output_scale)r6   r   rm   r%   r*   r9   r<   r>   r?   r,   (  s   

zMAOutputScaleLayer.__init__c                 O   s.   | j |i |}t|tttfr|S | |S r   )r   
isinstancelisttupledictr   r6   rN   kwargsr^   r>   r>   r?   r`   ;  s   
zMAOutputScaleLayer.forward)Nrg   Nr    Nra   r>   r>   r<   r?   r   "  s    r   c                       s4   e Zd ZdZ					d	 fdd	Zdd Z  ZS )
r   zR
    Add FakeQuantMovingAverageAbsMax layer to the behind of the input layer.
    r   rg   Nc           	   	      s>   t    || _td|d u r| n|||| jd|d| _d S )Nmoving_average_abs_maxFr   )r+   r,   r   r   r   r3   _fake_quant_output)	r6   r   r   r   rm   r%   r9   argsr   r<   r>   r?   r,   I  s   
z$FakeQuantMAOutputScaleLayer.__init__c                 O   s8   | j |i |}t|ttfrt|dkr|S | |S )Nr&   )r   r   r   r   lenr   r   r>   r>   r?   r`   `  s   
z#FakeQuantMAOutputScaleLayer.forward)r   r   rg   NNra   r>   r>   r<   r?   r   D  s    r   c                 K   s  | dd | dd| dd| dd d}| dkr$| d	d
|d	< n| dkr1| dd|d< n| dkrX| d	d
|d	< | dd |d< | dd|d< |d d usWJ dn^| dkru| dd
|d< d
|d< d|d< | dd
|d< nA| dkrd} | dd
|d< d|d< | dd |d< | dd
|d< |d d usJ dn| dkr| dd
|d< | dd|d< tttttd}||  di |S )Nr%   r7   r   r*   r    r9   )r%   r7   r*   r9   r   r8   Fr   rm   rg   channel_wise_abs_maxr~   r   r   zHYou need to input channel_numwhen you use channel_wise_abs_max strategy.
lsq_weightZall_postiveZper_channelr&   r   Zchannel_wise_lsq_weightTlsq_actZ	symmetric)r   r   r   r   r   r>   )getr   r   r   r   r   )Z
quant_typer   Z	call_argsZfake_quant_mapr>   r>   r?   r   i  sP   



r   )+loggingrQ   r   r   Zpaddle.base.data_feederr   Zpaddle.base.frameworkr   Zpaddle.base.log_helperr   Zpaddle.frameworkr   r   Z	paddle.nnr	   r   Zpaddle.nn.initializerr
   Zpaddle.nn.quant.lsqr   r   Zpaddle.utilsr   Zlayer.layersr   __all__rb   INFO_loggerr   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r>   r>   r?   <module>   s>   e lqcvM`d;"%