o
    #j-                     @   s2   d dl ZddlmZ ddlmZ G dd dZdS )    N   )IrGraph)_get_paddle_placec                   @   sb   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )QuantInt8MkldnnPassal  
    Convert QuantizationFreezePass generated IrGraph to MKL-DNN supported INT8
    IrGraph. Following transformations did in this pass:
        1. Convert int8 range weights with float32 data type, which are generated by
           the QuantizationFreezePass, to float32 range weights with float32 data type
           by using the corresponding scales. This conversion is because MKL-DNN INT8
           conv2d kernel and mul kernel now only support float32 weights input, hence
           weights quantization will happen inside the conv2d and mul INT8 kernel.
        2. Create the new conv2d or mul op with the converted weights and link its output
           to fake_dequantize_abs_max op's output and set conv2d's attribute "force_fp32
           _output" as true
        3. Transform fake_quantize_xx op to quantize op
        4. Remove fake_dequantize_abs_max op
    Nc                 C   sb   || _ t|| _ddg| _dg| _dg| _g d| _ddg| _dg| _i | _	i | _
i | _d	| _d
S )a|  
        Args:
            scope(static.Scope): scope is used to initialize the new parameters.
            place(static.CPUPlace|str): place is used to initialize the new parameters.
            When it is string, it can be only 'cpu'.


        Examples:
            .. code-block:: python

                >>> # The original graph will be rewrite.
                >>> import paddle
                >>> from paddle import static
                >>> from paddle.static.quantization import QuantInt8MkldnnPass
                >>> from paddle.framework import IrGraph
                >>> from paddle.framework import core

                >>> graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
                >>> place = paddle.CPUPlace()
                >>> mkldnn_pass = QuantInt8MkldnnPass(static.global_scope(), place)
                >>> mkldnn_pass.apply(graph)
        Z$fake_quantize_moving_average_abs_maxZfake_quantize_range_abs_maxZfake_dequantize_max_absZ/fake_quantize_dequantize_moving_average_abs_max)conv2ddepthwise_conv2dmulr   r   Zpool2d   N)_scoper   _place_quantize_type_dequantize_type_quantize_dequantize_type_quantizable_ops	_conv_ops	_pool_ops	_in_scale
_max_range_new_output_s8_max)selfr
   r    r   r/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/static/quantization/quant_int8_mkldnn_pass.py__init__%   s   



zQuantInt8MkldnnPass.__init__c           	      C   s  t |ts	J d| }dd | D }|D ]n}| | jv rO|dd }|dd }| | j|d | j	|< |
 d| j|< |dd | j|< | | jv r|
  }|
  }|dd }|d	d }| | j|d | j	|< |dd | j|< q|D ]L}| | jv r| | jv r| || n0| | jv r| || n"| || n| | jv r| || n| | jv r| || | | q|S )
a   
        Quantize the graph for running MKL-DNN INT8 inference. According
        to activation quantization type, the graph will transform fake
        quantize ops to quantize ops and remove the fake dequantize ops.

        Args:
            graph(IrGraph): the applied graph.
        z&graph must be the instance of IrGraph.c                 S   s   g | ]}|  qS r   )name).0pr   r   r   
<listcomp>a   s    z-QuantInt8MkldnnPass.apply.<locals>.<listcomp>Xr   ScaleZ	max_rangeOutInScale)
isinstancer   all_op_nodesZall_persistable_nodesr   r   input_load_paramr
   r   opattrr   outputr   r   Zinput_names
attr_namesr   r   _transform_to_conv_mkldnnr   _transform_to_pool_mkldnn_transform_to_mul_mkldnnr   _transform_to_quantize_mkldnn_remove_fake_dequantize_op_remove_unused_var_nodes)	r   graphopsZpersistable_varsop_node
input_nameZ
scale_nameinputsattrsr   r   r   applyR   sV   


zQuantInt8MkldnnPass.applyc                 C   s    | dd }|dd }d S )Nr    r   r   )r(   r$   )r   r0   r&   output_namer3   r   r   r   r+      s   z-QuantInt8MkldnnPass._transform_to_pool_mkldnnc                    d    dd } dd }| | j|}tt|| j| j| }|	|j
}| || | j  dd }| j|}|| | j| }	 fdd   D }
|jd|
||dd|	id	}| j| j|  }g }| j| | j g}|d
| |d| |dd |dd |dd ||| ||| |||	 |  d S )NFilterr   OutputInputc                       i | ]
}|   |qS r   r&   r'   r   r   r2   r   r   
<dictcomp>       zAQuantInt8MkldnnPass._transform_to_conv_mkldnn.<locals>.<dictcomp>Zfused_conv2d)r;   r9   Zop_typer5   r4   outputsZScale_weightsZScale_inZ	Scale_out      ?
use_mkldnn   force_fp32_outputr$   r(   r%   r
   npdividemultiplyr   r   Zreshapeshape_restore_var_find_node_by_namer4   all_var_nodesr   r&   r)   create_op_noder   Zset_attrlink_tosafe_remove_nodes)r   r0   r2   weight_namer7   weightw_fp32input_var_nodeweight_var_nodeoutput_var_noder5   Zconv_op_nodescale_inscale_wr   r?   r   r*      sH   

z-QuantInt8MkldnnPass._transform_to_conv_mkldnnc                    r8   )NYr   r    r   c                    r<   r   r=   r>   r?   r   r   r@      rA   z@QuantInt8MkldnnPass._transform_to_mul_mkldnn.<locals>.<dictcomp>r   )r   r[   rB   Zscale_yZscale_xZ	scale_outrD   rE   rF   rG   rH   )r   r0   r2   rS   r7   rT   rU   rV   rW   rX   r5   Zmul_op_noderY   rZ   r   r?   r   r,      sH   

z,QuantInt8MkldnnPass._transform_to_mul_mkldnnc                 C   s   | |j|dd }| |j|dd }| j| | j|dd d  }|jddd|ddd	|id
|id}|	|| |	|| |
| dS )zS
        Transform fake_quantize_xx op to quantize mkldnn op in the graph.
        r   r   r    r!   quantizeZMKLDNNLAYOUTrF   )Zdata_formatrE   r   Zis_negative_inputr;   r:   rB   N)rN   r4   r$   rC   r(   r   r%   r
   rP   rQ   rR   )r   r0   r2   rV   rX   rY   Zquant_op_noder   r   r   r-      s.   z1QuantInt8MkldnnPass._transform_to_quantize_mkldnnc                 C   s&   | |j|dd }|| d S )Nr   r   )rN   r4   r$   rR   )r   r0   r2   rV   r   r   r   r.     s   z.QuantInt8MkldnnPass._remove_fake_dequantize_opc                 C   s   t || S N)rI   arrayfind_var
get_tensor)r   scope
param_namer   r   r   r%   
  s   zQuantInt8MkldnnPass._load_paramc                 C   s"   | j | }||| j d S r]   )r
   r_   r`   setr   )r   r   r^   Ztensorr   r   r   rM     s   z QuantInt8MkldnnPass._restore_varc                    sz   t   | }|D ]}|jD ]} | q|jD ]} | qq	dd  D  t t fdd| }|| d S )Nc                 S   s   h | ]}|j qS r   node)r   nr   r   r   	<setcomp>  s    z?QuantInt8MkldnnPass._remove_unused_var_nodes.<locals>.<setcomp>c                    s
   | j  vS r]   rd   rd   Zall_used_varsr   r   <lambda>  s   
 z>QuantInt8MkldnnPass._remove_unused_var_nodes.<locals>.<lambda>)rc   r#   r4   addrC   filterrO   rR   )r   r0   r1   r2   Z
input_nodeZoutput_nodeZall_unused_varsr   rh   r   r/     s    


z,QuantInt8MkldnnPass._remove_unused_var_nodes)NN)__name__
__module____qualname____doc__r   r6   r+   r*   r,   r-   r.   r%   rM   r/   r   r   r   r   r      s    
-6-.r   )numpyrI   Zbase.frameworkr   Z	frameworkr   r   r   r   r   r   <module>   s   