o
    #Õjpx  ã                   @   s>   d dl ZddlmZ ddlmZmZ ejjZG dd„ dƒZ	dS )é    Né   )ÚIrGraph)Ú_get_paddle_placeÚcorec                   @   s6  e Zd ZdZ					dJdd„Zdd„ Zdd	„ Zd
d„ Zdd„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zd d!„ Zd"d#„ Zd$d%„ Zd&d'„ Zd(d)„ Zd*d+„ Zd,d-„ Zd.d/„ Zd0d1„ Zd2d3„ Zd4d5„ Zd6d7„ Zd8d9„ ZdKd:d;„Zd<d=„ Z d>d?„ Z!d@dA„ Z"dBdC„ Z#dDdE„ Z$dFdG„ Z%dHdI„ Z&dS )LÚQuant2Int8MkldnnPassa•  
    Transform a quant model IrGraph into MKL-DNN supported INT8 IrGraph.
    The pass consists of the following transformations:
        1. gather scale values from fake quantize/dequantize operators,
        2. extract FP32 inference model graph from the quant graph, i.e.
            a.  remove fake quantize/dequantize operators,
            b.  dequantize conv2d and mul's weights,
        3. optimize the FP32 graph using standard FP32 optimization fuses
            (e.g. `conv2d`+`bn` -> `conv2d`),
        4. quantize the optimized FP32 graph using standard INT8v2 quantization
            passes (`cpu_quantize_pass`, `cpu_quantize_squash_pass`).
    NFc                 C   sÖ   || _ t|ƒ| _|| _|| _ddg| _ddg| _g d¢| _|| _|d ur&|ndh| _	g d¢| _
dg| _d	d
g| _dg| _dg| _dg| _ddg| _ddg| _ddg| _dg| _i | _i | _i | _d| _d| _d| _d S )NZ$fake_quantize_moving_average_abs_maxZfake_quantize_range_abs_maxZfake_dequantize_max_absZ$fake_channel_wise_dequantize_max_abs)Z fake_quantize_dequantize_abs_maxÚ/fake_quantize_dequantize_moving_average_abs_maxZ-fake_channel_wise_quantize_dequantize_abs_maxéÿÿÿÿ)Z
transpose2Zreshape2Úpool2dÚsliceÚshapeZnearest_interpZnearest_interp_v2ÚsplitÚscaleZconv2dZdepthwise_conv2dr	   ÚmulÚfcÚreluZrelu6ÚmatmulZ	matmul_v2Z
fusion_gruZ	multi_gruZfusion_lstmé   r   Úint8)Ú_scoper   Ú_placeÚ_coreÚ_debugÚ_fake_quantize_typesÚ_fake_dequantize_typesÚ_fake_quantize_dequantize_typesÚ_ops_to_quantizeÚ_op_ids_to_skipÚ_scale_immutable_opsÚ
_scale_opsÚ	_conv_opsZ	_pool_opsÚ_mul_opsÚ_fc_opsÚ	_relu_opsÚ_matmul_opsÚ_gru_opsÚ	_lstm_opsÚ_weight_thresholdsÚ_var_quant_scalesÚ
_max_rangeÚ_s8_maxÚ	_pass_idxÚ_pass_group)Úselfr   r   r   r   r   r   © r-   ús/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/static/quantization/quant2_int8_mkldnn_pass.pyÚ__init__%   s<   	
þþ
ÿ






zQuant2Int8MkldnnPass.__init__c                 C   sŽ   t |tƒs	J dƒ‚|  d¡ |  |¡}|  |¡}|  |¡}|  |¡}|  |¡}|  |¡}|  	|¡}|  
|¡}|  |¡}|  |¡}|  |¡}|S )Nú&graph must be the instance of IrGraph.r   )Ú
isinstancer   Ú_reset_pass_idx_and_groupÚ_label_skip_quantized_opÚ#_gather_weight_thresholds_from_fakeÚ_gather_input_scales_from_fakeÚ_gather_output_scales_from_attrÚ_remove_fake_opsÚ_dequantize_weightsÚ_optimize_fp32_graphÚ_compute_weight_scalesÚ_propagate_scalesÚ_quantize_fp32_graphÚ_cleanup©r,   Úgraphr-   r-   r.   Úapply^   s$   ÿþ











zQuant2Int8MkldnnPass.applyc                 C   s4   t |tƒs	J dƒ‚|  d¡ |  |¡}|  |¡}|S )Nr0   Zfp32)r1   r   r2   r9   r=   r>   r-   r-   r.   Úprepare_and_optimize_fp32s   s   ÿþ


z.Quant2Int8MkldnnPass.prepare_and_optimize_fp32c                 C   s   d| _ || _d S ©Nr   )r*   r+   )r,   Úgroupr-   r-   r.   r2   }   s   
z.Quant2Int8MkldnnPass._reset_pass_idx_and_groupc                 C   s   t  ¡ }| |t  ¡ ¡ |S ©N)r   Z	LoDTensorÚsetZCPUPlace)r,   r   Útensorr-   r-   r.   Ú_convert_scale2tensor   s   z*Quant2Int8MkldnnPass._convert_scale2tensorc                 C   s   t | jƒdkS rB   )Úlenr   ©r,   r-   r-   r.   Ú_is_quantizing_all_ops†   ó   z+Quant2Int8MkldnnPass._is_quantizing_all_opsc                    s   t ‡ fdd„| ¡ D ƒƒS )Nc                 3   s    | ]	}|  ¡ ˆ v V  qd S rD   ©Úname)Ú.0Úop©Úop_typesr-   r.   Ú	<genexpr>Š   s   € zDQuant2Int8MkldnnPass._is_any_of_op_types_in_graph.<locals>.<genexpr>)ÚanyÚall_op_nodes©r,   rQ   r?   r-   rP   r.   Ú_is_any_of_op_types_in_graph‰   s   z1Quant2Int8MkldnnPass._is_any_of_op_types_in_graphc                    s*   ˆ   ||¡oˆ  ¡ pt‡ fdd„|D ƒƒS )Nc                 3   s    | ]}|ˆ j v V  qd S rD   )r   )rN   Zop_typerI   r-   r.   rR      s   € zEQuant2Int8MkldnnPass._is_any_of_op_types_quantized.<locals>.<genexpr>)rV   rJ   rS   rU   r-   rI   r.   Ú_is_any_of_op_types_quantizedŒ   s   þz2Quant2Int8MkldnnPass._is_any_of_op_types_quantizedc                 C   ó   |   | j|¡S rD   )rW   r   r>   r-   r-   r.   Ú_is_conv_quantized’   rK   z'Quant2Int8MkldnnPass._is_conv_quantizedc                 C   rX   rD   )rW   r!   r>   r-   r-   r.   Ú_is_fc_quantized•   rK   z%Quant2Int8MkldnnPass._is_fc_quantizedc                 C   s~   | j | j | j }| ¡ D ]/}| ¡ |v r<| ¡  d¡s<d}|jD ]}|jD ]
}d| ¡ vr0d}q&q!|s<| ¡  dd¡ q|S )a³  
        For some ops(conv2d, depthwise_conv2d, mul, matml), find and label
        the skip quantized ops. cpu_quantize_placement_pass will use the
        label to identify it.
        For static models, the skip quantized ops have `skip_quant` attr.
        Therefore, it only needs to find and label the skip quantized ops for
        dygraph models, in which the quantized ops don't have `quantization_type`
        attr.
        Zquantization_typeTÚquantizeFZ
skip_quant)	r   r    r#   rT   rM   rO   Úhas_attrÚinputsZ	_set_attr)r,   r?   Z
target_opsÚop_nodeZis_quantized_opZvar_nodeZfront_op_noder-   r-   r.   r3   ˜   s    
ÿ

€þ€z-Quant2Int8MkldnnPass._label_skip_quantized_opc                 C   s(   | j }|D ]}||vr||f||< qdS )zK
        Save quantization scales for variables. Do not overwrite.
        N©r'   )r,   Z	var_namesÚuse_unsigned_intÚ
lod_tensorÚscalesÚvar_namer-   r-   r.   Ú_add_scale_for_vars°   s   €þz(Quant2Int8MkldnnPass._add_scale_for_varsc                 C   sÐ   dg}|  | j¡ | ¡ D ]X}| ¡ |v re| ¡  d¡}|dks'J d |¡ƒ‚| d¡d }| d¡d }| d¡d }t	 
d	|  | j|¡d  ¡ t	j¡}d
||t	jk< |  |¡}	d}
|  ||g|
|	¡ q|S )Nr   Ú
bit_lengthé   zCUnsupported number quantization bits ({}). Only 8 is supported now.ÚXr   ÚInScaleÚOutç      ð?ç        F)Úextendr   rT   rM   rO   ÚattrÚformatÚinputÚoutputÚnpÚarrayÚ_load_paramr   ÚastypeÚfloat64ÚInfrG   rd   )r,   r?   Zfake_opsrO   re   Ú
input_nameÚ
scale_nameÚoutput_namer   ra   r`   r-   r-   r.   r5   ¹   s4   
ÿÿÿþ

ÿ€z3Quant2Int8MkldnnPass._gather_input_scales_from_fakec                 C   s®   |  ¡ D ]P}| ¡ | jv rT| d¡d }| ¡  d¡r<t | ¡  d¡¡ 	tj
¡}t | j| j | ¡ 	tj
¡| j|< q| d¡d }t |  | j|¡¡ 	tj
¡| j|< q|S )Nrg   r   Z	max_rangeZScales)rT   rM   r   ro   rO   r\   rq   rr   rm   rt   ru   r)   r&   rs   r   )r,   r?   rO   rw   r(   rx   r-   r-   r.   r4   ×   s(   ÿÿ
þÿþ€z8Quant2Int8MkldnnPass._gather_weight_thresholds_from_fakec           	      C   s    |  ¡ D ]I}| ¡  d¡rM| ¡  d¡}|dkrqt d| ¡ tj¡}d||tjk< |  	|¡}d}| ¡  
¡ D ]}| ¡  |¡D ]
}|  |g||¡ qAq8q|S )NZout_thresholdrk   rj   F)rT   rO   r\   rm   rq   rr   rt   ru   rv   rG   Úoutputsrp   rd   )	r,   r?   rO   Z
attr_scaler   Zscale_lod_tensorr`   ry   Zout_var_namer-   r-   r.   r6   ê   s"   
ÿÿ€z4Quant2Int8MkldnnPass._gather_output_scales_from_attrc                    s`   ‡fdd„‰ ‡ ‡fdd„}||ƒ}t ƒ }t|ƒdkr.||kr.|}||ƒ}t|ƒdkr.||ks|S )Nc                    sJ   ˆ j | \}}t |¡|  ¡  d¡ }ˆ  | tj¡¡}||fˆ j |< d S )Nr   )r'   rq   rr   rO   rm   rG   rt   ru   )rO   ro   rp   ZunsignedrF   r   Z
new_tensorrI   r-   r.   Ú_update_scale_op_in_scaleý   s   zIQuant2Int8MkldnnPass._propagate_scales.<locals>._update_scale_op_in_scalec                    sV  t ƒ }|  ¡ D ]¡}| ¡ ˆjv rc| ¡ dks| ¡ dkr$| d¡d }n| d¡d }| d¡d }||g}t‡fdd„|D ƒƒrG| |¡ q|ˆjv rUˆj| ˆj|< q|ˆjv rbˆj| ˆj|< q| ¡ d	krˆ| d¡d }|ˆjv r‡| d¡}|D ]
}ˆj| ˆj|< q|q| ¡ ˆj	v r¨| d¡d }| d¡d }|ˆjv r¨ˆ |||ƒ q|S )
Nr
   r   ZInputr   rg   ri   c                 3   s    | ]}|ˆ j vV  qd S rD   r_   )rN   rM   rI   r-   r.   rR     s
   € ÿ
ÿzQQuant2Int8MkldnnPass._propagate_scales.<locals>._update_scales.<locals>.<genexpr>Úconcat)
rE   rT   rM   r   ro   rp   ÚallÚupdater'   r   )r?   Úwaiting_for_scalerO   rw   ry   Ztensor_namesÚinput_names©r{   r,   r-   r.   Ú_update_scales  sR   þ

þÿ
þÿ€

þÿ€
€z>Quant2Int8MkldnnPass._propagate_scales.<locals>._update_scalesr   )rE   rH   )r,   r?   r‚   r   Zwaiting_for_scale_prevr-   r   r.   r;   ü   s   )üz&Quant2Int8MkldnnPass._propagate_scalesc                 C   s   t  | |¡ ¡ ¡S rD   )rq   rr   Úfind_varÚ
get_tensor)r,   ÚscopeÚ
param_namer-   r-   r.   rs   8  s   z Quant2Int8MkldnnPass._load_paramc                 C   sd   |  ¡ D ]+}| ¡ | jv r|  ||¡ q| ¡ | jv r"|  ||¡ q| ¡ | jv r/|  ||¡ q|S rD   )rT   rM   r   Ú_remove_fake_quantizer   Ú_remove_fake_dequantizer   )r,   r?   rO   r-   r-   r.   r7   ;  s   €z%Quant2Int8MkldnnPass._remove_fake_opsc           	      C   s    |  |j| d¡d ¡}|  |j| d¡d ¡}|  |j| d¡d ¡}|  |j| d¡d ¡}|j}|D ]}|  |||¡ | ||¡ q5| ||||h¡ |S )Nrg   r   rh   ri   ZOutScale©Z_find_node_by_namer]   ro   rz   rp   Ú_swap_inputsÚlink_toÚsafe_remove_nodes)	r,   r?   rO   Zfake_quant_inZfake_quant_in_scaleZfake_quant_outZfake_quant_out_scaleÚnext_opsÚnext_opr-   r-   r.   r‡   F  s$   ÿÿÿ
ÿz*Quant2Int8MkldnnPass._remove_fake_quantizec                 C   sl   |  |j| d¡d ¡}|  |j| d¡d ¡}|j}|D ]}|  |||¡ | ||¡ q| ||h¡ |S )Nrg   r   ri   r‰   )r,   r?   rO   Zfake_dequant_inZfake_dequant_outr   rŽ   r-   r-   r.   rˆ   \  s   ÿz,Quant2Int8MkldnnPass._remove_fake_dequantizec                    sN   |  ¡  ¡ D ]}ˆ ¡ | |¡v r$|  ¡  |‡ ‡fdd„| |¡D ƒ¡ qd S )Nc                    s$   g | ]}|ˆ  ¡ krˆ   ¡ n|‘qS r-   rL   )rN   Úx©Ú	new_inputÚ	old_inputr-   r.   Ú
<listcomp>o  s    ÿÿz5Quant2Int8MkldnnPass._swap_inputs.<locals>.<listcomp>)rO   r€   rM   ro   Z	set_input)r,   rO   r’   r‘   rw   r-   r   r.   rŠ   j  s   þþ€þz!Quant2Int8MkldnnPass._swap_inputsc                    sz   ‡ fdd„}ˆ j ˆ j }| ¡ D ]*}| ¡ ˆ jv r'||dƒr'ˆ  ||dd¡ q| ¡ |v r:||dƒr:ˆ  ||dd¡ q|S )Nc                    sF   |   |¡d }ˆ j |¡d u rdS ˆ  ˆ j|¡}t t |d¡dk¡S )Nr   Fé   )ro   r   rƒ   rs   rq   r}   Úmod)r^   Úweight_nameÚweight_var_nameÚweightrI   r-   r.   Ú_is_int8_weightsv  s
   zBQuant2Int8MkldnnPass._dequantize_weights.<locals>._is_int8_weightsÚFilterÚOutputÚYri   )r    r#   rT   rM   r   Ú_dequantize_op_weights)r,   r?   r™   Zmul_and_matmul_opsrO   r-   rI   r.   r8   u  s   €z(Quant2Int8MkldnnPass._dequantize_weightsc           
      C   sÜ   |  |¡d }| |¡d }| j| }|  | j|¡}|jdks'|j|jd kr6t t 	|| j
¡j|j¡j}	n&t|jƒdkrQ|j|jd krQt t 	|| j
¡|¡}	ntd |j|j|¡ƒ‚|	 |j¡ tj¡}	|  ||	¡ d S )Nr   r”   zbThe size of weight scales vector ({}) does not match the dimensions ({}) of the weights tensor {}.)ro   rp   r&   rs   r   Úsizer   rq   ÚmultiplyÚdivider)   ÚTrH   Ú
ValueErrorrn   Úreshapert   Zfloat32Ú_restore_var)
r,   r?   r^   r–   ry   r—   Zoutput_var_namerb   r˜   Zw_fp32r-   r-   r.   r   †  s   

ÿÿz+Quant2Int8MkldnnPass._dequantize_op_weightsc                 C   s"   | j  |¡ ¡ }| || j¡ d S rD   )r   rƒ   r„   rE   r   )r,   rM   rr   rF   r-   r-   r.   r¤   ™  s   z!Quant2Int8MkldnnPass._restore_varc                 C   s^   |  ¡ D ](}| ¡ | jv r,| ¡  d¡s,d}| ¡  d¡r&| ¡  d¡r&d}| d|¡ q|S )NÚfuse_activationÚ Z	fuse_relur   )rT   rM   r   rO   r\   rm   Úset_attr)r,   r?   rO   Z
activationr-   r-   r.   Ú_update_activations  s   ÿ€z(Quant2Int8MkldnnPass._update_activationsc                 C   s4   t ƒ }| ¡ D ]}| ¡ r| |¡ q| |¡ |S rD   )rE   Úall_var_nodesZis_ctrl_varÚaddrŒ   )r,   r?   Zremove_ctr_varsÚnoder-   r-   r.   Ú_remove_ctrl_vars¨  s   
€
z&Quant2Int8MkldnnPass._remove_ctrl_varsc                 C   sT  |   |¡}|  |¡}|  |ddgtƒ g¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d	¡}|  |d
¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d¡}|  |d ¡}|  |d!¡}|  |d"¡}|  |d#d$d%gd&d&g¡}|  |d¡}|  |¡rø|  |d'¡}|  |d(¡}|  |d)¡}|  |d*¡}|  |d+¡}|  |d,¡}|  |d-¡}|  |d.¡}|  |d/¡}|  |d0¡}|S )1NZmkldnn_placement_passZmkldnn_enabled_op_typesZsimplify_with_basic_ops_passZlayer_norm_fuse_passZattention_lstm_fuse_passZseqconv_eltadd_relu_fuse_passZfc_lstm_fuse_passZmul_lstm_fuse_passZfc_gru_fuse_passZmul_gru_fuse_passZmulti_gru_fuse_passZmulti_gru_seq_fuse_passZseq_concat_fc_fuse_passZ!gpu_cpu_squeeze2_matmul_fuse_passZ!gpu_cpu_reshape2_matmul_fuse_passZ!gpu_cpu_flatten2_matmul_fuse_passZmatmul_v2_scale_fuse_passZsquared_mat_sub_fuse_passZis_test_passZ!gpu_cpu_map_matmul_v2_to_mul_passZ$gpu_cpu_map_matmul_v2_to_matmul_passZmatmul_scale_fuse_passZgpu_cpu_map_matmul_to_mul_passZrepeated_fc_relu_fuse_passZdepthwise_conv_mkldnn_passZconv_bn_fuse_passZconv_eltwiseadd_bn_fuse_passZ$conv_affine_channel_mkldnn_fuse_passZconv_transpose_bn_fuse_passZ&conv_transpose_eltwiseadd_bn_fuse_passZconv_bias_mkldnn_fuse_passZ$conv_transpose_bias_mkldnn_fuse_passZ%conv_elementwise_add_mkldnn_fuse_passZ conv_activation_mkldnn_fuse_passZfc_fuse_passZuse_gpuZuse_fc_paddingFZfc_mkldnn_passZfc_act_mkldnn_fuse_passZ)matmul_transpose_reshape_mkldnn_fuse_passZ'matmul_elementwise_add_mkldnn_fuse_passZ"matmul_activation_mkldnn_fuse_passZbatch_norm_act_fuse_passZ$softplus_activation_onednn_fuse_passÚscale_matmul_fuse_passÚ)reshape_transpose_matmul_mkldnn_fuse_passZruntime_context_cache_pass)r¨   r¬   Ú_apply_passrE   rZ   r>   r-   r-   r.   r9   °  sz   

ÿÿÿ
ÿÿÿz)Quant2Int8MkldnnPass._optimize_fp32_graphc           	      C   sº   t  |¡}|j}| d¡s| d| j¡ |r4|r t|ƒt|ƒks$J dƒ‚t||ƒD ]
\}}| ||¡ q)| 	|¡ | j
rO| d| j› d| j› d|› | ¡ ¡ |  |¡ |  jd7  _|S )NZ__param_scope__z5Different number of pass attributes and their values.Ú.Ú_r”   )r   Zget_passr?   ÚhasZset_not_ownedr   rH   ÚziprE   r@   r   Zdrawr+   r*   rT   Ú_remove_unused_var_nodes)	r,   r?   Z	pass_nameÚattrsZattr_valuesZir_passZ	cpp_graphrm   Úvaluer-   r-   r.   r¯   ò  s,   

ÿþ
ý
z Quant2Int8MkldnnPass._apply_passc                 C   s   |   |¡}|  |¡}|S rD   )r´   Ú_set_op_role_forwardr>   r-   r-   r.   r=     s   

zQuant2Int8MkldnnPass._cleanupc                    sz   t ƒ ‰ | ¡ }|D ]}|jD ]}ˆ  |¡ q|jD ]}ˆ  |¡ qq	dd„ ˆ D ƒ‰ t t‡ fdd„| ¡ ƒƒ}| |¡ |S )Nc                 S   s   h | ]}|j ’qS r-   ©r«   )rN   Únr-   r-   r.   Ú	<setcomp>  s    z@Quant2Int8MkldnnPass._remove_unused_var_nodes.<locals>.<setcomp>c                    s
   | j ˆ vS rD   r¸   r¸   ©Zall_used_varsr-   r.   Ú<lambda>  s   
 z?Quant2Int8MkldnnPass._remove_unused_var_nodes.<locals>.<lambda>)rE   rT   r]   rª   rz   Úfilterr©   rŒ   )r,   r?   Úopsr^   Z
input_nodeZoutput_nodeZall_unused_varsr-   r»   r.   r´     s"   

ÿ
þÿ
z-Quant2Int8MkldnnPass._remove_unused_var_nodesc                 C   s$   |  ¡ }|D ]	}| dtj¡ q|S )NZop_role)rT   r§   ÚOpRoleZForward)r,   r?   r¾   rO   r-   r-   r.   r·      s   z)Quant2Int8MkldnnPass._set_op_role_forwardc                    sž   ‡‡fdd„}‡fdd„‰ ‡ ‡‡fdd„}‡fdd„‰‡‡‡fd	d
„}|ˆj ddd |ˆjddd |ˆjddd |ˆjddd |ddƒ |ddƒ ˆS )Nc           	   	      sœ   ˆ   ¡ D ]G}| ¡  ¡ | v rK| |¡d }t ˆ ˆj|¡¡}dtjt 	| 
|jd d¡¡ tj¡|d }d||tjk< ˆ |¡}d}||fˆj|< qd S )Nr   rj   r   ©Úaxisrk   F)rT   rO   Útypero   rq   rr   rs   r   ZamaxÚabsr£   r   rt   ru   rv   rG   r'   )	r¾   Zw_namerÁ   rO   r—   Úweightsrb   ra   r`   )r?   r,   r-   r.   Ú_compute_var_scales'  s(   ÿÿü

þ€ðzHQuant2Int8MkldnnPass._compute_weight_scales.<locals>._compute_var_scalesc                    s   t  ˆ  ˆ j| ¡¡}t  ˆ  ˆ j|¡¡}|jd }dt jt  t j|d d …d d| …f | ¡ d d| | …  	|d| ¡gdd¡dd }dt jt  t j|d d …d| d …f | ¡ d| | d …  	||¡gdd¡dd }t  ||g¡ 
d¡}ˆ  |¡S )Nr   rj   é   rÀ   Úfloat)rq   rr   rs   r   r   ÚmaxrÃ   ÚconcatenateÚflattenr£   rt   rG   )Úwx_var_nameÚwh_var_nameÚwxÚwhZOCZscale_urZscale_oZgru_weights_scalerI   r-   r.   Ú!_compute_single_gru_weight_scales=  s:   
"þûÿ	öþûÿ	öÿ
zVQuant2Int8MkldnnPass._compute_weight_scales.<locals>._compute_single_gru_weight_scalesc              	      ó¦   ˆ  ¡ D ]L}| ¡  ¡ ˆjv rPt| | ¡ƒt| |¡ƒks/J d t| | ¡ƒt| |¡ƒ¡ƒ‚t| | ¡ƒD ]\}}| |¡| }d}ˆ ||ƒ}||fˆj|< q6qd S ©NzIMismatch in number of weights inputs ({} for WeightX vs. {} for WeightH).F)	rT   rO   rÂ   r$   rH   ro   rn   Ú	enumerater'   ©Zwx_nameZwh_namerO   ÚirË   rÌ   r`   ra   )rÏ   r?   r,   r-   r.   Ú_compute_gru_weight_scales`  ó(   ÿÿþÿþ€ózOQuant2Int8MkldnnPass._compute_weight_scales.<locals>._compute_gru_weight_scalesc              
      s‚   t  ˆ  ˆ j| ¡¡}t  ˆ  ˆ j|¡¡}dt jt  t j|d d …d d …f |d d …d d …f gdd¡dd }| d¡}ˆ  |¡S )Nrj   r   rÀ   rÇ   )	rq   rr   rs   r   rÈ   rÃ   rÉ   rt   rG   )rË   rÌ   rÍ   rÎ   Zlstm_weights_scalerI   r-   r.   Ú"_compute_single_lstm_weight_scaless  s   8ÿ

zWQuant2Int8MkldnnPass._compute_weight_scales.<locals>._compute_single_lstm_weight_scalesc              	      rÐ   rÑ   )	rT   rO   rÂ   r%   rH   ro   rn   rÒ   r'   rÓ   )r×   r?   r,   r-   r.   Ú_compute_lstm_weight_scales~  rÖ   zPQuant2Int8MkldnnPass._compute_weight_scales.<locals>._compute_lstm_weight_scalesrš   r”   rÀ   ÚWr   ZWeightHZWeightX)r   r!   r$   r%   )r,   r?   rÅ   rÕ   rØ   r-   )rÏ   r×   r?   r,   r.   r:   &  s   #

z+Quant2Int8MkldnnPass._compute_weight_scalesc                    s\   ‡ fdd„}‡ fdd„}||ˆ j d|ƒ}‡ fdd„}||ˆ jd|ƒ}||ˆ jdd	d
„ ƒ}|S )Nc           	         s†   |   ¡ D ]<}| ¡ |v r@| |¡d }|ˆ jv r@|| ¡ ƒr@ˆ j| \}}|du r9t |¡d }ˆ  | tj	¡¡}d|fˆ j|< q| S )zæ
            Sets the type of an output scale of a passed op type(s) to 'unsigned int8' if the
            predicate applied on op passes. Typically, the predicate checks if op's
            activation is set to relu.
            r   FrÆ   T)
rT   rM   rp   r'   rO   rq   rr   rG   rt   ru   )	r?   r¾   Zop_out_nameÚ	predicaterO   Zout_nameZis_unsignedrF   r   rI   r-   r.   Ú_set_unsigned_scaleš  s   ÿ
ÿ€zLQuant2Int8MkldnnPass._update_relu_output_scales.<locals>._set_unsigned_scalec                    ó   |   d¡ˆ jv S )Nr¥   ©rm   r"   ©rO   rI   r-   r.   Úconv_predicate²  ó   zGQuant2Int8MkldnnPass._update_relu_output_scales.<locals>.conv_predicater›   c                    rÜ   )NZactivation_typerÝ   rÞ   rI   r-   r.   Úfc_predicate¹  rà   zEQuant2Int8MkldnnPass._update_relu_output_scales.<locals>.fc_predicateri   c                 S   s   dS )NTr-   rÞ   r-   r-   r.   r¼   ¿  s    zAQuant2Int8MkldnnPass._update_relu_output_scales.<locals>.<lambda>)r   r!   r"   )r,   r?   rÛ   rß   rá   r-   rI   r.   Ú_update_relu_output_scales™  s   
ÿÿz/Quant2Int8MkldnnPass._update_relu_output_scalesc                 C   s   |   |¡rdS dS )NZNHWCZNCHW)rY   r>   r-   r-   r.   Ú_get_data_layoutÄ  s   z%Quant2Int8MkldnnPass._get_data_layoutc              	   C   sv   |   |d¡}|   |d¡}|   |ddg| jg¡}|   |dddg| j|  |¡g¡}|   |d¡}|   |d	¡}|   |d
¡}|S )Nr­   r®   Zcpu_quantize_placement_passZquantize_enabled_op_typesZcpu_quantize_passZquant_var_scalesZdata_layoutZcpu_quantize_squash_passZ"int8_scale_calculation_mkldnn_passZparams_quantization_mkldnn_pass)r¯   r   r'   rã   r>   r-   r-   r.   r<   Ç  s(   ÿüüz)Quant2Int8MkldnnPass._quantize_fp32_graph)NNNNF)NN)'Ú__name__Ú
__module__Ú__qualname__Ú__doc__r/   r@   rA   r2   rG   rJ   rV   rW   rY   rZ   r3   rd   r5   r4   r6   r;   rs   r7   r‡   rˆ   rŠ   r8   r   r¤   r¨   r¬   r9   r¯   r=   r´   r·   r:   râ   rã   r<   r-   r-   r-   r.   r      sT    
ù9
	<
Bs+r   )
Únumpyrq   Zbase.frameworkr   Z	frameworkr   r   Zop_proto_and_checker_makerr¿   r   r-   r-   r-   r.   Ú<module>   s
   