o
    #j|H                    @   sR  d dl Z d dlZd dlZd dlZzd dlmZ W n
   ddlmZ Y d dlmZm	Z	 ddl
mZmZ ddlmZ ddlmZ d	d
lmZ ddl
mZ ddlmZ ddlmZ ddlmZmZmZmZmZ ddlmZmZmZmZm Z m!Z!m"Z" ee#e j$ddZ%dd Z&dd Z'dd Z(	d!ddZ)G dd dZ*G dd de*Z+G dd  d Z,dS )"    N)tqdm   )IrGraph_get_var   )iostatic)core)unique_name   )
get_logger)utils)run_adaround)cal_kl_threshold)SUPPORT_QUANTIZATION_OP_DICTARMCPUQuantizerBaseQuantizerMKLDNNQuantizerTensorRTQuantizer)AddQuantDequantForInferencePassAddQuantDequantPassAddQuantDequantPassV2QuantizationFreezePassQuantizationTransformPassQuantizationTransformPassV2QuantWeightPassz&%(asctime)s-%(levelname)s: %(message)s)fmtc                 C   s(   g }|   D ]}|jr||j q|S N)	list_varspersistableappendname)programpersistable_var_namesvar r%   v/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/static/quantization/post_training_quantization.py_all_persistable_var_names9   s   r'   c                    sz   t   |  }|D ]}|jD ]} | q|jD ]} | qq	dd  D  t t fdd|  }| | | S )Nc                 S   s   h | ]}|j qS r%   node).0nr%   r%   r&   	<setcomp>J   s    z+_remove_unused_var_nodes.<locals>.<setcomp>c                    s
   | j  vS r   r(   r(   Zall_used_varsr%   r&   <lambda>M   s   
 z*_remove_unused_var_nodes.<locals>.<lambda>)setall_op_nodesinputsaddoutputsfilterall_var_nodessafe_remove_nodes)graphopsop_nodeZ
input_nodeZoutput_nodeZall_unused_varsr%   r-   r&   _remove_unused_var_nodesA   s    


r:   c                 C   s4   t  }|  D ]}| r|| q| | | S r   )r/   r5   Zis_ctrl_varr2   r6   )r7   Zremove_ctr_varsr)   r%   r%   r&   _remove_ctrl_varsT   s   

r;   Fc           
      C   s   t |}|j}|ds|d|  |r3|rt|t|ks#J dt||D ]
\}}	|||	 q(|| |rF|	dd| |
  t| |S )NZ__param_scope__z5Different number of pass attributes and their values..Z	qat_fp32_)r	   Zget_passr7   hasZset_not_ownedlenzipr/   applyZdrawr0   r:   )
scoper7   Z	pass_nameattrsZattr_valuesdebugZir_passZ	cpp_graphattrvaluer%   r%   r&   _apply_pass]   s"   


rF   c                   @   s  e Zd ZdZddddddddddg ddddd	d	d
dddddddddddfddZdd Zdd Z	d?ddZdd Zdd Z	dd Z
dd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d: Zd;d< Zd=d> ZdS )@PostTrainingQuantizationz
    Utilizing post training quantization methon to quantize the FP32 model,
    and it uses calibrate data to get the quantization information for all
    quantized variables.
    N
   KLwJ?roundMbP?F   range_abs_maxchannel_wise_abs_maxTc            "      C   s  g d| _ ddg| _g d| _|dv sJ || _|| _dg| _|dus'J d|dus/J d	t|tjs9J d
|	dksAJ d|| jv sJJ d|| j v sXJ d	|| j || jv sfJ d	|| j|| _
|| _|du rtt n|| _|| _|| _|| _|| _|| _|	| _|
| _|| _|| _|| _|| _|| _|| _|| _| jrdnd| _|| _|| _| jj | _!d| _"d| _#d| _$|| _%t& | _'t& | _(i | _)i | _*i | _+i | _,i | _-d| _.i | _/i | _0i | _1i | _2i | _3t& | _4|| _5|| _6|| _7|| _8d| _9| j"durd| _9|| _:|rt;t<= }n|r*|D ]} | t;t<= v s(J | d q||ks3J dg d}!|sCt>||d| _?dS |@ dkrStA||d| _?dS |@ dkrctB||d| _?dS |@ dkrstC||d| _?dS d	||!s|J dS )a"  
        Constructor.

        Args:
            executor(static.Executor): The executor to load, run and save the
                quantized model.
            scope(static.Scope, optional): The scope of the program, use it to load
                and save variables. If scope=None, get scope by static.global_scope().
            model_dir(str): The path of the fp32 model that will be quantized,
                and the model and params files are under the path.
            model_filename(str, optional): The name of file to load the inference
                program. If it is None, the default filename '__model__' will
                be used. Default is 'None'.
            params_filename(str, optional): The name of file to load all parameters.
                When all parameters were saved in a single binary file, set it
                as the real filename. If parameters were saved in separate files,
                set it as 'None'. Default is 'None'.
            batch_generator(Python Generator, depreceated): The batch generator provides
                calibrate data for DataLoader, and it returns a batch every
                time. Note that, sample_generator and batch_generator, only one
                should be set. Beisdes, batch_generator supports lod tensor.
            sample_generator(Python Generator, depreceated): The sample generator provides
                calibrate data for DataLoader, and it only returns a sample every
                time. Note that, sample_generator and batch_generator, only one
                should be set. Beisdes, sample_generator dose not support lod tensor.
            data_loader(Paddle.io.DataLoader): The
                Dataloader provides calibrate data, and it could
                return a batch every time.
            batch_size(int, optional): The batch size of DataLoader. Default is 10.
            batch_nums(int, optional): If batch_nums is not None, the number of
                calibrate data is batch_size*batch_nums. If batch_nums is None, use
                all data provided by sample_generator as calibrate data.
            algo(str, optional): If algo='KL', use KL-divergenc method to
                get the KL threshold for quantized activations and get the abs_max
                value for quantized weights. If algo='abs_max', get the abs max
                value for activations and weights. If algo= 'min_max', get the min
                and max value for quantized activations and weights. If algo='avg',
                get the average value among the max values for activations. If
                algo= 'hist', get the value of 'hist_percent' quantile as the threshold.
                If algo='mse', get the value which makes the quantization mse loss
                minimal. Default is KL.
            hist_percent(float, optional): The threshold of algo 'hist' for activations.
                Default is 0.99999.
            quantizable_op_type(list[str], optional): List the type of ops
                that will be quantized. Default is []. If quantizable_op_type is [],
                it will use the default quantization op type of the qunat config in
                the current deploy_backend.
            round_type(str, optional): The method of converting the quantized weights
                value float->int. Currently supports ['round', 'adaround'] methods.
                Default is `round`, which is rounding nearest to the integer.
                'adaround' is refer to https://arxiv.org/abs/2004.10568.
            learning_rate(float, optional): The learning rate of adaround method.
            is_full_quantized(bool, optional): If set is_full_quantized as True,
                apply quantization to all supported quantizable op type. If set
                is_full_quantized as False, it will apply quantization to the op type
                according to the input quantizable_op_type or quant config of deploy_backend.
            bias_correction(bool, optional): If set as True, use the bias correction
                method of https://arxiv.org/abs/1810.05723. Default is False.
            activation_bits(int): quantization bit number for activation.
            weight_bits(int, optional): quantization bit number for weights.
            activation_quantize_type(str): quantization type for activation,
                now support 'range_abs_max', 'moving_average_abs_max' and 'abs_max'.
                This param only specifies the fake ops in saving quantized model.
                If it is 'range_abs_max' or 'moving_average_abs_max', we save the scale
                obtained by post training quantization in fake ops. Note that, if it
                is 'abs_max', the scale will not be saved in fake ops.
            weight_quantize_type(str): quantization type for weights,
                support 'abs_max' and 'channel_wise_abs_max'. This param only specifies
                the fake ops in saving quantized model, and we save the scale obtained
                by post training quantization in fake ops. Compared to 'abs_max',
                the model accuracy is usually higher when it is 'channel_wise_abs_max'.
            onnx_format(bool): Whether to export the quantized model with format of ONNX.
                Default is False.
            freeze_model(bool): Whether to convert quantized and trained ``program`` to final
                quantized ``program``. Default: True.
            skip_tensor_list(list): List of skip quant tensor name. Default: None.
            same_scale_tensor_list(list(list)): The list of tensor keep same scale in the outermost
                list, the final scale about every list is the max of the scale in the list
                of tensor. Default: None.
            optimize_model(bool, optional): If set optimize_model as True, it applies
                some passes to the model before quantization, and it supports
                `conv2d/depthwise_conv2d + bn` pass so far. Some targets require the
                weights are quantized by tensor-wise method, which means the weights
                scale for all channel are the same. However, if fuse
                `conv2d/depthwise_conv2d + bn`, the weights scale for all channel will
                be different. In address this problem, fuse the pattern before
                quantization. Default False.
            is_use_cache_file(bool, optional): This param is deprecated.
            cache_dir(str, optional): This param is deprecated.
            deploy_backend(str, optional): Deploy backend, it can be None, `TensorRT`,
                `MKLDNN`, `ARM`. And it will extend the new backend. Default is None,
                which means to use the default general quantization configuration.
        Returns:
            None

        Examples:
            .. code-block:: python

                >>> # doctest: +SKIP("There are some example variables in the code.")
                >>> import paddle.static as static
                >>> from paddle.static.quantization import PostTrainingQuantization

                >>> exe = static.Executor(paddle.CPUPlace())
                >>> model_dir = "path/to/fp32_model_params"
                >>> # set model_filename as None when the filename is __model__,
                >>> # otherwise set it as the real filename
                >>> model_filename = None
                >>> # set params_filename as None when all parameters were saved in
                >>> # separate files, otherwise set it as the real filename
                >>> params_filename = None
                >>> save_model_path = "path/to/save_model_path"
                >>> # prepare the sample generator according to the model, and the
                >>> # sample generator must return a sample every time. The reference
                >>> # document: https://www.paddlepaddle.org.cn/documentation/docs/zh
                >>> # /user_guides/howto/prepare_data/use_py_reader.html
                >>> data_loader = your_data_loader
                >>> batch_size = 10
                >>> batch_nums = 10
                >>> algo = "KL"
                >>> quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"]
                >>> ptq = PostTrainingQuantization(
                ...     executor=exe,
                ...     sample_generator=None,
                ...     data_loader=data_loader,
                ...     model_dir=model_dir,
                ...     model_filename=model_filename,
                ...     params_filename=params_filename,
                ...     batch_size=batch_size,
                ...     batch_nums=batch_nums,
                ...     algo=algo,
                ...     quantizable_op_type=quantizable_op_type
                ... )
                >>> ptq.quantize()
                >>> ptq.save_quantized_model(save_model_path)
        )rN   Zmoving_average_abs_maxabs_maxrP   rO   )rI   histavgmseemdrP   min_maxptf)adaroundrK   ZlstmNzThe executor cannot be None.zdata_loader cannot be None.z0data_loader only accepts `paddle.io.DataLoader`.r   z(The batch_size should be greater than 0.z?The algo should be KL, hist, mse, avg, abs_max, min_max or ptf.z1The activation_quantize_type ({}) should in ({}).z,The weight_quantize_type ({}) shoud in ({}).TFi   # is not supported for quantization.zPactivation_bits and weight_bits must be the same, other cases are not supported.)Ntensorrtmkldnnarm)quantizable_op_type
quant_bitsrY   rZ   r[   z7Deploy Backend {} not support, please choose one of {}.)DZ!_support_activation_quantize_typeZ_support_weight_quantize_typeZ_support_algo_type_round_type_learning_rate_dynamic_quantize_op_type
isinstancer   Z
DataLoaderformat_bias_correction	_executorr   global_scope_scope
_model_dir_model_filename_params_filenameZ_sample_generatorZ_batch_generatorZ_batch_size_batch_nums_algo_hist_percent_activation_bits_weight_bits_activation_quantize_type_weight_quantize_type_onnx_format_clip_extra_skip_tensor_list_optimize_modelplace_place_program
_feed_list_fetch_list_data_loaderr/   _quantized_weight_var_name_quantized_act_var_name_weight_op_pairs_sampling_act_abs_min_max_sampling_act_histogramZ_sampling_data_quantized_var_threshold_histogram_bins_quantized_var_min_quantized_var_max_quantized_var_avg_best_calibration_loss_quantized_threshold_zero_size_var_names_same_scale_tensor_list_freeze_model_scale_dict_return_graphFLAG_is_full_quantizelistr   keysr   quant_configlowerr   r   r   )"selfexecutor	model_dirrA   model_filenameparams_filenamebatch_generatorsample_generatordata_loader
batch_size
batch_numsalgohist_percentr\   
round_typelearning_rateis_full_quantizebias_correctionactivation_bitsweight_bitsactivation_quantize_typeweight_quantize_typeonnx_formatfreeze_modeloptimize_modelis_use_cache_fileskip_tensor_listsame_scale_tensor_list	cache_dir
scale_dictreturn_graphZdeploy_backendop_typeZsupport_deploy_backendr%   r%   r&   __init__x   s    
+



z!PostTrainingQuantization.__init__c              	      s(             jdv rZd}t jddd3}  D ]&} jj j	| j
d jd    |d7 }|   jrF| jkrF nq W d	   n1 sQw   Y     d}t jd
dd3}  D ]&} jj j	| j
d jd    |d7 }|   jr| jkr nqiW d	   n1 sw   Y   jdkr jD ]}| jvrqt j|   j|< q jdv rƈ    jdkrψ       jdkr݈   n    js   t fdd jD r   j t!" j	  j#s j	S t$t%& j	j'dd}|S )a7  
        Load the FP32 model, and use the calibrate data to calculate the forward-stage.
        Based on the sample data, we can get the quantization information, and obtain
        the final quantized model.

        Args:
            None
        Returns:
            the program of quantized model.
        rI   rQ   r   z8Preparation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}P   )totalZ
bar_formatZncolsF)r"   feed
fetch_listZreturn_numpyrA   r   Nz5Sampling stage, Run batch:|{bar}| {n_fmt}/{total_fmt}rR   rW   rU   c                 3   s    | ]	}| j jv V  qd S r   )r    activation_quant_operation_types)r*   r   r   r%   r&   	<genexpr>  s
    

z4PostTrainingQuantization.quantize.<locals>.<genexpr>Tfor_test)(_load_model_data_collect_target_varnames_set_activation_persistablerk   r   rj   rz   rd   runrw   ry   rf   _collect_activation_abs_min_maxupdate_init_sampling_act_histogram	_samplingr|   r   nparraymeanr   _calculate_kl_hist_thresholdr^   _adaround_apply_reset_activation_persistable_save_input_threhold_update_programr   _save_output_thresholdanyr`   &_collect_dynamic_quantize_op_thresholdr   Z$move_persistable_var_to_global_blockr   r   r	   Graphdesc)r   Zbatch_idtdatavar_nameZ
main_graphr%   r   r&   quantize  s   








z!PostTrainingQuantization.quantizec                 C   sd   | j dks	J d| j dv r| j}n| j}t| j| j| j| j| j| j	| j
| j|| j| j| jd d S )NrU   zThe algo should not be min_max.r   )Znum_iterationsr   lr)rk   r   r   r   rz   rw   ry   rd   rf   rv   _quantized_op_pairsr}   rj   rc   r_   )r   r   r%   r%   r&   r     s$   

z(PostTrainingQuantization._adaround_applyc                    s   d}|du r	d}n| dr|ddd }n|}tj||} fdd jD }tj|| j j	 j
 jd	 td
|  dS )a  
        Save the quantized model to the disk.

        Args:
            save_model_path(str): The path to save the quantized model.
            model_filename(str, optional): If the model_filename is None,
                save the model to 'model.pdmodel' and 'model.pdiparams'. Otherwise, save the model to 'model_name.pdmodel' and
                'model_name.pdiparams". Default: None.
        Returns:
            None
        Nmodel.pdmodelr<   r   r   c                    s   g | ]
} j  |qS r%   )rw   global_blockr$   r*   r!   r   r%   r&   
<listcomp>;  s    zAPostTrainingQuantization.save_quantized_model.<locals>.<listcomp>)r   r"   Z
clip_extraz The quantized model is saved in )endswithrsplitospathjoinrx   r   save_inference_modelry   rd   rw   rr   _loggerinfo)r   Zsave_model_pathr   r   
model_namepath_prefix	feed_varsr%   r   r&   save_quantized_model$  s&   

z-PostTrainingQuantization.save_quantized_modelc                    s~    j du rtd tj j j j jd\ _  _	 _
 jr$    fdd j	D } jr7 j _dS t j _dS )z1
        Load model and set data loader.
        Nz"Load model and set data loader ...r   r   r   c                    s   g | ]
}t t| jqS r%   )r   strrw   )r*   r   r   r%   r&   r   \  s    z=PostTrainingQuantization._load_model_data.<locals>.<listcomp>)rw   r   r   r   load_inference_modelrg   rd   rh   ri   rx   ry   rt   _optimize_fp32_modelrj   r>   rz   )r   r   r%   r   r&   r   H  s*   



z)PostTrainingQuantization._load_model_datac                 C   s|   t d tt| jjdd}t|}t| j	|d}t| j	|d}t| j	|d}t| j	|d}t| j	|d}|
 | _d	S )
zH
        Fuse the `conv2d/depthwise_conv2d + bn` in FP32 model.
        zOptimize FP32 model ...Tr   Zconv_bn_fuse_passZdepthwise_conv_bn_fuse_passZconv_transpose_bn_fuse_passZconv_eltwiseadd_bn_fuse_passZ&depthwise_conv_eltwiseadd_bn_fuse_passN)r   r   r   r	   r   rw   r   r;   rF   rf   
to_program)r   r7   r%   r%   r&   r   e  s   
z-PostTrainingQuantization._optimize_fp32_modelc                    s  t d i  _ fdd}t j}tt jjD ]} jj| jD ]} j	dur>t
|D ]}| j	v r=|dd q0|j}|dkrw|dd	 } jj| jD ]#}t
|}	||	v rvt
|D ]}
|
|vru|dd |dd qcqS jr|tt vrt |d
  g }|D ]}d|v r|| q|dkot
|d	 |v ot
|d	 |v }| jjv s| jjv s|r|dko|d}|r|d n|}|t
||| |t
||| t
|D ]}t
|D ]}||v r| j|< qqq$| jjv r
|t
||| q$qdS )zr
        Collect the variable names for sampling, and set activation
        variables to be persistable.
        z$Collect quantized variable names ...c                    s:   | D ]}||v r j | | j|< q j| qd S r   )r{   r2   r}   r|   )Zvar_name_listr#   r   r   r   r%   r&   collect_var_name  s   zKPostTrainingQuantization._collect_target_varnames.<locals>.collect_var_nameNZop_namescopeZ
skip_quantZconv2d_transposeFilterr   rX   Zconv1dZ
unsqueeze2Z	matmul_v2trans_yZ_trans_y)r   r   r   r'   rw   ranger>   blocksr8   rs   r   _get_op_input_var_names	_set_attrtypeinput_get_op_output_var_namesr   r   r   r   warningr    r   weight_quant_operation_typesr   rD   observer_operation_types)r   r   r#   block_idopZinp_namer   Zin_nameZ_opr   r!   Zconv1d_persistable_var_namesopnameZis_conv1d_quantr   out_var_nameZin_var_namer%   r   r&   r   v  s   






z1PostTrainingQuantization._collect_target_varnamesc                 C   s&   | j  D ]}|j| jv rd|_qdS )zr
        Set activation variables to be persistable, so can obtain
        the tensor data in sample_data
        TN)rw   r   r!   r|   r   r   r$   r%   r%   r&   r     s
   z4PostTrainingQuantization._set_activation_persistablec                 C   s<   | j  D ]}|j| jv rd|_| j|j   qdS )z:
        Reset activations to be not persistable.
        FN)	rw   r   r!   r|   r   rf   Zfind_varZ
get_tensor_clearr   r%   r%   r&   r     s   z6PostTrainingQuantization._reset_activation_persistablec                 C   s   | j dkr|   dS | j dkr|   dS | j dkr!|   dS | j dkr,|   dS | j dkr7|   dS | j dkrB|   dS | j dv rM|   dS dS )	zO
        Sample the min/max, abs_max or histogram in every iterations.
        rP   rR   rU   rS   rT   rV   r   N)rk   _sample_abs_max_sample_avg_sample_min_max_sample_mse_sample_emd_sample_ptf_sample_histogramr   r%   r%   r&   r     s   






z"PostTrainingQuantization._samplingc                 C   s$  | j i krm| jD ]d}t| j|}| jdkr!ttt	|}nF| jdkrgg }| j
| tjv rNt|jd D ]}|ttt	|d d |f  q7nt|jd D ]}|ttt	||  qU|| j |< qtd | jD ]}t| j|}|jdkr| j| qu| }ttt	|}|dkrdn|}d}|| jvrtd	| j|< |d
kr|| }|d7 }d| jd  d }| jrtt|| | | d |}|| | }	ntt|d|| | | | }	||	 d  }
|
| j| kr|
| j|< || j |< |d
ksqud S )NrP   rO   r   r   zMSE searching stage ...        :0yE>333333?inf      ?{Gz?r   )r   r{   r   load_variable_datarf   rp   floatr   maxabsr}   _channelwise_quant_axis1_opsr   shaper    r   r   r|   sizer   r2   flattenr   rm   rq   cliprK   r   )r   r   
var_tensorabs_max_valueisscalebins	quant_varquant_dequant_varZmse_lossr%   r%   r&   r    sl   










z$PostTrainingQuantization._sample_msec                 C   sH  | j i krm| jD ]d}t| j|}| jdkr!ttt	|}nF| jdkrgg }| j
| tjv rNt|jd D ]}|ttt	|d d |f  q7nt|jd D ]}|ttt	||  qU|| j |< qtd | jD ]}t| j|}|jdkr| j| qu| }ttt	|}|dkrdn|}d}|| jvrtd	| j|< |d
kr!|| }|d7 }d| jd  d }| jrtt|| | | d |}|| | }	ntt|d|| | | | }	t	t|t|	 t	t|t|	  }
|
| j| kr|
| j|< || j |< |d
ksqud S )NrP   rO   r   r   zEMD searching stage ...r  r  r  r	  r
  r  r   )r   r{   r   r  rf   rp   r  r   r  r  r}   r  r   r  r    r   r   r|   r  r   r2   r  r   rm   rq   r  rK   r   Zstd)r   r   r  r  r  r  r  r  r  r  Zemd_lossr%   r%   r&   r  .  st   










z$PostTrainingQuantization._sample_emdc                 C   st  | j i krm| jD ]d}t| j|}| jdkr!ttt	|}nF| jdkrgg }| j
| tjv rNt|jd D ]}|ttt	|d d |f  q7nt|jd D ]}|ttt	||  qU|| j |< q| jD ]G}t| j|}|jdkr| j| qpttt	|}|| jvrg | j|< tttjt	||jd ddd}| j| | qpd S )NrP   rO   r   r   )Zaxis)r   r{   r   r  rf   rp   r  r   r  r  r}   r  r   r  r    r|   r  r   r2   r   r   Zreshape)r   r   r  r  r  Zabs_avg_valuer%   r%   r&   r   f  sL   







z$PostTrainingQuantization._sample_avgc                 C   sF  | j i krm| jD ]d}t| j|}| jdkr!ttt	|}nF| jdkrgg }| j
| tjv rNt|jd D ]}|ttt	|d d |f  q7nt|jd D ]}|ttt	||  qU|| j |< q| jD ]0}t| j|}|jdkr| j| qpttt	|}|| j vs|| j | kr|| j |< qpd S NrP   rO   r   r   )r   r{   r   r  rf   rp   r  r   r  r  r}   r  r   r  r    r|   r  r   r2   )r   r   r  r  r  r%   r%   r&   r     s>   







z(PostTrainingQuantization._sample_abs_maxc              
   C   s  | j i kr| ji kr| jD ]}t| j|}| jdkr*tt	|}tt
|}n^| jdkrg }g }| j| tjv rft|jd D ]"}|tt	|d d |f  |tt
|d d |f  qBn"t|jd D ]}|tt	||  |tt
||  qm|| j |< || j|< q| jD ]E}t| j|}|jdkr| j| qtt	|}tt
|}|| j vs|| j | k r|| j |< || jvs|| j| kr|| j|< qd S r  )r   r   r{   r   r  rf   rp   r  r   minr  r}   r  r   r  r    r|   r  r   r2   )r   r   r  	min_value	max_valuer  r%   r%   r&   r    sH   


 "






z(PostTrainingQuantization._sample_min_maxc                 C   s   | j D ]:}t| j|}|jdks|| jvr| j| qt	|}| j| d }tj
||d\}}| j| d  |7  < qd S )Nr   r   )r  )r|   r   r  rf   r  r   r   r2   r   r  	histogram)r   r   r  Zvar_tensor_absr  rQ   _r%   r%   r&   r    s   



z*PostTrainingQuantization._sample_histogramc                 C   s.  | j i krm| jD ]d}t| j|}| jdkr!ttt	|}nF| jdkrgg }| j
| tjv rNt|jd D ]}|ttt	|dd|f  q7nt|jd D ]}|ttt	||  qU|| j |< q| jD ]}t| j|}|jdkr| j| qpttt	|}d| jd  d }|| }|d }|d }|d }	tt||	 d||	 }
tt|| d|| }tt|| d|| }tt|| d|| }t||
}t||}t||}t||}||||g}d|t| }|	| }|| }|| j |< qpdS )zj
        The following code are modified from:
        https://github.com/megvii-research/FQ-ViT/
        rP   rO   r   Nr   r   )r   r{   r   r  rf   rp   r  r   r  r  r}   r  r   r  r    r|   r  r   r2   rm   r  rK   Zl2_lossindexr  )r   r   r  r  r  Zq_maxZscale8Zscale4Zscale2Zscale1Zquant_dequant_var_scale1Zquant_dequant_var_scale2Zquant_dequant_var_scale4Zquant_dequant_var_scale8Zscore1Zscore2Zscore4Zscore8Zscoremaskr  	thresholdr%   r%   r&   r    sb   





z$PostTrainingQuantization._sample_ptfc                 C   s   | j dks	J dtt| jjD ]L}| jj| jD ]B}|j| jjv s*|j| jj	v r\t
|D ],}|| jv s8J || jv s?J ||d | j|  ||d | j|  |dd q/qqdS )z;
        Save input threshold to the quantized op.
        rU   z3The algo should be min_max to save input threshold.z.minz.maxwith_quant_attrTN)rk   r   r>   rw   r   r8   r   r   r   r   r   r   r   r   r   )r   r   r   r   r%   r%   r&   r   "  s,   z-PostTrainingQuantization._save_input_threholdc                 C   s   | j D ]U}t| j|}|jdkr| j| qt|}t	t
|}t	t|}|| jvr8||g| j|< q|| j| d k rH|| j| d< || j| d krX|| j| d< qdS )z
        Collect the abs_min and abs_max for all activation. When algo = KL,
        get the min and max value, and then calculate the threshold.
        r   r   N)r|   r   r  rf   r  r   r2   r   r  r  r  r  r~   )r   r   r  r   r!  r%   r%   r&   r   ;  s$   



z8PostTrainingQuantization._collect_activation_abs_min_maxc                 C   st   | j D ]4}|| jv r|| jvrq|| jvr7| j| d }| j| d }tjg | j||fd\}}||g| j|< qdS )zN
        Based on the min/max value, init the sampling_act_histogram.
        r   r   r  r   N)r|   r   r~   r   r   r"  r   )r   r   Zmin_valZmax_valrQ   hist_edegesr%   r%   r&   r   S  s   




z5PostTrainingQuantization._init_sampling_act_histogramc                 C   sv  t d| j d | jdv sJ d| jD ]d}t| j|}| jdkr/tt	
t	|}nF| jdkrug }| j| tjv r\t|jd D ]}|tt	
t	|dd|f  qEnt|jd	 D ]}|tt	
t	||  qc|| j|< q| jD ]:}|| jv r|| jvrq~| j| \}}| jd
kr|d |d	  }t||| j| j|< q~| jdkr| ||| j|< q~dS )zL
        Calculate the KL or hist threshold of quantized variables.
        z
Calculate z threshold ...r   zThe algo should be KL or hist.rP   rO   r   Nr   rI   rQ   )r   r   rk   r{   r   r  rf   rp   r  r   r  r  r}   r  r   r  r    r   r|   r   r   r   rm   _get_hist_scaling_factor)r   r   weight_dataZweight_thresholdr  rQ   r)  	bin_widthr%   r%   r&   r   d  sL   








z5PostTrainingQuantization._calculate_kl_hist_thresholdc              
   C   s  t d tt| jjdd}| js't| j	| j
| j| j| j| j| jjd}nt| j	| j
| j| j| j| j| jjd}| D ]
}d|_|| q>| jsXt| j	| j
| jjd}nt| j	| j
| jjd}| D ]
}d|_|| qg| jdu r6| jdv r| j}n| j}| jdur3| jD ]}d}|D ]\}d|v r|d\}	}
}|	| vrq|
d	krt||	 t| ||	< n|
d
krt||	 t| ||	< |du r||	 nt |||	 }q|| vrq|du r|| nt ||| }q|D ]?}d|v r%|d\}	}
}|	| vr	q|
d	kr|t| ||	< q|
d
kr$|t| ||	< q|| vr-q|||< qq|| _| j! D ]+\}}t"#| j	| j
|d t$j%|gt$j&d t"#| j	| j
|d t$j%|gt$j&d q;| js| j'rt(| j	| j
| j)| j| j*| j| j| jjd}| D ]}d|_|| qn@t+| j	| j
}| D ]}d|_|| q| jj| jj | jj, }t-| j	| j
| j|| jd}| D ]}d|_|| q|. | _dS )z
        Use QuantizationTransformPass and AddQuantDequantPass to insert
        fake_quantize, fake_dequantize and fake_quant_dequant op.
        Besides, save all threshold to the scale var node.
        zUpdate the program ...Tr   )rA   ru   r   r   r   r   r\   )rA   ru   r\   Nr   #*/z@scaledtypez.quant_dequant@scale)rA   ru   r   r   r   r   r   r\   )rA   ru   r]   r\   Zcalibration_range_dict)/r   r   r   r	   r   rw   r   rq   r   rf   rv   rn   rm   ro   rp   r   r   r   Zall_sub_graphsZ	_for_testr@   r   r   r   r   rk   r   r   r   splitr   r  r  itemsr   set_variable_datar   r   float32r   r   rc   r^   r   r   r   r   )r   r7   Ztransform_passZ	sub_graphZadd_quant_dequant_passr   Ztensor_listZ	max_scaleZtensor_nameZreal_tensor_nameZoperaZscalarkeyvalZfreeze_passZquant_weight_passZinfer_pass_quant_op_typesZout_scale_infer_passr%   r%   r&   r     s*  













z(PostTrainingQuantization._update_programc                    s   i _  fddfdd}ttjjD ]*}jj| jD ]   jjjjj	 jj
 v rBt }|D ]}| | q:q"qdS )z<
        Save output threshold to the quantized op.
        c                    s   |j v r||vrtd| d S ||v s J d|| jjr3i j|< || j| d< d S | |||  | |d t|d  d ||  | dd | jj	j
v sa| jj	jv ri d	| d S d S )
NzA{} is zero-size tensor and unable to calibrate, so skip quant it.z3The output ({}) of {} node does not have threshold.r  r   r   
_thresholdr'  Tquantization_type)r   r   r   rb   r   rq   _calibration_scalesr   r   r   r   r   )r9   r   Zthreshold_mapZout_info_nameargname_indexZquantized_type)r   r   r%   r&   	save_infoC  s@   


zBPostTrainingQuantization._save_output_threshold.<locals>.save_infoc              	      s   t | |}|d usJ |d jdv r( | |jd|dtj   d S jdv r> | |jd|dtj  d S jdkrY | |jd|d  | |jd	|d d S d S )
Nz is not the output of the opr   Zout_thresholdpost_)rR   rP   rS   rT   rV   rU   Zout_minZpost_min_maxZout_max)	r   Z_get_output_name_indexrk   r   r   r   r   r   r   )r9   r   r;  )r<  r   r%   r&   analysis_and_save_infoo  sP   



zOPostTrainingQuantization._save_output_threshold.<locals>.analysis_and_save_infoN)r:  r   r>   rw   r   r8   r   r   r   r   r   r   r   )r   r>  r   Zout_var_namesr   r%   )r   r<  r   r&   r   =  s$   ,*
z/PostTrainingQuantization._save_output_thresholdc                 C   s   g }t | jjD ]}| j|jD ]}|j|v r|| qqtd| j 	 }t
| j}|D ]F}t|D ]>}||v rtt| j|}ttt|}	t||\}
}||
t| d |	 |d| |d| j |dd q6q/dS )z
        Collect and save the weight threshold for dynamic quantize ops,
        such as lstm and gru.
        Args:
            target_ops_type(list): the op type of target ops
        Returns:
            None
        r=  r8  r9  
bit_lengthr'  TN)r   rw   
num_blocksblockr8   r   r    r   rk   r   r'   r   r   r  rf   r  r   r  r  Z_get_input_name_indexr   rn   )r   Ztarget_ops_typeZ
target_opsr$  r   r9  r#   r   Zvar_datar&  argnamer%   r%   r&   r     s,   



z?PostTrainingQuantization._collect_dynamic_quantize_op_thresholdc                 C   sl   | j }|tt| }d}d}tt|D ]}||| 7 }||kr'|d } nq|d |d  }|d | S )zB
        Using the hist method to get the scaling factor.
        r   r   g      ?)rl   r  sumr   r>   )r   rQ   Z
hist_edgesthreshold_ratehist_sum
hist_indexr  r,  r%   r%   r&   r*    s   z1PostTrainingQuantization._get_hist_scaling_factorNN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r   r  r  r   r   r  r  r  r   r   r   r   r   r   r   r*  r%   r%   r%   r&   rG   q   sx    

  7b
$]		68)"(<- -grG   c                       sZ   e Zd Zddddddddddg dddddd	d	d
ddddddddddf fdd	Z  ZS )PostTrainingQuantizationProgramNrH   rI   rJ   conv2ddepthwise_conv2dmulrK   rL   FrM   rN   rO   Tc                    s   t  j||d d d ||||	|
|||||||||||||||||||| d| _|| _| jd ur1d| _|d us9J d|d usAJ d|| _|| _d S )NFTzFeed list should not be None.zFetch list should not be None.)superr   r   rw   rx   ry   )r   r   r"   	feed_listr   rA   r   r   r   r   r   r   r   r\   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	__class__r%   r&   r     sP   ! 

z(PostTrainingQuantizationProgram.__init__)rH  rI  rJ  r   __classcell__r%   r%   rS  r&   rL    s<    rL  c                   @   s   e Zd Zg dZddgZd!ddZddddgd	dd
dfddZdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zd"dd ZdS )#WeightQuantizationrM  rO   rP   Nc                 C   s   || _ || _|| _dS )a  
        This class quantizes the weight of some ops to reduce the size of model
        or improve the perforemace.

        Args:
            model_dir(str): The path of the fp32 model that will be quantized,
                and the model and params files are under the path.
            model_filename(str, optional): The name of file to load the inference
                program. If it is None, the default filename '__model__' will
                be used. Default is 'None'.
            params_filename(str, optional): The name of file to load all parameters.
                When all parameters were saved in a single binary file, set it
                as the real filename. If parameters were saved in separate files,
                set it as 'None'. Default is 'None'.
        N)rg   rh   ri   )r   r   r   r   r%   r%   r&   r   "  s   
zWeightQuantization.__init__rN  rP  rM   Fr  c	              
   C   s   |D ]}	|	| j v sJ d|	 d q|dv sJ d|| jv s'J d| jtj|d}
| |
|||||d| |rQtj|d}| ||||||d	| d
S d
S )ab  
        In order to reduce the size of model, this api quantizes the weight
        of some ops from float32 to int8/16. In the inference stage, the
        quantized weight will be dequantized to float32 again.

        Args:
            save_model_dir(str): The path to save the quantized model.
            save_model_filename(str, optional): The name of file to
                save the inference program. If it is None, the default
                filename '__model__' will be used. Default is 'None'.
            save_params_filename(str, optional): The name of file to
                save all parameters. If it is None, parameters were
                saved in separate files. If it is not None, all
                parameters were saved in a single binary file.
            quantizable_op_type(list[str], optional): The list of ops
                that will be quantized, and the quantized ops should be
                contained in ["conv2d", "depthwise_conv2d", "mul"].
                Default is ["conv2d","mul"].
            weight_bits(int, optional): The bits for the quantized weight,
                and it should be 8 or 16. Default is 8.
            weight_quantize_type(str, optional): quantization type for weights,
                support 'channel_wise_abs_max' and 'abs_max'. Set it as
                'channel_wise_abs_max', the accuracy performs better.
            generate_test_model(bool, optional): If set generate_test_model
                as True, it saves a fake quantized model, in which the weights
                are quantized and dequantized. We can use PaddlePaddle to load
                the fake quantized model and test the accuracy on GPU or CPU.
            threshold_rate(float, optional): This api uses abs_max methd to
                quantize the weight from float32 to int8/16, and the abs max
                value is important for quantization diff. When the abs_max
                value is far away from the center of the numerical distribution,
                we can set threshold_rate between 1e-6 and 1e-8, so the abs max
                value will be optimized. Default is 0.0.
        zInput error:z* is not supported for weight quantization.)rM      z+Input error: weight_bits should be 8 or 16.z.Input error: weight_quantize_type should in {}Zquantized_modelFZ
test_modelTN)_supported_quantizable_op_type_supported_weight_quantize_typerb   r   r   r   _quantize_weight_to_int)r   save_model_dirsave_model_filenamesave_params_filenamer\   r   r   Zgenerate_test_modelrD  r   Zquantized_model_dirZtest_model_dirr%   r%   r&   quantize_weight_to_int6  sP   -
z)WeightQuantization.quantize_weight_to_intc                 C   s  t  }t|}t }tj| j|| j| jd\}}}t	 }|
 }	i }
| D ]J}|jt jjjksD|jrD|jdv sD|jt jjjkrEq*|	|}| jdurU||
|j< q*tjtj||j}|	jdd|gii tj|ddd q*| jdurg }t|
 D ]	}||
|  q|	jt jjjtd	d
}|j !d tjtj|| j}|	jdd|id|i|ddd |"  |#| | jdu rdn| j}tj| j|}tj||}t$%|| dS )a  
        Convert all presistable vars from fp32 to fp16.
        Note that, this api only changes the data type of variables in
        __params__ file, and the __model__ file remains unchanged.

        Args:
            save_model_dir(str): The path to save the fp16 model.
        r   )r   fetchNsaveXT)	file_pathZsave_as_fp16)r   r1   r3   rB   Zsaved_params)r   r!   Zsave_combineYZ	__model__)&r	   CPUPlacer   Executorre   r   rg   rh   ri   ZProgramr   r   r   ZVarDescZVarTypeZRAWr   r!   r1  ZFP32Z_clone_variabler   r   r   normpathZ	append_opsortedr   r    Z
create_varr
   generater   Zset_persistableZ_sync_with_cppr   shutilcopyfile)r   r[  ru   exerA   Zinfer_programrR  r   Zsave_programZ
save_blockZsave_var_mapr$   Znew_varZsave_file_pathZsave_var_listr!   Zsaved_params_varZ	save_pathr   Z	src_modelZ
dest_modelr%   r%   r&   convert_weight_to_fp16  sx   








z)WeightQuantization.convert_weight_to_fp16c	                    sB  t  }	t|	}
t }tj| j|
| j| jd\ }}g }t	 j
D ]} |}|jD ]}|j|v r9|| q-q#t }|D ]*}|jD ]$}||v rj|dkr\| ||	||||| qF|dkrj| ||	|||| qFqAd}|du rud}n|dr|ddd	 }n|}tj||} fd
d|D }tj||||
 d dS )zC
        Generate quantized model or fake quantized model.
        r   rP   rO   Nr   r   r<   r   r   c                    s   g | ]	}   |qS r%   )r   r$   r   r"   r%   r&   r     s    z>WeightQuantization._quantize_weight_to_int.<locals>.<listcomp>)r   r"   )r	   rd  r   re  re   r   rg   rh   ri   r   r@  rA  r8   r   r    r'   Zinput_arg_names_weight_abs_max_quantization)_weight_channel_wise_abs_max_quantizationr   r   r   r   r   r   )r   r[  r\  r]  r\   r   r   r   rD  ru   rk  rA   rR  r   Zquantized_opsr$  rA  r   r#   r   r   r   r   r%   rm  r&   rZ    sh   





	

z*WeightQuantization._quantize_weight_to_intc                 C   s  d|d > d }|dkrt jnt j}	t||}
t|dk r't t |
}n| |
|}||
|
|k< | |
|
| k < || }t |
| 	|	}|sTt
|||| n|| 	t j}t
|||| |dd |d| ||d |g |dd	 d
S )z8
        Use abs_max method to quantize weight.
        r   rM   g|=r9  Zpost_weight_abs_maxquantize_weight_bits_quant_scaler'  TN)r   int8int16r   r  r  r  _calculate_thresholdaroundastyper4  r5  r   )r   rA   ru   r   rD  r   r   r   quantize_rangesave_weight_dtyper+  Zthreshold_valuer  quantized_weight_datadequantized_weight_datar%   r%   r&   rn  (  s8   z/WeightQuantization._weight_abs_max_quantizationc                 C   s  d|d > d }|dkrt jnt j}t||}	|jdkr'| |	||\}
}n|jdv r6| |	||\}
}nt	|jd  |sIt
|||| n(|jdkrU| ||
}n|jdv ra| ||
}nt	|jd  t
|||| |dd |d| ||d	 |
 |d
d dS )zE
        Use channel_wise_abs_max method to quantize weight.
        r   rM   rP  )rN  rO  z( is not supported by weight quantizationr9  Z post_weight_channel_wise_abs_maxrp  rq  r'  TN)r   rr  rs  r   r  r   _mul_channel_wise_quantization_conv_channel_wise_quantizationr   errorr4   _mul_channel_wise_dequantization!_conv_channel_wise_dequantizationr   )r   rA   ru   r   r   r   r   rw  rx  r+  scalesry  rz  r%   r%   r&   ro  S  sL   




z<WeightQuantization._weight_channel_wise_abs_max_quantizationc           	      C   sp   g }t j||d}|jd }t|D ]!}t t || | }|| t || | |||< q||fS )z
        Get channel wise scale for the weights of conv2d and depthwise_conv2d,
        and quantize the weights.
        r0  r   	r   
zeros_liker  r   r  r  r    ru  rv  	r   r+  rw  rx  r  ry  Zchannel_numr  r  r%   r%   r&   r|    s   


z2WeightQuantization._conv_channel_wise_quantizationc                 C   sB   t j|t jd}tt|D ]}|| ||  t j||< q|S )zR
        For conv2d and depthwise_conv2d, dequantize the weights to fp32.
        r0  r   r  r5  r   r>   rv  r   ry  r  rz  r  r%   r%   r&   r    s   z4WeightQuantization._conv_channel_wise_dequantizationc           	      C   s   g }t j||d}|jd }t|D ]-}t t |dd|f | }|| t |dd|f | ||dd|f< q||fS )r  r0  r  Nr  r  r%   r%   r&   r{    s   
 
z1WeightQuantization._mul_channel_wise_quantizationc                 C   sR   t j|t jd}tt|D ]}|dd|f ||  t j|dd|f< q|S )z:
        For mul, dequantize the weights to fp32.
        r0  Nr  r  r%   r%   r&   r~    s   z3WeightQuantization._mul_channel_wise_dequantization  c                 C   s   t |}t j||dt |fd\}}|tt| }d}d}tt|D ]}	|||	 7 }|d| kr:|	d } nq&|d |d  }
||
 S )Nr   r(  r
  r   )r   r  r"  r  r  rC  r   r>   )r   r   rD  Zhistogram_binsZ	input_absrQ   r)  rE  rF  r  r,  r%   r%   r&   rt    s   

z'WeightQuantization._calculate_thresholdrG  )r  )rH  rI  rJ  rX  rY  r   r^  rl  rZ  rn  ro  r|  r  r{  r~  rt  r%   r%   r%   r&   rV    s*    

VWE+7rV  )NNF)-loggingr   ri  numpyr   r   r   Zpaddle.base.frameworkr   r    r   r   Z	frameworkr	   r
   Z
log_helperr   rW   r   r   r   r   r   r   r   r   Zquantization_passr   r   r   r   r   r   r   rH  INFOr   r'   r:   r;   rF   rG   rL  rV  r%   r%   r%   r&   <module>   sN   $


          kL