o
    #jX                     @   s  d dl Z d dlZd dlZd dlZd dlmZmZmZ d dl	m
Z
 d dlmZ ddlmZmZmZmZ ddlmZ e
eejd	d
ZejjjejjjejjjgZdZdd Zdd Zdd Z dd Z!dd Z"dd Z#edd Z$dd Z%	d%ddZ&	d&dd Z'd'd!d"Z(d%d#d$Z)dS )(    N)core	frameworkglobal_scope)
get_logger)signature_safe_contextmanager   )_rename_arg_rename_op_inputfind_true_post_opfind_true_prev_op   )AutoMixedPrecisionListsBF16z&%(asctime)s-%(levelname)s: %(message)s)fmtZ__use_bf16__c                 C   s4   t | } t jdd t jgd| j}t || jS )Nc                 S   s   t dt d| d d? S )Nz<Iz<fr      )structunpackpack)x r   a/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/static/amp/bf16/amp_utils.py<lambda>2   s    z)convert_float_to_uint16.<locals>.<lambda>)Zotypes)npZasarrayZ	vectorizeZuint16ZflatZreshapeshape)Zin_listoutr   r   r   convert_float_to_uint16/   s   
r   c                 C   s   | t jjjkr	dS dS )zx
    Convert specific variable type to its corresponding string.

    Args:
        dtype (VarType): Variable type.
    Zbf16Zfp32)r   VarDescVarTypeBF16)dtyper   r   r   _dtype_to_str8   s   r   c              
   C   s  d}|j D ]}}|tjjjkr|jdv r|dvrq||D ]d}| |}|jtvs.|j	|kr/q|j	|krv|j
d t| }	| j|	}
|
du sL|
j	|krm| j|	|d|jd}
| j|dd	|id
|
i|j	|
j	dd |d7 }t||j
|
j
 q|dr|d| qq|tjjjkr|tjjjkr|jD ]>}|jdv r|dkrq||D ],}| |}
|
jtvrq|
j	tjjjkr|
jtjjj |dr|dtjjj qq|S )a  
    Insert cast op and rename args of input and output.

    Args:
        block (Program): The block in which the operator is.
        op (Operator): The operator to insert cast op.
        idx (int): The index of current operator.
        src_dtype (VarType): The input variable dtype of cast op.
        dest_dtype (VarType): The output variable dtype of cast op.

    Returns:
        num_cast_op (int): The number of cast ops that have been inserted.
    r   )
batch_normfused_bn_add_activation
layer_norm   XZ.cast_NFnamer   Zpersistablestop_gradientcastr$   Outin_dtype	out_dtypetypeZinputsZoutputsattrsr   r-   Yr.   )input_namesr   r   r   FP32r0   inputvar_valid_typesr   r(   r   varsget
create_varr)   
_insert_opr   has_attr	_set_attrr   output_namesoutputdesc	set_dtype)blockopidx	src_dtype
dest_dtypenum_cast_opsin_namein_var_namein_var	cast_nameout_varout_nameout_var_namer   r   r   _insert_cast_opE   sj   








rO   c                 C   s   d}|  |}|jtvs|j|kr|S |j|ks%J dt|jt||jd t| }	| j|	}
|
d u s=|
j|krg| j	|	|d|j
d}
| j|dd|id|
i|j|
jd	d
 |d7 }|
j|| j |j< |S )Nr   z4The real dtype({}) is not equal to the src dtype({})r&   Fr'   r*   r$   r+   r,   r/   r   )r6   r0   r7   r   formatr   r(   r8   r9   r:   r)   r;   rD   )rB   rC   rD   rE   rF   target_nameop_var_rename_maprG   Z
target_varrK   Zcast_varr   r   r   _insert_cast_post_op   s8   
rS   c                 C   sF   |j sdS | jD ]
}||j v r dS q| jD ]
}||j v r  dS qdS NFT)fp32_varnamesinput_arg_namesoutput_arg_names)rC   	amp_listsrH   rM   r   r   r   _is_in_fp32_varnames   s   



rY   c                 C   sr   | j |v rdS g }|t| j |t| j |D ]	}d|v r$ dS q|r7| dr5t| dv r5dS dS dS )NTZlearning_rateZop_namescopeF)r0   extendlistrV   rW   r<   _bf16_guard_patternattr)rC   Zunsupported_op_listuse_bf16_guardZin_out_arg_namesr(   r   r   r   _need_keep_fp32   s   

r_   c                   c   s:    t jtd dV  W d   dS 1 sw   Y  dS )av  
    As for the pure bf16 training, if users set `use_bf16_guard` to True,
    only those ops created in the context manager `bf16_guard` will be
    transformed as float16 type.

    Examples:
        .. code-block:: python

            >>> import numpy as np
            >>> import paddle
            >>> import paddle.nn.functional as F
            >>> paddle.enable_static()
            >>> data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
            >>> conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)

            >>> with paddle.static.amp.bf16.bf16_guard():
            ...     bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
            ...     pool = F.max_pool2d(bn, kernel_size=2, stride=2)
            ...     hidden = paddle.static.nn.fc(pool, size=10)
            ...     loss = paddle.mean(hidden)
    )prefixN)r   Z
name_scoper\   r   r   r   r   
bf16_guard   s   "ra   c                 C   s(   | D ]}|D ]
}||v r  dS qqdS rT   r   )post_opskeep_fp32_opspost_oprC   r   r   r   are_post_ops_bf16   s   re   c                 C   s  |   j}|D ]}t|j|jv rd}g }	g }
|jD ]-}||D ]%}||}t|||d}|d u s8|jt	vr<d} n|	
| |

| q!q|rt|	|r|
D ]"}|jtjjjkrd|jtjjj |d urs|j|v rs||j qQ|dr|dtjjjkr|dtjjj qd S )NTFr   )global_blockopsstrr0   Zbf16_initializer_listr>   r?   r6   r
   r7   appendre   r   r   r   r   r4   r@   rA   r   r(   remover<   r]   r=   )startup_progrX   rB   Zall_opsrc   to_bf16_var_namesZprepend_opsrC   Z	change_opZop_post_opsZop_out_varsrM   rN   rL   rd   r   r   r   cast_initializers_to_bf16  s<   




rm   Tc                 C   s  |du rt  }|  }t }t }t }g }| jD ]}	||	j q| jD ]_}	|	j}
|
D ]H}|jdks:|jdkr;q-t||j|rH|	| q-|j
D ]w}|jdv rW|dvrWqK||D ]e}d}z|	|}W n- ty } z!td| d ||}|durtd| d	 W Y d}~nd}~ww |du s|jtvrq\|jtjjjkr|jtjjj |	| td
|j||j q\qK|jD ]w}|jdv r|dkrq||D ]e}d}z|	|}W n/ ty } z"td| d ||}|durtd| d	 W Y d}~nd}~ww |du s|jtvrq|jtjjjkr0|jtjjj td|j||j qqdD ]}||r\||tjjjkr\||tjjj q@|drj|dd |drv|dd q-|durt||||
|| q%dd t t!| jD }| jD ]}	|	j}
d}|t!|
k r<|
| }d}||vr||v rt"|	||tjjjtjjj}||7 }nit"|	||tjjjtjjj}||7 }|j#D ]S}|	j$%|}|du s|jtvrq|jtjjjkr-|jtjjj t&|
||}|D ]"}||v rq
t'|	||| d tjjjtjjj||}||7 }q
q||d 7 }|t!|
k sqt(| ||| |S )a  
    Traverse all ops in the whole model and set their inputs and outputs
    to the bf16 data type. This function will do some special processing for
    the batch normalization, which will keep the batchnorm's computations in FP32.
    Args:
        program (Program): The used program.
        amp_lists (AutoMixedPrecisionListsBF16): An AutoMixedPrecisionListsBF16 object.
        use_bf16_guard(bool): Determine whether to use `bf16_guard` when
                              constructing the program. Default True.
    Ncreate_py_readerread>   r    r!   r"   r#   z-- z&, try to get it in the global block --z-- var z is got in the global block --z4-- op type: {}, in var name: {}, in var dtype: {} --r2   z6-- op type: {}, out var name: {}, out var dtype: {} --)r-   r.   r   
use_mkldnnTmkldnn_data_typebfloat16c                 S   s   g | ]}t  qS r   )collectionsOrderedDict).0_r   r   r   
<listcomp>  s    z&cast_model_to_bf16.<locals>.<listcomp>r   r   ))r   rf   setblocksrZ   rg   r0   r_   Zunsupported_listaddr3   r5   r6   
ValueError_loggerdebugr7   r   r   r   r   r4   r@   rA   r   rP   r>   r?   r<   r]   r=   rm   rangelenrO   rW   r8   r9   r
   rS   r	   )programrk   rX   r^   rf   rc   rl   Zto_bf16_pre_cast_opsZ
origin_opsrB   rg   rC   rH   rI   rJ   erM   rN   rL   	attr_namerR   rD   rG   Zin_var_cast_numZpre_cast_numrb   rd   Zpost_cast_numr   r   r   cast_model_to_bf16+  s2  

























	,r   c                 C   s   g }|j D ]	}||  q|r|nt }|r|nt }|D ]&}|j|v rEtd|j d ||j	 }	t
|	}
|	t|
|  qdS )a  
    Traverse all parameters in the whole model and set them to the BF16 data type.
    Whereas, this function will keep parameters of batchnorms in FP32.
    Args:
        place(base.CPUPlace|base.CUDAPlace): `place` is used to restore the BF16 weight tensors.
        program (Program): The used program.
        scope(base.Scope, optional): `scope` is used to get the FP32 weight tensor values.
                                      Default is None.
        to_bf16_var_names(set|list, optional): The data types of vars in `to_bf16_var_names`
                                               will be set to BF16. Usually, it is the returned
                                               value of `cast_model_to_bf16` API.
    z
---- cast z to bf16 dtype ----N)ry   rZ   all_parametersrx   r   r(   r|   r}   Zfind_varZ
get_tensorr   arrayr   )Zplacer   scoperl   r   rB   Zbf16_var_namesZ	var_scopeparamZparam_tdatar   r   r   cast_parameters_to_bf16  s   


r   c                 C   sP  |du rt  }|  }|j}t }t }|D ]}|jdks"|jdkr#q|jdur3t||r3|| q|j|jv r?|| q|j|j	v rK|| q|j|j
v rd}d}|jD ]D}	|	r||	D ]:}
||
}|jdu rnqa|j|u rt|||
}|du r~qan|j}||v s|j|jv rd}qa||v s|j|j	v rd}qaqX|r|| q|r|| q	 q|| qd}|t|k r&|| }d}||v rt|||tjjjtjjj}nA||v r|dr|dd |dd	 n|d
r|d
tjjjkr|d
tjjj t|||tjjjtjjj}n	 ||d 7 }|t|k sdS dS )a/  
    Traverse all ops in current block and insert cast op according to
    which set current op belongs to.

    1. When an op belongs to the fp32 list, add it to fp32 set
    2. When an op belongs to the bf16 list, add it to bf16 set
    3. When an op belongs to the gray list. If one
       of its inputs is the output of fp32 set op or fp32 list op,
       add it to fp32 set. If all of its previous ops are not fp32
       op and one of its inputs is the output of bf16 set op or
       bf16 list op, add it to bf16 set.
    4. When an op isn't in the lists, add it to fp32 op set.
    5. Add necessary cast ops to make sure that fp32 set op will be
       computed in fp32 mode, while bf16 set op will be computed in
       bf16 mode.

    Args:
        main_prog (Program): The main program for training.
    Nrn   ro   FTr   rp   rq   rr   r   r   )r   rf   rg   rx   r0   rU   rY   rz   Z	fp32_listZ	bf16_listZ	gray_listr3   r5   r6   rC   r   r   rO   r   r   r   r   r4   r<   r=   r]   )Z	main_progrX   rB   rg   Zbf16_op_setZfp32_op_setrC   Z
is_fp32_opZ
is_bf16_oprH   rI   rJ   Zprev_oprD   rG   r   r   r   rewrite_program_bf16  s   






r   )N)NNT)NN)*rs   loggingr   numpyr   Zpaddle.baser   r   r   Zpaddle.base.log_helperr   Zpaddle.base.wrapped_decoratorr   Z
fp16_utilsr   r	   r
   r   rX   r   __name__INFOr|   r   r   Z
LOD_TENSORZSELECTED_ROWSZLOD_TENSOR_ARRAYr7   r\   r   r   rO   rS   rY   r_   ra   re   rm   r   r   r   r   r   r   r   <module>   sB   	L$

*
 
/