o
    "j#                     @   s   d dl mZ d dlmZ d dlZd dlZd dlmZmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ G dd dejZG dd dZdd ZeddddG dd dZdS )    )defaultdict)
MethodTypeN)_legacy_C_opsnn)	framework)baseto_variable)fleet) obtain_optimizer_parameters_list)core)
deprecatedc                       sN   e Zd Zd fdd	Zdd Zdd Z				
dddZejdddZ	  Z
S )MixPrecisionLayerfloat16c                    sb   t  | d  || _|| _| jdv sJ | j D ]}t|ds.d |_|| 	| qd S )NZ_mix_precision)r   Zbfloat16	main_grad)
super__init__Z	full_name_layersZ_dtype
parametershasattrr   Z_register_grad_hook_update_main_grad_hook)selfZlayersdtypeparam	__class__ s/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/fleet/utils/mix_precision_utils.pyr   %   s   
zMixPrecisionLayer.__init__c                    s   t j  fdd}|S )z.Create the update_main_grad hook for backprop.c                    sr    j d u sJ d j|  r7 jd u r+tjj| t	j
 | jd j d _n j|  |   d S d S )NzJIn main_grad node, param.grad should be None, but find param[{}] has grad.z
main_grad@)valueplacename)Zgradformatr   Z_is_initializedr   r   eagerZTensorcastpaddleZfloat32r   r   Zadd_Z_clear_data)Ztmp_gradr   r   r   
param_hook6   s   

z<MixPrecisionLayer._update_main_grad_hook.<locals>.param_hook)r#   Zautogradno_grad)r   r   r%   r   r$   r   r   2   s   z(MixPrecisionLayer._update_main_grad_hookc                 O   s   | j |i |}|S N)r   )r   ZinputskwargsZoutputsr   r   r   forwardL   s   zMixPrecisionLayer.forwardNT c                 C   s   | j j|||dS )N)destinationinclude_sublayersstructured_name_prefix)r   
state_dict)r   r+   r,   r-   r   r   r   r.   Q   s
   zMixPrecisionLayer.state_dictc                 C   s   | j j||d d S )N)use_structured_name)r   set_state_dict)r   r.   r/   r   r   r   r0   ]   s   
z MixPrecisionLayer.set_state_dict)r   )NTr*   T)__name__
__module____qualname__r   r   r)   r.   r   Zdeprecate_stat_dictr0   __classcell__r   r   r   r   r   $   s    
r   c                   @   s@   e Zd Zdd Zejejdd ZejdddZ	dd	 Z
d
S )MixPrecisionOptimizerc                 C   s   || _ t|| _d S r'   )
_inner_optr
   _parameter_list)r   	optimizerr   r   r   r   e   s   zMixPrecisionOptimizer.__init__c                 C   sr  t | jd tsTg }| jD ];}|jrq|j}t r.t|dr-| r-| j	j
d ur-tdnt|drA| rA| j	j
d urAtd|||f q| j	jd d |d}d S | j	jD ]^}tdd }|d D ]=}|jrjqd|j}t rt|dr| r| j	j
d urtdnt|dr| r| j	j
d urtd|d ||f qd|d	d
 | D  | jd d |d qXd S )Nr   is_selected_rowszOAdamW don't support weight_decay with sparse parameters, please set it to None.
_is_sparse)ZlossZstartup_programparams_gradsc                   S   s   g S r'   r   r   r   r   r   <lambda>   s    z,MixPrecisionOptimizer.step.<locals>.<lambda>paramsc                 S   s   i | ]\}}|d kr||qS )r>   r   ).0kvr   r   r   
<dictcomp>   s    z.MixPrecisionOptimizer.step.<locals>.<dictcomp>)
isinstancer8   dictstop_gradientr   r#   Zin_dynamic_moder   r:   r7   ZregularizationRuntimeErrorr;   appendZ_apply_optimize_param_groupsr   updateitems)r   r<   r   Zgrad_varZoptimize_opsparam_groupr   r   r   stepi   sx   

zMixPrecisionOptimizer.stepTc                 C   s   g }| j d u st| j d ts| j D ]
}|js|| qn| jD ]}|d D ]
}|js1|| q'q!|D ]'}t|drR|jd urR|rI|j  q5|j	  d |_q5t|ds\|
| q5d S )Nr   r>   r   )r8   rC   rD   rE   rG   rH   r   r   Zzero__clearZclear_gradient)r   Zset_to_zeroZ
param_listprK   r   r   r   
clear_grad   s2   







z MixPrecisionOptimizer.clear_gradc                 C      t | j|S r'   )getattrr7   r   itemr   r   r   __getattr__      z!MixPrecisionOptimizer.__getattr__Nr1   )r2   r3   r4   r   imperative_baser&   r   Zdygraph_onlyrL   rO   rT   r   r   r   r   r6   d   s    @r6   c                 C   sH  | j sd S g }t|dd r9t|jd tr9|jD ]}|d D ]}|jd ur6|jjtjj	j
ks0J ||j qqn|jD ]}|jd urT|jjtjj	j
ksNJ ||j q<ttdgtj}t|rnt|| j|| |rrdnd| _t }|d ur|j| krtj| jgdd}tjj|tjjj d d t!|| _d S d S d S )NrH   r   r>      Zint32)r   )opgroup)"Z_enablerQ   rC   rH   rD   r   r   r   ZVarDescZVarTypeZFP32rG   r8   r   nparrayZastypeZbool_lenr   Zcheck_finite_and_unscaleZ_scaleZ
_found_infr	   Zget_hybrid_communicate_groupZnranksZget_data_parallel_world_sizer#   Z	to_tensordistributedZ
all_reduceZReduceOpMAXint)r   r9   Zparam_gradsrY   r   Ztemp_found_infZhcgZis_found_infr   r   r   unscale_method   sH   




r`   z2.5.0zpaddle.distributed_scalerrW   )ZsinceZ	update_tolevelc                   @   s   e Zd Zdd Zdd ZdS )MixPrecisionScalerc                 C   s   || _ tt|| j _d S r'   )_inner_scalerr   r`   Z_unscale)r   Zscalerr   r   r   r      s   zMixPrecisionScaler.__init__c                 C   rP   r'   )rQ   rc   rR   r   r   r   rT      rU   zMixPrecisionScaler.__getattr__N)r2   r3   r4   r   rT   r   r   r   r   rb      s    rb   )collectionsr   typesr   numpyrZ   r#   r   r   Zpaddle.baser   Zpaddle.base.dygraphr   rV   r   Zpaddle.distributedr	   Z3paddle.distributed.fleet.utils.hybrid_parallel_utilr
   Zpaddle.frameworkr   Zpaddle.utilsr   ZLayerr   r6   r`   rb   r   r   r   r   <module>   s(   @d&