o
    "jn%                     @   s   d dl Z d dl mZ d dlmZ d dlmZmZmZmZ ddl	m
Z
 g Zdd Zd#d	d
Z	d#ddZdd Zdd Zdd Zdd Zdd Z	d$ddZdd Zdd Zdd Zd%d!d"ZdS )&    N)	framework)core)_split_tensorsbuild_groupsin_dynamic_modesync_params_buffers   )loggerc                 C   sV   t | dd r$t| jd tr$g }| jD ]}|d D ]}|| qq|S t| j}|S )N_param_groupsr   params)getattr
isinstancer
   dictappendlistZ_parameter_list)	optimizerZparameters_listgroupparam r   t/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/fleet/utils/hybrid_parallel_util.py obtain_optimizer_parameters_list    s   


r   c                 C   s  t  }g }g }| D ]'}|jr0| d ur0| }| r J d|| ||vs+J || q	t||}	|d u r?tj	 n|j
}
|d u rH|
nd| }|dkrRd n|}|	D ]-\}}}|d ur{tj||jd}tjj jd||dd|iddid	 tjj||d
 qVt|	 d S )N)Now, it doesn't support sparse parameters      ?dtypeZelementwise_div)XYZOutZaxis)typeinputsZoutputsattrsr   )set	trainable
_grad_ivarZ
_is_sparser   addr   paddledistributedget_world_sizenranksZ	to_tensorr   baser   Z_dygraph_tracerZtrace_op
all_reducer   )
parameters
comm_groupbucket_sizescalegrad_var_set	grad_varsZsparse_grad_varsr   g_varcoalesced_grads_and_varsr)   coalesced_grad_Z
div_factorr   r   r   _apply_collective_grads.   s@   



r6   c                 C   s  t  }g }| D ]B}d }|jr| d ur| }|jr-t|dr-| d u s*J d|j}|d urI| r9J d|| ||vsDJ || qt||}|d u rXt	j
 n|j}	|d u rcd|	 n|}|dkrkd n|}|D ]\}
}}|d ur}|
| t	j
j|
|d qot| d S )N	main_gradzparam.grad is not Noner   r   r!   )r"   r#   r$   hasattrr7   Z	is_sparser   r%   r   r&   r'   r(   r)   Zscale_r+   r   )r,   r-   r.   r/   r0   r1   r   r2   r3   r)   r4   r5   r   r   r   _apply_collective_grads_eagerW   s<   




r9   c           	      C   s   |  }| }| }|| jd}tjj|||dd |dkr)tj||d}n| }tjj|||dd |dkr^t	 rG| 
  ||  d S |     |   |   d S d S )NFT)srcr   Zsync_opr   r   )get_model_parallel_group!get_model_parallel_group_src_rankget_model_parallel_rank_copy_toplacer&   r'   	broadcastZzerosr   _clear_data_share_buffer_tovalueZ
get_tensor_clearZ_share_data_with)	datashaper   hcgmodel_parallel_groupsrc_rankmp_rankZ	shape_gpuZ
input_datar   r   r   _broadcast_data_help   s,   
rK   c                 C   s.   |  }| }| }tjj| ||d d S )N)r:   r   )r;   r<   r=   r&   r'   Zbroadcast_object_list)Zobject_listrG   rH   rI   rJ   r   r   r   _broadcast_object_list_help   s   
rL   c           
   	   O   s  t  }|dd }|dv s|t j v sJ d| t|dd }|dkr0t |}n|t j v r@t ||}d}ntd|	  d	|}|D ]I}t
|tjjrt . t rwtd
| d sw||d}|  || t|t ||j|  W d    n1 sw   Y  qNt||  qN| D ]Q\}	}t
|tjjrt . t rtd
| d s||d}|  || t|t ||j|  W d    n1 sw   Y  |||	< qt|| ||	< q||fS )N:r   )Zxpugpuz9Only support xpu, gpu and custom_device now, but this is r   rN   Zcustomzpaddle.ZPlacezv.place.is_Z_placeT)r&   Z
get_devicesplitZdeviceZget_all_custom_device_typeintZ	CUDAPlaceZCustomPlaceevalupperr   r   eagerZTensorr   no_gradr   r>   rA   rB   rK   rF   r   rL   items)
rG   r   kwargsZ
cur_devicedevZdev_idxr?   vZv_gpukr   r   r   broadcast_input_data   sL   




rZ   c                 C   $   |  }| }t| ||dd d S )NTZis_model_parallel)r;   r<   r   )modelrG   rH   rI   r   r   r   broadcast_mp_parameters   
   
r^   c                 C   r[   )NFr\   )get_data_parallel_groupZ get_data_parallel_group_src_rankr   )r]   rG   Zdata_parallel_grouprI   r   r   r   broadcast_dp_parameters   r_   ra      c                 C   sJ   t  rtnt}t  || ||| W d    d S 1 sw   Y  d S N)r   r9   r6   r   rT   )parameter_listr   r.   r/   Z
apply_funcr   r   r   $fused_allreduce_gradients_with_group   s   
"re   c                 C   s   d }d }|d urC|  dk}| dk}|s"|s"J d| d| d }d}|r1| }||j }|rC| }| }|d u rA|n|}td t| ||d d S )Nr   zdp_enabled z; sep_enabled r   z(dp or sep start fuse allreduce gradients)r/   )	Zget_data_parallel_world_sizeZget_sep_parallel_world_sizer`   r)   get_sep_parallel_groupZget_dp_sep_parallel_groupr	   debugre   )rd   rG   r   r/   Z
dp_enabledZsep_enabled	sep_groupZdp_sep_groupr   r   r   fused_allreduce_gradients   s,   

ri   c                 C   .   t d | }| }t| ||dd d S )Nz#sharding start init parameters syncFr\   )r	   rg   Zget_sharding_parallel_groupZ$get_sharding_parallel_group_src_rankr   )r]   rG   Zsharding_parallel_grouprI   r   r   r   broadcast_sharding_parameters	  s   

rk   c                 C   rj   )Nzsep start init parameters syncFr\   )r	   rg   rf   Zget_sep_parallel_group_src_rankr   )r]   rG   rh   rI   r   r   r   broadcast_sep_parameters  s   
rl   r   c                 C   s"   | }t ||r|j}t ||s|S rc   )r   
_inner_opt)r   Zoptimizer_instancesrm   r   r   r   unwrap_optimizer  s
   

rn   rc   )rb   N)r   )r&   r   Zpaddle.baser   Zpaddle.distributed.parallelr   r   r   r   Zlog_utilr	   __all__r   r6   r9   rK   rL   rZ   r^   ra   re   ri   rk   rl   rn   r   r   r   r   <module>   s*   
*
)
-	

