o
    "j*R                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ ddl	m
Z
 ddgZdaejd	ejj ZG d
d dZG dd dZG dd dZG dd dZdS )    N)reduce)product)check_nccl_version_for_p2p   )loggerCommunicateTopologyHybridCommunicateGroupZPADDLE_USE_FOUR_DIRECTIONS_P2Pc                   @   s$   e Zd ZdZdZdZdZdZdZdS )ParallelModea  

    There are all the parallel modes currently supported:

        - DATA_PARALLEL: Distribute input data to different devices.
        - TENSOR_PARALLEL: Shards tensors in the network to different devices.
        - PIPELINE_PARALLEL: Place different layers of the network on different devices.
        - SHARDING_PARALLEL: Segment the model parameters, parameter gradients and optimizer states corresponding to the parameters to each device.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env: DISTRIBUTED)
            >>> import paddle
            >>> parallel_mode = paddle.distributed.ParallelMode
            >>> print(parallel_mode.DATA_PARALLEL)
            0

    r      r         N)	__name__
__module____qualname____doc__DATA_PARALLELTENSOR_PARALLELPIPELINE_PARALLELSHARDING_PARALLELSEGMENT_PARALLEL r   r   g/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/fleet/base/topology.pyr	   !   s    r	   c                   @   sr   e Zd Zg dg dfddZdd Zdd Zd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )r   )datapipeshardingsepmodel)r
   r
   r
   r
   r
   c                    s   | _ | _td j  _tdd  jd _dd  jD } fddt| D }tt	|t
t| _tt	 j  j  _d S )NZ
Coordinatec                 S   s   | | S Nr   )xyr   r   r   <lambda>H   s    z.CommunicateTopology.__init__.<locals>.<lambda>r
   c                 S   s   g | ]}t |qS r   )range).0dr   r   r   
<listcomp>J   s    z0CommunicateTopology.__init__.<locals>.<listcomp>c                    s   g | ]} j | qS r   )
coordinate)r"   r   selfr   r   r$   K   s    )_parallel_names_dimscollections
namedtupler%   r   _world_sizer   dictzipr!   len_coord2rankvalueskeys_rank2coord)r'   Zhybrid_group_namesdimsrangesZall_coordinater   r&   r   __init__>   s   
zCommunicateTopology.__init__c                 C      | j S r   )r(   r&   r   r   r   get_hybrid_group_namesR      z*CommunicateTopology.get_hybrid_group_namesc                 C   s   | j | j| S r   )r)   r(   indexr'   	axis_namer   r   r   get_dimU   s   zCommunicateTopology.get_dimc                 C   r7   r   )r,   r&   r   r   r   
world_sizeX   r9   zCommunicateTopology.world_sizec                 K   sB   t |t | jksJ | jdi |}|| j v sJ | j| S Nr   )r/   r)   r%   r0   r2   )r'   argskeyr   r   r   get_rank[   s   
zCommunicateTopology.get_rankc                 C   s*   || j k sJ || j v sJ | j| S r   )r,   r3   r2   )r'   rankr   r   r   	get_coorda   s   
zCommunicateTopology.get_coordc                    s4   j |  fddj D }|  |S )Nc                    s"   g | ]}|  krj | qS r   )r0   )r"   coordZaxisr:   r'   r   r   r$   h   s
    z5CommunicateTopology.get_axis_list.<locals>.<listcomp>)r(   r:   r0   r2   sort)r'   r<   r:   ranksr   rF   r   get_axis_listf   s   z!CommunicateTopology.get_axis_listc                 C   s    || j v sJ | j| j | S r   )r(   r)   r:   r;   r   r   r   get_dim_sizep   s   z CommunicateTopology.get_dim_sizec              	   C   s   t t| j|}g }|D ]}|t| j| j|  qg }|D ]}|t| j| j|  q#g }t| D ]>}i }g }	t	|D ]
\}
}||||
 < qDt| D ]}t	|D ]
\}
}||||
 < qY|	| j
| jdi |  qS||	 q:|S r?   )listsetr(   
differenceappendr!   r)   r:   r   	enumerater0   r%   )r'   Z
fused_axisZnon_fused_axisZnon_fused_rangesr<   Zfused_rangesZ	rank_listZnon_fused_ranksZ
coord_dictrH   iZnon_fused_rankZfused_ranksZ
fused_rankr   r   r   get_fused_rankst   s.   z#CommunicateTopology.get_fused_ranksc              	      s    | j v sJ  fdd| j D }g }|D ]}| |}|t| qg }t| D ]6}i }|D ]}	|||	 ||	< q0g }
td|  D ]}|| < |
| j| jdi |  qF||
 q*|S )Nc                    s   g | ]}| kr|qS r   r   )r"   namer<   r   r   r$      s    z5CommunicateTopology.get_comm_list.<locals>.<listcomp>r   r   )r(   rJ   rN   r!   r   r:   r0   r%   )r'   r<   Zother_axis_namesr5   rR   Zdim_numZ
all_resultr   Z	key_coordZ
other_nameresultrP   r   rS   r   get_comm_list   s&   

z!CommunicateTopology.get_comm_listc                 K   s.   |  |}|jdi | }| jdi |S r?   )rD   _replace_asdictrB   )r'   global_rankkwargsrE   tfr   r   r   get_rank_from_stage   s   
z'CommunicateTopology.get_rank_from_stageN)r   r   r   r6   r8   r=   r>   rB   rD   rI   rJ   rQ   rU   r[   r   r   r   r   r   =   s    

c                   @   sb  e Zd Zdd Zdd Zdd Zdd ZdXd
dZdXddZdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:d; Zd<d= Z d>d? Z!d@dA Z"dBdC Z#dDdE Z$dFdG Z%dHdI Z&dJdK Z'dYdMdNZ(dOdP Z)dQdR Z*dSdT Z+dUdV Z,dWS )Zr   c              	   C   sn  t j | _t j | _|| _| jd| _| jd| _	| jd| _
| jd| _| jd| _|  | _|  | _|  | _|  | _|  | _|  s_J d| j| j	| j| j
| j| d\| _| _t jjt jdgdd	t jjj| jd
 | d\| _ | _!| d\| _"| _#| d\| _$| _%d | _&| jdkr| d\| _&| _'| (d\| _)| _*| jdkr| (d\| _+| _,| jdkr| -ddg\| _.| _/| -ddg\| _0| _1| (d\| _+| _,| jdk| _2| j| j
d k| _3| j
dkrt j4j56 rt7  | 8  t9r| :  d| j| j	| j| j
| j| jf }|d| j"| j$| j| j | j&| j)7 }t;<| | a=d S )Nr   r   r   r   r   zAmp_num: {}, sharding_num: {}, pp_num: {}, dp_num: {}, sep_num: {}r
   Zint32)Zdtype)opgroupr   zqHybridParallelInfo: rank_id: %d, mp_degree: %d, sharding_degree: %d, pp_degree: %d, dp_degree: %d, sep_degree: %dzd, mp_group: {},  sharding_group: {}, pp_group: {}, dp_group: {}, sep:group: {}, check/clip group: {})>paddledistributedZget_world_sizenranksrB   rX   _topor=   
_dp_degree
_mp_degree
_pp_degree_sharding_degree_sep_degree_get_data_parallel_id_data_parallel_id_get_model_parallel_id_model_parallel_id_get_sharding_parallel_id_sharding_parallel_id_get_sep_parallel_id_sep_parallel_id_get_pipe_parallel_idstage_id_check_vaild_topoformat_set_comm_groupZ	_pp_group_pp_comm_groupZ
all_reduceZzerosZReduceOpZSUMZ	_dp_group_dp_comm_groupZ	_mp_group_mp_comm_groupZ_sharding_group_sharding_comm_groupZ
_sep_group_sep_comm_group_set_check_groupZ_check_group_check_comm_groupZsharding_check_groupsharding_check_comm_groupcreate_fuse_groupZ_dp_sep_group_dp_sep_comm_groupZ_pp_mp_group_pp_mp_comm_groupZis_first_stageZis_last_stageZ	frameworkcoreZis_compiled_with_ncclr   _set_p2p_prev_next_use_four_directions_set_four_directions_p2p_groupr   info_HYBRID_PARALLEL_GROUP)r'   topologyZ	debug_strr   r   r   r6      s   








zHybridCommunicateGroup.__init__c                 C   s   | j dkr| jdkr| jdkr| jdkr| jdkrtjS | j dkr3| jdkr3| jdkr3| jdkr3tjS | j dkrE| jdkrE| jdkrEtjS | j dkrR| jdkrRtj	S | j dkrZtj
S d S )Nr
   )rd   rc   rf   re   rb   r	   r   r   r   r   r   r&   r   r   r   get_parallel_mode%  s(   












z(HybridCommunicateGroup.get_parallel_modec                 C   s$   | j | j | j | j | j | jkS r   )rb   rc   rd   re   rf   r`   r&   r   r   r   rq   K  s   z(HybridCommunicateGroup._check_vaild_topoc                 C   s   | j dks	J dd S )Nr
   zsep not existrf   r&   r   r   r   _check_sep_existU  s   z'HybridCommunicateGroup._check_sep_existr   c                 C   sx   g }d }| j |}|D ]}tjj|d}| j|v r|}|}qt|dks'J |d us-J td	t|| ||fS )NrH   r   z.Total {} {} comm group(s) create successfully!)
ra   rU   r^   r_   	new_grouprX   r/   r   r   rr   )r'   parallel_methodparallel_groupparallel_comm_groupparallel_groupsr]   
comm_groupr   r   r   rs   X  s"   
z&HybridCommunicateGroup._set_comm_groupc                 C   st   g }d }| j |}t|D ]}| j ||}tjj|d}| j|v r'|}|}qt|dks0J |d us6J ||fS )Nr   r   )	ra   r=   r!   rI   r^   r_   r   rX   r/   )r'   r   r   r   Zparallel_sizeidxr   r   r   r   r   ry   m  s   
z'HybridCommunicateGroup._set_check_groupc                 C      t | ds	J d| jS )N	next_rankznext_rank has not been inited)hasattrr   r&   r   r   r   _get_p2p_next_rank}     z)HybridCommunicateGroup._get_p2p_next_rankc                 C   r   )N	prev_rankzprev_rank has not been inited)r   r   r&   r   r   r   _get_p2p_prev_rank  r   z)HybridCommunicateGroup._get_p2p_prev_rankc                 C   s|   | j d}|D ]3}t|| jksJ t|D ]#\}}|}||d | j  }||d | j  }| j|kr:|| _|| _qqd S )Nr   r
   )ra   rU   r/   rd   rO   rX   r   r   )r'   
comm_lists
comm_ranksr   rC   	curr_rankr   r   r   r   r   r     s   
z)HybridCommunicateGroup._set_p2p_prev_nextc           
      C   s  | j d}d | _d | _d | _d | _|D ]\}t|| jksJ t|D ]L\}}|}||d | j  }||d | j  }t	j
j||gd}| j|krM|| _n| j|krU|| _t	j
j||gd}	| j|krg|	| _q#| j|kro|	| _q#q| jd usxJ | jd usJ | jd usJ | jd usJ d S )Nr   r
   r   )ra   rU   send_next_groupsend_prev_grouprecv_next_grouprecv_prev_groupr/   rd   rO   r^   r_   r   rX   )
r'   r   r   r   rC   r   r   r   Z
next_groupZ
prev_groupr   r   r   r     s>   



z5HybridCommunicateGroup._set_four_directions_p2p_groupc                 C   r7   r   )ra   r&   r   r   r   r     r9   zHybridCommunicateGroup.topologyc                 C   r7   r   )rX   r&   r   r   r   get_global_rank  r9   z&HybridCommunicateGroup.get_global_rankc                 C      | j | jjS r   )ra   rD   rX   r   r&   r   r   r   rg        z,HybridCommunicateGroup._get_data_parallel_idc                 C   r7   r   )rh   r&   r   r   r   get_data_parallel_rank  r9   z-HybridCommunicateGroup.get_data_parallel_rankc                 C   r7   r   )rb   r&   r   r   r   get_data_parallel_world_size  r9   z3HybridCommunicateGroup.get_data_parallel_world_sizec                 C   r7   r   )ru   r&   r   r   r   get_data_parallel_group  r9   z.HybridCommunicateGroup.get_data_parallel_groupc                 C      | j jd S Nr   )ru   rH   r&   r   r   r    get_data_parallel_group_src_rank     z7HybridCommunicateGroup.get_data_parallel_group_src_rankc                 C   r   r   )ra   rD   rX   r   r&   r   r   r   ri     r   z-HybridCommunicateGroup._get_model_parallel_idc                 C   r7   r   )rj   r&   r   r   r   get_model_parallel_rank  r9   z.HybridCommunicateGroup.get_model_parallel_rankc                 C   r7   r   )rc   r&   r   r   r   get_model_parallel_world_size  r9   z4HybridCommunicateGroup.get_model_parallel_world_sizec                 C   r7   r   )rv   r&   r   r   r   get_model_parallel_group  r9   z/HybridCommunicateGroup.get_model_parallel_groupc                 C   r   r   )rv   rH   r&   r   r   r   !get_model_parallel_group_src_rank  r   z8HybridCommunicateGroup.get_model_parallel_group_src_rankc                 C   r   r   )ra   rD   rX   r   r&   r   r   r   ro     r   z,HybridCommunicateGroup._get_pipe_parallel_idc                 C   r7   r   )rp   r&   r   r   r   get_stage_id  r9   z#HybridCommunicateGroup.get_stage_idc                 C   r7   r   )rd   r&   r   r   r   get_pipe_parallel_world_size  r9   z3HybridCommunicateGroup.get_pipe_parallel_world_sizec                 C   r   r   )ra   rD   rX   r   r&   r   r   r   rm     r   z+HybridCommunicateGroup._get_sep_parallel_idc                 C   r7   r   )rn   r&   r   r   r   get_sep_parallel_rank  r9   z,HybridCommunicateGroup.get_sep_parallel_rankc                 C   r7   r   r   r&   r   r   r   get_sep_parallel_world_size  r9   z2HybridCommunicateGroup.get_sep_parallel_world_sizec                 C      |    | jS r   )r   rx   r&   r   r   r   get_sep_parallel_group     z-HybridCommunicateGroup.get_sep_parallel_groupc                 C   s   |    | jjd S r   )r   rx   rH   r&   r   r   r   get_sep_parallel_group_src_rank  s   z6HybridCommunicateGroup.get_sep_parallel_group_src_rankc                 C   r7   r   )rt   r&   r   r   r   get_pipe_parallel_group  r9   z.HybridCommunicateGroup.get_pipe_parallel_groupc                 C   s    t sJ d| j| j| j| jfS )NzrIf you want to use four directions p2p group, set the environment variable PADDLE_USE_FOUR_DIRECTIONS_P2P to True.)r   r   r   r   r   r&   r   r   r   get_p2p_groups  s   z%HybridCommunicateGroup.get_p2p_groupsc                 C   r   r   )ra   rD   rX   r   r&   r   r   r   rk     r   z0HybridCommunicateGroup._get_sharding_parallel_idc                 C   r7   r   )rl   r&   r   r   r   get_sharding_parallel_rank  r9   z1HybridCommunicateGroup.get_sharding_parallel_rankc                 C   r7   r   )re   r&   r   r   r    get_sharding_parallel_world_size  r9   z7HybridCommunicateGroup.get_sharding_parallel_world_sizec                 C   r7   r   )rw   r&   r   r   r   get_sharding_parallel_group  r9   z2HybridCommunicateGroup.get_sharding_parallel_groupc                 C   r   r   )rw   rH   r&   r   r   r   $get_sharding_parallel_group_src_rank  s   z;HybridCommunicateGroup.get_sharding_parallel_group_src_rankFc                 C   s   |r| j S | jS r   )r{   rz   )r'   r   r   r   r   get_check_parallel_group  s   z/HybridCommunicateGroup.get_check_parallel_groupc                 K   s   | j j| jfd|i|S )Nr   )ra   r[   rX   )r'   rp   rY   r   r   r   r[     s   z*HybridCommunicateGroup.get_rank_from_stagec                 C   r   r   )r   r}   r&   r   r   r   get_dp_sep_parallel_group%  r   z0HybridCommunicateGroup.get_dp_sep_parallel_groupc                 C   r   r   )r   r~   r&   r   r   r   get_pp_mp_parallel_group)  r   z/HybridCommunicateGroup.get_pp_mp_parallel_groupc                 C   s   t |dks
J dg }g }| j|}|  |D ]}tjj|d}| j|v r2|| || qt |dks;J t |dksCJ t	
dt || t |dkrX||fS |d |d fS )Nr   z9the length of fused_strategy_list must be greater than 0.r   z7Total {} comm group(s) of fused {} create successfully!r
   )r/   ra   rQ   rG   r^   r_   r   rX   rN   r   r   rr   )r'   Zfused_strategy_listr   r   r   r]   r   r   r   r   r|   -  s.   


z(HybridCommunicateGroup.create_fuse_groupN)r   )F)-r   r   r   r6   r   rq   r   rs   ry   r   r   r   r   r   r   rg   r   r   r   r   ri   r   r   r   r   ro   r   r   rm   r   r   r   r   r   r   rk   r   r   r   r   r   r[   r   r   r|   r   r   r   r   r      sV    v&


%
c                   @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )_CommunicateGroupztmp for staticc                 C   s   | a i | _d S r   )r   groupsr&   r   r   r   r6   N  s   
z_CommunicateGroup.__init__c                 C   s    t jj|||}|| j|< d S r   )r^   r_   Z
collectiveGroupr   )r'   
group_nameZ
group_rankZ
group_sizeZring_idZgroup_ranksr]   r   r   r   set_comm_groupS  s   z _CommunicateGroup.set_comm_groupc                 C   s   || j v sJ | j | S r   )r   )r'   r   r   r   r   	get_group[  s   
z_CommunicateGroup.get_groupc                 C   s
   |  dS Nr   )r   r&   r   r   r   r   _  s   
z*_CommunicateGroup.get_model_parallel_groupc                 C      |  djS r   )r   r`   r&   r   r   r   r   b  r   z/_CommunicateGroup.get_model_parallel_world_sizec                 C   r   r   )r   rC   r&   r   r   r   r   e  r   z)_CommunicateGroup.get_model_parallel_rankN)
r   r   r   r   r6   r   r   r   r   r   r   r   r   r   r   K  s    r   )r*   os	functoolsr   	itertoolsr   r^   Z#paddle.distributed.utils.nccl_utilsr   Zutils.log_utilr   __all__r   environgetbaser   Zis_compiled_with_xpur   r	   r   r   r   r   r   r   r   <module>   s&   q    