o
    "j                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZmZmZ dadd Zdd Zdd ZG dd dZG dd dZG dd dZG dd dZdS )    N)defaultdict)PassContext)IrGraphcore	set_flags   )ProcessMesh   )DistributedOperator)DistributedTensor)__no_shape_var_type___copy_dist_attr_to_cppis_loss_grad_opc                  C   s   t d u rt } t|  t S N)_g_default_distributed_contextDistributedContextset_default_distributed_contextZdist_context r   u/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/auto_parallel/static/dist_context.pyget_default_distributed_context"   s   r   c                 C   s   | a d S r   )r   r   r   r   r   r   *   s   r   c                 C   s   | j  | j  fS r   )nodegraph_ididr   r   r   r   _node_id/   s   r   c                	   @   s  e Zd ZdZddddi i dddf	ddZedd Zedd Zed	d
 Zedd Z	edd Z
edd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zejdd Zed d! Zed"d# Zed$d% Zed&d' Zed(d) Zejd*d) Zed+d, Zed-d. Zejd/d, Zd0d1 Zd2d3 Zd}d5d6Zd7d8 Zd9d: Zd;d< Z d~d>d?Z!d~d@dAZ"	4	=	4	=ddBdCZ#ddEdFZ$dGdH Z%dIdJ Z&dKdL Z'dMdN Z(dOdP Z)dQdR Z*dSdT Z+dUdV Z,dWdX Z-dYdZ Z.d[d\ Z/d]d^ Z0d_d` Z1dadb Z2dcdd Z3dedf Z4dgdh Z5ddidjZ6dkdl Z7dmdn Z8dodp Z9dqdr Z:dsdt Z;dudv Z<dwdx Z=dydz Z>d{d| Z?dS )r   z
    DistributedContext is used to collect related distributed information for program and graph.
    One auto-parallel run should use its own DistributedContext to avoid interfering other run.
    Nc
           
      C   s  || _ || _|| _|| _|| _|| _d | _d | _d | _d | _	i | _
i | _d | _i | _i | _d | _i | _i | _i | _i | _i | _i | _t | _g | _|| _|| _t | _t | _g | _g | _ g | _!d| _"d| _#g | _$g | _%g | _&g | _'g | _(g | _)d| _*d| _+t, | _-|	| _.d| _/d S )NFTr   )0_original_serial_main_program _original_serial_startup_program_original_serial_optimizer_original_serial_loss_original_serial_feed_vars_original_serial_fetch_vars_serial_main_program_serial_startup_program_serial_loss_serial_optimizer_serial_feed_vars_serial_fetch_varsZ_lr_optimizer_dist_tensors_for_program_dist_ops_for_program_serial_graph_dist_tensors_for_graph_dist_ops_for_graph_node_id_to_tensor_id_node_id_to_op_id_dist_main_programs_dist_startup_programsDistributedOperatorContext_dist_op_context_process_meshes_cluster	_strategyr   _pass_context
BlockState_block_state_serial_ordered_tensor_nodes_serial_ordered_op_nodes_serial_ordered_nodes_is_initialized_need_copy_dist_attr_to_graph_backup_pass_context_stack_backup_block_state_stack&_backup_dist_tensors_for_program_stack"_backup_dist_ops_for_program_stack!_backup_serial_main_program_stack$_backup_serial_startup_program_stack_gradient_scale_data_parallelUpDownStream_up_down_streamsZ_json_configZ_num_model_chunks)
selfZserial_main_progZserial_startup_progserial_optimizerserial_lossZ	feed_varsZ
fetch_varsclusterstrategyZjson_configr   r   r   __init__9   sX   
zDistributedContext.__init__c                 C      | j S r   )r"   rH   r   r   r   serial_main_program      z&DistributedContext.serial_main_programc                 C   rN   r   )r#   rO   r   r   r   serial_startup_program   rQ   z)DistributedContext.serial_startup_programc                 C   rN   r   )r$   rO   r   r   r   rJ      rQ   zDistributedContext.serial_lossc                 C   rN   r   )r%   rO   r   r   r   rI      rQ   z#DistributedContext.serial_optimizerc                 C   rN   r   )r&   rO   r   r   r   serial_feed_vars   rQ   z#DistributedContext.serial_feed_varsc                 C   rN   r   )r'   rO   r   r   r   serial_fetch_vars   rQ   z$DistributedContext.serial_fetch_varsc                 C   rN   r   )r/   rO   r   r   r   dist_main_programs   rQ   z%DistributedContext.dist_main_programsc                 C   rN   r   )r0   rO   r   r   r   dist_startup_programs   rQ   z(DistributedContext.dist_startup_programsc                 C   rN   r   )r4   rO   r   r   r   rK      rQ   zDistributedContext.clusterc                 C   rN   r   )r5   rO   r   r   r   rL      rQ   zDistributedContext.strategyc                 C   rN   r   )r*   rO   r   r   r   serial_graph   rQ   zDistributedContext.serial_graphc                 C   rN   r   )r;   rO   r   r   r   serial_ordered_nodes   rQ   z'DistributedContext.serial_ordered_nodesc                 C   rN   r   r3   rO   r   r   r   process_meshes   rQ   z!DistributedContext.process_meshesc                 C   
   || _ d S r   rY   )rH   valr   r   r   rZ         
c                 C   rN   r   )r6   rO   r   r   r   pass_context   rQ   zDistributedContext.pass_contextc                 C   rN   r   )r2   rO   r   r   r   dist_op_context   rQ   z"DistributedContext.dist_op_contextc                 C   rN   r   )r8   rO   r   r   r   block_state   rQ   zDistributedContext.block_statec                 C   s   t | jp	t | jS r   )lenr(   r)   rO   r   r   r   has_annotation   s   z!DistributedContext.has_annotationc                 C   rN   r   rD   rO   r   r   r   gradient_scale   rQ   z!DistributedContext.gradient_scalec                 C   r[   r   rc   )rH   gsr   r   r   rd      r]   c                 C   rN   r   rE   rO   r   r   r   data_parallel   rQ   z DistributedContext.data_parallelc                 C   rN   r   )rG   rO   r   r   r   up_down_streams   rQ   z"DistributedContext.up_down_streamsc                 C   r[   r   rf   )rH   Zdpr   r   r   rg      r]   c                 C   sP   | j | j  | j| j  | jt| j	 | j
t| j d S r   )rB   appendr"   clonerC   r#   r>   copydeepcopyr6   r?   r8   rH   moder   r   r   _backup_serial_info   s   
z&DistributedContext._backup_serial_infoc                 C   s,   | j t| j | jt| j d S r   )r@   ri   rk   rl   r(   rA   r)   rm   r   r   r   _backup_dist_info   s   

z$DistributedContext._backup_dist_infoTc                 C   $   |r|  | |r| | d S d S r   )ro   rp   rH   serialZserial_modedistZ	dist_moder   r   r   _backup   s
   
zDistributedContext._backupc                 C   s   | j rQt| j tr:t| j dkr*| j d }|jj}|j}| jj| 	|}|| _
d S t| j dkr6g | _
d S td| j jj}| j j}| jj| 	|}|| _
d S d S )Nr	   r   z"multi loss vars are not supported.)r   
isinstancelistra   blockidxnamer"   blocks_var_recursiver$   
ValueError)rH   Zloss	block_idxvar_namevarr   r   r   _restore_serial_loss  s0   




z'DistributedContext._restore_serial_lossc                 C   sZ   | j  D ]%\}}g }|D ]}|jj}|j}| jj| |}|| q|| j	|< qd S r   )
r    itemsrx   ry   rz   r"   r{   r|   ri   r&   )rH   keyvar_listnew_var_listr   r~   r   r   r   r   _restore_serial_feed_vars  s   z,DistributedContext._restore_serial_feed_varsc           	      C   s   | j  D ]P\}}g }|dkr6|D ]#}g }|D ]}|jj}|j}| jj| |}|| q|| qn|D ]}|jj}|j}| jj| |}|| q8|| j	|< qd S )NZmetrics)
r!   r   rx   ry   rz   r"   r{   r|   ri   r'   )	rH   r   r   r   Zinner_var_listZnew_inner_var_listr   r~   r   r   r   r   _restore_serial_fetch_vars)  s6   z-DistributedContext._restore_serial_fetch_vars	to_backupc                 C   s   |dkr| j  | _| j | _n|dkr/| jd usJ | jd us#J | j | _| j | _|   | 	  | 
  | j| _| j | _| j | _d S )Nr   to_original)rB   popr"   rC   r#   r   r   rj   r   r   r   r   r%   r>   r6   r?   r8   rm   r   r   r   _restore_serial_infoB  s$   z'DistributedContext._restore_serial_infoc                 C   s  |dkr| j  | _| j | _n|dkr.| jsJ | jsJ t| j| _t| j| _n|dkrg }| j	 D ]\}}|| j
v rH|j  q9|| q9|D ]}| j| qPg }| j	 D ]\}}|| jv ro|j  q`|| q`|D ]}| j| qwn8g }| j	 D ]	\}}|| q|D ]}| j| qg }| j	 D ]	\}}|| q|D ]}| j| qi | _i | _t | _d| _g | _d S )Nr   r   Z
to_defaultT)r@   r   r(   rA   r)   "_original_dist_tensors_for_program_original_dist_ops_for_programrk   rl   r   _tensors_ids	dist_attrresetri   _ops_idsr/   r0   r1   r2   r=   r3   )rH   rn   Znew_tensors_ids	tensor_iddist_tensorZnew_ops_idsop_iddist_opr   r   r   _restore_dist_info[  sf   




z%DistributedContext._restore_dist_infoc                 C   rq   r   )r   r   rr   r   r   r   _restore  s
   
zDistributedContext._restoreFc                 C   s  | j s|| js| jr| j | _| js| jr| j | _| js"|   | js)| j	| _| j
s0|   | js7|   | | t| j| _t| j| _t| j | _t| j | _d| _ |rct|  |r|tddi tt| jj| _|    d| _!| j!r|r| "  d S d S d S )NTZFLAGS_convert_all_blocksF)#r<   r"   r   rj   r#   r   r$   r   r%   r   r&   r   r'   r   _init_dist_attr_for_programrk   rl   r(   r   r)   r   rw   keysr   r   r   r   r   r   ZGraphdescr*   _init_dist_attr_for_graphr=   $copy_dist_attr_from_program_to_graph)rH   Z
with_graphZwith_cpp
no_defaultr   r   r   
initialize  sN   

zDistributedContext.initializec                 C   s6   t |ttjfsJ d|| jvr| j| d S d S )Nz,The type of dim_mapping must be ProcessMesh.)rv   r   r   rZ   r3   ri   )rH   process_meshr   r   r   add_process_mesh  s   

z#DistributedContext.add_process_meshc                 C      |j }|j }|| j|< d S r   )serial_tensorr   original_idr(   )rH   r   Zinner_serial_tensorZinner_serial_tensor_idr   r   r   add_dist_tensor_for_program     
z.DistributedContext.add_dist_tensor_for_programc                 C   r   r   )	serial_opr   r   r)   )rH   r   Zinner_serial_opZinner_serial_op_idr   r   r   add_dist_op_for_program  r   z*DistributedContext.add_dist_op_for_programc                 C   D   |j  }| j|d }|r|S |j  }| j|d }|r |S d S r   )r   r   r(   getr   rH   r   serial_tensor_idr   r   r   r   get_dist_tensor_for_program  s   

z.DistributedContext.get_dist_tensor_for_programc                 C      t |}| j|d S r   )r   r+   r   )rH   serial_tensor_nodeserial_tensor_node_idr   r   r   get_dist_tensor_for_graph     z,DistributedContext.get_dist_tensor_for_graphc                 C   r   r   )r   r   r)   r   r   rH   r   Zserial_op_idr   r   r   r   get_dist_op_for_program  s   

z*DistributedContext.get_dist_op_for_programc                 C   s(   |j  }| j|d r| j|= d S d S r   )r   r   r)   r   )rH   r   r   r   r   r   del_dist_op_for_program  s   
z*DistributedContext.del_dist_op_for_programc                 C   r   r   )r   r,   r   )rH   serial_op_nodeserial_op_node_idr   r   r   get_dist_op_for_graph  r   z(DistributedContext.get_dist_op_for_graphc                 C   H   |j  }| j|d }|r|jS |j  }| j|d }|r"|jS d S r   )r   r   r(   r   r   r   r   r   r   r    get_tensor_dist_attr_for_program  s   

z3DistributedContext.get_tensor_dist_attr_for_programc                 C      | j |d }|r|jS d S r   )r(   r   r   )rH   r   r   r   r   r   (get_tensor_dist_attr_for_program_with_id     z;DistributedContext.get_tensor_dist_attr_for_program_with_idc                 C      t ||}| | d S r   )r   r   )rH   r   r   r   r   r   r    set_tensor_dist_attr_for_program!     
z3DistributedContext.set_tensor_dist_attr_for_programc                 C   $   t |}| j|d }|r|jS d S r   )r   r+   r   r   )rH   r   r   r   r   r   r   get_tensor_dist_attr_for_graph%  s   z1DistributedContext.get_tensor_dist_attr_for_graphc                 C   r   r   )r   r   r)   r   r   r   r   r   r   r   get_op_dist_attr_for_program/  s   

z/DistributedContext.get_op_dist_attr_for_programc                 C   r   r   )r)   r   r   )rH   r   r   r   r   r   $get_op_dist_attr_for_program_with_id<  r   z7DistributedContext.get_op_dist_attr_for_program_with_idc                 C   r   r   )r
   r   )rH   r   r   r   r   r   r   set_op_dist_attr_for_programC  r   z/DistributedContext.set_op_dist_attr_for_programc                 C   r   r   )r   r,   r   r   )rH   r   r   r   r   r   r   get_op_dist_attr_for_graphG  s
   z-DistributedContext.get_op_dist_attr_for_graphc                 C   st   |  r| d urt|}| j|d }|r|jS d S | r8| d ur8t|}| j|d }|r6|jS d S d S r   )	is_varr   r   r+   r   r   is_opopr,   )rH   Zserial_noder   r   r   r   r   r   r   get_dist_attr_for_graphO  s   z*DistributedContext.get_dist_attr_for_graphc                 C   s$  |st  }t|j| _n| }|j| _| jjD ]j}|j	
 D ]/}||}|r;|| ur;t|}t|j|_| | | |}|d u rMt|}| | q|jD ]/}||}	|	rn|| urnt|}
t|	j|
_| |
 | |}|d u rt|}
| |
 qQqt| j| _t| j| _d S r   )r   rk   rl   rZ   r3   rg   rE   r"   r{   varsvaluesr   r   r   r   opsr   r
   r   r(   r   r)   r   )rH   r   Zdefault_ctxrx   tensorZdefault_dist_tensorr   Zcurrent_dist_tensorr   Zdefault_dist_opr   Zcurrent_dist_opr   r   r   r   b  sN   








z.DistributedContext._init_dist_attr_for_programc                 C   s  g }g }g }i }t | j D ]\}}| D ]}|| qq|D ]&}| r9| d ur9|| d|t|< | rH|	 d urH|| q"|j
dd d |j
dd d t|t| }g }	g }
g }|D ]v}g }|jD ]"}| r| d ur|t| s|| |	| d|t|< qp|j
dd d || || |
| g }|jD ]"}| r| d ur|t| s|| |	| d|t|< q|j
dd d || qi|	j
d	d d |
j
d
d d |	| _|
| _|| _t| jt| jt| j ksJ tt| _t | jD ];\}}| rO| d urO|j }|  }| j| |d d u rCg | j| |< | j| | ||f qg | _|D ]}|t| se| j| qVt| j|krutd d S d S )NFc                 S   
   | j  S r   r   original_desc_idr   r   r   r   <lambda>     
 zBDistributedContext._order_nodes_by_program_order.<locals>.<lambda>)r   c                 S   r   r   r   r   r   r   r   r     r   Tc                 S   r   r   r   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r     r   zRWARNING: there are some orphan tensors or ops which are not used in the execution.)	enumerater*   Zall_sub_graphs	all_nodesri   r   r   r   r   r   sortra   ZinputsextendZoutputsr9   r:   r;   r   dict_tensor_nodes_with_same_namer   r   rz   r   _serial_orphan_tensor_nodesprint)rH   Zserial_ordered_tensor_nodesZserial_ordered_op_nodesr   visitedry   graphr   Znum_nodes_beforeZnew_serial_ordered_tensor_nodesZnew_serial_ordered_op_nodesZnew_serial_ordered_nodesZop_nodeZtensor_nodesZtensor_noder   Ztensor_namer   r   r   _order_nodes_by_program_order  s   
















z0DistributedContext._order_nodes_by_program_orderc                 C   s  |    i | _i | _| j D ]\}}|jj }|| j|< q| j D ]\}}|j	j }|| j|< q$| j
D ]}| r| d urd }|j }| j|d }|d urX|}	n| j| }	| j|	d }|}|	| jt|< |d usuJ dt|}
t|j|j}|| j|
< | r| d urd }|j }| j|d }|d ur|}n| j| }| j|d }|}|| jt|< |d usJ dt|}t|j	|j}|| j|< q7d S NzKTensor must have a distributed tensor after the initialization for program.zOOperator must have a distributed operator after the initialization for program.)r   _tensor_original_id_to_id_op_original_id_to_idr(   r   r   r   r   r)   r   rX   r   r   r   r   r   r-   r   r   r   r+   r   r   r.   r
   r,   )rH   r   r   r   r   r   r   r   cur_dist_tensorcur_tensor_idr   new_dist_tensorr   cur_dist_op	cur_op_idr   new_dist_opr   r   r   r     sr   







z,DistributedContext._init_dist_attr_for_graphc                 C      | j   | j  d S r   )r(   clearr)   rO   r   r   r   clear_dist_info_for_program'  r   z.DistributedContext.clear_dist_info_for_programc                 C   r   r   )r+   r   r,   rO   r   r   r   clear_dist_info_for_graph+  r   z,DistributedContext.clear_dist_info_for_graphc                 C   s$  | j D ]}| rJ| d urJd }|j }| j|d }|d ur$|}n| j| }| j|d }|}|d us:J dt|}t	|j
|j}|| j|< | r| d urd }|j }	| j|	d }
|
d uri|	}n| j|	 }| j|d }
|
}|d usJ dt|}t|j|j}|| j|< qd S r   )rX   r   r   r   r   r(   r   r   r   r   r   r   r+   r   r   r)   r   r
   r   r,   )rH   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   /  s\   







z7DistributedContext.copy_dist_attr_from_program_to_graphc                 C   sP  | j sJ di }| j}| jd g}|D ]b}| rL| d urL| jt| }||d}|sL| |}| j	| }||_
d||< |j}	|	|vrL||	 | rv| d urv| jt| }
| |}| j|
 }||_
|j}	|	|vrv||	 qt|| _| jD ]%}|  }| j	|d }|s|  }| j	|d }| jd |j
_qd S )Nz+Both program and graph must be initialized.r   FT)r<   r;   rZ   r   r   r-   r   r   r   r(   r   r   ri   r   r   r.   r   r)   rk   rl   r   r   r   )rH   Zupdated_tensorsr   rZ   r   r   updatedZtensor_dist_attr_for_graphZdist_tensor_for_programr   r   Zop_dist_attr_for_graphZdist_op_for_programZorphan_noder   r   r   r   r   $copy_dist_attr_from_graph_to_program^  sZ   




z7DistributedContext.copy_dist_attr_from_graph_to_programc                 C   s  | j  D ]T}|j}|j}|jtv rg }n|j}|j}|jj}|jj	}t
t|D ],}|| dkrE|| dkrE|||  || krEd||< || dkrUt|dkrUd||< q)||_q| j D ]}	|	j}
|	j}|jj}|jj	}|
jD ][}|	|d u r~g }n|	|jtv rg }n|	|j}||}t
t|D ],}|| dkr|| dkr|||  || krd||< || dkrt|dkrd||< q||| qr|
jD ]V}|	|jtv rg }n|	|j}||}t
t|D ]1}|| dkr|| dkr|||  || krd||< || dkr t|dkr d||< q||| qt|dkr>|	jjdkr>d|	j_d|	j_q_d S )Nr   r	   Zdropoutdefault)r(   r   r   r   typer   shapedims_mappingr   Zprocess_idsrangera   r)   r   Zinput_arg_namesZget_serial_inputZget_input_dims_mappingZset_input_dims_mappingZoutput_arg_namesZget_serial_outputZget_output_dims_mappingZset_output_dims_mappingZ	impl_typeZimpl_idx)rH   r   r   r   Ztensor_shaper   Zprocess_mesh_shapeZprocess_mesh_processesir   r   Zarg_namer   r   r   amend_dist_attr_for_program  s   








z.DistributedContext.amend_dist_attr_for_programc              	   C   s   | j std| jjD ]n}|j D ]1}| |}|d us&J d|jj	|d urC|
 sCtd|jj	|jj |jj |jq|jD ]1}| |}|d us[J d|jj|d urx|
 sxtd|jj|jj |jj |jqGqdS )NzHProgram must be initialized before validating its distributed attributesz0Tensor {} does not have a distributed attribute.zJTensor {} (id: {}, original_id: {}) has a wrong distributed attributes {}.z2Operator {} does not have a distributed attribute.zMOperator {} (id: {}, original_id: {}) has a wrong distributed attributes {} .T)r<   AssertionErrorrP   r{   r   r   r   formatr   rz   Zvalidate_dist_attrr   r   r   r   r   r   r   r   )rH   rx   r   r   r   r   r   r   r   validate_dist_attr_for_program  sV   








z1DistributedContext.validate_dist_attr_for_programc                 C   sz   | j }||}||t| < | j D ]\}}|dv r"t||| qt||t|| q|j	 D ]}||j| _
q2|S )N)r   r   r"   r#   r*   r/   r0   r;   r9   r:   r   r    r!   r$   r&   r'   r%   rB   rC   r6   r   )	__class____new__r   __dict__r   setattrrk   rl   r(   r   Z_dist_context)rH   memoclsresultkvr   r   r   r   __deepcopy__  s   
zDistributedContext.__deepcopy__)TNTN)r   )Tr   Tr   )TFF)F)@__name__
__module____qualname____doc__rM   propertyrP   rR   rJ   rI   rS   rT   rU   rV   rK   rL   rW   rX   rZ   setterr^   r_   r`   rb   rd   rg   rh   ro   rp   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   3   s    
T

























;

0

+_;/4Y*r   c                   @   s   e Zd ZdZdd Zdd Zedd Zejdd Zed	d
 Z	edd Z
e
jdd Z
edd Zedd Zejdd Zedd Zdd Zdd ZdS )r1   z
    DistributedOperatorContext is used to create a dist op desc in Program.
    Every time to create a new dist op, the context should be updated for it accordingly.
    c                 C   sX   d | _ d | _d | _d | _d | _d | _i | _tt| _	d | _
t | _d | _d | _d| _d S )NF)_dst_main_program_main_block_dst_startup_program_startup_block_cur_src_opZ_cur_dist_attrZgrad_op_id_to_op_idr   r   Zgrad_var_to_var_work_blocksetZalready_init_sync_varsvarname_mappingZrank_id_exceed_backward_init_oprO   r   r   r   rM   A  s   

z#DistributedOperatorContext.__init__c                 C   s^   | j }||}||t| < | j D ]\}}|dv r"t||| qt||t|| q|S )N)r  r
  r  r  r	  r  )r   r   r   r   r   r   rk   rl   )rH   r   r   r   r   r   r   r   r   r  T  s   
z'DistributedOperatorContext.__deepcopy__c                 C   rN   r   )r  rO   r   r   r   dst_main_programf  rQ   z+DistributedOperatorContext.dst_main_programc                 C      || _ |jd | _d S Nr   )r  r{   r	  rH   progr   r   r   r  j     c                 C   rN   r   )r	  rO   r   r   r   
main_blocko  rQ   z%DistributedOperatorContext.main_blockc                 C   rN   r   )r
  rO   r   r   r   dst_startup_programs  rQ   z.DistributedOperatorContext.dst_startup_programc                 C   r  r  )r
  r{   r  r  r   r   r   r  w  r  c                 C   rN   r   )r  rO   r   r   r   startup_block|  rQ   z(DistributedOperatorContext.startup_blockc                 C      | j d usJ | j S r   r  rO   r   r   r   
work_block     z%DistributedOperatorContext.work_blockc                 C   s   |d usJ || _ d S r   r  )rH   rx   r   r   r   r    s   
c                 C   r  r   )r  rO   r   r   r   
cur_src_op  r  z%DistributedOperatorContext.cur_src_opc                 C   rN   r   )r  rO   r   r   r   in_backward_phase  s   z,DistributedOperatorContext.in_backward_phasec                 C   s   || _ t|r
d| _i }|j D ](}g }|j|D ]}|| j|jj v s(J |	| j|jj |  q|||< qi }|j
 D ](}g }|j|D ]}|| j|jj v sXJ |	| j|jj |  qK|||< qA||fS )NT)r  r   r  r   Zinput_namesinputr  rx   ry   ri   Zoutput_namesoutput)rH   Zsrc_opZkinputsZ
input_namevarnamesvarnameZkoutputsZoutput_namer   r   r   prepare_context  s$   

z*DistributedOperatorContext.prepare_contextN)r  r  r  r  rM   r  r  r  r  r  r  r  r  r  r  r$  r   r   r   r   r1   ;  s0    








r1   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )r7   c                 C   s   d| _ g | _g | _i | _d S r  )nblockforward_indicesbackward_indicesbackward_to_forward_index_maprO   r   r   r   rM     s   
zBlockState.__init__c                 C   s   |   |jdksJ t|jD ](\}}||jksJ d|jdks+J d||j| j| |  j	d7  _	q| j	dks@J d S )Nr   index doesn't matchr   z3forward_block_idx of forward block [{}] is not [{}]r	   )
Z_roll_to_global_blockZcurrent_block_idxr   r{   ry   forward_block_idxr   r&  ri   r%  rH   programry   rx   r   r   r   parse_forward_blocks  s   zBlockState.parse_forward_blocksc                 C   s   d| j v sJ d| j  d| jd< t|jD ]0\}}|t| j k r#q||jks,J d|j| j v s4J | j| |j| j|< |  j	d7  _	q| j	t|jksRJ d S )Nr   zforward block idx arer)  r	   )
r&  r(  r   r{   ra   ry   r*  r'  ri   r%  r+  r   r   r   parse_backward_blocks  s   

z BlockState.parse_backward_blocksN)r  r  r  rM   r-  r.  r   r   r   r   r7     s    r7   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )rF   c                 C   s   i | _ i | _d S r   )_ups_downsrO   r   r   r   rM     s   
zUpDownStream.__init__c                 C   X   | j |d }|s|g| j |< d S |dkr*ttdd |}|| || j |< d S d S )Nr   c                 S      | dkS Nr   r   ar   r   r   r         z,UpDownStream.add_up_stream.<locals>.<lambda>)r/  r   rw   filterri   )rH   rankZ	up_streamupsr   r   r   add_up_stream     
zUpDownStream.add_up_streamc                 C   r1  )Nr   c                 S   r2  r3  r   r4  r   r   r   r     r6  z.UpDownStream.add_down_stream.<locals>.<lambda>)r0  r   rw   r7  ri   )rH   r8  Zdown_streamdownsr   r   r   add_down_stream  r;  zUpDownStream.add_down_streamc                 C   s4   |  |d |  || | || | |d d S r3  )r:  r=  )rH   upZdownr   r   r   add_pair_stream  s   zUpDownStream.add_pair_streamc                 C   "   | j |d }|sd S tt|S r   )r/  r   rw   r  )rH   r8  r9  r   r   r   r9       zUpDownStream.upsc                 C   r@  r   )r0  r   rw   r  )rH   r8  r<  r   r   r   r<    rA  zUpDownStream.downsN)	r  r  r  rM   r:  r=  r?  r9  r<  r   r   r   r   rF     s    		rF   )rk   collectionsr   Zpaddle.distributed.passesr   Zpaddle.frameworkr   r   r   r   r   r   r
   r   r   utilsr   r   r   r   r   r   r   r   r1   r7   rF   r   r   r   r   <module>   s0           r*