o
    "j                    @   s\  d dl Z d dlmZ d dlmZ d dlZd dlmZ d dlm	Z	m
Z
mZmZ d dlmZ ddlmZmZmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZm Z  ddga!ddgZ"dd Z#G dd dZ$G dd dZ%G dd dZ&G dd dZ'G dd dZ(G dd dZ)G dd  d Z*G d!d" d"Z+G d#d$ d$Z,G d%d& d&Z-dS )'    N)OrderedDict)reduce)OpRole)LayerHelperOpProtoHolderProgramcore)unique_name   )AllgatherOpCostCommContextConcatOpCost
SendOpCostSliceOpCostSplitOpCostbuild_comm_desc)TensorDistAttr)DistributedContext)new_process_group)_g_gradient_clip_opsis_gradient_clip_opis_optimize_opZcheck_finite_and_unscaleZupdate_loss_scalingwhileconditional_blockc                 C   s@   d}| |j v r|j |  }n|| }|dusJ |j d|S )z=Get var in the parent block if not found in the current blockNz is not found)varsZ_var_recursivename)var_nameblockprogramvar r    p/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/auto_parallel/static/reshard.pyget_var_with_recursion,   s   

r"   c                   @   s,   e Zd ZdZdd Zedd Zdd ZdS )		EndOpDescz
    Describe to end reshard parse process.
    It is supposed to contain a list of variables which are the outputs of one reshard process.

    Args:
        vars (list): a list of variables.
    c                 C   s
   || _ d S NZ_vars)selfr   r    r    r!   __init__D      
zEndOpDesc.__init__c                 C      | j S r$   r%   r&   r    r    r!   r   G      zEndOpDesc.varsc                 C   s   d| j  dS )NzEnd vars : .r%   r*   r    r    r!   __repr__K   s   zEndOpDesc.__repr__N)__name__
__module____qualname____doc__r'   propertyr   r-   r    r    r    r!   r#   ;   s    
r#   c                   @   s^   e Zd ZdZdddZedd Zedd	 Zed
d Zedd Z	edd Z
dd ZdS )AllGatherOpDescz
    Describe the allgather op in the reshard phase.

    Args:
        group (list): Process group.
        shape (list): The tensor shape.
        is_bool (bool): Whether allgather bool data. Default: False.
    FTc                 C   s"   || _ d| _|| _|| _|| _d S )NZ
all_gather)_group_desc_shape_is_bool_need_split)r&   groupshapeis_bool
need_splitr    r    r!   r'   Y   
   
zAllGatherOpDesc.__init__c                 C   r)   r$   r7   r*   r    r    r!   r;   `   r+   zAllGatherOpDesc.is_boolc                 C   r)   r$   r4   r*   r    r    r!   r9   d   r+   zAllGatherOpDesc.groupc                 C   r)   r$   r5   r*   r    r    r!   desch   r+   zAllGatherOpDesc.descc                 C   r)   r$   r6   r*   r    r    r!   r:   l   r+   zAllGatherOpDesc.shapec                 C   r)   r$   )r8   r*   r    r    r!   r<   p   r+   zAllGatherOpDesc.need_splitc                 C   .   d| j  d| j d| j d| j d| j dS )Nop: 	, group: 	, shape: , is_bool: z, need_split: r,   )r5   r4   r6   r7   r8   r*   r    r    r!   r-   t      .zAllGatherOpDesc.__repr__N)FT)r.   r/   r0   r1   r'   r2   r;   r9   rA   r:   r<   r-   r    r    r    r!   r3   O   s    
	




r3   c                   @   sR   e Zd ZdZdddZedd Zedd Zed	d
 Zedd Z	dd Z
dS )AllGatherConcatOpDescz
    Describe the c_concat op in the reshard phase.

    Args:
        group (list): Process group.
        shape (list): The tensor shape.
        is_bool (bool): Whether c_concat bool data. Default: False.
    Fc                 C   s   || _ d| _|| _|| _d S )Nc_concat)r4   r5   r6   r7   )r&   r9   r:   r;   r    r    r!   r'      s   
zAllGatherConcatOpDesc.__init__c                 C   r)   r$   r>   r*   r    r    r!   r;      r+   zAllGatherConcatOpDesc.is_boolc                 C   r)   r$   r?   r*   r    r    r!   r9      r+   zAllGatherConcatOpDesc.groupc                 C   r)   r$   r@   r*   r    r    r!   rA      r+   zAllGatherConcatOpDesc.descc                 C   r)   r$   rB   r*   r    r    r!   r:      r+   zAllGatherConcatOpDesc.shapec              	   C   s&   d| j  d| j d| j d| j d	S )NrD   rE   rF   rG   r,   )r5   r4   r6   r7   r*   r    r    r!   r-      s   &zAllGatherConcatOpDesc.__repr__NF)r.   r/   r0   r1   r'   r2   r;   r9   rA   r:   r-   r    r    r    r!   rI   x   s    
	



rI   c                   @   j   e Zd ZdZdddZedd Zedd Zed	d
 Zedd Z	edd Z
edd Zdd ZdS )
SendOpDesca0  
    Describe the send op in the reshard phase.

    Args:
        partition_index (list): The index of partition in complete tensor.
        src (int): The source process to send.
        dst (int): The destination process to receive.
        is_bool (bool): Whether send bool data. Default: False.
    Fc                 C   s(   || _ || _d| _g | _|| _|| _d S )Nsend)_dst_partition_indexr5   r6   r7   _srcr&   partition_indexsrcdstr;   r    r    r!   r'         
zSendOpDesc.__init__c                 C   r)   r$   rQ   r*   r    r    r!   rT      r+   zSendOpDesc.srcc                 C   r)   r$   r>   r*   r    r    r!   r;      r+   zSendOpDesc.is_boolc                 C   r)   r$   rP   r*   r    r    r!   rS      r+   zSendOpDesc.partition_indexc                 C   r)   r$   rO   r*   r    r    r!   rU      r+   zSendOpDesc.dstc                 C   r)   r$   r@   r*   r    r    r!   rA      r+   zSendOpDesc.descc                 C   0   | j s| jD ]}| j |d |d   q| j S Nr
   r   r6   rS   appendr&   itemr    r    r!   r:         
zSendOpDesc.shapec                 C   rC   NrD   z, partition_index: z, dst: rF   rG   r,   r5   rP   rO   r6   r7   r*   r    r    r!   r-      rH   zSendOpDesc.__repr__NrK   )r.   r/   r0   r1   r'   r2   rT   r;   rS   rU   rA   r:   r-   r    r    r    r!   rM           







rM   c                   @   rL   )
RecvOpDesca0  
    Describe the recv op in the reshard op.

    Args:
        partition_index (list): The index of partition in complete tensor.
        src (int): The source process to send.
        dst (int): The destination process to receive.
        is_bool (bool): Whether receive bool data. Default: False.
    Fc                 C   s(   || _ || _d| _g | _|| _|| _d S )Nrecv)rQ   rP   r5   r6   r7   rO   rR   r    r    r!   r'      rV   zRecvOpDesc.__init__c                 C   r)   r$   rY   r*   r    r    r!   rU      r+   zRecvOpDesc.dstc                 C   r)   r$   r>   r*   r    r    r!   r;      r+   zRecvOpDesc.is_boolc                 C   r)   r$   rX   r*   r    r    r!   rS      r+   zRecvOpDesc.partition_indexc                 C   r)   r$   rW   r*   r    r    r!   rT      r+   zRecvOpDesc.srcc                 C   r)   r$   r@   r*   r    r    r!   rA      r+   zRecvOpDesc.descc                 C   rZ   r[   r\   r^   r    r    r!   r:      r`   zRecvOpDesc.shapec                 C   rC   ra   rb   r*   r    r    r!   r-      rH   zRecvOpDesc.__repr__NrK   )r.   r/   r0   r1   r'   r2   rU   r;   rS   rT   rA   r:   r-   r    r    r    r!   rd      rc   rd   c                   @   s^   e Zd ZdZdddZedd Zedd Zed	d
 Zedd Z	edd Z
dd ZdS )SliceOpDescac  
    Describe the slice op in the reshard phase.

    Args:
        starts (list): It represents start indices of corresponding axis in ``axes``.
        ends (list):  It represents end indices of corresponding axis in ``axes``.
        axes (list):  Axes that `starts` and `ends` apply to.
        shape (list): The shape of the tensor to be sliced.
    Nc                 C   s"   || _ || _|| _d| _|| _d S )Nslice)_starts_ends_axesr5   r6   )r&   startsendsaxesr:   r    r    r!   r'     r=   zSliceOpDesc.__init__c                 C   r)   r$   )rh   r*   r    r    r!   rk     r+   zSliceOpDesc.startsc                 C   r)   r$   )ri   r*   r    r    r!   rl     r+   zSliceOpDesc.endsc                 C   r)   r$   )rj   r*   r    r    r!   rm     r+   zSliceOpDesc.axesc                 C   r)   r$   r@   r*   r    r    r!   rA     r+   zSliceOpDesc.descc                 C   r)   r$   rB   r*   r    r    r!   r:   "  r+   zSliceOpDesc.shapec                 C   s^   | j d urd| j d| j d| j d| j d| j  dS d| j d| j d| j d| j d	S )NrD   z
, starts: z, ends: z, axes: rF   r,   )r6   r5   rh   ri   rj   r*   r    r    r!   r-   &  s   
.&zSliceOpDesc.__repr__r$   )r.   r/   r0   r1   r'   r2   rk   rl   rm   rA   r:   r-   r    r    r    r!   rf      s    






rf   c                   @   s8   e Zd ZdZdd Zedd Zedd Zdd	 Zd
S )ConcatOpDescz
    Describe the concat op in the reshard phase.

    Args:
        partition_index_list (list): The list contains all partition index.
    c                 C   s   || _ d| _d S )Nconcat)_partition_index_listr5   )r&   partition_index_listr    r    r!   r'   5  s   
zConcatOpDesc.__init__c                 C   r)   r$   )rp   r*   r    r    r!   rq   9  r+   z!ConcatOpDesc.partition_index_listc                 C   r)   r$   r@   r*   r    r    r!   rA   =  r+   zConcatOpDesc.descc                 C   s   d| j  d| j dS )NrD   z, partition_index_list: r,   )r5   rp   r*   r    r    r!   r-   A  s   zConcatOpDesc.__repr__N)	r.   r/   r0   r1   r'   r2   rq   rA   r-   r    r    r    r!   rn   -  s    

rn   c                   @   s   e Zd ZdZedd Zedd Zedd Zedd	 Zed
d Z	edd Z
edddZedd Zedd Zedd Zedd ZdS )Inserterz*Insert op required in the reshard process.c              	   C   sj   t jjdddg}| j|||j|jd}| j|dd|gid|gi|j	|j	|dd	}|
d
d |S )Nr,   zcast@RESHARDtmpr   dtypetype	lod_levelcastXOutZin_dtypeZ	out_dtypeop_rolerv   inputsoutputsattrsop_namescope/auto_parallel/reshard)paddleutilsr	   generate_with_ignorable_keyjoin
create_varrv   rw   
_insert_opru   	_set_attr)r   idxtensorr|   Ztensor_typenew_var_nameoutcast_opr    r    r!   insert_cast_opH  s*   zInserter.insert_cast_opc           	   
   C   sR   d}t ||gdd}| j||d|gi|j|j|d|ddd}|dd	 d
S )z-Insert send op into block at the given index.send_v2p2pZ
group_typery   T)ring_idpeeruse_calc_streamr|   dynamic_shape)rv   r~   r   r   r   N)r   r   idranksindexr   )	r   r   r   rT   rU   r|   op_typeprocess_groupZsend_opr    r    r!   insert_send_opb  s   
zInserter.insert_send_opc           	      C   sb   d}t ||gdd}| j||d|gid|gi|j|j||j|jd|ddd}|d	d
 dS )z-Insert recv op into block at the given index.Zrecv_v2r   r   ry   rz   T)r   r   Z	out_shaperu   r   r|   r   r}   r   r   N)r   r   r   r   r   r:   ru   r   )	r   r   r   rT   rU   r|   r   r   Zrecv_opr    r    r!   insert_recv_opv  s"   
zInserter.insert_recv_opc                 C   sf   t jjdddg}| j||j|j|j|j	d}| j
|d||dd|id|id	}|d
d |S )z2Insert reset_lod op into block at the given index.r,   zreset_lod@RESHARDrs   r   r:   rv   ru   rw   	lod_resetry   Yrz   r|   r}   r   r   )r   r   r	   r   r   r   r:   rv   ru   rw   r   r   )r   r   ry   r   r|   r   reset_lod_outZreset_opr    r    r!   insert_reset_lod_op  s&   zInserter.insert_reset_lod_opc           
   
   C   s   d|i}i }||d< ||d< t di t }tj| j( | jtjj	d
|jdg|d jd|d j|d jd	d	d
}W d   n1 sHw   Y  | j|d|d|gi|d}	|	dd |S )z/Insert concat op into block at the given block.ry   axisr|   concat@RESHARDr,   rs   r   NFr   ru   r:   rw   rv   persistablestop_gradientro   rz   r}   r   r   )r   )r   localsr   staticprogram_guardr   r   r   r	   r   r   r   ru   rw   rv   r   r   )
r   r   Ztensorsr   r|   r~   r   helperr   Z	concat_opr    r    r!   insert_concat_op  s6   zInserter.insert_concat_opc                    s&  j }fddttD }	g }
t|	D ]\}}||| kr&|
| qt|
dkr[ j|jj|	jd}dgi}d|gi}d|d} j	|d	|||d
}|
dd |S t|
dkr|
d }|| |	|  }|}| |	|  }|}di}|||d}g }tj D ]\}}||kr|| q|||  qtj j  fddt|D }|| }W d   n1 sw   Y   j	|d|d|i|d
}|
dd |S di}dd tt|D }|||d} j|jjjd} j	|d|d|gi|d
}|
dd |S )z.Insert slice op into block at the given block.c                    s   g | ]
} | |  qS r    r    .0i)rl   rk   r    r!   
<listcomp>  s    z,Inserter.insert_slice_op.<locals>.<listcomp>r   )r   ru   rv   r:   rw   ry   rz   F)Zin_placer|   assignr}   r   r   r
   numr   r|   c                    s>   g | ]} j tjjd ddgjdjdjddqS )r,   split@RESHARDrs   NF)r   ru   r:   rv   r   rw   r   )	r   r   r   r	   r   r   ru   rv   rw   r   )r   r   r    r!   r     s    NsplitInputc                 S      g | ]}d qS r
   r    r   r    r    r!   r         )rm   rk   rl   infer_flagsr|   rt   rg   )r:   rangelen	enumerater]   r   ru   rv   rw   r   r   r   r   r   r   )r   r   r   rk   rl   rm   r   r|   Zglobal_shapeslice_shapeZ	diff_dimsr   r_   r   r~   r   r   Z	assign_opZdiff_dimnum_or_sectionsr   Zcur_idxinput_shape	new_shapeoutssplit_opr   Zslice_opr    )r   rl   rk   r   r!   insert_slice_op  s   	





zInserter.insert_slice_opr   c                    s   t di t j}di}|||d}g }	tjD ]\}
}|
|kr*|	| q|	||  qtj j  fddt	|D }W d   n1 sQw   Y   j
|d|d|i|d	}|d
d |S )z.Insert split op into block at the given index.r   ry   r   c                    s@   g | ]} j tjjd jdgjdjj	dddqS )r,   rs   NFr   )
r   r   r   r	   r   r   r   ru   rw   rv   r   r   r   r   r    r!   r   9  s    z,Inserter.insert_split_op.<locals>.<listcomp>Nr   rz   r}   r   r   )r   )r   r   r:   r   r]   r   r   r   r   r   r   r   )r   r   r   r   r|   r   r   r~   r   r   r   r_   r   r   r    r   r!   insert_split_op+  s&   zInserter.insert_split_opc           	   	   C   s   t di t }tj| j" | jtjj	d
|jdgtjdtjjjddd}W d   n1 s4w   Y  i }ddi}ttd|d	< td|d
< |j|d< ||d< tjj|||dd | j|d|d|gi|d}d|_|dd |S )z6Insert fill constant op into block at the given index.fill_constant@RESHARDr,   rs   NF)r   ru   r:   rv   r   r   Z	force_cpu1Z	str_valuevalueru   r|   Zfill_constant)r~   r   r:   r   rz   r}   Tr   r   )r   )r   r   r   r   r   r   r   r   r	   r   r   r   int64r   ZVarDescZVarTypeZ
LOD_TENSORstrintru   Zget_shape_tensor_inputsr   r   r   )	r   r   r|   r:   r   r   r~   r   Zfillconstant_opr    r    r!   insert_fill_constant_opM  s@   
z Inserter.insert_fill_constant_opc              
   C   s  g }t |}d}d}	t|	d fi t }
tj| j" | jtjj	
d|
jdg|jd|j|jddd}W d   n1 sAw   Y  | j|| |	d	|gid
|gi|jd|j|dd}|dd |d7 }|rt| || ||j|}|d7 }|| ||fS ||g ||fS )z2Insert allgather op into block at the given index.r   c_allgather@RESHARDr,   rs   NFr   ry   rz   T)r   r   nranksr|   r}   r   r   r
   )r   r   r   r   r   r   r   r   r   r	   r   r   r   ru   rw   rv   r   r   r   r   rr   r   extend)r   r   r   r   r|   r<   tensor_listr9   
idx_offsetr   r   Zallgather_outZallgather_opZ	split_outr    r    r!   insert_allgather_opr  sP   
zInserter.insert_allgather_opc                 C   s   t |}d}d}t|d fi t }tj| j" | jtjj	
d|jdg|jd|j|jddd}	W d   n1 s?w   Y  tj }
| j|| |d	|gid
|	gi|jdd|j||
|v rh|j|
nddd}|dd |	S )z1Insert c_concat op into block at the given index.r   rJ   r   r,   rs   NFr   ry   rz   T)r   r   Zuse_model_parallelr   r|   rankr}   r   r   )r   r   r   r   r   r   r   r   r   r	   r   r   r   ru   rw   rv   distributedZget_rankr   r   r   r   r   r   )r   r   r   r   r|   r9   r   r   r   Zc_concat_outZcur_rankZc_concat_opr    r    r!   insert_c_concat_op  sB   
zInserter.insert_c_concat_opc                 C   s   | s|  ||f dS d}d}|t| k rpt| | d |\}}	}
|dkrfd}|	dkr=t||d | | d |g||nt||d || | d g||}| | |d  d7  < t| ||
||| n
|d7 }|t| k s|s{|  ||f dS dS )z(Concat the tensors and insert concat op.r   Fr
   TN)r]   r   	Reshardercompute_concat_inforr   r   popconcat_partitions_with_op)partition_tensor_listr   rS   r   r   r|   r   
has_concatconcat_axisfirst_ordernew_partition_r    r    r!   r     s\   	
'z"Inserter.concat_partitions_with_opNr   )r.   r/   r0   r1   staticmethodr   r   r   r   r   r   r   r   r   r   r   r    r    r    r!   rr   E  s0    





e!
$
0
'rr   c                   @   s@   e Zd ZdZedd Zedd Zedd Zedd	 Zd
S )Removerz)Remove var and op in the reshard process.c              	   C   s  g d}g }t jD ]}|| q	t| jD ]\}}||vr#|| q|D ]}g }| j| }|j}|j}	t|D ]\}
}|jdkrog }|jD ]}|	t
||| j qGt|
ddD ]}|| jdkrm|| d|  nqZq9|jdkrg }|jD ]}|t
||| j}||jv r|| qy|s||
 q9t |j}|j|jd j| |j|jd j| q9||}|dur|j}||jvr|j|vr||
 q9|ddd D ]	}
|j|
d	d
 q|  q&dS )z&Remove no need ops in the main program)create_py_readercreate_double_buffer_readerreadr   r   r   Zshape_concatc_sync_comm_streamr   NFsync)r   while_block_infor]   r   blocksopsr   rv   output_arg_namesr   r"   r:   r   r   input_arg_namesZ get_tensor_dist_attr_for_programprocess_meshprocess_idsr   instanceget_op_protorA   	set_inputr~   r   
set_outputr   get_op_dist_attr_for_program
_remove_op_sync_with_cpp)auto_parallel_main_progdist_contextrank_idZnot_remove_op_refZremove_block_order	block_idxr   remove_op_idxr   r   r   opZdim_listr   r   Z	need_saver   protoop_dist_attrop_process_meshr    r    r!   remove_no_need_ops  sx   














zRemover.remove_no_need_opsc                 C   s  t | jD ]\}}t }|j}|j}t }|D ] }	|	jD ]}
|
|v r'||
 q|	jD ]}
|
|v r6||
 q+q|D ]}||vrE|| q:|dkri }|D ]*}	t|		dtt
jkrxd|	jv rxd|	jv rx|	dd }|	dd }|||< qNg }t |D ]\}}|d j| vr|| q|ddd D ]}|| qd}|t|k r|| d j}|| d j}||| kr|| |||  f||< |d7 }|t|k s|D ]}||v rq|| qqdS )z'Remove no need vars in the main programr   r|   ParamZGradNr   r
   )r   r   setr   r   r   addr   r   attrr   ZOptimizeZinput_namesinputr   keysr]   r   r   _remove_var)r   dist_params_gradsfeed_var_namesr   r   remove_varsr   r   	need_varsr   r   r   Zparam_grad_map
param_nameZ	grad_nameZneed_remove_idxr   r_   r    r    r!   remove_no_need_varsN  sh   









zRemover.remove_no_need_varsc                 C   sV   t | || t| | g }tt|j g D ]}||j	 qt 
| || dS )z0Remove no need vars and ops in the main program.N)r   r  r    change_while_op_input_and_outputsumlistZserial_feed_varsvaluesr]   r   r  )r   r   r   r  r  r   r    r    r!   remove_no_need_in_main  s   zRemover.remove_no_need_in_mainc                 C   s<  t  }|  j}|D ]}|jD ]}|| qq
| }t  }|j}|D ]}|jdkr,q$|jD ]}|| q/q$t  }	|D ]}||v rH|	| q=|j}t  }
t|D ]2\}}d}|jdkr_qS|jD ]
}||	v rld} nqb|r|jD ]}|
| qr|jD ]}|
| q}qSt  }|jD ]}||
vr|| q|D ]}|	| qg }|j}t|jD ]Y\}}d}|jdkrg }|jD ]}||v r|
| q|s|
| nt |j}|j|jd j| |j|jd j| q|jD ]
}||vrd} nq|r|
| q|ddd D ]
}|j|dd q|  dS )z3Remove no need vars and ops in the startup program.r   FTr   Nr   r   )r  Zglobal_blockr   r   r  rv   r   r   r   r  r]   r   r   r   rA   r   r~   r   r   r   r   r   )r   auto_parallel_startup_progZmain_input_varsZmain_opsr   r   Zstartup_blockZstartup_output_varsZstartup_opsr  Zactual_need_varsr   Z
is_need_opr  r   r   r   Zis_no_need_opZ	var_namesr  r    r    r!   remove_no_need_in_startup  s   















z!Remover.remove_no_need_in_startupN)	r.   r/   r0   r1   r   r  r  r  r  r    r    r    r!   r      s    
J
9
r   c                   @   s  e Zd ZdZi Z	dSddZedd Zedd Zed	d
 Z	edd Z
edd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zedd  Zed!d" Zed#d$ Zd%d& Zd'd( Zd)d* Zd+d, ZdTd.d/Zd0d1 ZdUd3d4Zd5d6 Zd7d8 Z d9d: Z!d;d< Z"d=d> Z#d?d@ Z$dAdB Z%dCdD Z&dEdF Z'dGdH Z(dIdJ Z)dKdL Z*dMdN Z+dOdP Z,dQdR Z-dS )Vr   a!  
    Reshard tensor in the program according to its distributed attribute and corresponding op distributed attribute.

    Args:
        auto_parallel_main_prog (Program): An auto parallel main program.
        auto_parallel_startup_prog (Program): An auto parallel startup program.
        rank_id (int): The process id.
        dist_context (DistributedContext): The distributed context of this rank.
        dist_params_grads (list): The list contains the tuple of param and grad.
        batch_size (int): The batch size. Default: None.
    Nc                 C   s   t |tsJ dt| d|d ur"t |ts"J dt| dt |ts1J dt| dt |ts@J dt| d|d urSt |tsSJ dt| d|| _|| _|| _|| _|| _	|| _
i | _i | _i | _i | _d S )Nz?The type of auto_parallel_main_prog should be Program, but got r,   zJThe type of auto_parallel_startup_prog should be Program or None, but got z+The type of rank_id should be int, but got z?The type of dist_context should be DistributedContext, but got z.The type of batch_size should be int, but got )
isinstancer   rv   r   r   _auto_parallel_main_prog_auto_parallel_startup_prog_rank_id_dist_context_dist_params_grads_batch_size	_has_sent	_has_recv_has_allgather_has_resharded)r&   r   r  r   r   r  
batch_sizer    r    r!   r'     sF   	
zResharder.__init__c                 C   r)   r$   )r  r*   r    r    r!   r   #  r+   z!Resharder.auto_parallel_main_progc                 C   r)   r$   )r  r*   r    r    r!   r  '  r+   z$Resharder.auto_parallel_startup_progc                 C   r)   r$   )r  r*   r    r    r!   r   +  r+   zResharder.rank_idc                 C   r)   r$   )r  r*   r    r    r!   r   /  r+   zResharder.dist_contextc                 C   r)   r$   )r  r*   r    r    r!   r  3  r+   zResharder.dist_params_gradsc                 C   r)   r$   )r  r*   r    r    r!   r$  7  r+   zResharder.batch_sizec                 C   r)   r$   )r   r*   r    r    r!   has_sent;  r+   zResharder.has_sentc                 C   r)   r$   )r!  r*   r    r    r!   has_recv?  r+   zResharder.has_recvc                 C   r)   r$   )r"  r*   r    r    r!   has_allgatherC  r+   zResharder.has_allgatherc                 C   sH   g }t | D ]\}}|| dkr|| q|||||    q|S )zCompute the shape of partition.r   r   r]   )complete_shapedims_mappingprocess_shapepartition_shaper   r_   r    r    r!   compute_partition_shapeG  s   z!Resharder.compute_partition_shapec                 C   sj   | | }g }tdd |d}tt|D ]}||||   }|||  }||| |  }|| q|S )z@Compute the index of process_shape corresponding to the process.c                 S   s   | | S r$   r    )xyr    r    r!   <lambda>X  s    z1Resharder.compute_process_index.<locals>.<lambda>r
   )r   r   r   r   r]   )processr   r+  Zrelative_processprocess_indexproductr   r   r    r    r!   compute_process_indexS  s   
zResharder.compute_process_indexc           	      C   s   t |||}t | ||}g }tt|D ]+}|| dkr(|d|| g q||||  ||  |||  d ||  g q|S )z/Compute the partition index in complete tensor.r   r   r
   )r   r-  r4  r   r   r]   )	r1  r)  r*  r+  r   r,  r2  rS   r   r    r    r!   compute_partition_indexd  s$   z!Resharder.compute_partition_indexc                 C   s   d}d}d}g }t | D ]]\}}||| krd|d7 }|d || d kr>|d || d k r>|}||d || d g q|d || d krc|d || d krcd}|}||| d |d g q|| q|dkrs|||fS d||fS )zYJudge whether two partition can be concatenated and compute concatenated partition index.r   r   r
   r(  )Zpartition_index_xZpartition_index_yZdiffer_countr   r   r   r   r_   r    r    r!   r     s*   

zResharder.compute_concat_infoc                 C   sH   g }t | D ]\}}|| dkr|| q|||||    q|S )zVcompute the complete shape of the slice tensor  with its process mesh and dims mappingr   r(  )r   r+  r*  r)  r   r_   r    r    r!   compute_complete_shape  s   z Resharder.compute_complete_shapec                 C   s   | s	|  | dS d}d}|t| k r:t| | |\}}}|dkr0d}| | t| | n
|d7 }|t| k s|sC|  | dS dS )z8Concat the given partitions without inserting concat op.r   Fr   Tr
   N)r]   r   r   r   r   concat_partitions)rq   rS   r   r   r   r   r   r    r    r!   r7    s(   

zResharder.concat_partitionsc                 C   s  t jD ]}| j| }t j| d }| j|j }t }g }|jD ]8}||}	|	s=|jdkr/|	r=|jdkr6|	r=|jdkrW|	sW|jD ]}
|
|vrK|	|
 q@|j
D ]}
||
 qOqd}|jD ]}|j |kro|jdkro|} nq]|du ruqt |j}g }|dD ]}
|
|v r|	|
 q|sJ |  |j|jd j| g }|d	D ](}
|ddd
 D ]}||
d
krt|
t|ksd|v r||vr|	| qq|sJ |j|jd j| qdS )zNChange while op input and output after the corresponding sub block ops removedop_idrg   r   r   Nr   ry   r   rz   r   r   )r   r   r   Z
parent_idxr  r   get_dist_op_for_programrv   r   r]   r   r  rA   r   r   r   r   r	  sortr   r~   r   outputfindr   r   r   )r   r   Zsub_block_idx	sub_blockZparent_while_op_idZparent_blockZsub_block_op_inputsZsub_block_op_outputsr   dist_opr   Zwhile_opr  Znew_XZnew_OutZoutput_namer    r    r!   r    sh   









z*Resharder.change_while_op_input_and_outputc                 C   sn   d}|d |d   kr|d k s%n |d |d   kr#|d k r'n nd}|ddgkr5|ddgkr5d}|S )zBJudge whether two partitions intersect on the specified dimension.Fr   r
   Tr    )r&   Zshape_xZshape_yZ
overlappedr    r    r!   is_overlapped  s   "$zResharder.is_overlappedc                 C   s   |D ]	}|dkr dS qdS )Nr   FTr    )r&   r*  dimr    r    r!   
is_unshard  s
   zResharder.is_unshardc                 C   s(   |j tv rdS t|r|j tv rdS dS )NTF)rv   _g_special_opsr   r   )r&   r   r    r    r!   is_special_op  s
   
zResharder.is_special_opc           
      C   s   | j j|dj }|jdkr|d}n
|jdkr|d}|D ]"}t||| j }| j|}|j	}|j
}|D ]
}	|	dkrB  dS q8q!dS )	Nr=  r   	Conditionr   ZCondr   FT)r   r   r  r   rv   r	  r"   r   get_dist_tensor_for_program	dist_attrr*  )
r&   r   r=  Z
input_condr   r   dist_tensortensor_dist_attrZvar_dims_mappingr@  r    r    r!   is_condition_replicative  s$   


z"Resharder.is_condition_replicativeTc                 C   s   d}|j }|j}|j}|d }	|r_|d }
tdd |||
|	fD r]||
krC|| jjvrA|D ]
}|dkr8tdq.|s=|S tdd	}||	krId	}||	kr]t|jt|	jkr]|j	j
r]d}|S |d }td
d ||||	fD r~||krxtd||	kr~d	}|S )z/Judge the tensor whether needs to be resharded.Fr   r
   c                 s       | ]}|V  qd S r$   r    r   r.  r    r    r!   	<genexpr>7  
    
z)Resharder.need_reshard.<locals>.<genexpr>r   z7The dim must be -1 when tensor process mesh is a union.zJit is not supported that tensor process mesh is a union and needs reshard.Tc                 s   rJ  r$   r    rK  r    r    r!   rL  d  rM  zVIt is not supported that tensor dims mapping is different from op output dims mapping.)rF  r*  r   allr   process_meshes
ValueErrorr   r   serial_tensoris_data)r&   rG  rF  Zop_inputr>  Z
is_reshardrH  Ztensor_dims_mappingtensor_process_meshr  op_input_dims_mappingr_   Zop_output_dims_mappingr    r    r!   need_reshard+  sn   
	zResharder.need_reshardc                 C   sj   g }| j |}|jj}| j jD ]}t|jt|j@ r+t|jt|jk r+|| q|s3|| |S )zEGet sub process meshes of the given op if op process mesh is a union.)	r   r9  rF  r   rO  r  r   r   r]   )r&   r   rO  r>  r  r   r    r    r!   get_op_process_meshesv  s   


zResharder.get_op_process_meshesFc           5   
   C   s8  |j }|j}|j}|j}|j}|j}	|d }
|d }|
j}|
j}|d }|jd dk rD|jd dks4J t|j}| j|d< |j	| |sNt
|j|	|n|j}t }t|t|rit|t|ri	 |S ||krg }|D ]R}t
||||	|}|s|||gdgg qrdd |D }dd |D }d	d |D }||dkr||}|| | || d qr|||gdgg qr|D ]G}g }t
|||||}g }g }|D ]}t
||||	|}d
}tdd tt| j||D r||vrdd |D |}dd |D | }dd |D | }d} | t|k r<||  s1||  }d|| < n| d7 } | t|k s#| t|krRdd |D }|d }d|d< |d
us[J d|| vrfg ||< || vrqg ||< || |jjtjk}!t||||!d}"t||||!d}#|| |" || |# || t
|| t|tt j!kr| j"j#$|| q|| t%| g }$g }%g }&|d }'g }(t&|'D ]/\}})|$|| d |)d   |%|| d |)d   |&| |(|)d |)d   q|| t'|$|%|&|(d q|S g }g }g }*|D ],}t
||||	|}||vr9|| |*|g|g q|*|| d | qt(t|*d d D ]G} g }+t(t|*D ]},|+|*|, d |   | dkrw||*|, d  q\|+D ]}-t)*|+}.t)*|}/t
|-||||}|+D ]'}0t
|0|||	|}tdd tt| j||D s|.+|0 |/+| qg }1|/D ]	}2t
|1|2 q|1d }'g }$g }%g }&g }(t&|'D ]$\}})|$|| d |)d   |%|| d |)d   |&| q|(|)d |)d   t'|$|%|&|(d}3|sd
n|j,|-d}4|dt|krJ|d
d dt|d
d krJ|d dkrJt-|+|4dg||-< q{|dd
 |dd
 kry|d dkry|d dkryt.|.|4|jtjkddt/d
g||-< q{t|.dkrt.|.|4|jtjkdt%|/d|3gn|3g||-< q{qQ|S )a  
        Find the op description sequence to reshard the source tensor for matching the op requirement.

        Args:
            dist_tensor (DistributedTensor): A distributed tensor.
            dist_attr (list): A list contains process_mesh and dims_mapping such as [process_mesh, dims_mapping].
            serial (bool): If serial is true, the dist tensor and dist op come from serial program. Otherwise, they come from auto program.

        Returns:
            Dict, the dict represents the required op description sequence corresponding to process, The key of dict is
            process and value is a list containing op description.
        r   r
      r   Fc                 S      g | ]}|d  qS r   r    r   r_   r    r    r!   r         z.Resharder.find_op_desc_seq.<locals>.<listcomp>c                 S   rX  r   r    rY  r    r    r!   r     rZ  c                 S   rX  rW  r    rY  r    r    r!   r     rZ  Nc                 s   rJ  r$   r    r   r   r    r    r!   rL    rM  z-Resharder.find_op_desc_seq.<locals>.<genexpr>c                 S   rX  r   r    rY  r    r    r!   r     rZ  c                 S   rX  r[  r    rY  r    r    r!   r     rZ  c                 S   rX  r   r    rY  r    r    r!   r     rZ  Tc                 S   r   rK   r    rK  r    r    r!   r     r   z Failed to find the send process.)r;   )r:   c                 s   rJ  r$   r    r\  r    r    r!   rL    rM  )rk   rl   rm   r:   )r   )r9   r:   )r9   r:   r;   r<   )r9   r:   r;   )rq   )0rF  rQ  r*  r   r   r:   r  r$  rA   	set_shaper   r6  r   r  intersection
differencer5  r]   countr   rN  mapr?  r   r
  ru   r   boolrM   rd   r7  r   r   ZForwardr   Zup_down_streamsZadd_pair_streamrn   r   rf   r   copydeepcopyremoveZlocal_sizesrI   r3   r#   )5r&   rG  rF  serialrH  source_tensorZsource_dims_mappingZsource_process_meshZsource_process_groupZsource_process_shapeZtarget_process_meshZtarget_dims_mappingZtarget_process_groupZtarget_process_shaper|   r   r)  op_desc_seqZpartition_process_mapping_listZsource_processZsource_partition_indexZpartition_listZprocess_listZhas_usedr   Ztarget_processr%  Ztarget_partition_indexrq   Zall_partition_index_listZto_send_processr   r   r;   Zsend_op_descZrecv_op_descZslice_startsZ
slice_endsZslices_axesZconcatenated_partition_indexto_slice_tensor_shaper_   r2  r9   jr1  Zmin_comm_groupZall_partition_index_list_copiedZ_processZ!concatenated_partition_index_listrS   Zslice_op_descZallgather_shaper    r    r!   find_op_desc_seq  st  

  =  







 # 

	


	






vzResharder.find_op_desc_seqc           1         sV	  |D ]@}|| }|D ]7}t |trt|j q
t |tr"t|j q
t |tr2t|j|jgdd q
t |trAt|j|jgdd q
qg }	g }
j	|
 vrPdS |j	 }d}tt jD ]\}}|jj|jjkrn|} nq^|dus|J dj	 d j| }t| j}dd }t|r||rtjn|d}d}|D ]}t |trf|j
 vrg j|< j| r|jd	d
 j| D vr?|jrt |||tj}tj |d ||j||jd\}	}||7 }g }|	D ]}t |||tj}||j  |d7 }qj| |j|g nLtj |||j||jd\}	}|dkr(|	}||7 }dd
 |	D }j| |j|g nj| D ]}|j|d kr\ fdd
|d D }	 nqD|	seJ dqt |tr|j!
 vryg j!|< |jj!| vr|jrt |||tj}t" |d ||j|j| |d7 }nt" |||j|j| |d7 }j!| |j qt |tr|j#
 vri j#|< |jj#| 
 vr|j$}g }|D ]}||d |d   q|jr1 j%t&'|d ||j(tj|j)d}t* |||j|j| t |d ||tj}|	| |d7 }|j#| |j< q j%t&'|d ||j(|j+|j)d}t* |||j|j| |j(dkrd}jj,D ]?}|j-D ]3}|j-| }|j.r|j(|j(krt/ |d |||}|	| |d7 }|j#| |j< d} nq`|r nq[|du sJ q|	| |d7 }|j#| |j< q|	j#| |j  qt |t0r|j1} |g}!t|	D ]\}}"t2|
|"| |  |!| q|!d }qt |t3tt4frd}#t |t3r,t5|
dks|
rJ t5|
dkr|
d d n|}$t&'|d }%tj6 ||$|j7|j8|j9|%|d}#n&t |tr=t: |||j|}#nt |t4sEJ t5|dksNJ |d }#|#dusYJ |d }&|d }'t; }(|'|(_<|&|(_=j>?|#|( |j)dkrdt@jA|dj 
 vri t@jA|dj d< |t@jA|dj d 
 vrg t@jA|dj d |< t@jA|dj d | ||#j g  jD ]}g })|jBD ]}*j>C|}+|*|kr|+dur|j |j krL|j)dkr|jD|*|#j  |*},|#j }%|,|%ksJ |+E|,}-|+F|%|- |+G|%|' |+G|%|' |)|% q|jD|*|#j  |*},|#j }%|,|%ks3J |+E|,}-|+F|%|- |+G|%|' |+G|%|' q|+j=}.|+H|}/|.|&kr|/|'kr|jD|*|#j  |*},|#j }%|,|%ksrJ |+E|,}-|+F|%|- |+G|%|' |+G|%|' q|)rtIJ K|j)}0|jL|0jMd j |Nd|)  qqdS )z1Parse op desc sequence and insert op in the blockr   r   Nz/The op for reshard cannot be found in the rank z	 program.c                 S   s
   |  dS )NZGRAD)endswithr   r    r    r!   is_grad#  r(   z(Resharder.parse_op_desc.<locals>.is_gradr|   c                 S   rX  r   r    rK  r    r    r!   r   3  rZ  z+Resharder.parse_op_desc.<locals>.<listcomp>r
   )r<   c                 S   s   g | ]}|j qS r    rm  )r   r   r    r    r!   r   b  s    r   c                    s   g | ]	}t | jqS r    )r"   r   )r   r   r   r&   r    r!   r   i  s    z6The result of parsing allgather op should not be None.rW  @recvr   r:   rw   ru   rv   FTr   )rk   rl   rm   r   r|   r   var_reshard_mappingr=  ry   )Or  r3   r   r9   rI   rM   rT   rU   rd   r   r
  r  r   r   rA   r   r"   r   r   r   ZBackwardr  r'  r;   rr   r   r   r   r   r<   rb  r]   r   r%  r   r&  rS   r   r	   generaterw   rv   r   ru   r   r   rR  r   rn   rq   r   rf   r#   r   r   rk   rl   rm   r   r   r*  r   r   Z set_tensor_dist_attr_for_programr   r   r   r   _rename_inputget_input_dist_attrset_input_dist_attrZset_input_dims_mappingget_input_dims_mappingr   r   r   r   r~   r	  )1r&   r   rh  r   Z
reshard_oprF  r   op_desc_listop_descr   r   r   r   r   Z
matched_oprg  rn  r|   Zend_varsZout_castr   Ztensor_name_listr   r_   rS   r:   Zrecv_tensorset_lod	tmp_blocktmp_var_nametmp_varr   rq   Zidx_listr   Ztarget_tensorZto_slice_tensornew_namer   r*  Ztensor_attrZwhile_op_X_appendr   r  old_nameop_input_dist_attrr  rT  r  r    ro  r!   parse_op_desc  s  	























	












  zResharder.parse_op_descc              	   C   s   |j tv sJ | jj|dj }|j}g }|D ]D}| j|}|s#q|j	}|j
D ]2}||kr[|j}	||}
d}|D ]}|	|d krM|
|d krMd} nq;|s[||	|
|dg q)q|S Nr=  Fr   r
   Tr|   )rv   _g_subblock_opsr   r   r  r   r   r   r9  rF  r   r   rw  r]   )r&   r   r   r=  r   input_attrsr>  rF  r   r   input_dims_mapping	has_exist
input_attrr    r    r!   _get_subblock_input_attrs  @   
z#Resharder._get_subblock_input_attrsc              	   C   s   |j tv sJ | jj|dj }|j}g }|D ]D}| j|}|s#q|j	}|j
D ]2}||kr[|j}	||}
d}|D ]}|	|d krM|
|d krMd} nq;|s[||	|
|dg q)q|S r  )rv   r  r   r   r  r   r   r   r9  rF  r   r   get_output_dims_mappingr]   )r&   r   r   r=  r   Zoutput_attrsr>  rF  r   r   Zoutput_dims_mappingr  output_attrr    r    r!   _get_subblock_output_attrs  r  z$Resharder._get_subblock_output_attrsc           
      C   s   g }| j |}|j}|j}| j jD ]}t|jt|j@ r-t|jt|jk r-|| q|s5|| |	|}g }	|D ]}|	|||
dg q>|	S )Nr|   )r   r9  rF  r   rO  r  r   r   r]   rw  r  )
r&   r   r   rO  r>  rF  r  r   r  r  r    r    r!   _get_common_op_input_attrs  s,   



z$Resharder._get_common_op_input_attrsc                 C   sR   g }|j tv r| ||}|s| ||}n| ||}|s'J d|j||S )NzCThe input '{}' of op '{}' has no distributed attributes in subblock)rv   r  r  r  r  formatr   )r&   r   r   op_input_attrsr    r    r!   get_op_input_attrs	  s   
zResharder.get_op_input_attrsc                 C   s   t  }t| jj}|dkrXg }d}| jjD ]}|jD ]}|| qqt| jjD ]\}}tt |jt|kr>|| q)t |j|k rGd}q)|rZt|D ]}| jj	| qNdS dS dS )z;Remove global process mesh from dist_context.process_meshesr
   FTN)
r  r   r   rO  r   r  r   r]   reversedr   )r&   r   Zprocess_mesh_countZglobal_process_mesh_idxZhas_sub_process_meshr   Z
process_idr   r    r    r!   _remove_global_process_mesh	  s.   
z%Resharder._remove_global_process_meshc                 C   sv  dt j| v rt j| d }|jD ]}|jD ]\}||v rr| j|}|j}d }|| D ]}	|j|	d d krG|||	d d krG|	d } nq+|d u rMq|j	
|| | j|}|j}
|}|}||ksgJ |
|}|
|| q|jD ]?}||v rt|| dkrtd|| d d }|j	|| | j|}|j}
|}|}||ksJ |
|}|
|| qvqd S d S )Nrr  r   r
   zpThe scene is not supported that the output is inplaced and the tensor has been resharded multiply when as input.)r   r   r   r   r   r9  rF  r   rw  rA   rt  ru  rv  r   r   rP  Z_rename_outputZget_output_dist_attrZset_output_dist_attr)r&   r   r   rr  r   r   r>  rF  target_namer_   r  r  r~  r  Zop_output_dist_attrr    r    r!   $_change_subblock_op_input_and_output1	  sr   



z.Resharder._change_subblock_op_input_and_outputc              	   C   s  d}|t |jk rt |j}|j| }| |r|d7 }q| j|}|d urg }|jtv rX| |s8td|	dj
tjvrJi tj|	dj
< |j
 tj|	dj
 d< |jdkrc|d}n|jdkrn|d	}n|j}|  d}|D ]w}	d
|	v rqyt|	|| j}
| j|
}d}|jj| jjvr| jjrd}|jjdt |jjksJ | ||	}|D ];}d }|rt|d jt|jjjkrq|d ur| ||r| ||}| |||	|| t |j}|| | }|}qqy|| d }n|d7 }|t |jk s
d S d S )Nr   r
   zFPlease check the condition due to the dims mapping is not replicative.r=  r8  r   ry   r   r   Zlod_tensor_blocking_queueFTr   )r   r   rC  r   r9  rv   r  rI  rP  r  r   r   r   rA   r	  r   r:  r"   r   rE  rF  r   rO  r*  r`  r  r  r   rU  rk  r  )r&   r   r   pre_op_countr   r>  Zop_input_dist_attrsZinput_var_namesr   r   r   rG  Zis_union_process_mesh_tensorr  r  Zinput_process_meshreshard_op_desccur_op_countr    r    r!   _reshard_inputn	  s   











zResharder._reshard_inputc                 C   sx  | j |kr:|jtjkr|jt|jd |j|j	tj
|jd}t||d ||||d d }|j	dkrd}	| jjD ]J}
|
jD ]@}|
j| }|jr|j	|j	kr|jt|jd |j|j|j|j	d}|d7 }|j|d	||d
d|id|did d}	 nqA|	r nq<|	du sJ |j|d dd|d u r|gn|gid|gi|j|j|ddd}|dd d S |j	dkr)|jt|jd |j|j	|j
|jd}t||d ||||d d}	| jjD ]9}
|
jD ].}|
j| }|jr|j	|j	kr|d7 }|j|d	||d
d|id|did d}	 nq|	r nq|	du s'J d S t||d ||||d d S d S )Nrp  rq  r
   r|   r   Fz	@RESETLODr   r   r   rz   r}   TrW  rx   ry   r{   r   r   )r   ru   r   rb  r   r	   rs  r   r:   rw   r   rv   rr   r   r  r   r   r   rR  r   r   )r&   r   r   r   r   	send_rank	recv_rankZrecv_cast_outr   rz  r{  r|  r}  r   Zrecv_outr    r    r!   _handle_recv	  s   


	

zResharder._handle_recvc              
   C   sn   |j tjkr&t||d ||dtj}t||d ||||d d S t||d ||||d d S )Nr
   r|   rW  )ru   r   rb  rr   r   r  r   r   )r&   r   r   r   r   r  r  Zcast_outr    r    r!   _handle_send@
  s   	zResharder._handle_sendc              
   C   s(  d}g d}|t 7 }|t7 }|t|jk rt|j}|j| }| j|}|d ur|j|vrd}|jD ]F}t||| j	}	| j
|	}
|
jj}|jj|j|g}|
d ur}| |
|dr}t|jt|jt|d j@  }|r}t|t|d jkr|
jjdt|
jjks|d dt|d krtdt|D ]m\}}|}|}|t|d jkr|t|d j t|d j }|d j| }||krq|	jd dkrt|	j}| j|d< |	j| | j|kr| |||	||| q| j|kr| |||	||| qt||gdd qn^t|D ]Y\}}|}|d j| }||kr*q|	jd dkrBt|	j}| j|d< |	j| | j|krT| |||	||| q| j|krf| |||	||| qt||gdd qt|j}|| | }|}q6|| d }n|d7 }|t|jk sd S d S )	Nr   )r   r   r   Zwrite_to_arrayZread_from_arrayZnopZdependFr   r
   zThe dims_mapping must be -1r   r   )rB  r  r   r   r   r9  rv   r   r"   r   rE  rF  r   r  rU  r  r   r*  r`  rP  r   r:   r  r$  rA   r]  r   r  r  r   )r&   r   r   Zskip_opsr  r   r>  r   r   r   rG  rS  r  Ztensor_processesr   Ztensor_processr  Zactual_indexr_   r   r  r    r    r!   _reshard_outputR
  s
  






4





 zResharder._reshard_outputc                 C   sz   |    t| jjD ]\}}|tjv r| || | | | | q
t	
| j| j| j| j t	| j| j i t_d S r$   )r  r   r   r   r   r   r  r  r  r   r  r   r   r  r  r  )r&   r   r   r    r    r!   reshard
  s    


zResharder.reshardc                 C   s  t dg }d }|j|v r|S |j}|dkr|S | j|}| j|}|r'|s)|S |j|j}	|jj}
|
|	|j	
dg}|d ur| ||r|| jvrS|g| j|< n'| j| D ]}|j}||}|j}|	|krq||
krq|  S qX| j| | | j||dd}|jj}| |||}|S )Nr   Zlod_tensor_blocking_queue_0r|   T)rf  )rB  rv   r   r   rE  r9  rF  rw  r   Z	serial_opr  rU  r#  r]   rk  rQ  ru   parse_op_desc_for_cost)r&   r   r   clusterZnot_supported_op_typeZreshard_op_costZtensor_namerG  r>  r*  r   rF  r_   Zitem_dist_attrZitem_dims_mappingZitem_process_meshr  ru   r    r    r!   get_cost	  s\   



zResharder.get_costc                 C   s  |s	| | d S d}d}|t|k r~t|| |\}	}
}|	dkrtd}i }d|d< d|	i|d< |
dkrCd	||| f||fgi|d
< nd	||f||| fgi|d
< || ||vr^g ||< ||  t||d | |||||| n
|d7 }|t|k s|s| | d S d S )Nr   Fr   Tro   r   r   r   ry   r~   ry  r  r
   )r]   r   r   r   r   r   _concat_partitions_for_cost)r&   r   rS   ru   r   local_rank_comp_costr  r   r   r   r   r   Zconcat_descr    r    r!   r  D  s`   	




+z%Resharder._concat_partitions_for_costc                 C   s  dd }t |}g }g }i }|D ]e}	g }
||	 }|D ]Y}t|tra|	|jg}|j}td|||}|||\}}|d u rQ||t||dfg |t| q|s`|| |t||df qt|t	r|j
}|j}td|||}g }t|D ]\}}|dkr||t|  qy|| qy|||\}}|d u r||t||dfg |t| n|s|| |t||df |	|vrg ||	< i }d|d< d	||fgi|d	< t|dd
|d< ||	 t||d qt|tr|j}t|D ]\}}| |
|||	|| qqt|tru|	|vrg ||	< t|
dks#|
r#J g }t|
dkr@|
d D ]}||d |d   q0n|j}i }d|d< dd tt|jD }|j|j|j|d|d< d||fgi|d	< ||	 t||d qq||f}|S )Nc                 S   s   d\}}d}|t | k r>| | t|krd}|D ]}|| | v r)|}| | | q|d u r3|d7 }n	 ||fS |t | k s||fS )N)NFr   Tr
   )r   r  r  )
comm_ranksgroup_ranksresis_the_samer   r   r    r    r!   _get_idx  s    
z2Resharder.parse_op_desc_for_cost.<locals>._get_idxr   )ry  comm_contextr   r   r   r   r~   )r   r   r   r  r
   rg   c                 S   r   r   r    r   r    r    r!   r     r   z4Resharder.parse_op_desc_for_cost.<locals>.<listcomp>)rm   rk   rl   r   r   )r   r  rM   rU   r:   r   r]   r   r  r3   r9   r   r   r   r   rn   rp   r  rf   r   rm   rk   rl   r   )r&   r  ru   r  r  r  Z
comm_costsr  r  keyr   rx  ry  r  r:   Z	send_descr   r  Zallgather_descZsplit_inputs_shaper@  Z
split_descrq   Zpartion_idexri  r_   Z
slice_descr   r  r    r    r!   r    s   




	







{z Resharder.parse_op_desc_for_costr$   )TNrK   ).r.   r/   r0   r1   r   r'   r2   r   r  r   r   r  r$  r%  r&  r'  r   r-  r4  r5  r   r6  r7  r  r?  rA  rC  rI  rU  rV  rk  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r    r    r    r!   r     s    	
,
















:
K
  n   1$$=]u ;<r   ).rc  collectionsr   	functoolsr   r   Z/paddle.distributed.fleet.meta_optimizers.commonr   Zpaddle.frameworkr   r   r   r   Zpaddle.utilsr	   Zcostr   r   r   r   r   r   r   Zdist_attributer   r   r   r   r   r   r   r   r   rB  r  r"   r#   r3   rI   rM   rd   rf   rn   rr   r   r   r    r    r    r!   <module>   s:   $	)$22-   > i