o
    "jz2                     @   s   d dl Zd dlmZ d dlmZ ddlmZmZm	Z	 	dddZ
dd	 Zd
d Zdd Zdd Zdd Zdd Zdd Ze	dG dd deZdS )    N)core)unique_name   )PassBasePassTyperegister_passc                 C   s   t | }g }|d u rdd }d}	 ||k r)|| | s)|d7 }||k r)|| | r|d }||k rW|| | rW|| | | | rW|d7 }||k rW|| | rW|| | | | s@||k rf||krf|||f |d }||krq	 |S q)Nc                 S      dS NT )ref_opnew_opr
   r
   j/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/passes/fuse_all_reduce.py<lambda>   s    z/find_adjacent_match_sequences.<locals>.<lambda>r   Tr   )lenappend)iterableZfilter_funcZadjacent_filter_funcnZmatch_sequencesijr
   r
   r   find_adjacent_match_sequences   s8   

r   c                 C   s  | j td|d  |d}|d s|d }t|}t|D ]_\}	}
|
d }|dkrH| j| jdkrH|| |d8 }|dkrH| j| jdks3|	dkr~|
d }t| j}||k r~| j| jdkr~| j| 	d|ksmJ || ||k r~| j| jdksaqtt
|}|jd	d
 |}|d d }tj }g }g }|D ]}| |j}|| |t| qdd|i||ddd	d|d|d|||| id}|d s| j|dd|id|||| id |d7 }| j|dd|id|i|d |D ]}
| |
 q|S )NZFusedOutput_r   )namedtypeuse_calc_streamring_idr   Zc_sync_calc_streamZc_sync_comm_streamT)reverseZcoalesce_tensorInput)OutputZFusedOutputZ	use_alignr   concated_shapesconcated_ranks)typeinputsoutputsattrsXOut)r   r    r!   c_allreduce_sum)Z
create_varr   generatelist	enumerateopsr   r   r   attrsetsortr   op_proto_and_checker_makerkOpRoleAttrName_find_var_recursiveshapeextend_insert_op_without_syncZ
_remove_op)blockZreversed_op_indicesZinput_var_namesZoutput_var_namesr   r"   Z	fused_varr   Znew_op_indicesr   op_idxZprev_op_idxZnext_op_idxr   Z
insert_idxop_role_keyr   r   var_namer0   Zcoalesce_tensor_op_kwargsr
   r
   r   insert_fuse_all_reduce_ops1   s   




r7   c                 C   s(   |D ]}|  || |kr dS qdS )NFT)r*   )Zop1Zop2Z
attr_names	attr_namer
   r
   r   has_same_attrs   s
   r9   c                 C   s8   h d}g }t | jD ]\}}|j|v r|| q|S )N>   Zc_allgatherr%   Zc_allreduce_prodZc_allreduce_minZc_broadcastZc_allreduce_max)r(   r)   r   r   )r3   Zall_collective_opsZmatch_op_indicesr   opr
   r
   r    filter_all_collective_op_indices   s   	

r;   c                    s   t   fddD } fdd}ddtj tj g fdd}t|||}g }|D ]\}}|fd	dt||D  q0|S )
Nc                    s   g | ]} j | qS r
   )r)   ).0r   r3   r
   r   
<listcomp>   s    z3find_all_fuse_all_reduce_groups.<locals>.<listcomp>c                    s   | j dks
| drdS | dd }| dd }||kr dS  |}|d us+J |j tjjjkr5dS |j	}t
dd |D rCdS d	S )
Nr%   use_model_parallelFr#   r   r$   c                 s   s    | ]}|d kV  qdS )r   Nr
   )r<   sr
   r
   r   	<genexpr>   s    zQfind_all_fuse_all_reduce_groups.<locals>.is_valid_allreduce_op.<locals>.<genexpr>T)r   r*   inputoutputr/   r   ZVarDescZVarTypeZ
LOD_TENSORr0   any)r:   in_var_nameout_var_namein_varr0   r=   r
   r   is_valid_allreduce_op   s   
z>find_all_fuse_all_reduce_groups.<locals>.is_valid_allreduce_opr   r   c                    sL   t | |sdS  | dd } |dd }|j|jkr$dS dS )NFr#   r   T)r9   r/   rB   r   )r   r   Zref_op_in_varZnew_op_in_var)r3   same_attr_namesr
   r   is_same_adjacent_op   s   z<find_all_fuse_all_reduce_groups.<locals>.is_same_adjacent_opc                    s   g | ]} | qS r
   r
   )r<   k)collective_op_indicesr
   r   r>      s    )r;   r   r-   r.   kOpDeviceAttrNamer   r   range)r3   Zcollective_opsrH   rJ   Z
match_seqsZnew_match_seqsr   r   r
   )r3   rL   rI   r   find_all_fuse_all_reduce_groups   s    	 rO   c                    s   g   fdd}|D ]M}t |}|dksJ |dkrq
d}|d }||k rQd}t||D ]}	|||	  ||  }
|
tjjjkr@q,|||| |} |d7 }||k s%|||| q
 S )Nc                    s&   || dkr  | ||  d S d S )Nr   )r   )
op_indices	start_idxZend_idxZ
new_groupsr
   r   insert_new_group   s   z>split_fuse_all_reduce_groups_by_deps.<locals>.insert_new_groupr   r   F)r   rN   r   NodeZDepZNoDep)r3   groupsop_depsrS   rP   r   rQ   rK   Zfound_groupZprev_idxdepr
   rR   r   $split_fuse_all_reduce_groups_by_deps   s,   rX   c                 C   s,  |sd S i }t | jD ]$\}}|jD ]}||vr|dg||< q|jD ]}||vr.|dg||< q"qt| j}g }t |D ]?\}}	|	d d |	d d  }
|}d}|
D ]}||vr]d}d} n|| \}}|rgd}t||}qQ||	d d	< |||	f q;|jd
d dd |D ]\}}	| j|fi |	 qd S )NTFr    r   r!   r   r   r"   	copy_datac                 S   s   | d S )Nr   r
   )elementr
   r
   r   r     s    z,insert_coalesce_tensor_ops.<locals>.<lambda>)keyr   )	r(   r)   Zinput_arg_namesZoutput_arg_namesr   minr   r,   r2   )r3   coalesce_ops_kwargsZ	var_infosidxr:   varr   Zinsert_idx_and_kwargsZ	group_idxkwargsZall_varsZ
min_op_idxrY   Zmin_idxr4   Zis_inputr
   r
   r   insert_coalesce_tensor_ops   sD   


ra   c                 C   s  t j }t j }t j }g }t|D ]}| j|d  }|d}	|d}
|d}||}||}d|	d|
d|||||i}| |	dd j
}t |}d}g }g }g }g }t|D ]l}| j| }|	dd }|dd }| |}tt|j| }|| |krt|dkr|||< t| |||||}|| d}g }g }g }g }||7 }|| || || ||r||| qat|dkr|||< t| |||||}|| q|   t| | d S )Nr   r   r   r?   r#   r$   r   )r   r-   r.   ZkOpRoleVarAttrNamerM   reversedr)   r*   r/   rB   r   Zsize_of_dtyperC   intnpprodr0   r   r7   r   Zhas_attrr1   _sync_with_cppra   )r3   rU   max_memory_sizer5   Zop_role_var_keyZop_device_keyr]   groupZfirst_opr   r   r?   Zop_roleZ	op_devicer"   r   sizeofZcur_mem_sizeZop_role_varsZrecorded_op_indicesZin_var_namesZout_var_namesr4   r:   rE   rF   rG   Zmem_sizeZcoalesce_op_kwargsr
   r
   r   %insert_fuse_all_reduce_by_memory_size  s   
















rj   Zfuse_all_reducec                       s<   e Zd Z fddZdd Zdd Zdd Zd	d
 Z  ZS )FuseAllReducePassc                    s   t    | dd d S )Nrg   )super__init__Zset_attrself	__class__r
   r   rn   c  s   
zFuseAllReducePass.__init__c                 C   s   |  d}|dkS )Nrg   r   )get_attr)rp   rg   r
   r
   r   _check_selfg  s   
zFuseAllReducePass._check_selfc                 C   r   r	   r
   )rp   Z
other_passr
   r
   r   _check_conflictk  s   z!FuseAllReducePass._check_conflictc                 C   s   t jS N)r   ZCOMM_OPTro   r
   r
   r   _typen  s   zFuseAllReducePass._typec           
      C   sb   |  d}|j }|j}t|D ]}||}t|}	t||	|| }	t||	| q|	  d S )Nrg   )
rs   descZget_op_deps
num_blocksrN   r3   rO   rX   rj   rf   )
rp   Zmain_programZstartup_programcontextrg   rV   ry   r   r3   rU   r
   r
   r   _apply_single_implv  s   



z$FuseAllReducePass._apply_single_impl)	__name__
__module____qualname__rn   rt   ru   rw   r{   __classcell__r
   r
   rq   r   rk   a  s    rk   rv   )numpyrd   Zpaddle.frameworkr   Zpaddle.utilsr   Z	pass_baser   r   r   r   r7   r9   r;   rO   rX   ra   rj   rk   r
   r
   r
   r   <module>   s   
Z- %K