o
    "j9                     @   s*  d dl mZmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZ d dlmZmZmZ d dlmZ d dlmZ d	d
lmZmZmZ e	 Zdd Zdd Zdeeeef  deeeeef  eeef f fddZdeeeef  deeef fddZ dd Z!edG dd deZ"dS )    )AnyDictListTupleN)ProcessMesh)get_world_process_group)is_forward_opis_optimize_op6naive_set_dist_op_attr_for_program_by_mesh_and_mappingset_var_dist_attr)OP_ROLE_KEYOP_ROLE_VAR_KEYOpRole)core)device_guard   )PassBasePassTyperegister_passc                 C   s   |   }tj   }g }t|jD ]\}}t|r*|j }|	|j |
| q|d d d D ]	}|j|dd q2|  |S )NF)sync)global_blockpaddlestaticZProgram	enumerateopsr	   desc	append_op	copy_fromappendZ
_remove_op_sync_with_cpp)main_programdist_context
main_blockoptimize_ops_blockZremoved_op_idxidxopnew_op_desc r(   w/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/passes/auto_parallel_gradient_merge.py_remove_and_get_optimizer_op)   s   

r*   c                 C   s  |   }tjjddgt|dddd}t||dgtj tjjddgddddd}t||dgtj tjjd	dgddddd}t||dgtj tjjd
dgtddddd}t||dgtj t	dd |j
dd|gid|giddttjid}t|ttjdg| |j
d||dd|iddddttjid}	t|	ttjdg| |j
d||dd|ittjid}
t|
ttjdg| W d    |S 1 sw   Y  |S )NZgradient_merge_kr   Zint32T)nameshapevaluedtypepersistableZ	force_cpur   Zgradient_merge_zeror   Zgradient_merge_stepZgradient_merge_condboolcpu	incrementXOutstep      ?typeZinputsoutputsattrsZelementwise_modr3   Yaxis
use_mkldnnFequal)r   r   r   Zcreate_global_varintr   world_process_groupZranksr0   r   r   r   r   Backwardr
   r   )r!   k_stepsr"   r#   Z
k_step_varZzero_varZstep_varcond_varZincrement_opZelementwise_mod_opZequal_opr(   r(   r)   _get_gm_cond_varB   s   


**rE   params_gradsreturnc                 C   s  |   }|  }i }|D ]\}}|jtjjjksJ d||f||j< qi }	g }
ttt	|j
D ]\}}t|dkr= nt|rC n|j D ]}||v r|| d }|d usZJ ||}|d useJ |j|jd |j|jdd}|j}|j}t|||| |j|jd |j|jdd}|jdd|i|j|jtddd	 || d
 }|d usJ |j|d
 d||dd|iddddttjid}|
||g |j|	|j< t|||| ||= qHq1t|dksJ dt||  |
|	fS )Nz@SELECTED_ROWS is not supported in GradientMergeOptimizer for nowr   z@GRAD@MERGET)r+   r,   r.   r/   Zfill_constantr4   )r,   r.   r-   )r8   r9   r:   r   Zelementwise_addr;   r=   r   r>   Fr7   z@grad_to_param_names must be empty right now, but it has {} items) r   r8   r   ZVarDescZVarTypeZSELECTED_ROWSr+   reversedlistr   r   lenr   r   output_arg_namesZ get_tensor_dist_attr_for_programZ
create_varr,   r.   Zprocess_meshZdims_mappingr   r   floatZ_insert_op_without_syncr   r   rB   r   r
   formatr    )r!   startup_programrF   r"   r#   Zstartup_blockZgrad_to_params_gradsparamZgradgrad_to_gradient_mergeZnew_params_gradsindexr&   Zout_nameZref_dist_attrZgradient_merge_varZref_process_meshZref_dims_mappingZstartup_gradient_merge_varZnew_grad_opr(   r(   r)   "_append_gradient_merge_backward_op   s   IrR   new_params_to_gradsrP   c           
         sL    fdd}t jjj||d d  jd }	|	ttj	 d S )Nc                     s  j }  } r*D ]\}}|jdd|id|id dddd}|ttj qtj	 D ]\}j
|}|j }|| | D ]}|v rU|||  qG| D ]}	|	v rh||	|	  qZ|trs|t t | v r|d	d
 }
|
d }|d	|g q1   |  |jD ]}t|r|}|r||_qD ])\}}|jdd|gid|gidtd
gd|jddgdg dg dg dg ttjid qd S )Nscaler3   r4   r6   g        F)rT   ZbiasZbias_after_scaler7   ZGradr   z@MERGE	set_valueZInputvaluesr.   r,   r   ZaxesZstartsZendsZsteps)Zcurrent_block_idxZcurrent_blockr   	_set_attrr   r   Optimizeranger   Zop_sizer&   r   Zinput_arg_namesZ_rename_inputrK   Z_rename_outputZhas_attrr   Zremove_attrr   Zgrad_var_suffixinputZ	set_inputr   r    r   r	   Zget_dist_op_for_programZ
_serial_oprL   r.   )Zcur_block_idxZ	cur_block_Znew_gradZscale_opZ
opt_op_idxZop_descr'   Z
input_nameZoutput_nameZ
grad_valueZgrad_merge_valuer&   Zdist_opavgr"   rP   rC   r!   rS   r$   r(   r)   true_apply_gradient  sz   







zD_create_cond_block_and_update_optimizer.<locals>.true_apply_gradient)Ztrue_fnZfalse_fnr   )
r   r   nnZcondr   r   rW   r   r   rX   )
r!   rD   rS   rP   r$   rC   r]   r"   r^   Zcond_opr(   r\   r)   '_create_cond_block_and_update_optimizer	  s   
Nr`   c           
   	   C   sB   t | |}t| |||\}}t| ||}	t| |	|||||| d S N)r*   rR   rE   r`   )
r!   rN   rF   rC   r]   r"   r$   rS   rP   rD   r(   r(   r)   parse_programf  s(   rb   Z!auto_parallel_gradient_merge_passc                       s<   e Zd Z fddZdd Zdd Zdd Zd	d
 Z  ZS )GradientMergePassc                    s&   t    | dd | dd d S )NrC   r   r]   T)super__init__Zset_attrself	__class__r(   r)   re     s   
zGradientMergePass.__init__c                 C   s   |  ddk r	dS dS )NrC   r   FT)get_attrrf   r(   r(   r)   _check_self  s   zGradientMergePass._check_selfc                 C   s   dS )NTr(   )rg   Z
other_passr(   r(   r)   _check_conflict  s   z!GradientMergePass._check_conflictc                 C   s   t jS ra   )r   ZCOMM_OPTrf   r(   r(   r)   _type  s   zGradientMergePass._typec                 C   sx   |  dd}|  dd}|  d}|  d}tj|| t|||||| W d    n1 s1w   Y  |  d S )NrC   r   r]   Fr"   rF   )rj   r   r   Zprogram_guardrb   r    )rg   r!   rN   contextrC   r]   r"   rF   r(   r(   r)   _apply_single_impl  s   


z$GradientMergePass._apply_single_impl)	__name__
__module____qualname__re   rk   rl   rm   ro   __classcell__r(   r(   rh   r)   rc     s    rc   )#typingr   r   r   r   r   Z-paddle.distributed.auto_parallel.process_meshr   Z5paddle.distributed.auto_parallel.static.process_groupr   Z-paddle.distributed.auto_parallel.static.utilsr   r	   r
   r   Z/paddle.distributed.fleet.meta_optimizers.commonr   r   r   Zpaddle.frameworkr   Zpaddle.staticr   Z	pass_baser   r   r   rA   r*   rE   strrR   r`   rb   rc   r(   r(   r(   r)   <module>   s2   Y
n

] 