o
    "j                     @   s~  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ dZdZdZd	Zej Zej ZejjjZejjZej ZejjjZejjjZejjjZej  Z!g d
Z"g dZ#ddgZ$dddZ%dddZ&dZ'g dZ(dd e(D Z)dd Z*dZ+e*dddZ,G dd dZ-G dd dZ.dd  Z/g fd!d"Z0d#d$ Z1d%d& Z2d'd( Z3d)d* Z4d+d, Z5d-d. Z6d/d0 Z7d1d2 Z8d3d4 Z9d5d6 Z:d7d8 Z;d9d: Z<d;d< Z=d=d> Z>	?dd@dAZ?dBdC Z@dDdE ZAddFdGZBddHdIZCdJdK ZDdLdM ZEdNdO ZFdPdQ ZGdRdS ZHdTdU ZI	VddWdXZJdYdZ ZKd[d\ ZLd]d^ ZMd_d` ZNdadb ZOdcdd ZPdedf ZQ	gddhdiZRddjdkZSdldm ZTejUjVjWdnejUjVjXdoejUjVjYdpejUjVjZdnejUjVj[doejUjVj\dpejUjVj]dqejUjVj^dqiZ_drds Z`G dtdu duZadvdw Zbdxdy Zcdzd{ Zdd|d} Zed~d Zfdd Zgdd Zhdd Zidd Zjdd Zkdd Zldd Zmdd Zndd Zodd Zpdd Zqdd Zrdd Zsdd ZtdS )    N)reduce)generate_control_dev_var_name)is_persistable)coreZop_namescopeZgradient_clipz@PS_STEP_COUNTER@z@LR_DECAY_COUNTER@)cpuZgpuZxpu)sendrecvZfetch_barrierZsend_barrierlookup_tablelookup_table_v2W)r	   r
   )Zlookup_table_gradZlookup_table_v2_gradr   )z.batch_sizez
.batch_sumz.batch_square_sumc                 C   s   g | ]}|d  qS )@GRAD ).0xr   r   c/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/ps/utils/public.py
<listcomp>7   s    r   c                 C   sv   t |}|jt jd t j| dddd}|t j t d}|| t  }|t j	 |
| |
| |S )N)levelazUTF-8T)modeencodingdelayzD%(levelname)s - %(asctime)s - %(pathname)s: %(lineno)s - %(message)s)logging	getLoggersetLevelWARNINGFileHandlerINFO	FormattersetFormatterStreamHandlerDEBUG
addHandler)log_pathlogging_nameloggerhandler	formatterconsoler   r   r   logger_config:   s   



r(   z	./ps_log/z./ps_usr_print_logZps_usr_print_log)r"   r#   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )DistributedModer                  N)	__name__
__module____qualname__SYNCASYNC
HALF_ASYNCGEOZFLZNUr   r   r   r   r)   R   s    r)   c                   @   s   e Zd Zdd Zdd ZdS )TrainerRuntimeConfigc                 C   s   d | _ tdd}|}|jd }|js|dkrtj| _ |jr&|dkr&tj| _ |jr3|dkr3tj| _ |}i | _	td|| j	d< td|| j	d< td	d| j	d
< td|| j	d< tdd| j	d< tdd| j	d< tdd| j	d< d S )NCPU_NUM1k_stepsr   Z$FLAGS_communicator_max_merge_var_numcommunicator_max_merge_var_numZ"FLAGS_communicator_send_queue_sizecommunicator_send_queue_sizeZ*FLAGS_communicator_independent_recv_threadZ$communicator_independent_recv_threadZ0FLAGS_communicator_min_send_grad_num_before_recvZ*communicator_min_send_grad_num_before_recvZ#FLAGS_communicator_thread_pool_size5communicator_thread_pool_sizeZ"FLAGS_communicator_send_wait_timescommunicator_send_wait_timesZ#FLAGS_communicator_is_sgd_optimizerZcommunicator_is_sgd_optimizer)
r   osgetenvZa_sync_configsZa_syncr)   r2   r3   r5   runtime_configs)selfZvalid_strategynum_threadssend_queue_sizer9   r   r   r   __init__\   sJ   





zTrainerRuntimeConfig.__init__c                    s  g }t dd}d} jd u s jtjkr j }d}n$ jtjks) jtjkr0d}g d}n jtj	kr=d}g d}nt
d	 jtjksM jtjkr} jd
 } jd }||krjtd|||| | jd
< ||kr}td|||| | jd<  fdd|D S )Nr7   r8    asynczsync or half_async)r:   r>   r=   r;   r5   )r=   r>   r:   r;   zUnsupported Moder:   r;   zWARNING: In {} mode, communicator_max_merge_var_num must be equal to CPU_NUM. But received, communicator_max_merge_var_num = {}, CPU_NUM = {}. communicator_max_merge_var_num will be forced to {}.zWARNING: In {} mode, communicator_send_queue_size must be equal to CPU_NUM. But received, communicator_send_queue_size = {}, CPU_NUM = {}. communicator_send_queue_size will be forced to {}.c                    s   i | ]
}|t  j| qS r   )strrA   )r   keyrB   r   r   
<dictcomp>   s    z?TrainerRuntimeConfig.get_communicator_flags.<locals>.<dictcomp>)r?   r@   r   r)   r3   rA   keysr2   r4   r5   
ValueErrorprintformat)rB   Z	need_keysrC   Zmode_strZmax_merge_var_numrD   r   rJ   r   get_communicator_flags   sZ   




z+TrainerRuntimeConfig.get_communicator_flagsN)r/   r0   r1   rE   rP   r   r   r   r   r6   [   s    )r6   c                 C   sX   g }t |  jD ] \}}t|t}|ttks$|ttttB kr)|| q	|S N)		enumerateglobal_blockopsintattrRPC_OP_ROLE_ATTR_NAMELR_SCHED_OP_ROLE_ATTR_VALUEOPT_OP_ROLE_ATTR_VALUEappend)programZlr_opsindexoprole_idr   r   r   
get_lr_ops   s   
r_   c                 C   s   |   }g }|jD ]4}t|r=t|dkr|dd |vrq	t| v r8t|tv r8|	dt
tjjj q	|| q	|S )Nr   Paramop_role)rS   rT   _is_opt_role_opleninputOP_NAME_SCOPE	all_attrsCLIP_OP_NAME_SCOPErV   	_set_attrrU   r   op_proto_and_checker_makerOpRoleBackwardrZ   )_programremote_sparseblockopt_opsr]   r   r   r   get_optimize_ops   s"   

rp   c                 C   s0   |   }g }|jD ]}|jdkr|| q	|S )NZ	data_normrS   rT   typerZ   )rl   rn   ro   r]   r   r   r   get_datanorm_ops   s   


rs   c                  C   sZ   t tdd} d}d}d}tdr&td}|d|  }t|d}| |||dS )NZPADDLE_TRAINER_ID0rF   r   ZPADDLE_TRAINER_ENDPOINTS,)
trainer_idnum_trainerscurrent_endpointtrainer_endpoints)rU   r?   r@   splitrc   )rv   ry   rx   rw   r   r   r   get_dist_env   s   

r{   c                 C   &   z|   W S  ty   |   Y S w rQ   )Z_role_id	Exceptionr^   
role_makerr   r   r   get_role_id  
   
r   c                 C   s6   z	|   t|  W S  ty   |  t|   Y S w rQ   )_get_pserver_endpointsr   r}   get_pserver_endpointsr~   r   r   r   get_ps_endpoint  s
   r   c                 C   r|   rQ   )r   r}   r   r~   r   r   r   get_ps_endpoints  r   r   c                 C      |   S rQ   )Z_get_heter_worker_endpointr~   r   r   r   get_heter_worker_endpoint     r   c                 C   r   rQ   )Z_get_trainer_endpointr~   r   r   r   get_trainer_endpoint  r   r   c                 C   r   rQ   )Z_get_trainer_endpointsr~   r   r   r   get_trainer_endpoints#  r   r   c                 C   r|   rQ   )Z_get_previous_trainersr}   Zget_previous_trainersr~   r   r   r   get_previous_stage_trainers'  r   r   c                 C   s<   | j tv r| ddu rdS | j dkr| ddu rdS dS )Nis_distributedTdistributed_lookup_tableFrr   SPARSE_OP_LISTrV   r]   r   r   r   is_distributed_sparse_op.  s   
r   c                 C   s   |  dd S )Nr   r   )rd   r   r   r   r   get_sparse_tablename;  s   r   c                 C   sJ   | j tv r| ddu r| ddu rdS | j dkr#| ddu r#dS dS )N	is_sparseTr   Fr   r   r   r   r   r   is_sparse_op?  s   

r   c                 C   sj   t  }| D ]+}|r| jD ]}t|r|t| qq| jD ]}t|r/|t| q"qt|S rQ   )setrS   rT   r   addr   r   list)Zprogramsr   Z
tablenamesr[   r]   r   r   r   get_sparse_tablenamesP  s   r   c                 C   r|   rQ   )Z_worker_numr}   Z
worker_numr~   r   r   r   get_trainers^  r   r   Fc                 C   s`  t |dk r|S |sg }g }|D ]%}d}	|d }
|
jj}tD ]	}||r'd}	q|	r0|| q|| qg }d}|D ]!}|d }
||
jj |  j|
jj }|tdd |j	d7 }q<dt
| }d}ddlm} |||gd	g|g|||dd|ddt| g }|||< |d7 }t |dkr|S g }d}|D ]!}|d }
||
jj |  j|
jj }|td
d |j	d7 }qdt
| }d}ddlm} |||gd	g|g|||dd|ddt| g }|||< |d7 }|S |D ]C}|d }
|
jj}|  j| }tdd |j	d}|}d}ddlm} |||gd	g|g|g||dd|ddt| g }|||< |d7 }q|S )Nr*   FTr   c                 S      | | S rQ   r   r   yr   r   r   <lambda>      z(get_dense_send_context.<locals>.<lambda>zDense@GRAD_CommContext127.0.0.1:6071c                 S   r   rQ   r   r   r   r   r   r     r   zDataNorm@GRAD_c                 S   r   rQ   r   r   r   r   r   r     r   )rc   
merged_varnameDATA_NORM_GRAD_NAMEendswithrZ   rS   varsr   shaperH   paddle.base.corer   id)r[   send_ctxidxmerged_dense_pairsrv   split_dense_tabledense_pairsZdata_norm_pairsmergedZis_data_normgradvarnamer   origin_varnames	var_numelvar	grad_nameZ	aggregater   Z	dense_ctxZdata_norm_ctxZorigin_varnamer   r   r   get_dense_send_contexte  s   

r   c                 C   sv  | d t jkrtdt| d di }t| d }| d }d}t|d}t|D ]m\}}| d | }|D ]`}	|	\}
}|jj}|
jj}|| d	 v rIq4||v rOdnd
}|	 j
|jj }tdd |jdd  d}ddlm} td|| |||gdg|g|g|dd||d
d
t|g }|d7 }||| < q4q(t|dkrtdt| d dkr| d rt|| d \}}|||< |S )NZps_modezps mode: {} not matched {}get_geo_trainer_send_contextr   origin_main_programsr   Tmerged_sparse_pairsrm   Fc                 S   r   rQ   r   r   r   r   r   r     r   z.get_geo_trainer_send_context.<locals>.<lambda>r*   r   z(public get_the_geo_send_context sparse: r   z-GeoSGD require sparse parameters in your net.tensor_table	is_worker)r)   r5   rM   rO   r   r   rR   r   r   rS   r   r   r   r   r   rN   r   var_namerc   	_step_ctx)attrsr   rv   origin_programsr   distibuted_varnamesir[   r   r   paramr   r   
param_namer   r   r   r   
sparse_ctxr   ctxr   r   r   r     sf   
$r   c           	      C   sh   t }t|}t|}dgt| }|gt| }ddlm} ||||||g|ddd| dddg }||fS )Nr*   r   r   TF)STEP_COUNTERr   r   rc   r   r   )	r   r   r   rv   Z	endpointssectionsnamesr   r   r   r   r   r     s.   r   c                 C   s  |d u rdg}i }t | d }| d }td|  d}t|d}t|D ]\}}	| d | }
|
D ]}|\}}|jj}|jj}g }|| d v rL|| g }tt|D ]}|| d	|  qT||v rgdnd
}|		 j
|jj }t|j}|r{dn|d |d< ||v rq1ddlm} td||| ||||||g|dd||d
d
t|	|}|d7 }||| < q1q%t|D ]\}}	| d | }t|	|||||}qt| d dkr| d rt|| d \}}|||< |S )Nr   r   r   zis_heter_ps_mode? r   Tr   rm   .blockFr   z(public get_the_one_send_context sparse: r*   r   r   r   )r   rN   r   rR   r   r   rZ   rangerc   rS   r   r   r   r   r   r   r   r   r   )r   r   ep_listr   rv   r   r   r   r   r[   r   r   r   r   r   r   Zremote_sparse_idsZsplited_varnamer   r   r   r   r   r   r   r   r   r   r   get_the_one_send_context8  s   


4	r   c           *   	      sH  |t vrtd| dt  d dd}d dd}dd	 }|  }|   	 i }t j}t|}t|d
 ddD ]}	t j}||	 }
d|
jv rJ|
j	dd }|t
 v r|
ddu r|
t
| d }||v rt j}|||  }|jd  fdd|jD i}|jd  fdd|jD i} j|	d
 |j||| d  || d
  || |D ]
}||  d
7  < qq8|dkrI	 g }|
jD ]}|
|D ]}|dkrqd|v rq||	dd  qq|D ]\}||v rH	 t j}|||  }|jd  fdd|jD i}|jd  fdd|jD i} j|	d
 |j||| d  || d
  || |D ]}||  d
7  < q<qq8|
jdkr|
dd }d|v r|	dd }||	d
  }d|jv r|j	dd }|t
 v r|ddu r|t
| d }||kr|
d|dkrq8|	||< q8|dkrg }|jD ]}||D ]}|dkrqd|v rƐq|| qqg }|
jD ]}|
|D ]}|dkrqd|v rq|| qېqd}|D ]}||v rd} nq|r	q8|	||< q8|	||< q8|  }|   g }|i i}i }d}g }g } |}!d}" jD ]}
||
|!|rd}"t| d
krS| || |< ||  g } |d
7 }||
|!|rg|
d}!||
|| q0|d d}#|||# |< || |d
7 }g }|
d}!||
|| q0|"r|d d}#|||# |< || |d
7 }g }|}!d}"| |
 q0| |
 q0| g kr| || |< ||  |g kr|d d}#|||# |< || t|dkrtd d}$d}%| D ]}&||& }'|%t|'7 }%|' D ]\}(})|$t|)7 }$qqtdt j|$|% ||||fS )!NzGiven device z is not in device list r   c                 S   sf   t t}|| | d}| j}||v rdS |tv r!||kr!dS |d u s)||kr1| d| dS dS N	op_deviceTF)r   DEVICE_LISTremoverV   rr   COMMUNICATE_OPS_TYPErh   )r]   current_heter_devicedefault_deviceZheter_devicesr   Zop_typer   r   r   _is_heter_op  s   

z$find_heter_ops.<locals>._is_heter_opc                 S   s&   |  d}||krdS ||krdS dS r   )rV   )r]   Z
pre_devicer   r   r   r   r   _is_same_device  s   
z'find_heter_ops.<locals>._is_same_devicec                 S   s(   |  d}||vri ||< ||  d S )Nr   )rV   rZ   )r]   current_heter_block_ops	heter_opsr   r   r   r   _append_heter_op  s   
z(find_heter_ops.<locals>._append_heter_opr*   r   _gradr   remote_prefetchTc                       g | ]} j | qS r   r   r   rd   rn   r   r   r         z"find_heter_ops.<locals>.<listcomp>c                    r   r   r   r   outputr   r   r   r     r   r\   rr   inputsoutputsr   Zelementwise_mul@EMPTY@lod_tensor_blocking_queuer   c                    r   r   r   r   r   r   r   r     r   c                    r   r   r   r   r   r   r   r     r   sumOutr   FzsNo heterogeneous OP was found in your program ,  please using static.device_guard() to run OPs on different device.zeThere are {} OPs in your main_program, and contains {} heter-OPs which is made up of {} heter-blocks.r   )r   rM   clonerS   r   rT   rc   r   rr   rz   SPARSE_OP_TYPE_DICTrL   rV   rd   input_namesinput_arg_namesoutput_namesoutput_arg_names
_insert_oprf   
_remove_oppopr   rZ   warningswarnitemsrN   rO   )*r[   r   r   r   r   Zorigin_porgramZvar2idxZop_listop_sizer   r]   Zforward_op_typer   Zsum_opZsum_op_inputsZsum_op_outputsZvar_Zoutput_vars_no_gradrI   r   Zno_grad_varr   Z
origin_varZpre_opZoutput_varsZ
input_varsZis_matchZprogram_block_opsZdefault_opsr   block_indexr   Zcurrent_default_block_opsr   Zis_heterr   Ztotal_heter_opsZheter_blocksdeviceZheter_block_dict_heter_blockr   r   r   find_heter_ops  s  






























r   c                 C   s   	 t | }g }|d dksJ dtd|d D ]}d| | i}|d| |d |  i || qg g d}| |d  D ]}d|jvrR|jd	ksR|d | q>|d | q>|| |S )
z
    before analyzing the input & output of each block in program_block_list, we should
    union the forward op and corresponding gradient op to elimincate the unnecessary variable
    transmit
    r+   r   z2the length of program_block_ops_list should be oddforwardbackwardr*   )r   r   r   r   )rc   r   updaterZ   rr   )program_block_ops_listZblock_lengthZunion_program_block_ops_listr   block_op_listr]   r   r   r   union_forward_gradient_op  s&   

r  c                 C   s(   t | |}t| |||}t| ||}|S rQ   )find_entrance_exit_privateentrance_exit_checkdelete_block_useless_exit)r[   r   r   block_var_detailr   r   r   find_block_joints  s   r  c                 C   sj   g }g }|D ]}t |  j|}|t|7 }t|  j|}|t|7 }qtt|}tt|}||fS rQ   )_get_input_map_from_oprS   r   get_varlist_from_op_map_get_output_map_from_opr   r   )r[   Zops_listinput_var_listoutput_var_listr]   r   r   r   r   r   find_ops_list_input_output  s   r  c              	   C   s  g }g }t |D ]\}}t| |d \}}t| |t| | }tt|t|@ }tt|t| }	tt|t| }
d|	|
||di}t| |d \}}t| |t| | }tt|t|@ }tt|t| }tt|t| }|d||||di || q|S )Nr   )entranceexitprivatepersistablesr   )rR   r  screen_persistablesr   r   r   rZ   )r[   r   r  r  r\   r  Zblock_inputZblock_outputZblock_private_varsZblock_entranceZ
block_exitdetailZbp_block_inputZbp_block_outputZbp_persistablesZbp_block_private_varsZbp_block_entranceZbp_block_exitr   r   r   r    sZ   

r  c                 C   sr  t t|d ddD ]}|d dk r n||d  d d }|  || d d }|| d d }|| d d || d d  || d d  }|D ]}	d	|	vr[|	|vr[||	 qL|  ||kreq
tt|t|@ }
tt|t|
 }||d  d d }||d  d d }|D ]}	|	|vr|	|vr||	 ||	 |	|vr||	 qq
t dt|d dD ]~}||d  d d }|  || d d }|  ||krqtt|t|@ }
tt|t|
 }g }|D ]}	d	|	vr||	 qtt|t|}||d  d d }||d  d d }|D ]}	|	|vr/|	|vr/||	 ||	 qq|S )
Nr*   r   r   r   r  r  r   r  r   )r   rc   sortrZ   r   r   
difference)r[   r   r  r   r\   Zprevious_block_exitZcurrent_block_entranceZbackward_entranceZforward_allr   Z
exist_varsZneed_add_varsZprevious_block_privateZprevious_block_entranceZneed_ignore_varsr   r   r   r    s   








r  c                 C   s  t t|D ]8}|t|d kr n-|| d d }||d  d d }g }|D ]}||vr3|| q(|D ]}|| q6qt t|d ddD ]7}|d dk rT |S || d d }||d  d d }g }|D ]}||vru|| qj|D ]}|| qxqI|S )Nr*   r   r  r  r   r   r   )r   rc   rZ   r   )r[   r   r  r\   Zcurrent_block_exitZnext_block_entranceZneed_delete_varr   r   r   r   r  Y  s<   

r  r   c                 C   s   g }g }|dkrd|d  d| d}nd|d  d| d}|   |D ]#}|  j| }|j}	dtdd	 |	d }
||
 || d
 q%|||d}|S )Nr   Zforward_joint_r*   r   z@HeterZbackward_joint_r   c                 S   r   rQ   r   r   r   r   r   r     r   z*get_communicate_var_info.<locals>.<lambda>z.input_reshape@Heter)input_var_reshape_diminput_var_reshape_nameblock_input_var_name)r  rS   r   r   r   rZ   )r[   r   Zentrance_var_listrr   r  r  r  r   r   r   Zrecv_var_diminfor   r   r   get_communicate_var_info{  s&   
r  c                 C   s\   | D ])}||  jvr+||jvr+|  j| }|jr$|  j|dd q|j|dd qd S )NFZforce_persistable)rS   r   persistable_clone_variable)Zvar_name_listorigin_programr[   rn   r   r   r   r   r   add_vars_by_var_list  s   
r  c                 C   p   t  }|jD ].}g }||D ]}|dkrqd|v rq|| |  qt|dkr1|d ||< q|||< q|S )z9Returns a dict from op output name to the vars in varmap.r   r   r*   r   )collectionsOrderedDictr   r   rZ   rc   Zvarmapr]   ZiomaprI   r   r   r   r   r   r
       

r
  c                 C   sP   g }|   D ]\}}t|ts|g}tt|D ]}|| }||j qq|S rQ   )r   
isinstancer   r   rc   rZ   r   )Zvar_mapvar_listrI   varlistr   r   r   r   r   r	    s   
r	  c                 C   r   )z8Returns a dict from op input name to the vars in varmap.r   r   r*   r   )r!  r"  r   rd   rZ   rc   r#  r   r   r   r    r$  r  c                 C   s   g }|D ]/}d|v r#d| dd krq| dd }|  j| }n|  j| }t|r3|| q|D ]}|| q6|S )Nr   GRAD@r   r   )rz   rS   r   r   rZ   r   )r[   r&  Zneed_remover   Zorigin_var_namer   r   r   r   r    s   
r  c                 C   s  |  j }||j t||}| D ]3\}}t|ts"|g}|D ]$}|j|   jvrH|j|jvrH|j	rA|   j
|dd q$|j
|dd q$qt|  j|}	|	 D ]3\}}t|tsb|g}|D ]$}|j|   jvr|j|jvr|j	r|   j
|dd qd|j
|dd qdqVd|jvr|j|j||	| dS |j}
tjjj}tj }|j }||
 |t| |j|r|
|}||| |  d S )NFr  r   rr   r   r   r   )rS   r   copyr   r  r   r%  r   r   r  r  r
  rr   	append_oprf   descr   ri   rj   rk   kOpDeviceAttrNameZ	copy_fromrh   rW   has_attrrV   Z_sync_with_cpp)r[   r  rn   r]   Zmerge_ordereddictr   rI   r'  r   r   Zop_descr   Zdevice_attr_nameZnew_op_descr   r   r   r   block_append_op  sX   








r0  c                 C   r|   rQ   )Z_get_next_trainersr}   Zget_next_trainersr~   r   r   r   get_next_stage_trainers(  r   r1  Tc                 C   s   |rt |}t|}	|| d d }
t| |d |
}nt |}t|}	||d  d d }
t| |d |
d}|j|dd|j|
d  id	g id
|rJdndd|
dg dg d|d d|d|	dt|d|tti	d |
S )Nr   r  r*   r   r  Zsend_and_recvXr   r   r   Zsend_var_nameZmicrobatch_idZrecv_var_nameZmessage_namer  Znext_endpointsZprevious_endpointsrv   r   r   )r1  r   r  r   r   r   rW   RPC_OP_ROLE_ATTR_VALUE)Zorign_programr   r   Zstage_idZfirst_op_indexr  r   Z
is_forwardZnext_heter_worker_endpointsZprevious_heter_worker_endpointsZentrance_varZ	comm_infor   r   r   insert_communicate_op/  sD   


r4  c                 C   s   i }i }|rAt | |d}t| D ],\}\}}| rq| r"q| }	g }
|	D ]}| d | }|
| q*|
|| < q|S t | dd d}t| D ]'\}\}}| sYqN| }	g }
|	D ]}| d | }|
| qa|
|| < qN|S )N)r   grad_name_to_param_nameF)r   r   )r   rR   r   r   Zis_tensor_tabler   rZ   table_id)contextZis_denser   Zrecv_id_mapsr5  r   r   r   r   Zorigin_grad_varnamesZparam_namesZgrad_varnamer   r   r   r   get_the_one_recv_contextb  s>   r8  c                 C   s   d}d}d}|  d}|dkr| |d d  }nt| }|  d}|dkr.| |d | }nt| }| dt|| }|||fS )NrF   z	.trainer_r   r*   r   )findrc   min)r   Zorig_var_nameZtrainer_partZ
block_partZtrainer_idxr   r   r   r   _get_varname_parts  s   


r;  r+   r-      r*   c                 C   s$   t dd | jd}|t| j 9 }|S )Nc                 S   r   rQ   r   r   r   r   r   r     r   z"get_var_mem_size.<locals>.<lambda>r*   )r   r   dtype_to_sizeZdtype)r   Zm_sizer   r   r   get_var_mem_size  s   r>  c                   @   s   e Zd Zdd ZdS )MergedVariablec                 C   s   || _ || _|| _d S rQ   )r   Zordered_varsoffsets)rB   r   orderedr@  r   r   r   rE     s   
zMergedVariable.__init__N)r/   r0   r1   rE   r   r   r   r   r?    s    r?  c                 C   s  | d }i }i }g | d< g | d< g | d< g | d< g | d< i | d< |D ]}t |\}}g }g }g }	g }
g }|D ]\}}|||f q6|D ]\}}|||f qD|D ]$}|\}}t||gdg}t||gdg}|||f |
||f qR|D ]$}|\}}t||gdg}t||gdg}|||f |	||f qy|D ]}|\}}|j| d |jj< |j| d |jj< qg }|| || |D ]\}}|j||j< |j||j< q| d | | d | | d |	 | d |
 q"|| d	< || d
< d S )Nr   Zorigin_sparse_pairsZorigin_dense_pairsr   r   merged_variables_pairsZmerged_variable_mapr   param_name_to_grad_namer5  )get_param_gradsrZ   r?  r   r   extend)r7  r   rC  r5  r  Zsparse_pairsr   Zorigin_for_sparseZorigin_for_denser   r   rB  r   r   Z
dense_pairZm_paramZm_gradZsparse_pairr   Zparam_mergesr   r   r   build_var_distributed  sn   

rF  c                 C   sB   t j}t jjj}| | jv rt|  |  t|krdS dS )NTF)r   ri   rj   OptimizekOpRoleAttrName
attr_namesrU   rf   )r]   Zop_makerZoptimize_roler   r   r   rb     s   
rb   c                    s2    fdd} fdd}| }||\}}||fS )Nc                    s      }g }g }t }   j}ttjjj}|jD ]S}t	|rmt
| v r4t|t
v r4|d| q|ts:q|trm|td }|td }	||vrm|| || ||	 f}
|| v rh||
 q||
 q||fS )Nra   r   r*   )rS   r   r   rU   r   ri   rj   rk   rT   rb   re   rf   rg   rV   rh   r/  OP_ROLE_VAR_ATTR_NAMEr   rZ   )sparse_varnamesrn   dense_param_gradssparse_param_gradsZoptimize_paramsZorigin_var_dictr^   r]   r   r   Z
param_gradr  r   r   _get_params_grads  s6   





z*get_param_grads.<locals>._get_params_gradsc                     sZ   g }    jD ]}|jt v r&|ddu r&|t|j d }| | qtt	| S )Nr   Tr   )
rS   rT   rr   r   rL   rV   rd   rZ   r   r   )varnamesr]   r   rN  r   r   _get_sparse_varnames<  s   
z-get_param_grads.<locals>._get_sparse_varnamesr   )r  rO  rQ  rK  rM  rL  r   rN  r   rD    s
   $rD  c                 C   sX   |D ]'}zt | j|}| | W q ty) } z
t| W Y d }~qd }~ww d S rQ   )r   rT   r\   r   r}   rN   )rn   rT   r]   r   er   r   r   
delete_opsN  s   rS  c                 C   s,   g }|   jD ]}|jdkr|| q|S )Nr   rq   )r[   send_op_listr]   r   r   r   find_send_opW  s   

rU  c                 C   sX   g }g }t |j|}|t|7 }t|j|}|t|7 }tt|}tt|}||fS rQ   )r  r   r	  r
  r   r   )r[   rn   r]   r  r  r   r   r   r   r   find_op_input_output_  s   rV  c                    s*  fdd}g }| }i }|D ]Q}d|vrqd| dd kr q||vr%q|| }|d}	|d}
|d	}|| |
|vrWi ||
< g ||
 d
< |	||
 d< |||
 d	< ||
 d
 | q|D ]/}
 jt d} fdd||
 d
 D } jdd|id|id	||
 d	 d|	d|
ttid qc|S )Nc                     s@   i } t  }|D ]}t   |\}}|D ]}|| |< qq| S rQ   )rU  rV  rS   )send_op_dictrT  r]   Z
input_listr   r   )r[   r   r   _get_send_op_dictl  s   

z&add_send_op.<locals>._get_send_op_dictr   r(  r)  r   r   r6  send_varnamesr&  )r   c                    r   r   r   )r   Z	union_varr   r   r   r     r   zadd_send_op.<locals>.<listcomp>r   r2  r   r*  )rz   rV   rZ   Z
create_varr   r,  rW   r3  )r[   rn   Z_varsrX  Zsend_grad_var_listrW  Z
table_dictZpersistable_varZsend_opr   r6  rY  Zdummy_outputZsend_input_varsr   )rn   r[   r   add_send_opk  sL   





rZ  c                 C   s   | j  }t|}|S rQ   )r   rL   r   )rn   Z	vars_listZvars_name_listr   r   r   get_vars_name_in_block  s   
r[  c           	      C   s   t t|}g }|  jD ]"}t| |  |\}}t t|t|}t t|t|}q||7 }t tt|  t|}|D ]	}|  | qE|S rQ   )	r   r   rS   rT   rV  unionr[  r  Z_remove_var)	r[   Z
static_varZprogram_useful_var_listr]   r  r  Zop_var_listZprogram_useless_var_listr   r   r   r   delete_trainer_useless_var  s&   
r]  c                 C   s   | j d }| |}t|D ]/\}}|jdkr6|d}d}	|D ]}
|
|  jvr2|
|jvr2d}	 nq |	r6qt| ||| q|d d d }t||| | |d d d	 }t||| | |S )
Nr*   r   rY  FTr   r   r  r  )	Z
num_blocksZ_create_blockrR   rr   rV   rS   r   r0  r  )r[   r  Zbp_ops_listr  Zpre_block_idxr   r   r]   rY  Zis_skipr   Zentrance_varsZ	exit_varsr   r   r   create_backward_block  s(   




r^  c                 C   s"   t | jv ot| t ttj@ S rQ   )op_role_attr_namerI  rU   rV   ra   rk   r   r   r   r   is_backward_op     
r`  c                 C   s"   t | jv ot| t ttjkS rQ   )r_  rI  rU   rV   ra   ZForwardr   r   r   r   is_forward_op  ra  rb  c                 C   s
   | j dkS )NZdistributed_push_sparse)rr   r   r   r   r   is_push_sparse_op  s   
rc  c                 C   8   g }t | j D ]}| j| }t|r|| q	|S rQ   )r   r-  r   rT   rc  rZ   )rn   Zpush_sparse_op_listop_idxr]   r   r   r   #get_distributed_push_sparse_op_list     

rf  c                 C   rd  rQ   )r   r-  r   rT   r`  rZ   )rn   Z
bp_op_listre  r]   r   r   r   get_bp_op_list  rg  rh  c                 C   sx   |D ]7}z| j D ]}t|t|kr!t| j |}| |  nqW q ty9 } z
t| W Y d }~qd }~ww d S rQ   )rT   rH   r   r\   r   r}   rN   )rn   rT   r]   Z	origin_opr   rR  r   r   r   delete_same_ops  s   

ri  c                 C   st   d}| j D ].}|jD ]$}|j }|j }|| D ]}|t|s-tdt||qq
|d7 }qt	d d S )Nr   z.var: {} needed by op is not found in block: {}r*   zprogram checked valid)
blocksrT   r-  r   r   Z_find_var_recursiverH   rM   rO   rN   )r[   Z	block_idxrn   r]   Zinput_var_namesZoutput_var_namesr   r   r   r   check_program	  s    




rk  c                 C   sT   t jt j| dd t| d}|t| W d    d S 1 s#w   Y  d S )NT)exist_okzw+)r?   makedirspathdirnameopenwriterH   )filer[   fr   r   r   debug_program  s   "rt  c                  C   s   t d} | d u rdS dS )NZTRAINING_ROLEFT)r?   r@   )Z	node_roler   r   r   is_distributed_env!  s   
ru  )F)FNr   )r   )T)TF)ur!  r   r?   r   	functoolsr   Zpaddle.base.frameworkr   Zpaddle.distributed.ior   Zpaddle.frameworkr   re   rg   r   ZLEARNING_RATE_DECAY_COUNTERri   ZkOpRoleVarAttrNamerJ  rH  rW   rj   ZRPCr3  ra   r_  ZLRSchedrX   rG  rY   rk   r   r.  ZOP_DEVICE_KEYr   r   r   r   ZSPARSE_GRAD_OP_TYPE_DICTZDEFAULT_DEVICEZDATA_NORM_NAMEr   r(   Zps_log_root_dirr$   r)   r6   r_   rp   rs   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r
  r	  r  r  r0  r1  r4  r8  r;  ZVarDescZVarTypeZFP16ZFP32ZFP64ZINT16ZINT32ZINT64ZBOOLZUINT8r=  r>  r?  rF  rb   rD  rS  rU  rV  rZ  r[  r]  r^  r`  rb  rc  rf  rh  ri  rk  rt  ru  r   r   r   r   <module>   s   








	m	
|<

S   3V#
":

3&







W7	7		