o
    "j                     @   sP   d dl Z d dlmZ d dlmZ d dlmZ g ZG dd dZG dd dZ	dS )	    N)is_optimizer_op)	FP16Utils)get_var_sizec                   @   sl   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd ZdS )Shardc                 C   s$   t  | _d| _d| _i | _i | _d S N)setglobal_params
worker_idx
worker_numglobal_param2devicedevice2global_params)self r   x/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/fleet/meta_optimizers/sharding/shard.py__init__   s
   
zShard.__init__c                 C   s6   dd |D | _ || _|| _| |||\| _| _d S )Nc                 S   s   h | ]}|d  j qS r   )name.0xr   r   r   	<setcomp>&   s    zShard.setup.<locals>.<setcomp>)r	   r
   r   _split_paramsr   r   )r   params_gradsr
   r   r   r   r   setup$   s   zShard.setupc                 C   s   || j v o| || jkS N)r   _var_device_idr
   r   var_namer   r   r   	has_param1   s   
zShard.has_paramc                 C   s   |  || jkS r   r   r
   r   r   r   r   has_opt_var7   s   zShard.has_opt_varc                 C   s   |  |dkp|  || jkS r   r    r   r   r   r   has_var:   s   zShard.has_varc                 C   s   i }d}g }dd |D D ]}t |}||7 }||j|f qdd t|D }	d}
d}|D ]#\}}||d |
d  | krC|
d7 }
|	|
 | |
||< ||7 }q/||	fS )	N        c                 S   s   g | ]}|d  qS r   r   r   r   r   r   
<listcomp>D   s    z'Shard._split_params.<locals>.<listcomp>c                 S   s   i | ]}|g qS r   r   r   r   r   r   
<dictcomp>H       z'Shard._split_params.<locals>.<dictcomp>r   g      ?   )r   appendr   range)r   r   r
   r   Zparam2deviceZtotal_param_memZ	param2memparamZmemZdevice2paramsZ
device_idxZmem_accu
param_namer   r   r   r   @   s"   
zShard._split_paramsc                 C   sH   || j v r
| j | S dD ]}t|d|}|| j v r!| j |   S qdS )NZ
_moment1_0Z
_moment2_0Z_beta1_pow_acc_0Z_beta2_pow_acc_0Z_velocity_0 r   )r   resubr   r   suffix	base_namer   r   r   r   S   s   


zShard._var_device_idc                 C   s   t  }t  }i }dd | jD }|jD ]}t|rq|j D ]}|| jv r.||  d7  < qq|jD ]3}t||| js>q3|jd }|jd }|	| |	| |||< ||  d8  < | j
| | j
|< q3| D ]\}	}
|
dkrx|	|	 qk|S )Nc                 S   s   i | ]}|d qS r   r   r   r   r   r   r%   g   r&   z/Shard.find_broadcast_params.<locals>.<dictcomp>r'   r   )r   r	   opsr   descZinput_arg_namesr   Zis_fp16_cast_opZoutput_arg_namesaddr   items)r   blockZbroadcast_varsZfp16_paramsZfp16_to_fp32Zparam_usageopZ
input_nameZoutput_namer*   usager   r   r   find_broadcast_paramsb   s:   







zShard.find_broadcast_paramsc                 C   s
   |  |S r   )r   r   r   r   r   device      
zShard.devicec                 C   s
   || j v S r   )r	   r   r   r   r   is_param   r<   zShard.is_paramc                 C   s:   || j v rdS dD ]}t|d|}|| j v r dS q	dS )NTr,   r-   F)r	   r.   r/   r0   r   r   r   is_opti_var   s   

zShard.is_opti_varc                 C   s4   g }|D ]}| dd }| |r|| q|S )N@r   )splitr   r(   )r   ZgradsZgrads_in_shardZgradr*   r   r   r   filter_grads   s   

zShard.filter_gradsN)__name__
__module____qualname__r   r   r   r!   r"   r   r   r:   r;   r=   r>   rA   r   r   r   r   r      s    	r   c                   @   s   e Zd Zdd ZdS )ProgramSegmentc                 C   s:   || _ g | _d| _d| _i | _g | _i | _g | _d| _d S )Nr   r#   )	_blockZ_allreduce_varsZ
_start_idxZ_end_idxZ_param2broadcastZ_broadcast_varsZ	_cast_opsZ_fill_constant_varsZ
_param_mem)r   r7   r   r   r   r      s   
zProgramSegment.__init__N)rB   rC   rD   r   r   r   r   r   rE      s    rE   )
r.   Z/paddle.distributed.fleet.meta_optimizers.commonr   Z=paddle.distributed.fleet.meta_optimizers.sharding.fp16_helperr   Z7paddle.distributed.fleet.meta_optimizers.sharding.utilsr   __all__r   rE   r   r   r   r   <module>   s    