o
    "jܿ                     @   s@  d dl Z d dlZd dlZd dlZd dlZd dlmZ ddlmZ g dZ	d4ddZ
	d5d	d
Zd5ddZdd Zdd Zdd Zdd Zdd Zdd Zd4ddZdd Z	d6deeef fd d!Z	"d7deeef fd#d$Zdeeef fd%d&Z			'd8deeeef fd(d)Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Z dS )9    N)Tuple   )_PRUNE_FUNC)fullZ	full_attnZ	core_attnFc                 C   st   | dkr| gS t  }d}| d d }||k r0| | dkr(|| || |  |d7 }||k st|}t||dS )z'Return the divisor of the given number.r      r   reverse)setaddlistsorted)numr   resultsimid r   d/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/auto_tuner/utils.pydivisor   s   
r   c                    s    t | |||} fdd|D S )zZReturn the degree of different parallel modes by gpus and nodes num with customized range.c                       g | ]}| v r|qS r   r   ).0Zdegreecustomized_ranger   r   
<listcomp>0   s    z5dist_degree_with_customized_range.<locals>.<listcomp>)dist_degree)modenum_gpus	num_nodesr   	tuner_cfgZdist_degree_allr   r   r   !dist_degree_with_customized_range+   s   r   c                 C   s  | dv sJ g }g }| dkr&| dddkrt|dd}|S t|dd}|S | d	kri|d
kr?| ddr?tt|d
 dd}nt|dd}|D ]}d}|d  dd}|r]|| dkr]d}|sd|| qG|}|S | dkr| ddr|| }	| dddkrt|	dd}nt|	dd}n| dddkrt|dd}nt|dd}|D ]\}
d}|d  dd}|d  dd}|d  dd}|d  dd}| dd}|r||
 dkrd}|r||
 dkrd}|r||
 dkrd}|r||
 dkr|rd}|s||
 q|}|S | dkrt|dd}|S | dkr9| dddkr-t|d d dd}|S t|d d dd}|S | dkr]| dddkrSt|d d dd}|S t|d d dd}|S )zDReturn the degree of different parallel modes by gpus and nodes num.)	dp_degree	mp_degree	pp_degreesharding_degreemicro_batch_size
vpp_degreer   schedule_modememoryperformanceFr   Tr!   r   Zenable_pp_pruner   	model_cfg
num_layersNr    Zenable_mp_prunehidden_size
vocab_sizenum_attention_heads
seq_lengthuse_sequence_paralelr"   r#   global_batch_sizer$   )getr   r   rangeappend)r   r   r   r   r   Zprune_resultsr!   Z
prune_flagr*   gpus_per_noder    r+   r,   r-   r.   r/   r   r   r   r   3   s   WU
D




r   c                    s  i }d}d| v rd| d v r| d d }|du r| d n|}|du r&| d n|| d  }|dks2J dD ]}t | |d||}t||||| ||< q4t | d	d| d
 d d	}td	|||| |d	< t | dd| d
 d d}td|||| |d< | dd}	t | dddd  fdddD |d< |	dkrt|d dd|d< n
t|d dd|d< | dd}
t|
tr|
 dkr|	dkrddgnddg|d< d| d< nt|
tr|
g|d< n|
du rdg|d< ntd| dd}t|tr9| dkr|	dkrt	nt
tt	|d< |S | t	v r | g|d< |S |du r,dg|d< |S tddt	 d| tddt	 d| ) JReturn the default candidates of every hyper param which user defined autoNsearch_algoestimated_num_gpusr   nodesr4   r   )r   r    r!   r"   r$   r)   r*   r#   r0   r%   r&   sharding_stage   c                    r   r   r   )r   ZstageZsharding_stage_customized_ranger   r   r      s    z&default_candidates.<locals>.<listcomp>)r:   r   r   r'   Tr   Fuse_recomputeautorecompute_granularityz&use_recompute supports auto/True/Falsez)recompute_granularity only supports auto//
, but got )_param2ranger1   r   r   
isinstancestrlowerbool
ValueError#__SUPPORTED_RECOMPUTE_GRANULARITY__r   reversedjoin)r   
candidatesr7   r   r   ZstrategyZstrategy_customized_rangeZvpp_degree_customized_rangeZmbs_customized_ranger%   r<   r>   r   r;   r   default_candidates   s   

















rK   c           )   
   C   s  | d }|d }|d }|d }|d }|d }|d }|d }|d	 }	|d
 }
d| vs2d| d vr6| d n| d d }g }|D ]i}g }|| dkrKq@| | || }|D ]R}|| dkr_qV| | || }|D ]9}|| dkrsqj| | || }|D ] }||krq~| | t|dksJ | t| |  q~|  qj|  qVq@tt||||	|
}g }|D ]?}|D ]:}|\}}}}t|\}}}}}| d d || |  dkrq| d d ||  dkrqt|t| }| | qqdddddddd	d
d	}g } |D ]}i }!t|D ]\}"}#|#|!||" < q|  |! q	g }$|| d< | D ] }%d}&t	D ]}'|'| |%g }(|(rAd}& nq2|&sK|$ |% q,|$S )-Permutate the candidates of all hyper params.rJ   r   r    r!   r$   r#   r9   r"   r<   r>   r6   r7   r   r      r)   r0   r*   )	r   r   r   r:   rM               FT)
r3   lencopydeepcopypopr   	itertoolsproduct	enumerater   ))r   rJ   dp_degree_candidatesmp_degree_candidatespp_degree_candidatesZvpp_degree_candidatesmbs_candidatessharding_stage_candidatessharding_degree_candidatesuse_recompute_candidates recompute_granularity_candidatesr   Zvalid_degreesr    degreesZsharding_resr"   Zpp_resr!   Zdp_resr   Zother_dim_cfgsall_cfgsZvalid_degreeZother_dim_cfgr9   ZmbsZvppr<   r>   cfgmappingnew_all_cfgsnew_cfgidxvalZpruned_all_cfgscur_cfgZprunedfuncresultr   r   r   
search_all	  s   











rl   c                 C   s   d}t | tr d|  v rttd|d }|S td| dt | trH| dd}| dd}|r5|s=td| dtt||d }|S t | trQ| }|S t | tr[| g}|S | du rddg}|S td| d	)
z3Convert a param from json file to candidates range.Nr=   r   zIllegal param found: z , only support auto in str type.minmaxz/, min and max should be specified in dict type.z,, only support str, dict, list and int type.)	rB   rC   rD   r   r2   rF   dictr1   int)Zparam_from_json_fileZ	max_valueZ	param_keyZselected_rangeZcustomized_min_valueZcustomized_max_valuer   r   r   rA   |  s>   




	

rA   c           
      C   s  t | }| d dd }|d usJ g }|D ]X}t|d |d  |d< d|d< d|d< d|d< |d |d	  |d
< |d
 | d krM|d
 | d  |d< nd|d< | d d |d  |d< ||vrn|d | d krn|| q| d dd rg }|D ]i}t|}| d | d  }||d
  }|dkr||d< d|d< d |d< |d |d	  |d  |d
< |d
 | d kr|d
 | d  |d< nd|d< |d | |d< d|d< || t|}	d|	d< ||	 q{|| |S )Nr6   r7   r   r"   Zestimated_dp_degreer   r9   r    r!   r   r4   r8   r)   r0   sharding_overlapFT)rl   r1   rp   r3   rS   rT   extend)
r   rb   r7   re   taskZsharding_all_cfgsZnew_taskZgiven_num_gpusr"   Zoverlap_new_taskr   r   r   search_by_dp_estimation  sl   







rt   c                 C   sB  | |d d  rd}d}|D ]0}g d}d}|D ]}|| | | kr&d} nq|r>d|vr0|}q|d s>|d | d kr>|}q|dusEJ |dusKJ ||d d  }	| |d d  }
||d d  }|r|
r|	r|
|	 |	 }t t| }|D ]$}|d	r|| rd
| }t|| d|  d||< qzdS dS dS dS dS )z
    In single dp search scenario,
    the overlay acceleration ratio is obtained by automatically running overlap and non overlap tasks,
    and the estimated performance of the multi dp after overlap is obtained.
    Z
metric_cfgnameN)r   r    r!   r$   r#   r<   r>   r9   TFrq   r"   Zbw_Zoverlap_r   rN   )rS   rT   r   keys
startswithround)ri   r   Zhistory_cfgsZnon_overlap_cfgZraw_cfgrc   rv   ZsamekeyZbefore_overlap_performanceZoverlap_performanceZraw_performanceratioZmew_keyr   r   r   add_overlap_performance  sV   

!	r{   c                 C   s>  d|d vrdS t |d d }|ddrg d}|D ]}||vr/td| d| dq|d	 }|d
ks:J |d }|d }d}	|d
krO|d
krOd}	n|d
krZ|d
krZd}	n|d
kre|d
kred}	ndS |	dusmJ |	|v rd||	 d v r| ||	  dS d||	 d v r| ||	  dS d||	 d v rddl}
||	 d }zt|d}|
|}W d   n1 sw   Y  W n   td||	 d
 d}d}|dt	|d
  D ]}|r|| }q|| }q|r||	 d ||d < n
||	 d ||d < |

|t||	 d d dS d||	 d v rddl}||	 d }zt|d}||}W d   n	1 s>w   Y  W n   td||	 d
 d}d}|dt	|d
  D ]}|rl|| }qa|| }qa|r||	 d ||d < n
||	 d ||d < |
|t||	 d d dS dS dS dS )z"Generate args of sharding overlap.rq   r6   NF)sharding_mpsharding_ppsharding_mp_ppzOnly support r@   .r"   r   r    r!   r|   r}   r~   --r   -o.jsonr0Please check your auto tuner json whether valid.r   r(   w.yaml)rS   rT   r1   rF   rr   jsonopenloadsplitrR   dumpyaml	safe_load)res_argsrc   r   cmdZvalid_hybrid_strategyry   r"   r    r!   argr   	file_pathfcmd_cfgrv   valuer   r   r   r   gen_sharding_overlap_args  s   


r   c              	   C   s  dd }d|v s
J t |d }t | }|d|||| |d|||| |d|||| |d|||| |d|||| |d	|||| |d
|||| |d|||| |d|||| |d|||| |d|||| |d ddr|st |d d }|D ]%}d|| d v r|||  qd|| d v r|||  qd|| d v r(ddl}|| d }	zt|	d}
||
}W d   n1 sw   Y  W n   td|| d d}d}|dt	|d  D ]}|r|| }q|| }q|r|| d ||d < n
|| d ||d < |
|t|| d d qd|| d v rddl}|| d }	zt|	d}
||
}W d   n	1 sRw   Y  W n   td|| d d}d}|dt	|d  D ]}|r|| }qu|| }qu|r|| d ||d < n
|| d ||d < |
|t|| d d q|d ddr|rt |d d }|D ]/}d|| d v r|||  qd|| d v r|||  qd|| d v roddl}|| d }	zt|	d}
||
}W d   n	1 sw   Y  W n   td|| d d}d}|dt	|d  D ]}|rB|| }q7|| }q7|rV|| d ||d < n
|| d ||d < |
|t|| d d qd|| d v rddl}|| d }	zt|	d}
||
}W d   n	1 sw   Y  W n   td|| d d}d}|dt	|d  D ]}|r|| }q|| }q|r|| d ||d < n
|| d ||d < |
|t|| d d qt||| |S )zGenerate new script args.c                 S   s	  | |v r| |v rd||  d v r+||  d t ||   ||  d< |||   d S d||  d v rN||  d d t ||   ||  d< |||   d S d||  d v rdd l}||  d }d}t||  dkrp||  d	 }zt|d
}||}	W d    n1 sw   Y  W n   td||  d d}
d }|
d t|
d  D ]}|s|	| }q|| }q|r|r|t ||   n||  ||
d < n|r|t ||   n||  |	|
d < ||	t||  d d d S d||  d v rdd l	}||  d }d}t||  dkr||  d	 }zt|d
}|
|}	W d    n	1 s(w   Y  W n   td||  d d}
d }|
d t|
d  D ]}|sV|	| }qK|| }qK|rs|rj|t ||   n||  ||
d < n|r~|t ||   n||  |	|
d < ||	t||  d d d S d S | dkrB| |v rBd|d d v r|d n|d d }||d  |d  }d|d d v r|d d t | |d d< ||d  d S d|d d v r |d d d t | |d d< ||d  d S d||  d v rdd l}||  d }d}t||  dkr$||  d	 }zt|d
}||}	W d    n	1 s;w   Y  W n   td||  d d}
d }|
d t|
d  D ]}|si|	| }q^|| }q^|r|r{|t | n|||
d < n|r|t | n||	|
d < ||	t||  d d d S d||  d v r@dd l	}||  d }d}t||  dkr||  d	 }zt|d
}|
|}	W d    n	1 sw   Y  W n   td||  d d}
d }|
d t|
d  D ]}|s	|	| }q|| }q|r"|r|t | n|||
d < n|r+|t | n||	|
d < ||	t||  d d d S d S | dkr| |v rz#d|d d v rZ|d n|d d }||d  |d  |d  }W n   Y d S d|d d v r|d d t | |d d< ||d  d S d|d d v r|d d d t | |d d< ||d  d S d||  d v rXdd l}||  d }d}t||  dkr||  d	 }zt|d
}||}	W d    n	1 sw   Y  W n   td||  d d}
d }|
d t|
d  D ]}|s!|	| }q|| }q|r:|r3|t | n|||
d < n|rC|t | n||	|
d < ||	t||  d d d S d||  d v rdd l	}||  d }d}t||  dkr|||  d	 }zt|d
}|
|}	W d    n	1 sw   Y  W n   td||  d d}
d }|
d t|
d  D ]}|s|	| }q|| }q|r|r|t | n|||
d < n|r|t | n||	|
d < ||	t||  d d d S d S d S d S )Nr   r   r   r   =r    r:   r   r   r   r   r(   r   r   local_batch_sizer0   r)   r"   r   gradient_accumulation_stepsr#   )rC   rr   r   rR   r   r   rF   r   r   r   r   )r   r   rc   r   r   r   r   prefixr   r   rv   r   ry   r   r0   r   r   r   r   r   _gen_new_argd  s   $








#





?z"gen_new_args.<locals>._gen_new_argZrun_cmdr   r    r!   r$   r#   r"   r9   r<   r>   r   r   Zsearch_stageNr   r   r   r   r   r   r   r   r   r(   r   r   Zrun_best_stage)rS   rT   r1   rr   r   r   r   rF   r   rR   r   r   r   r   )Zraw_argsrc   r   Zrun_bestr   r   r   r   r   r   r   r   rv   r   ry   r   r   r   r   gen_new_argsa  s     




r   c                 C   s   t | }d|v rhd|d v rh|d dksJ |d dksJ |d |d  |d  }||d	 krId
dd t|D |j_|jjrGd|j_|S d
dd t|d	 D |j_||d	  }| d| |j_|S )zGenerate new running context.r6   r7   r   r   r9   r    r!   r"   r4   ,c                 S      g | ]}t |qS r   rC   r   r   r   r   r   r         zgen_new_ctx.<locals>.<listcomp>z1:1c                 S   r   r   r   r   r   r   r   r     r   :)rS   rT   rI   r2   argsZdevicesZmasternnodes)ctxri   r   Znew_ctxZactual_cardsr   r   r   r   gen_new_ctx  s0   
r   workerlog.0step/sreturnc              	   C   sr  	 d}| d | }t j|sdS t|d}|d | }d}d}g }	| }
|
D ]6}t||}t||tj}|r[d}|d D ]}zt|}|		| W  n   Y qA|dus[J |r_d}q)|rjd	}||d> B }|	ssd	}|dB }n,t
|	d
k r~|	d }n!t
|	dk rt|	dd t
|	dd  }n
t|	dd d
 }t|d}W d   n1 sw   Y  ||f}|S )z$For extracting metric from log file.r   r?   )        r   r   !:* *(\d+(\.\d*)?)|(\d+(\.\d*)?) *zOut of memoryNr   r   
   r(      	   rN   )ospathexistsr   	readlinesrefindall
IGNORECASEfloatr3   rR   sumrx   )r   filetarget_metricerr_codetarget_filer   re_metric_patternZre_out_of_memory_patternZout_of_memory_flagmetric_listlineslinemetricZout_of_memoryr   item
metric_averesr   r   r   read_metric_log  sZ   



"*r   interval_runtimec              	   C   sB  | d | }t j|sd S t|d}|d | }g }| }|D ]*}t||}	|	rLd }
|	d D ]}zt|}
||
 W  n   Y q2|
d usLJ q"|sZd }	 W d    d S t	|dk re|d }n!t	|dk r|t
|dd  t	|dd   }n
t
|d	d  d }t|d
}W d    |}|S 1 sw   Y  |}|S )Nr?   r   r   r   r   r(   r   r   r   rN   )r   r   r   r   r   r   r   r   r3   rR   r   rx   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   read_step_time_log@  sJ   


"
r   c                 C   s   t j| |}t j|sdS g }g }g }t|dK}t|}d}|s6t|}	t|	dkr4d|	v r4d}|r$|D ]%}	t|	dkr]|	\}
}}}}}|	t
|
 |	t
| |	t
| q8W d    n1 shw   Y  t|dfS )N)r   Tr   FrO   memory_usedT)r   r   rI   r   r   csvreadernextrR   r3   rp   rn   )r   r   log_pathr   Zutilization_gpuZindexsr   r   flagrowindexZutil_gpu_Zmem_usedr   r   r   read_memory_logh  s0   
r   	0.gpu.logc                 C   s   d}t | D ] \}}}|D ]}|dsqt| ||\}	}
|
r&|
d@ |B }qqt| ||\}}
|
|B }zt| |\}}|d> |B }W n
   d}d|B }Y |||fS )z
    extract metric and max memory usage from log file
    return:
        metric: average metric of last 10 steps
        memory: max memory used
        err_code: 00: no error, 01: no metric, 10: out of memory, 100: no memory log
    r   Z	workerlogr   r   rM   )r   walkrw   r   r   )r   Zmetric_filer   Zmemory_filer   rootdirsfilesr   r   Zmetric_flagZ
res_metricZ
res_memoryZmemory_flagr   r   r   read_log  s&   


r   c                 C   sl   g }t d| d d D ](}| | dkr3t || d d D ]}| | | dkr2|||| | | f qq|S )zAReturn the combinations of three numbers which product is target.r   r:   r   r   )r2   r3   )targetr   r   jr   r   r   three_mul_combinations  s   r   c                 C   sx   t |d }t|ddD ]+}|| dkr7|| }t |d }|| dkr.|d8 }|| dks$|||| f  S qtd)z&Return middle candidates of dp, mp, ppgUUUUUU?r   r(   g      ?r   zCannot distribute GPUs equally)rx   r2   rF   )r   r   r   startr   	remainingr   r   r   r   gbs_dp_mp_pp_candidates  s   r   c                    s   i }| d }| d }|dksJ |  di  dd}|dkrft| ||\ } }dg|d< |g|d	< g|d
< |g|d< dg|d< dg|d< dg|d< dd tddD |d<  fdd|d D |d< |S )r5   r   r8   r   r)   r0   r=   r   r   r    r!   r"   r9   Fr<   Nr>   c                 S   s   g | ]}d | qS )r   r   r   r   r   r   r     r   z*gbs_default_candidates.<locals>.<listcomp>r   r#   c                    s   g | ]}  | qS r   r   )r   eZdp_candidateZpp_candidater   r   r     s    
)r1   r   r2   )r   rJ   r   r   r0   Zmp_candidateZsharding_dgree_candidater   r   r   gbs_default_candidates  s0   








r   c                 C   s   | d }|d }|d }|d }|d }|d }|d }|d }|d	 }	t t||||||||	}
dddddddd	d
}g }|
D ]&}i }t|D ]
\}}|||| < qI|d |d  |d  |d< || qA|S )rL   rJ   r   r    r!   r#   r9   r"   r<   r>   )r   r   r   r:   rN   rM   rO   rP   r0   )r   rV   rW   rX   r3   )r   rJ   rY   rZ   r[   r\   r]   r^   r_   r`   rb   rd   re   rc   rf   rg   rh   r   r   r   gbs_search_all  sV   r   c              
   C   s:  g }g d}ddg}t | d}t|}t|}W d   n1 s#w   Y  |D ]p}i }|D ] }	||	d}
zt|
||	< W q0 tyP   t|	 d|
 w |dd}| dv sfJ | d	| | d
k|d< |dd}|dks| tv sJ | dt d| d|dkr|nd|d< |	| q*|S )zLoad the configs from csv file.)r   r    r!   r$   r#   r"   r9   r<   r>   r   Nr   z must be integer, but got )truefalsez  must be true or false, but got r   z must be one of r@   r   )
r   r   
DictReaderr   r1   rp   rF   rD   rG   r3   )Zconfigs_csvZall_configsZextract_keys_integerZextract_keys_stringr   r   Zraw_configsZ
raw_configconfigZextract_keyrh   r<   r>   r   r   r   load_configs_from_csv  sD   	

r   )F)N)r   r   )r   r   )r   r   r   )!rS   r   rV   r   r   typingr   Zpruner   rG   r   r   r   rK   rl   rA   rt   r{   r   r   r   r   rp   r   r   rE   r   r   r   r   r   r   r   r   r   r   r   <module>   sV   


hns!?6
O   

?

(
$3