o
    *jB                     @   sf  d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z,m-Z- G dd deZ.dd Z/G dd deeeZ0ej1ej2dG dd deZ2dS )    N)partialmethod)DeepSpeedEngine)mpuprint_rank_0)HfTrainerDeepSpeedConfig)Hooks)LoadCheckpointHook)HOOKS)BestCkptSaverHookCheckpointHook)CheckpointProcessor)Hook)LrSchedulerHookLrSchedulerProcessor)OptimizerHookOptimizerProcessor)Priority)save_checkpoint)DistributedParallelType)create_device)
get_logger)get_dist_infoget_local_rank	init_distc                   @   s    e Zd ZdZdd Zdd ZdS )DeepSpeedConfigz
    The `DeepSpeedConfig` object is meant to be created during `TrainingArguments` object creation and has the
    same lifespan as the latter.
    c                 C   s   |  |}|d u rdS |dkS )NFauto)	get_value)selfZds_key_longval r   u/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/trainers/hooks/distributed/deepspeed_hook.pyis_auto)   s   
zDeepSpeedConfig.is_autoc                    s6  g d} fdd|D }t |dkrQt|jdr|jj}nt|jdr+t|jj}ntd| d d	||    rQ d
d| |   dd|  |j	j
di }|di }|dd}	|dd}
|	dkrq|	nt||
 }	 d|  d|	 t  jdkrd j}td| ddS )z
        This stage runs after we have the model and know num_training_steps.

        Now we can complete the configuration process.
        )$zero_optimization.reduce_bucket_size-zero_optimization.stage3_prefetch_bucket_size4zero_optimization.stage3_param_persistence_thresholdc                    s   g | ]	}  |r|qS r   )r!   ).0xr   r   r    
<listcomp>>   s
    
z;DeepSpeedConfig.trainer_config_finalize.<locals>.<listcomp>r   hidden_sizehidden_sizeszThe model's config file has neither `hidden_size` nor `hidden_sizes` entry, therefore it's not possible to automatically fill out the following `auto` entries in the DeepSpeed config file: zb. You can fix that by replacing `auto` values for these keys with an integer value of your choice.r"   r#   ?r$   
   optionswarmupwarmup_stepswarmup_ratio        z scheduler.params.total_num_stepsz!scheduler.params.warmup_num_steps
z]Please correct the following DeepSpeed config values that mismatch TrainingArguments values:
zF
The easiest method is to set these DeepSpeed config values to 'auto'.N)lenhasattrconfigr)   maxr*   
ValueErrorZ	fill_onlyZis_zero3train	optimizergetmathceilZ
fill_match
mismatchesjoin)r   argsmodelnum_training_stepsZhidden_size_based_keysZhidden_size_auto_keysr)   r-   r.   r/   r0   r=   r   r'   r    trainer_config_finalize0   sT   	


z'DeepSpeedConfig.trainer_config_finalizeN)__name__
__module____qualname____doc__r!   rB   r   r   r   r    r   #   s    r   c                 C   sL   |j }d }d|vr| rtd | j}d|d< d }d|vr"| j}||fS )Nr9   zDetected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the custom optimizer has both CPU and GPU implementation (except LAMB)TZzero_allow_untested_optimizer	scheduler)r5   Z
is_offloadloggerinfor9   rG   )trainerZhf_deepspeed_configrA   r5   r9   lr_schedulerr   r   r    deepspeed_optim_schedl   s   rL   c                   @   sh   e Zd ZdZdd ZdddZ		ddd	Zd
d Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )DeepspeedProcessorr@   c              	   C   sD   zt  }|dkrW dS t  }d|W S  ttfy!   Y dS w )N    z_mp_rank_{:02d})r   Z$get_tensor_model_parallel_world_sizeget_tensor_model_parallel_rankformatImportErrorAssertionError)r   Ztp_world_sizemp_rankr   r   r    	rank_name   s   zDeepspeedProcessor.rank_nameTc                 C   s&   |sdS t  }d|}d| dS )Nzpytorch_model.binz{:02d}Zmp_rank_z_model_states.pt)r   rP   rQ   )r   with_mpurT   rankr   r   r    get_bin_filename   s
   
z#DeepspeedProcessor.get_bin_filenameNc                 C   sN  | |j}||   tj }t||d d |dd tj|}tj	|}	t
  }
| |
}tj||}| jdks=|
rE|j||	 n
t||d d d dd | jdkrVd S |
rbtj|| j|}ntj||}tj|rtt| z	t|| W d S  ty } zt d| d| d| d t|| W Y d }~d S d }~ww )	NF)metaZ
with_model   )rY   Z	with_metazLink z to z error: z@, changing to copy the bin file, this may case more space usage.)unwrap_moduler@   rU   r   TRAINER_STATE_SUFFIXr   ospathdirnamebasenamer   Zis_unitializedrX   r>   
zero_stage_BIN_FILE_DIRisfileunlinklinkOSErrorr   errorshutilcopyfile)r   rJ   checkpoint_path_prefix
output_dirrY   Zsave_optimizersr@   _train_state_filesave_dirprefixrV   bin_fileZsrc_file	dest_fileer   r   r    save_checkpoints   sB   




z#DeepspeedProcessor.save_checkpointsc                 C   s:   ||    tj }tj|rt| tj|dd d S )NT)ignore_errors)	rU   r   r\   r]   r^   rc   removerh   rmtree)r   rJ   rj   rl   r   r   r    remove_checkpoints   s   

z%DeepspeedProcessor.remove_checkpointsc                 C   s  t j|sJ t j|}t j|}i }||   tj }t j|r,| 	|||}t
|jtr?|jj|||| d |S |}	|  }
t j|	|
}tj|dd d}|d }||j }|D ]}|| vrrtd|  qctd|  qc||jj||d |S )	N)Zload_module_strictZload_module_onlyc                 S   s   | S Nr   )Zstoragelocr   r   r    <lambda>   s    z5DeepspeedProcessor.load_checkpoints.<locals>.<lambda>)Zmap_locationmodulez
Skip key: zLoading key: )strict)r]   r^   isdirr_   r`   rU   r   r\   rc   Zload_trainer_state
isinstancer@   r   Zload_checkpointrX   r>   torchloadr[   Z
state_dictkeysr   Zload_state_dict)r   rj   rJ   Zload_all_stater{   r^   tagrY   rl   rm   ro   Z
model_file
checkpointZ
model_dictkeyr   r   r    load_checkpoints   sF   
z#DeepspeedProcessor.load_checkpointsc                 C   s.   |D ]}|j | }|j| q|j  d S rw   )Ztrain_outputsr@   backwardstep)r   rJ   Z	loss_keyscumulative_itersZ	grad_clipkZlossr   r   r    r      s   
zDeepspeedProcessor.backwardc                 C      d S rw   r   r   rJ   r   r   r    initialize_optimizer     z'DeepspeedProcessor.initialize_optimizerc                 C   r   rw   r   r   r   r   r    r     r   zDeepspeedProcessor.stepc                 C   s   dS NTr   r   r   r   r    should_save_on_rank  r   z&DeepspeedProcessor.should_save_on_rankc                 C   sz   t |jtjjst |jtjrdd |jjD }|S t |jtr9t }|j	 D ]\}}dd |jD ||< q(|S t
d)Nc                 S      g | ]}|d  qS lrr   r%   groupr   r   r    r(         z5DeepspeedProcessor.get_current_lr.<locals>.<listcomp>c                 S   r   r   r   r   r   r   r    r(     r   z6lr is not applicable because optimizer does not exist.)r}   r9   r~   optimZ	Optimizer	deepspeedZDeepSpeedOptimizerZparam_groupsdictitemsRuntimeError)r   rJ   r   namer   r   r   r    get_current_lr
  s   z!DeepspeedProcessor.get_current_lr)Tr   )rC   rD   rE   rb   rU   rX   rr   rv   r   r   r   r   r   r   r   r   r   r    rM      s    

*'rM   )module_namec                   @   sV   e Zd ZejZ					dddZdd Zdd	 Zd
d Z	dd Z
dd Zdd ZdS )DeepspeedHookNTFc                 C   s:   || _ || _|| _|| _|d ur|dv sJ d|| _d S )N)r   rN      rZ   z zero_stage must in (0, 1, 2, 3)!)save_zero_checkpoint"deepspeed_activation_checkpointingrV   deepspeed_configra   )r   r5   r   r   rV   ra   r   r   r    __init__  s   

zDeepspeedHook.__init__c                 C   s  t  }|t}t|dkrt|d jt s|d | |t}t|dkr7t|d jt s7|d | |t}t|dkrQt|d jt sQ|d | |t	}t|dkrkt|d jt sk|d | |t
}t|dkrt|d jt s|d | || _d S )Nr   )rM   Zget_hookr   r3   r}   	processorZset_processorr   r
   r   r   )r   rJ   r   Zoptimizer_hookZ	ckpt_hookZbest_ckpt_hookZload_ckpt_hookZlr_scheduler_hookr   r   r    register_processor+  s6   









z DeepspeedHook.register_processorc                    s   j jdd _ j dd _ j jdd _ j jdd _ j jd	d
 _ j jdd _	 j jdd _
 j dd _ j dd _ j dd _ j dd _ j dd  _t fdd j jD  j _ js|d _ j dd _d S )NZbatch_size_per_gpu   Z	clip_gradg      ?r   gh㈵>
adam_beta1r+   
adam_beta2g+?adam_epsilong:0yE>weight_decayr1   Zuse_fp16Ffp16_backendampsave_on_each_nodefp16_opt_levelc                 3   s*    | ]}|d  dkr| d jV  qdS )typeZApexAMPOptimizerHook	opt_levelN)r:   r   )r%   itemr?   r   r    	<genexpr>T  s    z-DeepspeedHook.prepare_args.<locals>.<genexpr>ZO1bf16)r8   Z
dataloaderr:   Zper_device_train_batch_sizeZmax_grad_normr9   Zlearning_rater   r   r   r   Zfp16Zfp16_full_evalr   r   r   nexthooksr   )r   r?   r   r   r    prepare_argsF  s,   zDeepspeedHook.prepare_argsc                 C   s   t  \}|_| | tj| jr| j}n	tj|j| j}tj|s.t	d| j d| j
d|  t|}|| |||j| |S )Nz$No such DeepSpeed json config file: .zLoading deepspeed config from )r   Z
world_sizer   r]   r^   existsr   r>   Z	model_dirr   rH   rI   r   Ztrainer_config_processrB   r@   )r   rJ   r?   	max_steps_r   	ds_configr   r   r    get_deepspeed_config\  s    


z"DeepspeedHook.get_deepspeed_configc                 C   s<   t d t }td| |_|j|j d |jtj< d S )NZpytorchzcuda:)	r   r   r   Zdevicer@   toZparallel_groupsr   ZDP)r   rJ   Z
local_rankr   r   r    
after_initp  s
   zDeepspeedHook.after_initc                 C   r   rw   r   r   r   r   r    
before_valw  r   zDeepspeedHook.before_valc           
      C   s   t |ds
t | _n|j| _|j}|jjdi dd|_|j|j }t	
|j| }| |||}t|||\}}|j}| jd urJ| j|d d< |d dd| j_tj|j|||d\|_|_}	|_d S )	NrH   r-   r   rN   Zzero_optimizationZstager   )r@   r9   r5   rK   )r4   r   rH   cfgr8   r9   r:   Zgradient_accumulation_stepsZiters_per_epochr;   r<   Z_max_epochsr   rL   r5   ra   r   r   Z
initializer@   rK   )
r   rJ   r?   Znum_update_steps_per_epochr   r   r9   rK   r5   r   r   r   r    
before_runz  s2   


zDeepspeedHook.before_run)NTFTN)rC   rD   rE   r   Z	VERY_HIGHZPRIORITYr   r   r   r   r   r   r   r   r   r   r    r     s    
r   )3r;   r]   rh   	functoolsr   r   r~   r   Zmegatron_utilr   r   Ztransformers.deepspeedr   Zmodelscope.metainfor   Zmodelscope.trainers.hooksr   Z!modelscope.trainers.hooks.builderr	   Z4modelscope.trainers.hooks.checkpoint.checkpoint_hookr
   r   Z9modelscope.trainers.hooks.checkpoint.checkpoint_processorr   Zmodelscope.trainers.hooks.hookr   Z+modelscope.trainers.hooks.lr_scheduler_hookr   r   Z(modelscope.trainers.hooks.optimizer.baser   r   Z"modelscope.trainers.hooks.priorityr   Zmodelscope.utils.checkpointr   Zmodelscope.utils.constantr   Zmodelscope.utils.devicer   Zmodelscope.utils.loggerr   Zmodelscope.utils.torch_utilsr   r   r   r   rL   rM   Zregister_moduler   r   r   r   r    <module>   s>   I 