o
    *j                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ ddlmZ ddlmZ eje
jdG dd deZdS )    N)Union)DeepSpeedEngine)mpu)nn)Trainers)
TorchModel)DistributedPlug)BertLayerNorm)TextGenerator)ModeKeys   )TRAINERS)NlpEpochBasedTrainer)module_namec                   @   sd   e Zd Zdeejef fddZdeejef fddZdd Z	dd	 Z
d
d Zdd Zdd ZdS )PlugTrainerreturnc                 C   sb   t tjdd}tjdd}tjdd}t| j|f||d| jj}| j| |j_|jS )NZ
LOCAL_RANKZMASTER_ADDRz	127.0.0.1ZMASTER_PORTZ29500)	master_ipmaster_port)	intosenvirongetr   Z	model_dircfgmodelunwrap_module)selfrankr   r   r    r   e/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/trainers/nlp/plug_trainer.pybuild_model   s   zPlugTrainer.build_modelc                 C   s   ddl m} ||S )Nr   )DistributedDataParallel)Z modelscope.utils.nlp.distributedr!   )r   r   ZDDPr   r   r   to_parallel#   s   zPlugTrainer.to_parallelc                 C   s   dg i}g dd}|  D ]?}t|ttjjfr*|d dd t|j	 D  q|d dd t|j
 D  |d dd t|j
 D  q||fS )Nparams        )r#   weight_decayc                 S   s   g | ]}|d ur|qS )Nr   ).0pr   r   r   
<listcomp>-   s
    zIPlugTrainer._get_params_for_weight_decay_optimization.<locals>.<listcomp>c                 S   s4   g | ]\}}|d urd|vrd|vr|dkr|qS )NZ
mask_scoremaskbiasr   r&   nr'   r   r   r   r(   2   s    c                 S   s$   g | ]\}}|d ur|dkr|qS )Nr*   r   r+   r   r   r   r(   7   s
    )modules
isinstancer	   torchr   Z	LayerNormextendlist_parametersvaluesitems)r   moduleZweight_decay_paramsZno_weight_decay_paramsZmodule_r   r   r   )_get_params_for_weight_decay_optimization'   s   


z5PlugTrainer._get_params_for_weight_decay_optimizationc                 C   sV  | j \}}| jjdd }|d ur|di }ddlm} | j}|jjj	j
}|jjj	jj}|jjjj}	g }
|
t| |7 }
|
t| |7 }
|
t| |	7 }
|
D ]}|d D ]
}t|dsbd|_qXqR||
|j|jd}| jjd	d }|d ur|d usJ |di }dd
lm} | j}|||j|j| ||jdd}|| _|| _| j| j||fS )N	optimizeroptionsr   )DeepSpeedCPUAdamr#   model_parallelF)lrr%   lr_scheduler)AnnealingLRr   )Zstart_lrZwarmup_iter	num_itersdecay_styleZ	last_iter)Z
optimizersr   trainr   popZdeepspeed.ops.adamr9   r   r5   Zbert
embeddingsencoderlayerdecoderr1   r6   hasattrr:   r;   r%   Z&modelscope.models.nlp.plug.AnnealingLRr=   Z	max_itersZwarmupr?   r7   r<   )r   r7   r<   Zoptimizer_cfgZoptim_optionsr9   r   rB   ZlayersZ
dec_layersZparam_groupsZparam_groupparamZlr_scheduler_cfgZ
lr_optionsr=   r>   r   r   r   create_optimizer_and_scheduler>   s^   

z*PlugTrainer.create_optimizer_and_schedulerc           	      C   s   |  \}}d}ttj|||f|jd|d||}tj|  tj|jd}d|||k< tj|tj|jd}|	d
|}|||fS )N   )device)ZdtyperJ   r$   r   )sizer/   ZtrilZonesrJ   viewfloatZarangelongZ	unsqueezeZ	expand_as)	r   dataZ	eod_token
batch_sizeZ
seq_lengthZatt_mask_batchattention_maskZ	loss_maskposition_idsr   r   r   _get_masks_and_position_idsn   s(   

z'PlugTrainer._get_masks_and_position_idsc              	   C   s   t j| _t| jjdd}|d d d d df  }|d d d dd f  }| |d\}}}t| jjdd r=| }||d d |d	 ||||d
\}	}
t	
|
  |}|d}t|d| |  }d|i| _| j| j d S )Ncheckpoint_activationsTlabelsr   rI   r   Zfp16	input_idsrQ   )rT   loss)r   ZTRAIN_modegetattrr   r@   
contiguousrS   Zhalfr   Zvocab_parallel_cross_entropyrM   rL   r/   sumZtrain_outputsZ
log_bufferupdate)r   r   ZinputsrT   Z
tgt_tokensZ
tgt_labelsZtgt_attention_maskZdec_loss_maskrR   _outputZlossesrW   r   r   r   
train_step   s6   

	

zPlugTrainer.train_stepc                 C   sr  t | jtr| jj}n| j}|  | | jjj}|d jd }t	|| j
jd }t  |d  }|d  }|d  }|d d dd f  }	|d |g}
||
}|d }|	   }g |d< g |d< t|D ]8}|| d }d	|||d k< |   }| j
j|| d
d}| j
j|d
d}|d | |d | qnW d    |S 1 sw   Y  |S )NrV   r   rQ   rU   rI   ZpredictionspredsZtgtsd   T)Zskip_special_tokens)r.   r   r   r5   evalr   configZoriginal_vocab_sizeshaper
   Zeval_preprocessorZnlp_tokenizerr/   Zno_gradrN   byterZ   Ztranslate_batchcpunumpytolistrangedecodeappend)r   rO   r   Z
vocab_sizerP   Zbeam_generatortokensZpadding_maskZ
target_idsZtarget_labelsZencoder_inputsresultZ	pred_listZtarget_listiZpred_idsZgold_stringZpred_stringr   r   r   evaluation_step   sL   




zPlugTrainer.evaluation_stepN)__name__
__module____qualname__r   r   Moduler   r    r"   r6   rH   rS   r_   ro   r   r   r   r   r      s    0r   )r   typingr   r/   Z	deepspeedr   Zmegatron_utilr   r   Zmodelscope.metainfor   Zmodelscope.models.baser   Zmodelscope.models.nlp.plugr   Z#modelscope.models.nlp.plug.backboner	   Z$modelscope.models.nlp.plug.generatorr
   Zmodelscope.utils.constantr   baser   Znlp_trainerr   Zregister_moduleZnlp_plug_trainerr   r   r   r   r   <module>   s     