o
    *j                     @   s   d dl Z d dlmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d
dlmZ d
dlmZ eje	jdG dd deZeje	jdG dd deZdS )    N)TupleUnion)nn)Trainersbuild_metric)Model
TorchModel)Preprocessor)Config)ModeKeys   )TRAINERS)EpochBasedTrainer)module_namec                       sd   e Zd ZdZ fddZdd ZdefddZd	ee	j
ef fd
dZd	eeef fddZ  ZS )NlpEpochBasedTrainera  Add code to adapt with nlp models.

    This trainer will accept the information of labels&text keys in the cfg, and then initialize
    the nlp models/preprocessors with this information.

    Labels&text key information may be carried in the cfg like this:

    >>> cfg = {
    >>>     ...
    >>>     "dataset": {
    >>>         "train": {
    >>>             "first_sequence": "text1",
    >>>             "second_sequence": "text2",
    >>>             "label": "label",
    >>>             "labels": [1, 2, 3, 4],
    >>>         },
    >>>         "val": {
    >>>             "first_sequence": "text3",
    >>>             "second_sequence": "text4",
    >>>             "label": "label2",
    >>>         },
    >>>     }
    >>> }

    To view some actual finetune examples, please check the test files listed below:
    tests/trainers/test_finetune_sequence_classification.py
    tests/trainers/test_finetune_token_classification.py
    c                    s4   d | _ d | _d | _d | _d | _t j|i | d S N)label2idid2label
num_labels
train_keys	eval_keyssuper__init__)selfargskwargs	__class__ `/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/trainers/nlp_trainer.pyr   2   s   zNlpEpochBasedTrainer.__init__c                 C   s   z |j jj}dd t|D | _dd t|D | _t|| _W n	 ty)   Y nw dd }||	d| _
||	d| _t| jdkrK| j
| _d S d S )	Nc                 S   s   i | ]\}}||qS r   r   .0idxlabelr   r   r    
<dictcomp>=       z7NlpEpochBasedTrainer.prepare_labels.<locals>.<dictcomp>c                 S   s   i | ]\}}||qS r   r   r!   r   r   r    r%   >   r&   c                 S   sD   | d urt | dd t | dd t | dd d}ni }dd | D S )Nfirst_sequencesecond_sequencer$   )r'   r(   r$   c                 S   s   i | ]\}}|d ur||qS r   r   )r"   kvr   r   r    r%   M   s    zSNlpEpochBasedTrainer.prepare_labels.<locals>.build_dataset_keys.<locals>.<dictcomp>)getattritems)cfgZ
input_keysr   r   r    build_dataset_keysC   s   


z?NlpEpochBasedTrainer.prepare_labels.<locals>.build_dataset_keyszdataset.trainzdataset.valr   )Zdatasettrainlabels	enumerater   r   lenr   AttributeErrorZsafe_getr   r   )r   r-   r0   r.   r   r   r    prepare_labels:   s   
z#NlpEpochBasedTrainer.prepare_labelsr-   c                 C   sf   | j d ur
|  |}| | t|jds1t|jds1| jd ur&| j|jd< | jd ur1| j|jd< |S )Nr   r   )Zcfg_modify_fnr4   hasattrmodelr   r   )r   r-   r   r   r    rebuild_configT   s   




z#NlpEpochBasedTrainer.rebuild_configreturnc                 C   sb   | j du ri nd| j i}tj| jfd| ji|}t|tjs't|dr'|j	S t|tjr/|S dS )z Instantiate a pytorch model and return.

        By default, we will create a model using config from configuration file. You can
        override this method in a subclass.

        Nr   cfg_dictr6   )
r   r   from_pretrained	model_dirr-   
isinstancer   Moduler5   r6   )r   Z
model_argsr6   r   r   r    build_model`   s   z NlpEpochBasedTrainer.build_modelc                 C   s   | j du ri nd| j i}tj| jf| jtjd|| jtjdd}tj| jf| jtjd|| j	tjdd}||fS )zBuild the preprocessor.

        User can override this method to implement custom logits.

        Returns: The preprocessor instance.

        Nr   )r9   Zpreprocessor_modeT)modeZuse_fast)
r   r
   r:   r;   r-   r   ZTRAINr   EVALr   )r   
extra_argsZtrain_preprocessoreval_preprocessorr   r   r    build_preprocessorq   s<   


z'NlpEpochBasedTrainer.build_preprocessor)__name__
__module____qualname____doc__r   r4   r   r7   r   r   r=   r	   r>   r   r
   rC   __classcell__r   r   r   r    r      s    r   c                   @   s   e Zd ZdddZdS )VecoTrainerNc                    s  ddl m} |durddlm} |||  | j  tj| _	i }| j
du r2| j| j| j	| jd| _
d}d}t| j
|rH| j
| t| j
j}	 | j| j
fi | jjdi | _| j| _d	d
 | jD }|D ]}| |_qh| | j| t|D ]#\}	}
d| d|vri |d| d< |
 |d| d | j|	 < qy|d7 }||k r| j
| nnqI| jD ]$fdd
| D }|d  D ] t fdd
|D | < qq|S )z1Veco evaluates the datasets one by one.

        r   )VecoDatasetN)LoadCheckpointHook)Z	model_cfgr?   Zpreprocessorr   TZ
dataloaderc                 S   s   g | ]}t |qS r   r   r"   Zmetricr   r   r    
<listcomp>       z(VecoTrainer.evaluate.<locals>.<listcomp>zeval_dataset[]c                       g | ]}|  qS r   r   )r"   m)metric_namer   r    rM      rN   c                    rP   r   r   rL   )keyr   r    rM      rN   ) Z1modelscope.msdatasets.dataset_cls.custom_datasetsrJ   Zmodelscope.trainers.hooksrK   Zload_checkpointr6   evalr   r@   _modeZeval_datasetZbuild_dataset_from_cfgr-   rB   r<   Zswitch_datasetr2   ZdatasetsZ_build_dataloader_with_datasetZ
evaluationgetZeval_dataloaderZdata_loaderZmetricstrainerZevaluation_loopr1   evaluatevalueskeysnpZaverage)r   Zcheckpoint_pathrJ   rK   Zmetric_valuesr#   Zdataset_cntZmetric_classesrQ   Zm_idxZ
metric_clsZall_metricsr   )rS   rR   r    rX      sb   



zVecoTrainer.evaluater   )rD   rE   rF   rX   r   r   r   r    rI      s    rI   )ostypingr   r   numpyr[   Ztorchr   Zmodelscope.metainfor   Zmodelscope.metrics.builderr   Zmodelscope.models.baser   r	   Zmodelscope.preprocessorsr
   Zmodelscope.utils.configr   Zmodelscope.utils.constantr   baser   rW   r   Zregister_moduleZnlp_base_trainerr   Znlp_veco_trainerrI   r   r   r   r    <module>   s    ~