o
    *jZ                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% e% Z&eG dd dZ'ej(ej)dG dd de!Z*dS )    N)	dataclass)AnyCallableDictListOptionalTupleUnion)nn)
DataLoaderDataset)tqdm)Trainers)Model
TorchModel)BertForTextRanking)	MsDataset)Preprocessor)TRAINERS)NlpEpochBasedTrainer)DEFAULT_MODEL_REVISION)
get_loggerc                   @   s6   e Zd ZdZdeeeef  deeef fddZdS )GroupCollatorz
    Wrapper that does conversion from List[Tuple[encode_qry, encode_psg]] to List[qry], List[psg]
    and pass batch separately to the actual collator.
    Abstract out data detail for the model.
    featuresreturnc                 C   sr   t |d trt|g }|d  }dd |D }|D ]}| D ]\}}|| | q!qdd | D }|S )Nr   c                 S   s   i | ]}|t  qS  )list).0kr   r   m/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/trainers/nlp/text_ranking_trainer.py
<dictcomp>&       z*GroupCollator.__call__.<locals>.<dictcomp>c                 S   s    i | ]\}}|t j|d dqS )r   )dim)torchcat)r   r   vr   r   r   r    *   s     )
isinstancer   sumkeysitemsappend)selfr   r(   batcheler   r%   r   r   r   __call__"   s   
zGroupCollator.__call__N)	__name__
__module____qualname____doc__r   r   strr   r.   r   r   r   r   r      s    *r   )module_namec                       s   e Zd Zdddddddddef
deeeeje	f  dee	 dee
 dee
 dee
 deeeef  d	eeeef  d
ee deejjejjjf dee	 f fddZdddZdddZ	ddee	 dee	ef fddZ  ZS )TextRankingTrainerN)NNmodelcfg_filecfg_modify_fnarg_parse_fndata_collatortrain_dataseteval_datasetpreprocessor
optimizersmodel_revisionc                    s:   |d u rt  }t jd|||||||	|||
d
| d S )N)
r6   r7   r8   r9   r:   r=   r>   r;   r<   r?   r   )r   super__init__)r+   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   kwargs	__class__r   r   rA   1   s    
zTextRankingTrainer.__init__
   c           	      C   sv   d}|  D ].}t|dd dd}d}t|d | D ]\}}t|d dkr/d|d  } nq||7 }q|t| S )	Nr   c                 S      | d S Nr   r   xr   r   r   <lambda>T       z0TextRankingTrainer.compute_mrr.<locals>.<lambda>Tkeyreverse   1g      ?)valuessorted	enumerater3   len)	r+   resultr   mrrres
sorted_resarindexr-   r   r   r   compute_mrrQ   s   
zTextRankingTrainer.compute_mrrc           	      C   s   d}ddl m} | D ],}t|dd dd}tdd |D g}td	d |D g}|t||||d
7 }q|t| }|S )Nr   )
ndcg_scorec                 S   s   dgS rG   r   rH   r   r   r   rJ   a   s    z1TextRankingTrainer.compute_ndcg.<locals>.<lambda>TrL   c                 S      g | ]}|d  qS )rO   r   r   r-   r   r   r   
<listcomp>b   r!   z3TextRankingTrainer.compute_ndcg.<locals>.<listcomp>c                 S   r]   )r   r   r^   r   r   r   r_   c   r!   r   )Zsklearnr\   rQ   rR   nparrayfloatrT   )	r+   rU   r   ndcgr\   rW   rX   labelsZscoresr   r   r   compute_ndcg]   s   zTextRankingTrainer.compute_ndcgcheckpoint_pathr   c              	      s  | j | jfi | jjdi d| ji| _|dur t|}n| j	}|
  d}t }t }t }d}	tj r;dnd |  tt| jD ]\}
}z fdd	| D }W n tyl   d
d	 | D }Y nw t }t % |d   }|d   }|di |}W d   n1 sw   Y  t }|	|| 7 }	|| jj7 }dd }|d d   }|| }|| || || qItd |	|	d |  i }t!|||D ]\}}}||vrg ||< || "||f q|D ]}t#|| dd d||< qt }| j$D ][}|%drI|&dd }t'|}| j(||d}td || |"||f q|%drs|&dd }t'|}| j)||d}td || |"d|f qt*d| t+|S )a  evaluate a dataset

        evaluate a dataset via a specific model from the `checkpoint_path` path, if the `checkpoint_path`
        does not exist, read from the config file.

        Args:
            checkpoint_path (Optional[str], optional): the model path. Defaults to None.

        Returns:
            Dict[str, float]: the results about the evaluation
            Example:
            {"accuracy": 0.5091743119266054, "f1": 0.673780487804878}
        Z
dataloaderZ
collate_fnNr   g        zcuda:0cpuc                    s,   i | ]\}}|t |tjr| n|qS r   )r&   r#   ZTensortor   rM   valZdevicer   r   r       s
    z/TextRankingTrainer.evaluate.<locals>.<dictcomp>c                 S   s   i | ]\}}||qS r   r   rj   r   r   r   r       s    re   qidc                 S   s   t | dt |   S )NrO   )ra   exp)logitsr   r   r   sigmoid   s   z,TextRankingTrainer.evaluate.<locals>.sigmoidro   z/Inference time = {:.2f}s, [{:.4f} ms / sample] i  c                 S   rF   rG   r   rH   r   r   r   rJ      rK   z-TextRankingTrainer.evaluate.<locals>.<lambda>)rM   rV   @r`   z{}: {}rd   zMetric %s not implementedr   ),Z_build_dataloader_with_datasetr<   cfgZ
evaluationgetZeval_data_collatorZeval_dataloaderr   Zfrom_pretrainedr6   evalr   r#   cudaZis_availableri   rS   r   r)   RuntimeErrortimeZno_gradpopdetachrh   numpyZ
batch_sizeZsqueezetolistextendloggerinfoformatzipr*   rR   Zmetrics
startswithsplitintr[   rf   NotImplementedErrordict)r+   rg   argsrB   r6   Ztotal_samplesZlogits_listZ
label_listZqid_listZtotal_spent_timeZ_stepr,   Zinfer_start_timeZ	label_idsZqidsZoutputsZinfer_end_timerp   ro   Zrank_resultrm   ZscorelabelZeval_outputsZmetricr   rV   rd   r   rl   r   evaluateh   s   







zTextRankingTrainer.evaluate)rE   )N)r/   r0   r1   r   r   r	   r   r
   Moduler3   r   r   r   r   r   r#   ZoptimZ	OptimizerZlr_schedulerZ_LRSchedulerrA   r[   rf   r   rc   r   __classcell__r   r   rC   r   r5   .   sT    	

 

r5   )+rx   dataclassesr   typingr   r   r   r   r   r   r	   r{   ra   r#   r
   Ztorch.utils.datar   r   r   Zmodelscope.metainfor   Zmodelscope.models.baser   r   Zmodelscope.models.nlpr   Z modelscope.msdatasets.ms_datasetr   Zmodelscope.preprocessors.baser   Zmodelscope.trainers.builderr   Zmodelscope.trainers.nlp_trainerr   Zmodelscope.utils.constantr   Zmodelscope.utils.loggerr   r~   r   Zregister_moduleZnlp_text_ranking_trainerr5   r   r   r   r   <module>   s,   $