o
    *j1                     @   sz  d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZ d dlZd dlZd dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ d dl,m-Z- e+ Z.e j/dd Z0G dd dej1j2j3Z4eG dd dZ5G dd deZ6e"j7ej8dG dd de$Z9dS )    N)defaultdict)	dataclass)LooseVersion)partial)CallableDictListOptionalTupleUnion)nn)
DataLoaderDataset)DistributedSampler)Trainers)
TorchModel)	MsDataset)Preprocessor)TRAINERS)EpochBasedTrainer)worker_init_fn)DEFAULT_MODEL_REVISIONModeKeys)
get_logger)get_dist_infoc              	   g   sv    | du r
dV  dS t |dkrtt| g|R d } tj }tj|  zdV  W tj| dS tj| w )zgContext manager which seeds the NumPy PRNG with the specified seed and
    restores the state afterwardNr   g    .A)leninthashnprandomZ	get_stateseedZ	set_state)r    Z
addl_seedsstate r"   w/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/trainers/nlp/faq_question_answering_trainer.py
numpy_seed   s   
r$   c                   @   s>   e Zd Zdd Zdd ZdddZdd	 Zd
d Zdd ZdS )EpisodeSamplerc
              
      s  |_ |_|_|_|_|_|_|	_d_d_	d_
d_d_|_i }
|}|jdkrEtdt| dt|  t|D ]B\}}||v rRqI|j	}|j
}|d u sh|d u riqI|jj}|
|i }||
|< ||g }|||< || qI|
_ttj _fd	d
jD  tt  fdd
tjD _d}j  D ]\}}|  D ]
\}}|t|7 }qq|jdkrt|j d d| dtj  |j_d S )Nr   labeltextdomaindefault_domaintrainznum. of bad sample ids:/c                    s   g | ]	}t  j| qS r"   )r   domain_label_tokens).0r(   selfr"   r#   
<listcomp>W   s    z+EpisodeSampler.__init__.<locals>.<listcomp>c                    s   g | ]
\}} |  qS r"   r"   )r-   ir(   )domain_label_cnttotalr"   r#   r0   [   s    
z: label size:z, data size:z,                 domain_size:)!datasetk_shotn_wayr_query
min_labelsr    rank
world_sizesteplabel_field
text_fielddomain_fieldr)   episodeget_bad_sampleidsmodeloggerinfor   	enumerate
_get_fieldgetappendremove_invalid_labelsr,   sortedlistkeysdomainsfloatsumdomain_to_probitems)r/   r4   r5   r6   r7   r8   r    n_iterr9   r:   domain_label_sampleidZbad_sample_idsZsample_indexsampler&   r'   r(   Zlabel_tokensZsample_list	data_sizetokensr"   )r2   r/   r3   r#   __init__1   st   




zEpisodeSampler.__init__c              	   c   s<   t | jD ]}| j| j | j }t|| jf { |  jd7  _tjj	| j
| jdddd }tt| j|  }t| jt|}tjj	|t|t|dd }g }|d | D ](}| j| | }	| j| j }
tt|	t|
}tjj	|	|dd }|| qYdd |D }|V  W d    n1 sw   Y  qd S )N   F)psizereplacer   )rY   rZ   c                 S   s   g | ]}t |qS r"   )r   )r-   nr"   r"   r#   r0          z+EpisodeSampler.__iter__.<locals>.<listcomp>)ranger?   r;   r:   r9   r$   r    r   r   choicerL   rO   rI   rJ   r,   rK   minr6   r   tolistr5   r7   r   extend)r/   r1   r    r(   Z
all_labelsNlabelsbatchr&   
candidatesZnum_samplesKtmpr"   r"   r#   __iter__i   sJ   
zEpisodeSampler.__iter__Nc                 C   s    | ||}|d urt|S d S N)rF   str)r/   objkeydefaultvaluer"   r"   r#   rE      s   zEpisodeSampler._get_fieldc           	      C   s   t  }t  }i }| D ]5\}}i ||< | D ]\}}t|| jk r)|| q||| |< qt|| | jk rA||= || q|S ri   )setrP   r   r5   addr8   )	r/   rR   Zremoved_labelsZremoved_domainsresultr(   Zlabel_to_samplesr&   samplesr"   r"   r#   rH      s   
z$EpisodeSampler.remove_invalid_labelsc                 C   s  t dd }t|D ]'\}}| j|| j| jd}| j|| jdd}|| | || || jf q
g }g }| D ]C\}}	g }
g }|	 D ],\}}t	dd |D }t
|dkrd|
dd |D  qF|d	d |d
d  D  qF||
 || q:t	t|}|t	t| |S )Nc                   S   s   t tS ri   )r   rJ   r"   r"   r"   r#   <lambda>   s    z2EpisodeSampler.get_bad_sampleids.<locals>.<lambda>)rm    c                 S      g | ]}|d  qS )rW   r"   r-   itemr"   r"   r#   r0      r\   z4EpisodeSampler.get_bad_sampleids.<locals>.<listcomp>   c                 S   ru   r   r"   rv   r"   r"   r#   r0      r\   c                 S   ru   ry   r"   rv   r"   r"   r#   r0      r\   rW   )r   rD   rE   r>   r)   r=   rG   r<   rP   ro   r   ra   rJ   update)r/   r4   Zdomain_text_to_samplesZlocal_indexrS   r(   idxZoverall_conflict_resultZoverall_duplicate_resultZtext_to_samplesZconflict_resultZduplicate_resultr'   rr   Z	label_cntrq   r"   r"   r#   r@      s0   

z EpisodeSampler.get_bad_sampleidsc                 C   s   | j S ri   )r?   r.   r"   r"   r#   __len__   s   zEpisodeSampler.__len__ri   )	__name__
__module____qualname__rV   rh   rE   rH   r@   r|   r"   r"   r"   r#   r%   /   s    8
r%   c                   @   s,   e Zd ZdefddZd	ddZdd ZdS )
FewShotCollatorpreprocessorc                 C   s"   || _ || _d| _d| _d| _d S )Nr&   r'   r(   )r   r5   r<   r=   r>   )r/   r   r5   r"   r"   r#   rV      s
   
zFewShotCollator.__init__Nc                 C   s   t |||p|||S ri   )getattrrF   )r/   rk   rl   rm   r"   r"   r#   rE      s   zFewShotCollator._get_fieldc                    s   t t}|D ]}|j}|j |  | qg }g }g }| D ].\ }|d j }	|jd  }
||
 | fdd|	D  | gt	|
  q(|||d}j
|tjd}|S )Nc                    s   g | ]
}j |j iqS r"   )r=   r<   )r-   tr&   r/   r"   r#   r0      s
    z,FewShotCollator.__call__.<locals>.<listcomp>)	query_setsupport_setZquery_label)rA   )r   rJ   rE   r=   r<   rG   rP   r5   ra   r   r   r   Z	INFERENCE)r/   rr   Zlabel_to_textsrS   r'   r   Zquery_labelsr   Ztextssqrq   r"   r   r#   __call__   s,   
zFewShotCollator.__call__ri   )r}   r~   r   r   rV   rE   r   r"   r"   r"   r#   r      s    
r   c                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )

FaqDatasetc                 C   s
   || _ d S ri   data)r/   r   r"   r"   r#   rV         
zFaqDataset.__init__c                 C   s
   | j | S ri   r   )r/   r1   r"   r"   r#   __getitem__   r   zFaqDataset.__getitem__c                 C   s   || j |< d S ri   r   )r/   rl   rn   r"   r"   r#   __setitem__   s   zFaqDataset.__setitem__c                 C   s
   t | jS ri   )r   r   r.   r"   r"   r#   r|      r   zFaqDataset.__len__N)r}   r~   r   rV   r   r   r|   r"   r"   r"   r#   r      s
    r   )module_namec                       s"  e Zd Zdddddddddedfdeeeeje	f  dee	 dee
 dee
 deee
ee	e
f f  d	eeeeef  d
eeeeef  deeeee	ef f  deejjejjjf dee	 def fddZedd ZedefddZ				ddedededededefddZ  ZS ) FaqQuestionAnsweringTrainerN)NN*   modelcfg_filecfg_modify_fnarg_parse_fndata_collatortrain_dataseteval_datasetr   
optimizersmodel_revisionr    c                    s|   t |tr	t|}t |trt|}tt| j|||||||||	|
|fi | | jd}t| j	|| _
t| j|| _d S )Nztrain.sampler.k_shot)
isinstancerJ   r   superr   rV   cfgsafe_getr   Ztrain_preprocessorZtrain_data_collatorZeval_preprocessorZeval_data_collator)r/   r   r   r   r   r   r   r   r   r   r   r    kwargsr5   	__class__r"   r#   rV      s(   



z$FaqQuestionAnsweringTrainer.__init__c                 C   s   | j | j S ri   )Z_train_iters_per_epochZ
max_epochsr.   r"   r"   r#   	max_iters  s   z%FaqQuestionAnsweringTrainer.max_itersreturnc                 C   s   dS )Nr   r"   r.   r"   r"   r#   
inner_iter  s   z&FaqQuestionAnsweringTrainer.inner_iterFTr   r4   workers_per_gpudistshufflec              	   K   s   t  \}}	d }
| jdi }||d< |jtjkr!| jd|d< n| jd|d< ||d< |	|d< t|fi |}|d urEtt|||dnd }t	t
jt	d	krU||d
< n
|du r_| jd t|f|
|||dd|d|}|S )Nztrain.samplerr    ztrain.train_iters_per_epochrQ   zevaluation.val_iters_per_epochr9   r:   )num_workersr9   r    z1.7.0persistent_workersTzNpersistent_workers is invalid because your pytorch version is lower than 1.7.0
pin_memoryF)samplerr   batch_samplerr   r   )r   r   r   rA   r   ZTRAINr%   r   r   r   torch__version__rB   warningr   pop)r/   r4   r   r   r   r    r   r   r9   r:   r   Zsampler_cfgr   Zinit_fnZdata_loaderr"   r"   r#   _build_dataloader_with_dataset  sN   



	z:FaqQuestionAnsweringTrainer._build_dataloader_with_dataset)FTr   F)r}   r~   r   r   r	   r   r   r   Modulerj   r   r   r   r   r   r   r
   r   ZoptimZ	OptimizerZlr_schedulerZ_LRSchedulerr   rV   propertyr   r   boolr   r   __classcell__r"   r"   r   r#   r      s|    

	

!
r   ):
contextlibcollectionsr   dataclassesr   Zdistutils.versionr   	functoolsr   typingr   r   r   r	   r
   r   numpyr   r   r   Ztorch.utils.datar   r   Ztorch.utils.data.distributedr   Zmodelscope.metainfor   Zmodelscope.models.baser   Zmodelscope.msdatasetsr   Zmodelscope.preprocessorsr   Zmodelscope.trainers.builderr   Zmodelscope.trainers.nlp_trainerr   Zmodelscope.trainers.trainerr   Zmodelscope.utils.constantr   r   Zmodelscope.utils.loggerr   Zmodelscope.utils.torch_utilsr   rB   contextmanagerr$   utilsr   ZBatchSamplerr%   r   r   Zregister_moduleZfaq_question_answering_trainerr   r"   r"   r"   r#   <module>   s>    
 
'