o
    *j&                  	   @   s  d dl Z d dlmZmZmZmZmZmZ d dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZ dgZejej ej dejej ej!dejej ej"dejej ej#dejej!ej!dG dd deZ$dS )    N)AnyDictListOptionalTupleUnion)	Pipelines)Model)
OutputKeys)InputPipeline)	PIPELINES)Preprocessor)	ModelFileTasks)torch_nested_detachtorch_nested_numpifyTokenClassificationPipeline)module_namec                	       s@  e Zd Z					d!deeef dee dedef fd	d
Zde	ee
f de	ee
f fddZde	ee
f de	ee
f fddZde	ee
f defddZdede	ee
f f fddZdee dedee	ee
f  f fddZdee defddZdee	ee
f  de	eeeef f dee	ee
f  fdd Z  ZS )"r   NgpuT   modelpreprocessorconfig_filedevicec                    s   t  j||||||dd|di d t| jts#J dtj |du r5tj	| jj
fd|i|| _| j  || _t| jdsEJ | jj| _dS )	a  use `model` and `preprocessor` to create a token classification pipeline for prediction

        Args:
            model (str or Model): A model instance or a model local dir or a model id in the model hub.
            preprocessor (Preprocessor): a preprocessor instance, must not be None.
            kwargs (dict, `optional`):
                Extra kwargs passed into the preprocessor's constructor.
        compileFcompile_options)r   r   r   r   auto_collater   r   z,please check whether model config exists in Nsequence_lengthid2label)super__init__pop
isinstancer   r	   r   ZCONFIGURATIONr   Zfrom_pretrainedZ	model_dirr   evalr   hasattrr   )selfr   r   r   r   r   r   kwargs	__class__ w/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/nlp/token_classification_pipeline.pyr!   "   s0   

	

z$TokenClassificationPipeline.__init__inputsreturnc                 K   sZ   | tj}t  i | jdi ||tj|iW  d    S 1 s&w   Y  d S )Nr*   )r"   r
   ZTEXTtorchZno_gradr   )r&   r,   Zforward_paramstextr*   r*   r+   forwardI   s   
$z#TokenClassificationPipeline.forwardc                 K   s   | j |fi |}tj|iS )zProcess the prediction results

        Args:
            inputs (Dict[str, Any]): should be tensors from model

        Returns:
            Dict[str, Any]: the prediction results
        )_chunk_processr
   OUTPUT)r&   r,   postprocess_paramschunksr*   r*   r+   postprocessQ   s   

z'TokenClassificationPipeline.postprocessc                    s  |d }t j|vr!|t j }t|jdkr|d }tj|dd}n|t j }t|jdkr1|d }|d }t|jdkr@|d }|d}|d	url|d	 
  }t|dd|}t|jdkrg|d }||}tt|}tt|} fd
d|D }	|dd}
|
rt j|v r|t j }t|jdkr|d }tt|d}nd}
g }i }tt|	|D ]\}\}}|d dv r|r||d |d  |d< || |dd	 |d |d d}|
r|| ||  |d< |d dv r|s|dd	 |d |d d}|
r|| ||  |d< |d dv r8|s8|dd	 |d |d d}|
r8|| ||  |d< |d dv rH|rH|d |d< |d dv re|re||d |d  |d< || i }q|rz||d |d  |d< || |S )zprocess the prediction results and output as chunks

        Args:
            inputs (Dict[str, Any]): should be tensors from model

        Returns:
            List: The output chunks
        r/      r   )dim   offset_mapping
label_maskNc                    s   g | ]} j | qS r*   )r   ).0xr&   r*   r+   
<listcomp>   s    z>TokenClassificationPipeline._chunk_process.<locals>.<listcomp>return_probTFZBSstartendspan   )typerA   rB   ZprobIEZIESES)r
   ZPREDICTIONSZLOGITSlenshaper.   ZargmaxgetsumlongcpuitemZnarrowZmasked_selectr   r   r"   Zsoftmax	enumeratezipappend)r&   r,   r3   r/   ZlogitsZpredictionsr:   r;   Zmasked_lengthslabelsr@   Zprobsr4   chunkilabeloffsetsr*   r>   r+   r1   ^   s   













z*TokenClassificationPipeline._chunk_processinputc           	         s~   | dd}|dkrt j|g|R i |S | |g|\}}g }|D ]}|t j|g|R i | q$| ||d S )Nsplit_max_lengthr   )r"   r    _process_single_auto_splitrR   
_auto_join)	r&   rX   argsr'   rY   split_textsindex_mappingoutputsr/   r(   r*   r+   rZ      s   "z+TokenClassificationPipeline._process_single
batch_sizec           	         sl   | dd}|dkrt j|g|R d|i|S | ||\}}t j|g|R d|i|}| ||S )NrY   r   ra   )r"   r    _process_batchr[   r\   )	r&   rX   ra   r]   r'   rY   r^   r_   r`   r(   r*   r+   rb      s4   z*TokenClassificationPipeline._process_batchinput_textsrY   c                 C   s   g }i }d}t |D ]C\}}t||k r$|| |df||< |d7 }q
tt|| }t|D ]}	|	| }
|||
|
|   ||
f||< |d7 }q1q
||fS )Nr   rD   )rP   rI   rR   mathceilrange)r&   rc   rY   r^   r_   Znew_idxraw_idxr/   Zn_splitrU   offsetr*   r*   r+   r[      s    


z'TokenClassificationPipeline._auto_splitr`   r_   c           	      C   s   g }t |D ]8\}}|| \}}|t|kr|| q|tj D ]}|d  |7  < |d  |7  < || tj | q!q|S )NrA   rB   )rP   rI   rR   r
   r2   )	r&   r`   r_   Zjoined_outputsidxoutputrg   rh   rT   r*   r*   r+   r\      s   z&TokenClassificationPipeline._auto_join)NNr   Tr   )__name__
__module____qualname__r   r	   strr   r   r!   r   r   r0   r5   r   r1   r   rZ   intrb   r[   r   r\   __classcell__r*   r*   r(   r+   r      sH    
'




e)%rd   typingr   r   r   r   r   r   numpynpr.   Zmodelscope.metainfor   Zmodelscope.modelsr	   Zmodelscope.outputsr
   Zmodelscope.pipelines.baser   r   Zmodelscope.pipelines.builderr   Zmodelscope.preprocessorsr   Zmodelscope.utils.constantr   r   Zmodelscope.utils.tensor_utilsr   r   __all__Zregister_moduleZtoken_classificationZpart_of_speechZword_segmentationZnamed_entity_recognitionr   r*   r*   r*   r+   <module>   s:    