o
    *j7                     @   sN  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$m%Z% d dl&m'Z'm(Z( ee)e*eddf Z+e ,e-Z.dej/d< dgZ0e"j1e(j2ej2dG dd de Z3dS )    N)deepcopy)ceil)time)AnyDict	GeneratorListMappingOptionalUnion)softmax)autocast)tqdm)	Pipelines)Model)	MsDataset)
OutputKeys)Pipeline)	PIPELINES)PreprocessorSiameseUiePreprocessor)	ModelFileTaskszImage.Imageznumpy.ndarraytrueZTOKENIZERS_PARALLELISMSiameseUiePipeline)module_namec                	       s   e Zd Z				ddeeef dee dedef fdd	Zd
e	ee
f de	ee
f fddZdeeee f dee	ee
f ef fddZdd Zdd Zdd Zdd Zdd Zdd Z  ZS )r   NcpuTmodelpreprocessorconfig_filedevicec                    s   t  j||||||dd|di d t| jts#J dtj | jdu r4t	j
| jjfi || _| j  d| _d| _d	| _d
| _d| _dS )uv  Use `model` and `preprocessor` to create a generation pipeline for prediction.

        Args:
            model (str or Model): Supply either a local model dir which supported the text generation task,
            or a model id from the model hub, or a torch model instance.
            preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
            the model if supplied.
            kwargs (dict, `optional`):
                Extra kwargs passed into the preprocessor's constructor.

        Examples:
            >>> from modelscope.pipelines import pipeline
            >>> pipeline_ins = pipeline(Tasks.siamese_uie,
            >>>    model='damo/nlp_structbert_siamese-uie_chinese-base')
            >>> sentence = '1944年毕业于北大的名古屋铁道会长谷口清太郎等人在日本积极筹资，共筹款2.7亿日元，参加捐款的日本企业有69家。'
            >>> print(pipeline_ins(sentence, schema={'人物': None, '地理位置': None, '组织机构': None}))

            To view other examples please check tests/pipelines/test_siamese_uie.py.
        compileFcompile_options)r   r   r   r    auto_collater!   r"   z,please check whether model config exists in Ni`  i        g      ?)super__init__pop
isinstancer   r   r   ZCONFIGURATIONr   r   Zfrom_pretrainedZ	model_direval	slide_lenmax_lenhint_max_leninference_batch_size	threshold)selfr   r   r   r    r#   kwargs	__class__ n/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/nlp/siamese_uie_pipeline.pyr'   '   s0   

	



zSiameseUiePipeline.__init__inputsreturnc                 C   s   d S )Nr4   )r0   r6   r4   r4   r5   postprocessW   s   zSiameseUiePipeline.postprocessinputc                 O   s   d|v r| d}|r|dkrtd| jr| js|   |}| d}t|tkr/t|}| dd}| 	|gd }g }	g }
| 
|||
||	| d|	iS )	uU  
        Args:
            input(str): sentence to extract
            schema: (dict or str) schema of uie task
        Default Returns:
            List[List]:  predicted info list i.e.
            [[{'type': '人物', 'span': '谷口清太郎', 'offset': [18, 23]}],
            [{'type': '地理位置', 'span': '日本', 'offset': [26, 28]}],
            [{'type': '地理位置', 'span': '日本', 'offset': [48, 50]}],
            [{'type': '组织机构', 'span': '北大', 'offset': [8, 10]}],
            [{'type': '组织机构', 'span': '名古屋铁道', 'offset': [11, 16]}]]
        
batch_size   z,This pipeline do not support batch inferenceschemaoutput_all_prefixFr   output)r(   	Exceptionr   Z_model_prepareZprepare_modeltypestrjsonloadsr   forward)r0   r9   argsr1   r:   textr<   r=   tokenized_textpred_info_listprefix_infor4   r4   r5   __call__Z   s&   


zSiameseUiePipeline.__call__c                 C   s(   |d  |g| j t|d   7  < |S )N)r,   len)r0   Z	input_idsZpad_token_idr4   r4   r5   _pad   s   $zSiameseUiePipeline._padc                    s   j |dd jd}g }t jkr!tt j  j d nd} fddt|D } fddt|D }|dkrK |d} |d}tj	|tj
 jd}tj	|tj
 jd}|d j d }	t||	}
t||	}g }t 1 t  t|
|D ]\}} j||}|| qW d    n1 sw   Y  W d    n1 sw   Y  tj|dd	}tj|dd	}tt|D ]0}|| }|| }t|D ]!}| j }|d
 | ||||| |j|| |jd}|| qq|S )NT)paddingZ
truncation
max_lengthr;   c                    ,   g | ]}j | j | j  j  qS r4   )idsr+   r,   .0jr0   rG   r4   r5   
<listcomp>   s    z6SiameseUiePipeline.tokenize_sample.<locals>.<listcomp>c                    rP   r4   )attention_maskr+   r,   rR   rU   r4   r5   rV      s    r   Zdtyper    )dimz--)idhintrF   shiftsequence_outputhint_token_idsattention_maskscross_attention_masks)r   r-   rL   r,   r   r+   rangerM   torchtensorlongr    sizer.   tensor_splitno_gradr   zipr   Zget_plm_sequence_outputappendcatrQ   rW   )r0   rF   rG   hintsZtokenized_hintstokenized_dataZ	split_numZ	token_idsr_   	batch_numZall_token_idsZall_attention_masksZall_sequence_outputr]   ir[   Ztokenized_hintrT   aitemr4   rU   r5   tokenize_sample   s   






z"SiameseUiePipeline.tokenize_samplec           
      C   s   |  |||}tdd |D }tdd |D }tjdd |D tj| jd}tjdd |D tj| jd}|d| j d }	t||	}t||	}t||	}t||	}|||||ffS )	Nc                 S      g | ]}|d  qS )r]   r4   rS   rp   r4   r4   r5   rV          zISiameseUiePipeline.get_tokenized_data_and_data_loader.<locals>.<listcomp>c                 S   rr   )r_   r4   rs   r4   r4   r5   rV      rt   c                 S   rr   )r^   r4   rs   r4   r4   r5   rV      rt   rX   c                 S   rr   )r`   r4   rs   r4   r4   r5   rV      rt   r   r;   )	rq   rb   stackrc   rd   r    re   r.   rf   )
r0   rF   rG   rk   rl   r]   r_   r^   r`   rm   r4   r4   r5   "get_tokenized_data_and_data_loader   s6   z5SiameseUiePipeline.get_tokenized_data_and_data_loaderc                    s   g } fddt t D }|D ]1}t |t|D ]'}|| jkrB|| d }	|| d }
|	|
g||	|
 d}||  nqqt|dd d}|S )	Nc                    s   g | ]} | j kr|qS r4   )r/   rR   
head_probsr0   r4   r5   rV      s    z3SiameseUiePipeline.get_entities.<locals>.<listcomp>r   r;   )offsetspanc                 S   s   t | d S )Nry   )tuple)xr4   r4   r5   <lambda>   s    z1SiameseUiePipeline.get_entities.<locals>.<lambda>)key)ra   rL   r/   ri   sorted)r0   rF   offsetsrx   
tail_probsZsample_entitiesZpotential_headsphptZ	char_headZ	char_tailer4   rw   r5   get_entities   s(   


	zSiameseUiePipeline.get_entitiesc           $   	   C   s  g }|D ]#}d}|D ]}||d  d|d  d7 }q
|| d7 }| | q| |||\}	}
g }d }g }g }g }t ; t ( t|
 D ]}| jj| \}}| | }}||7 }||7 }qHW d    n1 snw   Y  W d    n1 s}w   Y  |	 ddi | d  | d  t|	||D ]\}}}|d }|	dd	||d
}|d ur3||kr3t
|j}dg| }dg| }|D ]Y}|d }|d }|d }t
|}t|D ]B}|| |k r|||  dkr|| n|||  ||  d ||| < |||  dkr|| n|||  ||  d ||| < qq|j}| ||||} | |  g }| | |}qg }!t||D ]#\}} | D ]}"t|}#||"d |"d d}|# | |! |# qHqB|!S )N r@   z: rz   z, rZ   ZWhatADifferentUUiDr\   r   )r\   headtailrK   r   r      ry   )r@   rz   ry   )ri   rv   rb   rg   r   rh   r   Zfast_inferencetolistgetrL   r   ra   r   r   )$r0   rF   rG   rI   Zschema_typesrk   str[   rp   Zall_valid_tokenized_dataZall_tensor_dataZprobsZ	last_uuidZall_pred_entitiesZall_head_probsZall_tail_probsZ
batch_dataZbatch_head_probsZbatch_tail_probsZtokenized_samplerx   r   uuidZprobZ
len_tokensZprob_tmpr\   r   r   Zlen_subrT   r   Zpred_entitiesnext_prefix_infosr   pir4   r4   r5   get_prefix_infos   s   

	














z#SiameseUiePipeline.get_prefix_infosc           	   	   C   sd   |  ||||}|D ]%}||d d  }|d u r|| q
|r%|| | |||||| q
d S )NrK   r@   )r   ri   rD   )	r0   rF   rG   rI   Zcurr_schema_dictrH   r=   r   Znext_schema_dictr4   r4   r5   rD   /  s   

zSiameseUiePipeline.forward)NNr   T)__name__
__module____qualname__r   r   rA   r
   r   r'   r   r   r8   Inputr   r   rJ   rM   rq   rv   r   r   rD   __classcell__r4   r4   r2   r5   r   #   s0    
"0
%7I)4loggingospathlibcopyr   mathr   r   typingr   r   r   r   r	   r
   r   rB   rb   Zscipy.specialr   Ztorch.cuda.ampr   r   Zmodelscope.metainfor   Zmodelscope.modelsr   Zmodelscope.msdatasetsr   Zmodelscope.outputsr   Zmodelscope.pipelines.baser   Zmodelscope.pipelines.builderr   Zmodelscope.preprocessorsr   r   Zmodelscope.utils.constantr   r   rA   r{   r   	getLoggerr   loggerenviron__all__Zregister_moduleZsiamese_uier   r4   r4   r4   r5   <module>   s8   $

