o
    *js                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ e ZdgZejejejdG dd deZ dS )    N)AnyDictListUnion)Dataset)	Pipelines)Model)
OutputKeys)PipelineTensor)	PIPELINES),DocumentSegmentationTransformersPreprocessor)Tasks)
get_loggerExtractiveSummarizationPipeline)module_namec                	       s   e Zd Z				ddeeef dededef fdd	Zd
eee ef de	ee
f fddZd
eee ef de	ee
f fddZde	eef de	eef fddZdeee ef fddZdd Z  ZS )r   NgpuTmodelpreprocessorconfig_filedevicec                    sv   t  jd|||||d| |dd  |dd  | jj| _| jj| _|d u r9t| j| jjjfi || _	d S d S )N)r   r   r   r   auto_collatecompileZcompile_options )
super__init__popr   Z	model_dir	model_cfgr   configZmax_position_embeddingsr   )selfr   r   r   r   r   kwargs	__class__r   {/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/nlp/extractive_summarization_pipeline.pyr      s(   	

z(ExtractiveSummarizationPipeline.__init__	documentsreturnc                 C   s   |  |}| |}|S )N)predictpostprocess)r   r$   outputr   r   r#   __call__:   s   

z(ExtractiveSummarizationPipeline.__call__c                    s    |}t|} | j}t| jj }t| jj }|d}|d}| jj}	t	
  dd | D }
 jjdi |
j}W d    n1 sTw   Y  tj|dd}t|t|ksuJ d|t|t| fdd	t||D } fd
d	t||D }g }t|D ]}|g g g d qt||||	D ][\}}}}t|t|k r|d |d t|t|ksJ dt|t|t|t|ksJ dt|t||| d | || d | || d | q|S )Nlabels	sentencesc                 S   s   i | ]
\}}|t |qS r   )torchZtensor).0keyvalr   r   r#   
<dictcomp>P   s    
z;ExtractiveSummarizationPipeline.predict.<locals>.<dictcomp>   )Zaxisz(sample {}  infer_sample {} prediction {}c                    (   g | ]\}} fd dt ||D qS )c                    s$   g | ]\}}|d kr j j| qS ir   Z
label_listr-   plr   r   r#   
<listcomp>]       
FExtractiveSummarizationPipeline.predict.<locals>.<listcomp>.<listcomp>zipr-   
predictionlabelr8   r   r#   r9   \       
z;ExtractiveSummarizationPipeline.predict.<locals>.<listcomp>c                    r2   )c                    s$   g | ]\}}|d kr j j| qS r3   r4   r5   r8   r   r#   r9   d   r:   r;   r<   r>   r8   r   r#   r9   c   rA   )r+   r*   predictionsOz{} {}rB   r   )cut_documentsr   	from_dictr   r   lenZcontext_column_namer   Zexample_id_column_namer,   Zno_graditemsr   forwardlogitsnpZargmaxformatr=   rangeappendextend)r   r$   Zpred_samplesZpredict_examplesZpredict_datasetZnum_examplesZnum_samplesr*   r+   Zexample_idsinputrI   rB   Ztrue_predictionsZtrue_labelsoutir?   Zsentence_listr@   
example_idr   r8   r#   r&   ?   sj   










z'ExtractiveSummarizationPipeline.predictinputsc                 C   s   g }t |}t|D ]+}g }t|| d || d D ]\}}| }|dkr,|| q|d| q
|dkrAtj|d iS tj|iS )zprocess the prediction results

        Args:
            inputs (Dict[str, Any]): _description_

        Returns:
            Dict[str, str]: the prediction results
        r+   rB   B-EOP
   r   )rF   rL   r=   striprM   joinr	   ZTEXT)r   rS   resultZ
list_countnumressr6   r   r   r#   r'   ~   s   	


z+ExtractiveSummarizationPipeline.postprocessparac           
      C   s   t |tr	|g}n|}g }g }g }d}|D ]&}| |}dgt|d  dg }	|| ||	 || |d7 }q|||dS )Nr   rC   rV   rT   )rR   r+   r*   )
isinstancestrcut_sentencerF   rM   )
r   r]   Zdocument_listr+   r*   rR   idZdocumentZsentencer@   r   r   r#   rD      s$   





z-ExtractiveSummarizationPipeline.cut_documentsc                 C   sT   t dd|}t dd|}t dd|}t dd|}| }dd |dD S )	Nu   ([。！.!？\?])([^”’])z\1\n\2u   (\.{6})([^”’])u   (\…{2})([^”’])u*   ([。！？\?][”’])([^，。！？\?])c                 S   s   g | ]}|r|qS r   r   )r-   _r   r   r#   r9      s    z@ExtractiveSummarizationPipeline.cut_sentence.<locals>.<listcomp>rU   )resubrstripsplit)r   r]   r   r   r#   r`      s   z,ExtractiveSummarizationPipeline.cut_sentence)NNr   T)__name__
__module____qualname__r   r   r_   r   r   r   r   r   r)   r&   r   r'   rD   r`   __classcell__r   r   r!   r#   r      s&    
&&"?)!rc   typingr   r   r   r   numpyrJ   r,   Zdatasetsr   Zmodelscope.metainfor   Zmodelscope.modelsr   Zmodelscope.outputsr	   Zmodelscope.pipelines.baser
   r   Zmodelscope.pipelines.builderr   Zmodelscope.preprocessorsr   Zmodelscope.utils.constantr   Zmodelscope.utils.loggerr   logger__all__Zregister_moduleZextractive_summarizationr   r   r   r   r#   <module>   s(   