o
    *jE&                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ e ZdgZejejejdG dd deZ dS )    N)AnyDictListUnion)Dataset)	Pipelines)Model)
OutputKeys)PipelineTensor)	PIPELINES),DocumentSegmentationTransformersPreprocessor)Tasks)
get_loggerDocumentSegmentationPipeline)module_namec                	       s   e Zd Z				ddeeef dededef fdd	Zd
eeee  ee ef de	ee
f fddZd
eeee  ee ef de	ee
f fddZde	eef de	eef fddZdeeee  ee ef fddZdd Z  ZS )r   NgpuTmodelpreprocessorconfig_filedevicec                    sv   t  jd|||||d| |dd |dd | jj| _| jj| _|du r9t| j| jjjfi || _	dS dS )a8  The document segmentation pipeline.

        Args:
            model (str or Model): Supply either a local model dir or a model id from the model hub
            preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
            the model if supplied.
        )r   r   r   r   auto_collatecompileNZcompile_options )
super__init__popr   Z	model_dir	model_cfgr   configZmax_position_embeddingsr   )selfr   r   r   r   r   kwargs	__class__r   x/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/nlp/document_segmentation_pipeline.pyr      s(   

z%DocumentSegmentationPipeline.__init__	documentsreturnc                 C   s   |  |}| |}|S )N)predictpostprocess)r   r$   outputr   r   r#   __call__>   s   

z%DocumentSegmentationPipeline.__call__c                    s    |} jd dkr|d}t|} | j}t| jj }t| jj } jd dkr9|d |d}|d}	| jj}
 j	sU j
r\ jd	 r\ js\   t   fd
d| D } j	jdi |j }W d    n1 sw   Y  tj|dd}t|	t|ksJ d|t|	t| fddt||D } fddt||D }g }t|D ]} jd dkr|g g g || d q|g g g d qt||	||
D ]f\}}}} jd dkr3t|t|k r	|d |d t|t|ksJ dt|t|t|t|ks3J dt|t||| d | || d | || d | q jd dkrt|D ]*}t|| d d t|| d ksrJ || d d || d d q[|S )Nleveltopic
paragraphstypeZbertZsegment_idslabels	sentencesr   c                    s$   i | ]\}}|t | jqS r   )torchZtensortor   ).0keyvalr   r   r#   
<dictcomp>c   s    z8DocumentSegmentationPipeline.predict.<locals>.<dictcomp>   )Zaxisz(sample {}  infer_sample {} prediction {}c                    (   g | ]\}} fd dt ||D qS )c                    s$   g | ]\}}|d kr j j| qS ir   Z
label_listr2   plr5   r   r#   
<listcomp>p       
CDocumentSegmentationPipeline.predict.<locals>.<listcomp>.<listcomp>zipr2   
predictionlabelr5   r   r#   r>   o       
z8DocumentSegmentationPipeline.predict.<locals>.<listcomp>c                    r8   )c                    s$   g | ]\}}|d kr j j| qS r9   r:   r;   r5   r   r#   r>   w   r?   r@   rA   rC   r5   r   r#   r>   v   rF   )r/   r.   predictionsr,   )r/   r.   rG   docB-EOPz{} {}rG      r   )cut_documentsr   r   r   	from_dictr   lenZcontext_column_nameZexample_id_column_namer   Zhas_multiple_modelsmodelsZ_model_prepareZprepare_modelr0   Zno_graditemsforwardZlogitscpunpZargmaxformatrB   rangeappendextend)r   r$   Zpred_samplesr,   Zpredict_examplesZpredict_datasetZnum_examplesZnum_samplesr.   r/   Zexample_idsinputrG   Ztrue_predictionsZtrue_labelsoutirD   Zsentence_listrE   
example_idr   r5   r#   r&   E   s   















z$DocumentSegmentationPipeline.predictinputsc                 C   sn  g }g }t |}| jd dkrlt|D ]W}g }g }t|| d || d || d D ]+\}}	}
| }|	dkrEd|dg}|d	 nd|d
g}|d || q+|| dd|  }|| qn;t|D ]6}g }t|| d || d D ]\}}	| }|	dkrd|d
g}|| qdd| }|| qp|d	krtj|d iS tj|iS )zprocess the prediction results

        Args:
            inputs (Dict[str, Any]): _description_

        Returns:
            Dict[str, str]: the prediction results
        r*   r+   r,   rG   r.   rI    z

	rJ   z
	r   	r/   )	rM   r   rT   rB   stripjoinrU   r	   ZTEXT)r   r[   resultZ	res_predsZ
list_countnumrespredsr<   r=   documentr   r   r#   r'      sH   	





z(DocumentSegmentationPipeline.postprocessparac                 C   sH  |}g }g }g }g }d}| j d dkrmt|tr|gg}n
t|d tr'|g}|D ]<}g }	g }
|D ]}| |}|	| |
dgt|d  dg  q1|| ||	 ||
 || |d7 }q)||||dS t|tru|g}|D ]&}| |}	dgt|	d  dg }
||	 ||
 || |d7 }qw|||d	S )
Nr   r*   r+   z-100rJ   rI   )rZ   r/   r,   r.   O)rZ   r/   r.   )r   
isinstancestrcut_sentencerV   rM   rU   )r   rf   Zdocument_listr,   r/   r.   rZ   idre   ZsentencerE   itemZsentence_of_current_paragraphr   r   r#   rK      sZ   















z*DocumentSegmentationPipeline.cut_documentsc                 C   sT   t dd|}t dd|}t dd|}t dd|}| }dd |dD S )	Nu   ([。！.!？\?])([^”’])z\1\n\2u   (\.{6})([^”’])u   (\…{2})([^”’])u*   ([。！？\?][”’])([^，。！？\?])c                 S   s   g | ]}|r|qS r   r   )r2   _r   r   r#   r>     s    z=DocumentSegmentationPipeline.cut_sentence.<locals>.<listcomp>
)resubrstripsplit)r   rf   r   r   r#   rj     s   z)DocumentSegmentationPipeline.cut_sentence)NNr   T)__name__
__module____qualname__r   r   ri   r   r   r   r   r   r)   r&   r   r'   rK   rj   __classcell__r   r   r!   r#   r      s>    
!



"]$15)!ro   typingr   r   r   r   numpyrR   r0   Zdatasetsr   Zmodelscope.metainfor   Zmodelscope.modelsr   Zmodelscope.outputsr	   Zmodelscope.pipelines.baser
   r   Zmodelscope.pipelines.builderr   Zmodelscope.preprocessorsr   Zmodelscope.utils.constantr   Zmodelscope.utils.loggerr   logger__all__Zregister_moduleZdocument_segmentationr   r   r   r   r#   <module>   s&   