o
    *j                     @   s  d dl mZmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZmZ d dlmZmZ ddgZejejejdG dd deZejejej dG dd deZ!ejejej"dG dd de!Z#dS )    )AnyDictOptionalUnionN)	Pipelines)Model)
OutputKeys)Pipeline)	PIPELINES)TokenClassificationPipeline)Preprocessor+TokenClassificationTransformersPreprocessor WordSegmentationPreprocessorThai)	ModelFileTasks)torch_nested_detachtorch_nested_numpifyWordSegmentationPipelineWordSegmentationThaiPipeline)module_namec                   @   s6   e Zd ZdZ	ddeeef deeef fddZdS )	r   u  Use `model` and `preprocessor` to create a nlp word segment pipeline for prediction.

    NOTE: The preprocessor will first split the sentence into single characters,
    then feed them into the tokenizer with the parameter is_split_into_words=True.

    Examples:
        >>> from modelscope.pipelines import pipeline
        >>> pipeline_ins = pipeline(task='word-segmentation',
        >>>    model='damo/nlp_structbert_word-segmentation_chinese-base')
        >>> sentence1 = '今天天气不错，适合出去游玩'
        >>> print(pipeline_ins(sentence1))

    To view other examples please check tests/pipelines/test_word_segmentation.py.
    Tinputsreturnc                 K   sN   | j |fi |}|r dd |D }dd |D }tj|i}|S tj|i}|S )at  Process the prediction results

        Args:
            inputs (Dict[str, Any]): should be tensors from model
            output_final_sentence (bool): Output the cut sentence splitted by blanks or not.
                If False, the pipeline will output the original token-label information.

        Returns:
            Dict[str, Any]: The prediction results.
        c                 S   s    g | ]}|d    r|d  qS span)strip).0chunk r   t/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/nlp/word_segmentation_pipeline.py
<listcomp>;   s
    z8WordSegmentationPipeline.postprocess.<locals>.<listcomp>c                 S   s   g | ]}|qS r   r   )r   r   r   r   r   r   >   s    Z_chunk_processr   ZOUTPUT)selfr   output_final_sentencepostprocess_paramschunksspansZ
seg_resultZoutputsr   r   r   postprocess)   s   

z$WordSegmentationPipeline.postprocessNT)__name__
__module____qualname____doc__r   strr   r&   r   r   r   r   r      s    

c                   @   s2   e Zd Z	ddeeef deeef fddZdS )$MultilingualWordSegmentationPipelineTr   r   c                 K   s*   | j |fi |}dd |D }tj|iS )Nc                 S   s   g | ]}|d  qS r   r   r   entityr   r   r   r   Q   s    zDMultilingualWordSegmentationPipeline.postprocess.<locals>.<listcomp>r    )r!   r   r"   r#   r$   word_segmentsr   r   r   r&   L   s   
z0MultilingualWordSegmentationPipeline.postprocessNr'   )r(   r)   r*   r   r,   r   r&   r   r   r   r   r-   G   s    

r-   c                	       sh   e Zd Z					ddeeef dee dedef fd	d
Zde	ee
f de	eef fddZ  ZS )r   NgpuT   modelpreprocessorconfig_filedevicec                    s\   t  j|||||d t| jtsJ dtj |d u r,t| jjfd|i|| _	d S d S )N)r3   r4   r5   r6   auto_collatez,please check whether model config exists in sequence_length)
super__init__
isinstancer3   r   r   ZCONFIGURATIONr   Z	model_dirr4   )r!   r3   r4   r5   r6   r7   r8   kwargs	__class__r   r   r:   Y   s&   
z%WordSegmentationThaiPipeline.__init__r   r   c                 K   s*   | j |fi |}dd |D }tj|iS )Nc                 S   s   g | ]
}|d   ddqS )r     )replacer.   r   r   r   r   t   s    z<WordSegmentationThaiPipeline.postprocess.<locals>.<listcomp>r    )r!   r   r#   r$   r0   r   r   r   r&   q   s   
z(WordSegmentationThaiPipeline.postprocess)NNr1   Tr2   )r(   r)   r*   r   r   r,   r   r   r:   r   r   r&   __classcell__r   r   r=   r   r   U   s$    

)$typingr   r   r   r   ZtorchZmodelscope.metainfor   Zmodelscope.modelsr   Zmodelscope.outputsr   Zmodelscope.pipelines.baser	   Zmodelscope.pipelines.builderr
   Zmodelscope.pipelines.nlpr   Zmodelscope.preprocessorsr   r   r   Zmodelscope.utils.constantr   r   Zmodelscope.utils.tensor_utilsr   r   __all__Zregister_moduleZword_segmentationr   Zmultilingual_word_segmentationr-   Zword_segmentation_thair   r   r   r   r   <module>   s2   .