o
    0jf                     @   s   d dl mZmZmZmZmZmZ d dlZddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl	m"Z"m#Z# ej$G dd deZ%edG dd deZ&dS )    )AnyDictListOptionalTupleUnionN   )logging)pipeline_requires_extra   )ImageBatchSampler)	ReadImage)	HPIConfigPaddlePredictorOption)	DetResult)	benchmark   )(AutoParallelImageSimpleInferencePipeline)BasePipeline)CropByBoxes)	OCRResult   )LayoutParsingResult)get_sub_regions_ocr_ressorted_layout_boxesc                1       s^  e Zd ZdZddddddddedee dee deeeef  d	ee d
e	dee
eeef ef  ddf fddZdeddfddZ						d7dededededededee dee dee dee dee dee defddZdede	fd d!Zd"ee	 d#ee	 d$ee	 d%ee	 d&ee	 defd'd(Z																						d8d)e
eee ejeej f d"ee	 d#ee	 d*ee	 d$ee	 d%ee	 d&ee	 d+ee
eef  d,ee	 d-ee
eeeef ef  d.ee dee dee dee dee dee dee d/ee d0ee d1ee d2ee d3ee d4ee def0d5d6Z  ZS )9_LayoutParsingPipelinezLayout Parsing PipelineNFdeviceengineengine_config	pp_optionuse_hpip
hpi_configconfigr   r   r   r    r!   r"   returnc          	   	      sN   t  jd||||||d| | | tdd| _tdd| _t | _dS )a  Initializes the layout parsing pipeline.

        Args:
            config (Dict): Configuration dictionary containing various settings.
            device (Optional[str], optional): The device to use for prediction. Defaults to `None`.
            engine (Optional[str], optional): Inference engine. Defaults to `None`.
            engine_config (Optional[Dict[str, Any]], optional): Engine-specific config. Defaults to `None`.
            pp_option (Optional[PaddlePredictorOption], optional): Paddle predictor options.
                Defaults to `None`.
            use_hpip (bool, optional): Whether to use HPIP. Defaults to `False`.
            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
                HPIP configuration. Defaults to `None`.
        r   r   )Z
batch_sizeZBGR)formatN )	super__init__inintial_predictorr   batch_samplerr   
img_readerr   Z_crop_by_boxes)	selfr#   r   r   r   r    r!   r"   kwargs	__class__r&   t/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/pipelines/layout_parsing/pipeline.pyr(   &   s   

z_LayoutParsingPipeline.__init__c                 C   s  | dd| _| dd| _| dd| _| dd| _| jr1| di  ddd	i}| || _| d
i  dddi}i }| dd }durM||d< | dd }dur[||d< | dd }duri||d< | dd }durw||d< | j|fi || _| di  dddi}	| |	| _	| jr| di  dddi}
| |
| _
| jr| di  dddi}| || _| jr| di  dddi}| || _dS )zInitializes the predictor based on the provided configuration.

        Args:
            config (Dict): A dictionary containing the configuration for the predictor.

        Returns:
            None
        use_doc_preprocessorTuse_table_recognitionuse_seal_recognitionuse_formula_recognitionZSubPipelinesZDocPreprocessorZpipeline_config_errorz+config error for doc_preprocessor_pipeline!Z
SubModulesZLayoutDetectionZmodel_config_errorz"config error for layout_det_model!	thresholdN
layout_nmslayout_unclip_ratiolayout_merge_bboxes_modeZ
GeneralOCRz&config error for general_ocr_pipeline!ZSealRecognitionz+config error for seal_recognition_pipeline!ZTableRecognitionz,config error for table_recognition_pipeline!ZFormulaRecognitionz.config error for formula_recognition_pipeline!)getr1   r2   r3   r4   Zcreate_pipelinedoc_preprocessor_pipelineZcreate_modellayout_det_modelgeneral_ocr_pipelineseal_recognition_pipelinetable_recognition_pipelineformula_recognition_pipeline)r,   r#   Zdoc_preprocessor_configZlayout_det_configZlayout_kwargsr5   r6   r7   r8   Zgeneral_ocr_configZseal_recognition_configZtable_recognition_configZformula_recognition_configr&   r&   r0   r)   P   s   
z)_LayoutParsingPipeline.inintial_predictorimagelayout_det_resoverall_ocr_restable_res_listseal_res_listformula_res_listtext_det_limit_side_lentext_det_limit_typetext_det_threshtext_det_box_threshtext_det_unclip_ratiotext_rec_score_threshc           &      C   s  g }i }d}d}d}t |}g }t|d D ]\}}i }|d }|d  }||d< ||d< d|d< || |d	krYt|dkrXt||ksLJ d
|| d |d< |d7 }nr|dkrzt|dkryt||ksmJ d|| d |d< |d7 }nQ|dkrt|dkrt||ksJ dd|| d |d< |d7 }n-t||gdd\}}|D ]}||ddu r|g||< q|| | qd|d |d< || q|	 D ]]}t|dkr2|D ]Q}t j
|j|jdd }|| d }dd |D \}}} }!|||!|| ddf |||!|| ddf< t| j||||	|
||dd }"d|"d || d< qqt||dd}#t|#d |#d D ]\}$}%i }|$|d< d |d< |%|d< || qCt||jd d!}|S )"a  
        Retrieves the layout parsing result based on the layout detection result, OCR result, and other recognition results.
        Args:
            image (list): The input image.
            layout_det_res (DetResult): The detection result containing the layout information of the document.
            overall_ocr_res (OCRResult): The overall OCR result containing text information.
            table_res_list (list): A list of table recognition results.
            seal_res_list (list): A list of seal recognition results.
            formula_res_list (list): A list of formula recognition results.
            text_det_limit_side_len (Optional[int], optional): The maximum side length of the text detection region. Defaults to `None`.
            text_det_limit_type (Optional[str], optional): The type of limit for the text detection region. Defaults to `None`.
            text_det_thresh (Optional[float], optional): The confidence threshold for text detection. Defaults to `None`.
            text_det_box_thresh (Optional[float], optional): The confidence threshold for text detection bounding boxes. Defaults to `None`
            text_det_unclip_ratio (Optional[float], optional): The unclip ratio for text detection. Defaults to `None`.
            text_rec_score_thresh (Optional[float], optional): The score threshold for text recognition. Defaults to `None`.
        Returns:
            list: A list of dictionaries representing the layout parsing result.
        r   ZboxesZ
coordinatelabelZ
block_bboxZblock_label Zblock_contentZformulazThe number of                         formula regions of layout parsing pipeline                         and formula recognition pipeline are different!Zrec_formular   tablezThe number of                         table regions of layout parsing pipeline                         and table recognition pipeline are different!Z	pred_htmlZsealzThe number of                         seal regions of layout parsing pipeline                         and seal recognition pipeline are different!z, Z	rec_textsT)Zreturn_match_idxN
)dtype   c                 S   s   g | ]}t |qS r&   )int).0ir&   r&   r0   
<listcomp>
  s    zA_LayoutParsingPipeline.get_layout_parsing_res.<locals>.<listcomp>)rF   rG   rH   rI   rJ   rK   F)Zflag_withinZ	rec_boxesZ
other_text)w)nparray	enumeratelowerappendlenjoinr   r9   valuesZonesshaperP   listr<   zipr   )&r,   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   Zlayout_parsing_resZmatched_ocr_dictZformula_indexZtable_indexZ
seal_indexZobject_boxesZobject_box_idxZbox_infoZsingle_box_resboxrL   Zocr_res_in_boxZmatched_idxesZmatched_idxZlayout_box_idsidxZwht_imx1y1Zx2y2Zsub_ocr_resZocr_without_layout_boxesZocr_rec_boxZocr_rec_textr&   r&   r0   get_layout_parsing_res   s   !



0
z-_LayoutParsingPipeline.get_layout_parsing_resinput_paramsc                 C   sX   |d r| j std dS |d r| jstd dS |d r*| js*td dS dS )	a4  
        Check if the input parameters are valid based on the initialized models.

        Args:
            input_params (Dict): A dictionary containing input parameters.

        Returns:
            bool: True if all required models are initialized according to input parameters, False otherwise.
        r1   zRSet use_doc_preprocessor, but the models for doc preprocessor are not initialized.Fr3   zRSet use_seal_recognition, but the models for seal recognition are not initialized.r2   zTSet use_table_recognition, but the models for table recognition are not initialized.T)r1   r	   errorr3   r2   )r,   rh   r&   r&   r0   check_model_settings_valid,  s    z1_LayoutParsingPipeline.check_model_settings_validuse_doc_orientation_classifyuse_doc_unwarpingr3   r2   r4   c                 C   sl   |du r|du r| j }n|du s|du rd}nd}|du r | j}|du r'| j}|du r.| j}t||||dS )a'  
        Get the model settings based on the provided parameters or default values.

        Args:
            use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
            use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
            use_seal_recognition (Optional[bool]): Whether to use seal recognition.
            use_table_recognition (Optional[bool]): Whether to use table recognition.

        Returns:
            dict: A dictionary containing the model settings.
        NTF)r1   r3   r2   r4   )r1   r3   r2   r4   dict)r,   rk   rl   r3   r2   r4   r1   r&   r&   r0   get_model_settingsK  s"   z)_LayoutParsingPipeline.get_model_settingsinputuse_textline_orientationlayout_thresholdr6   r7   r8   seal_det_limit_side_lenseal_det_limit_typeseal_det_threshseal_det_box_threshseal_det_unclip_ratioseal_rec_score_threshc           )      k   s   |  |||||}| |sddiV  t| |D ]\}}| |jd }|d r8t| j|||dd }nd|i}|d }t| j|||	|
|dd }t| j	||||||||dd } |d	 rxt| j
|d
d
d
d
| |dd }!|!d }"ng }"|d rt| j|d
d
d
|||||||dd }#|#d }$ng }$|d rt| j|d
d
d
|dd }%|%d }&ng }&| j||| |"|$|&||||||d}'|jd |jd ||| |"|$|&|'|d
}(t|(V  qdS )a
  
        This function predicts the layout parsing result for the given input.

        Args:
            input (Union[str, list[str], np.ndarray, list[np.ndarray]]): The input image(s) or pdf(s) to be processed.
            use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
            use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
            use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
            use_seal_recognition (Optional[bool]): Whether to use seal recognition.
            use_table_recognition (Optional[bool]): Whether to use table recognition.
            use_formula_recognition (Optional[bool]): Whether to use formula recognition.
            layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
            layout_nms (Optional[bool], optional): Whether to use layout-aware NMS. Defaults to `False`.
            layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
                Defaults to `None`.
                If it's a single number, then both width and height are used.
                If it's a tuple of two numbers, then they are used separately for width and height respectively.
                If it's None, then no unclipping will be performed.
            layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to `None`.
            text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
            text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
            text_det_thresh (Optional[float]): Threshold for text detection.
            text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
            text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
            text_rec_score_thresh (Optional[float]): Score threshold for text recognition.
            seal_det_limit_side_len (Optional[int]): Maximum side length for seal detection.
            seal_det_limit_type (Optional[str]): Type of limit to apply for seal detection.
            seal_det_thresh (Optional[float]): Threshold for seal detection.
            seal_det_box_thresh (Optional[float]): Threshold for seal detection boxes.
            seal_det_unclip_ratio (Optional[float]): Ratio for unclipping seal detection boxes.
            seal_rec_score_thresh (Optional[float]): Score threshold for seal recognition.

            **kwargs: Additional keyword arguments.

        Returns:
            LayoutParsingResult: The predicted layout parsing result.
        ri   z0the input params for model settings are invalid!r   r1   )rk   rl   Z
output_img)r5   r6   r7   r8   )rp   rF   rG   rH   rI   rJ   rK   r2   F)rk   rl   use_layout_detectionZuse_ocr_modelrB   rA   rC   r3   )
rk   rl   rx   rA   rr   rs   rt   ru   rv   rw   rD   r4   )rx   rk   rl   rA   rE   )rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   )
Z
input_pathZ
page_indexdoc_preprocessor_resrA   rB   rC   rD   rE   parsing_res_listmodel_settingsN)rn   rj   rY   r*   r+   Z	instancesr`   r:   r;   r<   r>   r=   r?   rg   Zinput_pathsZpage_indexesr   ))r,   ro   rk   rl   rp   r3   r2   r4   rq   r6   r7   r8   rF   rG   rH   rI   rJ   rK   rr   rs   rt   ru   rv   rw   r-   r{   Zimg_idZ
batch_dataZimage_arrayry   Zdoc_preprocessor_imagerA   rB   Ztable_res_allrC   Zseal_res_allrD   Zformula_res_allrE   rz   Zsingle_img_resr&   r&   r0   predictw  s   A






	z_LayoutParsingPipeline.predict)NNNNNN)NNNNNNNNNNNNNNNNNNNNNN)__name__
__module____qualname____doc__r   r   strr   r   boolr   r   r(   r)   r`   r   r   rR   floatrg   rj   rm   rn   r   rW   Zndarrayr   r   r|   __classcell__r&   r&   r.   r0   r   "   s"   	*`	

 
/	
r   Zocrc                   @   s&   e Zd ZdgZedd Zdd ZdS )LayoutParsingPipelineZlayout_parsingc                 C   s   t S )N)r   )r,   r&   r&   r0   _pipeline_cls>  s   z#LayoutParsingPipeline._pipeline_clsc                 C   s   dS )Nr   r&   )r,   r#   r&   r&   r0   _get_batch_sizeB  s   z%LayoutParsingPipeline._get_batch_sizeN)r}   r~   r   entitiespropertyr   r   r&   r&   r&   r0   r   :  s
    
r   )'typingr   r   r   r   r   r   numpyrW   utilsr	   Z
utils.depsr
   Zcommon.batch_samplerr   Zcommon.readerr   modelsr   r   Zmodels.object_detection.resultr   Zutils.benchmarkr   Z	_parallelr   baser   
componentsr   Z
ocr.resultr   resultr   r   r   Ztime_methodsr   r   r&   r&   r&   r0   <module>   s.        