o
    0jO                    @   sn  d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z
d dlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z, ddlm-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7 ddl8m8Z8 ej9G dd deZ:edG dd deZ;dS )    N)AnyDictListOptionalTupleUnion)Image   )logging)pipeline_requires_extra   )ImageBatchSampler)	ReadImage)	HPIConfigPaddlePredictorOption)	DetResult)	benchmark   )(AutoParallelImageSimpleInferencePipeline)BasePipeline)	OCRResult)MarkdownResult   )LayoutBlockLayoutRegion)LayoutParsingResultV2)BLOCK_LABEL_MAPBLOCK_SETTINGSREGION_SETTINGS)calculate_bbox_area calculate_minimum_enclosing_bboxcalculate_overlap_ratio!convert_formula_res_to_ocr_formatgather_imgsget_bbox_intersectionget_seg_flagget_sub_regions_ocr_resremove_overlap_blocksshrink_supplement_region_bboxupdate_region_box)xycut_enhancedc                E       s  e Zd ZdZdddddddddedee dee d	eeeef  d
ee de	dee
eeef ef  de	ddf fddZdd ZdeddfddZdededefddZdede	fddZ	dWdedededededed e
edf defd!d"Z	dWded#edededed$ed%ed&eded e
edf defd'd(Zd)edee fd*d+Zdg fdedededed$ed%ed&eded e
edf d,ee defd-d.Z	dWd/e
e	df d0e
e	df d1e
e	df d2e
e	df d3e
e	df d4e
e	df d5e
e	df d6e
e	df d,eee  defd7d8Z																																dXd9e
eee ejeej f d/e
e	df d0e
e	df d:ee	 d1e
e	df d2e
e	df d3e
e	df d4e
e	df d5e
e	df d6e
e	df d;ee
eef  d<ee	 d=ee
ee eef ef  d>ee d?e
e!df d@e
edf dAe
edf dBe
edf dCe
edf d e
edf dDe
e!df dEe
edf dFe
edf dGe
edf dHe
edf dIe
edf dJe	dKe	dLe	dMe	dNe	dOe	d,eee  de"fDdPdQZ#dRede$fdSdTZ%dUdV Z&  Z'S )Y_LayoutParsingPipelineV2zLayout Parsing Pipeline V2NFT)deviceengineengine_config	pp_optionuse_hpip
hpi_configinitial_predictorconfigr,   r-   r.   r/   r0   r1   r2   returnc          
   	      sR   t  jd||||||d|	 |r| | t|ddd| _tdd| _dS )	a  Initializes the layout parsing pipeline v2.

        Args:
            config (Dict): Configuration dictionary containing various settings.
            device (Optional[str], optional): The device to use for prediction. Defaults to `None`.
            engine (Optional[str], optional): Inference engine. Defaults to `None`.
            engine_config (Optional[Dict[str, Any]], optional): Engine-specific config. Defaults to `None`.
            pp_option (Optional[PaddlePredictorOption], optional): Paddle predictor options.
                Defaults to `None`.
            use_hpip (bool, optional): Whether to use HPIP. Defaults to `False`.
            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
                HPIP configuration. Defaults to `None`.
            initial_predictor (bool, optional): Whether to initialize predictors.
                Defaults to `True`.
        )r,   r-   r.   r/   r0   r1   
batch_sizer   )r5   ZBGR)formatN )super__init__inintial_predictorr   getbatch_samplerr   
img_reader)
selfr3   r,   r-   r.   r/   r0   r1   r2   kwargs	__class__r7   w/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/pipelines/layout_parsing/pipeline_v2.pyr9   7   s   

z!_LayoutParsingPipelineV2.__init__c                 C   s   t | dd r| j  d S d S )Nchart_recognition_model)getattrrC   closer>   r7   r7   rB   rE   c   s   z_LayoutParsingPipelineV2.closec                 C   s`  | dds| dds| ddrd| _nd| _| dd| _| dd| _| dd| _| d	d| _| d
d| _| dd| _| jrX| di  dddi}| || _	| jrm| di  dddi}| 
|| _| di  dddi}i }| dd }dur||d< | dd }dur||d< | dd }dur||d< | dd }	dur|	|d< | j
|fi || _| di  dddi}
| |
| _| jr| di  dddi}| || _| jr| di  ddd i}| || _| jr| di  d!dd"i}| || _| jr%| di  d#dd$i}| 
|| _| d%g d&| _dS )'zInitializes the predictor based on the provided configuration.

        Args:
            config (Dict): A dictionary containing the configuration for the predictor.

        Returns:
            None
        use_doc_preprocessorTuse_doc_orientation_classifyuse_doc_unwarpingFuse_table_recognitionuse_seal_recognitionformat_block_contentuse_region_detectionuse_formula_recognitionuse_chart_recognitionZSubPipelinesZDocPreprocessorZpipeline_config_errorz+config error for doc_preprocessor_pipeline!Z
SubModulesZRegionDetectionZmodel_config_errorz.config error for block_region_detection_model!ZLayoutDetectionz"config error for layout_det_model!	thresholdN
layout_nmslayout_unclip_ratiolayout_merge_bboxes_modeZ
GeneralOCRz&config error for general_ocr_pipeline!ZSealRecognitionz+config error for seal_recognition_pipeline!ZTableRecognitionz,config error for table_recognition_pipeline!ZFormulaRecognitionz.config error for formula_recognition_pipeline!ZChartRecognitionz)config error for chart_recognition_model!markdown_ignore_labels)numberfootnoteheaderZheader_imagefooterZfooter_imageZ
aside_text)r;   rG   rJ   rK   rL   rM   rN   rO   Zcreate_pipelinedoc_preprocessor_pipelineZcreate_modelregion_detection_modellayout_det_modelgeneral_ocr_pipelineseal_recognition_pipelinetable_recognition_pipelineformula_recognition_pipelinerC   rT   )r>   r3   Zdoc_preprocessor_configZregion_detection_configZlayout_det_configZlayout_kwargsrP   rQ   rR   rS   Zgeneral_ocr_configZseal_recognition_configZtable_recognition_configZformula_recognition_configZchart_recognition_configr7   r7   rB   r:   g   s   


z+_LayoutParsingPipelineV2.inintial_predictoroverall_ocr_reslayout_det_resc                 C   sL   g }|d D ]}|d   dv r||d  qt|}t||dd}|S )a  
        Retrieves the OCR results for text paragraphs, excluding those of formulas, tables, and seals.

        Args:
            overall_ocr_res (OCRResult): The overall OCR result containing text information.
            layout_det_res (DetResult): The detection result containing the layout information of the document.

        Returns:
            OCRResult: The OCR result for text paragraphs after excluding formulas, tables, and seals.
        boxeslabelformulatableseal
coordinateF)Zflag_within)lowerappendnparrayr&   )r>   r`   ra   object_boxesbox_infoZsub_regions_ocr_resr7   r7   rB   get_text_paragraphs_ocr_res   s   
z4_LayoutParsingPipelineV2.get_text_paragraphs_ocr_resinput_paramsc                 C   sX   |d r| j std dS |d r| jstd dS |d r*| js*td dS dS )	a4  
        Check if the input parameters are valid based on the initialized models.

        Args:
            input_params (Dict): A dictionary containing input parameters.

        Returns:
            bool: True if all required models are initialized according to input parameters, False otherwise.
        rG   zRSet use_doc_preprocessor, but the models for doc preprocessor are not initialized.FrK   zRSet use_seal_recognition, but the models for seal recognition are not initialized.rJ   zTSet use_table_recognition, but the models for table recognition are not initialized.T)rG   r
   errorrK   rJ   )r>   rp   r7   r7   rB   check_model_settings_valid  s    z3_LayoutParsingPipelineV2.check_model_settings_validimageregion_det_resformula_res_listtext_rec_modeltext_rec_score_threshc           C         s	  i }i }	i }
g }g }g }d}d}d}g d}t dddt|| td D ]u\}}|d }|d	  }|| |\}}}}t||}t|t|}|d
krW|| n	|dkr`|| |dkrit||}|dkrq|d7 }|dvrt||gdd\}}||
|< |D ]}|	|ddu r|g||< q|| | qq(|D ]}d | d d |k rdd | d	< qt
|dko|dk}|rtd |d  d }t	dd}||| krdd |d  d	< | D ]\}}t
|dkrd} t|d | }!t|d | }"|D ]}d | d }#t|!|#}$|
| D ]}%|d |% }&t|&|$d}'|'dkr7d|d |%< qdd |$D \}(})}*}t||)||(|*f }+t||+gd },t|"|#dd}-|,d }.|,d  }/|durp|n| jj}|.|kr| d7 } | dkr|-|d |< |$|d |< |-|d! |< |.|d" |< |/|d |< q|d |- t
|d dkrt|$g|d< nt|d |$f|d< |d! |- |d" |. |d |/ |d# d |
| | |
| t
|d d  qq|
 D ]\}0}1d$}2|1D ]}3|d |3 dkrd}2 nq|2sd |0 d	 t	d%g vrd |0 d }$d&d |$D \}(})}*}t||)||(|*f }+t||+gd },t|$|$dd}-|,d }.|,d  }/|durg|n| jj}|.|krt
|d dkrt|$g|d< nt|d |$f|d< |d! |- |d" |. |d |/ |d# d |
|0 t
|d d  qt
d dkrt
|d dkrt|d D ]\}3}4t|4|}d d|4|d" |3 d' |3g|
|3< qt	d(g t	d)g  t	d*g  d+d d D  t|d d,d- d.|d< t
|d dkr2|d/dd0g|d< tt
 |	d< nttt
 }5t|d D ]\}6}7g }g |	|6< |7d }8|5D ]'}9d |9 d	 v r_qPt|8 |9 dd1}:|:t	d2dkrv||9 qPg };t
|dkrt
|;t
|krt|};g } fd3d|;D }<t|<}=|5D ]'}9d |9 d	 v rqt|= |9 dd1}:|:t	d2dkr||9 qt
|;t
|ks|D ]}9|5|9 q||	|6< |=|d |6 d< q@t
|5dkr fd4d|5D }>t
|>dkrnt|>}?g }t|d D ]/\}6}7t
|	|6 dkrq|7d }8t|?|8}:|:dkr<t|?|8|jd |jd |5 \}?}qfd5d|D }t
|dkrafd6d|5D }t
|dkran6 fd7d|D }<t|<}?t
|d }6t||	|6< |D ]}9|5|9 q||d |?d/dd0 t
|5dksfd8dtt
d D }@|@D ]!}3d |3 d }At
|d }6|3g|	|6< |d |Ad/dd0 qt |	|
d9}B|B|fS ):a  
        Retrieves the layout parsing result based on the layout detection result, OCR result, and other recognition results.
        Args:
            image (list): The input image.
            overall_ocr_res (OCRResult): An object containing the overall OCR results, including detected text boxes and recognized text. The structure is expected to have:
                - "input_img": The image on which OCR was performed.
                - "dt_boxes": A list of detected text box coordinates.
                - "rec_texts": A list of recognized text corresponding to the detected boxes.

            layout_det_res (DetResult): An object containing the layout detection results, including detected layout boxes and their labels. The structure is expected to have:
                - "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.

            table_res_list (list): A list of table detection results, where each item is a dictionary containing:
                - "block_bbox": The bounding box of the table layout.
                - "pred_html": The predicted HTML representation of the table.

            formula_res_list (list): A list of formula recognition results.
            text_rec_model (Any): The text recognition model.
            text_rec_score_thresh (Optional[float], optional): The score threshold for text recognition. Defaults to `None`.
        Returns:
            list: A list of dictionaries representing the layout parsing result.
        r   g          ry   r   r   g      ?T)rP   Zsmallerrb   rh   rc   rV   Zparagraph_titletextZ	doc_titler   rd   Zreturn_match_idxNr   Z%title_conversion_area_ratio_thresholdg333333?	rec_boxesdt_polyssmallg? 	rec_textsc                 S      g | ]}t |qS r7   int.0ir7   r7   rB   
<listcomp>      z>_LayoutParsingPipelineV2.standardized_data.<locals>.<listcomp>Zpoly)Zreturn_formatZ	rec_scoreZrec_text	rec_polys
rec_scores
rec_labelsFZvision_labelsc                 S   r   r7   r   r   r7   r7   rB   r     r   )rc   rh   scoreZunordered_labelsZheader_labelsZfooter_labelsc                 S      g | ]}|d  qS )rh   r7   )r   boxr7   r7   rB   r     r   c                 S   s   t | d S )Nrh   )r   )itemr7   r7   rB   <lambda>  s    z<_LayoutParsingPipelineV2.standardized_data.<locals>.<lambda>)keyZSupplementaryRegion)rh   rc   r   )modeZ#match_block_overlap_ratio_thresholdc                       g | ]} | qS r7   r7   r   idxblock_bboxesr7   rB   r   9      c                    r   r7   r7   r   r   r7   rB   r   R  r   c                    $   g | ]} d  | d vr|qS rb   rc   r7   r   ra   mask_labelsr7   rB   r   m  
    c                    r   r   r7   r   r   r7   rB   r   s  r   c                    r   r7   r7   r   r   r7   rB   r   z  r   c                    s$   g | ]} d  | d v r|qS r   r7   r   r   r7   rB   r     r   )region_to_block_mapblock_to_ocr_map)!r'   r"   	enumerateri   rj   r)   maxr   r&   r;   lenr   itemscopydeepcopyr$   r!   rk   rl   listr\   rw   vstackremover   sortedrangesetr   r    r(   shapedict)Cr>   rs   rt   ra   r`   ru   rv   rw   Zmatched_ocr_dictr   r   rm   Zfootnote_listZparagraph_title_listZbottom_text_y_maxZmax_block_areaZdoc_title_numZbase_region_bboxbox_idxrn   r   rc   _y2Zmatched_idxesZmatched_idxZfootnote_idxZonly_one_paragraph_titleZparagraph_title_block_areaZtitle_area_max_block_thresholdZoverall_ocr_idxZlayout_box_idsZ
matched_noZoverall_ocr_boxZoverall_ocr_dt_polyZ
layout_boxZcrop_boxZocr_idxZocr_boxZioux1y1Zx2Zcrop_imgZcrop_img_rec_resZcrop_img_dt_polyZcrop_img_rec_scoreZcrop_img_rec_textZlayout_box_idxZoverall_ocr_idxesZhas_textr   Zocr_rec_boxZblock_idxes_set
region_idxregion_inforegion_bboxZ	block_idxZoverlap_ratioZold_region_bbox_matched_idxesZmatched_bboxesZnew_region_bboxZunmatched_bboxesZsupplement_region_bboxZ
mask_idxesbboxregion_block_ocr_idx_mapr7   )r   ra   r   rB   standardized_data,  sb  !

















$












9

z*_LayoutParsingPipelineV2.standardized_datar   table_res_listseal_res_listchart_res_listc           %         s  d}d}d}g  t |d D ]\}}|d }|d }g g g d}t||d}|dkr<t|dkr<|| d |_|d	7 }ny|d
krUt|dkrUd|| d |_|d	7 }n`|dkrit|dkri|| |_|d	7 }nL|dkr~t||gdd\}}||d |< n|d |g }|D ]#}|d |d |  |d |d |  |d |d |  q|j|||	|
d |g dt	d  v rt
tt|\}}}}d|j d| d| d| d| d}t|||||dddf }||d|_ | qg d}g }t |d D ]/\}} t| d d}! fd d!|d" | D }"|"r4t|!|}t|!|"d#}#||# qtt|d|d#}$|$S )$ai  
        Extract structured information from OCR and layout detection results.

        Args:
            image (list): The input image.
            overall_ocr_res (OCRResult): An object containing the overall OCR results, including detected text boxes and recognized text. The structure is expected to have:
                - "input_img": The image on which OCR was performed.
                - "dt_boxes": A list of detected text box coordinates.
                - "rec_texts": A list of recognized text corresponding to the detected boxes.

            layout_det_res (DetResult): An object containing the layout detection results, including detected layout boxes and their labels. The structure is expected to have:
                - "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.

            table_res_list (list): A list of table detection results, where each item is a dictionary containing:
                - "block_bbox": The bounding box of the table layout.
                - "pred_html": The predicted HTML representation of the table.

            seal_res_list (List): A list of seal detection results. The details of each item depend on the specific application context.
            text_rec_model (Any): A model for text recognition.
            text_rec_score_thresh (Union[float, None]): The minimum score required for a recognized character to be considered valid. If None, use the default value specified during initialization. Default is None.

        Returns:
            list: A list of structured boxes where each item is a dictionary containing:
                - "block_label": The label of the content (e.g., 'table', 'chart', 'image').
                - The label as a key with either table HTML or image data and text.
                - "block_bbox": The coordinates of the layout box.
        r   rb   rc   rh   )rb   r   r   )rc   r   rf   Z	pred_htmlr   rg   
r   chartre   Tr{   r   r|   r   )rs   Zocr_rec_resrv   rw   )rg   rf   re   r   Zimage_labelszimgs/img_in_Z_box_r   z.jpgN)pathimgrx   r   c                    r   r7   r7   r   layout_parsing_blocksr7   rB   r     s    zG_LayoutParsingPipelineV2.get_layout_parsing_objects.<locals>.<listcomp>r   )r   blocks)r   r   r   contentjoinr&   r;   rj   Zupdate_text_contentr   r   mapr   rc   r   Z	fromarrayrs   rk   rl   Zastyper)   r   )%r>   rs   r   rt   r`   ra   r   r   r   rv   rw   Ztable_indexZ
seal_indexZchart_indexr   rn   rc   Z
block_bboxZrec_resblockr   Zocr_idx_listZbox_nox_miny_minx_maxy_maximg_pathr   Zpage_region_bboxlayout_parsing_regionsr   r   r   Zregion_blocksregionlayout_parsing_pager7   r   rB   get_layout_parsing_objects  s   )






$"



z3_LayoutParsingPipelineV2.get_layout_parsing_objectsr   c                 C   s,   t |}g }|D ]}t |}|| q|S N)r*   extend)r>   r   r   parsing_res_listr   r   r7   r7   rB   sort_layout_parsing_blocks  s   z3_LayoutParsingPipelineV2.sort_layout_parsing_blocksrT   c                    s   | j |||||| jj|	d\}}}| j||||||||| jj| jjd
}| |}d} fddtd D }t|D ]\}}||_|j	|v rM||_
|d7 }q:|S )aE  
        Retrieves the layout parsing result based on the layout detection result, OCR result, and other recognition results.
        Args:
            image (list): The input image.
            layout_det_res (DetResult): The detection result containing the layout information of the document.
            overall_ocr_res (OCRResult): The overall OCR result containing text information.
            table_res_list (list): A list of table recognition results.
            seal_res_list (list): A list of seal recognition results.
            formula_res_list (list): A list of formula recognition results.
            text_rec_score_thresh (Optional[float], optional): The score threshold for text recognition. Defaults to `None`.
        Returns:
            list: A list of dictionaries representing the layout parsing result.
        )rs   rt   ra   r`   ru   rv   rw   )
rs   r   rt   r`   ra   r   r   r   rv   rw   r   c                    s   g | ]}| vr|qS r7   r7   )r   rc   rT   r7   rB   r   _  s
    zC_LayoutParsingPipelineV2.get_layout_parsing_res.<locals>.<listcomp>Zvisualize_index_labels)r   r\   rv   r   rw   r   r   r   indexrc   order_index)r>   rs   rt   ra   r`   r   r   r   ru   rw   rT   r   r   r   r   Zvisualize_order_labelsr   r   r7   r   rB   get_layout_parsing_res&  sD   


z/_LayoutParsingPipelineV2.get_layout_parsing_resrH   rI   rK   rJ   rN   rO   rM   rL   c
              
   C   s   |du r|du r| j }
n|du s|du rd}
nd}
|du r | j}|du r'| j}|du r.| j}|du r5| j}|du r<| j}|du rC| j}|	du rJ| j}	t|
|||||||	dS )a  
        Get the model settings based on the provided parameters or default values.

        Args:
            use_doc_orientation_classify (Union[bool, None]): Enables document orientation classification if True. Defaults to system setting if None.
            use_doc_unwarping (Union[bool, None]): Enables document unwarping if True. Defaults to system setting if None.
            use_seal_recognition (Union[bool, None]): Enables seal recognition if True. Defaults to system setting if None.
            use_table_recognition (Union[bool, None]): Enables table recognition if True. Defaults to system setting if None.
            use_formula_recognition (Union[bool, None]): Enables formula recognition if True. Defaults to system setting if None.
            format_block_content (Union[bool, None]): Enables block content formatting if True. Defaults to system setting if None.

        Returns:
            dict: A dictionary containing the model settings.

        NTF)rG   rK   rJ   rN   rO   rM   rL   rT   )	rG   rK   rJ   rN   rM   rO   rL   rT   r   )r>   rH   rI   rK   rJ   rN   rO   rM   rL   rT   rG   r7   r7   rB   get_model_settingsl  s:   z+_LayoutParsingPipelineV2.get_model_settingsinputuse_textline_orientationlayout_thresholdrQ   rR   rS   text_det_limit_side_lentext_det_limit_typetext_det_threshtext_det_box_threshtext_det_unclip_ratioseal_det_limit_side_lenseal_det_limit_typeseal_det_threshseal_det_box_threshseal_det_unclip_ratioseal_rec_score_thresh#use_wired_table_cells_trans_to_html&use_wireless_table_cells_trans_to_htmluse_table_orientation_classify use_ocr_results_with_table_cellsuse_e2e_wired_table_rec_model use_e2e_wireless_table_rec_modelc"           P      k   s   |  |||||||	|
|!	}#| |#sddiV  | |D ]}$| |$j}%|#d r5t| j|%||d}&ndd |%D }&dd |&D }'t| j|'||||d}(d	d t|'|(D })|#d
 rht| j	|'ddd}*ndd |'D }*|#d rt| j
|'ddd|(d}+dd |+D },ndd |'D },t|'|,D ]#\}-}.|.D ]}/ttt|/d \}0}1}2}3d|-|1|3|0|2ddf< qqt| j|'|||||||d}4|4D ]}5dgt|5d  |5d< q|#d rg }6t|(|'|4|,|)D ]\}7}-}5}.}8t|5}9|.D ]q}/ttt|/d \}0}1}2}3|0|1f|2|1f|2|3f|0|3fg}:|9d |: |/d };|;dr(|;ds.d|; d};|9d |;  |9d jdkrIt|/d g|9d< nt|9d |/d gf|9d< |9d  |: |9d! d" q|8D ]_}<|<d# }=|<d$ \}0}1}2}3|0|1f|2|1f|2|3f|0|3fg}:|9d |: |9d d%|= d& |9d jdkrt|<d$ g|9d< nt|9d |<d$ f|9d< |9d  |: |9d! |<d'  qht| j|-dddd|9|7d|||||| d(}>d)d |>D }?|6|? qnd*d |'D }6|#d+ rt| j|'ddd|(||||||d,}@d-d |@D }And.d |'D }At|$j|$j|$j|'|&|(|*|4|6|A|,|)D ]\}B}C}D}-}E}7}F}5}G}H}.}8g }I|#d/ rg }J|7d0 D ].}K|Kd1 d2krr|Kd$ \}0}1}2}3|-t|1t|3t|0t|2ddf }L|Jd3|Li qE| j|Jd4D ]
}M|I|Md5  qz| j|-|F|7|5|G|H|I|.||#d6 d7
}N|.D ]}/ttt|/d \}0}1}2}3|/d8 |-|1|3|0|2ddf< qi d9|Bd:|Cd;|Dd<|-j d" d=|-j d d>|Ed?|7d@|FdA|5dB|GdC|HdD|IdE|.dF|NdG|8dH|#}Ot!|OV  q*qdS )Ia  
        Predicts the layout parsing result for the given input.

        Args:
            input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image path, list of image paths,
                                                                        numpy array of an image, or list of numpy arrays.
            use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
            use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
            use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
            use_seal_recognition (Optional[bool]): Whether to use seal recognition.
            use_table_recognition (Optional[bool]): Whether to use table recognition.
            use_formula_recognition (Optional[bool]): Whether to use formula recognition.
            use_region_detection (Optional[bool]): Whether to use region detection.
            format_block_content (Optional[bool]): Whether to format block content.
            layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
            layout_nms (Optional[bool], optional): Whether to use layout-aware NMS. Defaults to `False`.
            layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
                Defaults to `None`.
                If it's a single number, then both width and height are used.
                If it's a tuple of two numbers, then they are used separately for width and height respectively.
                If it's None, then no unclipping will be performed.
            layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to `None`.
            text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
            text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
            text_det_thresh (Optional[float]): Threshold for text detection.
            text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
            text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
            text_rec_score_thresh (Optional[float]): Score threshold for text recognition.
            seal_det_limit_side_len (Optional[int]): Maximum side length for seal detection.
            seal_det_limit_type (Optional[str]): Type of limit to apply for seal detection.
            seal_det_thresh (Optional[float]): Threshold for seal detection.
            seal_det_box_thresh (Optional[float]): Threshold for seal detection boxes.
            seal_det_unclip_ratio (Optional[float]): Ratio for unclipping seal detection boxes.
            seal_rec_score_thresh (Optional[float]): Score threshold for seal recognition.
            use_wired_table_cells_trans_to_html (bool): Whether to use wired table cells trans to HTML.
            use_wireless_table_cells_trans_to_html (bool): Whether to use wireless table cells trans to HTML.
            use_table_orientation_classify (bool): Whether to use table orientation classification.
            use_ocr_results_with_table_cells (bool): Whether to use OCR results processed by table cells.
            use_e2e_wired_table_rec_model (bool): Whether to use end-to-end wired table recognition model.
            use_e2e_wireless_table_rec_model (bool): Whether to use end-to-end wireless table recognition model.
            markdown_ignore_labels (Optional[list[str]]): The list of ignored markdown labels. Default is None.
            **kwargs (Any): Additional settings to extend functionality.

        Returns:
            LayoutParsingResultV2: The predicted layout parsing result.
        rq   z0the input params for model settings are invalid!rG   )rH   rI   c                 S   s   g | ]}d |iqS Z
output_imgr7   )r   Zarrr7   r7   rB   r     r   z4_LayoutParsingPipelineV2.predict.<locals>.<listcomp>c                 S   r   r   r7   r   r   r7   r7   rB   r     r   )rP   rQ   rR   rS   c                 S   s   g | ]\}}t ||d  qS rb   )r#   )r   r   resr7   r7   rB   r   ,  s    rM   Tr~   )rQ   rS   c                 S   s   g | ]}d g iqS r   r7   r   r   r7   r7   rB   r   :  r   rN   F)use_layout_detectionrH   rI   ra   c                 S   r   )ru   r7   r   r7   r7   rB   r   F  r   c                 S      g | ]}g qS r7   r7   r   r7   r7   rB   r   J      r}   g     o@N)r   r   r   r   r   r   rw   rz   r   r   rJ   rec_formula$r|   r   r   r   r   r   rh   z+<div style="text-align: center;"><img src="z" alt="Image" /></div>r   )rH   rI   r   Zuse_ocr_modelr`   ra   Zcell_sort_by_y_projectionr   r   r   r   r   r   c                 S   r   )r   r7   r   r7   r7   rB   r     r   c                 S   r   r7   r7   r   r7   r7   rB   r     r   rK   )
rH   rI   r   ra   r   r   r   r   r   r   c                 S   r   )r   r7   r   r7   r7   rB   r     r   c                 S   r   r7   r7   r   r7   r7   rB   r     r   rO   rb   rc   r   rs   )r   resultrT   )	rt   ra   r`   r   r   r   ru   rw   rT   Z	input_img
input_path
page_index
page_countwidthheightdoc_preprocessor_resra   rt   r`   r   r   r   ru   r   imgs_in_docmodel_settings)"r   rr   r<   r=   Z	instancesr   rY   r[   ziprZ   r_   r   r   r\   r   r   r   rj   
startswithendswithsizerk   rl   r   r^   r   r]   Zinput_pathsZpage_indexesZpage_countsrC   r   r   r   )Pr>   r   rH   rI   r   rK   rJ   rN   rO   rM   rL   r   rQ   rR   rS   r   r   r   r   r   rw   r   r   r   r   r   r   r   r   r   r   r   r   rT   r?   r   Z
batch_dataZimage_arraysZdoc_preprocessor_resultsZdoc_preprocessor_imagesZlayout_det_resultsr   Zregion_det_resultsZformula_res_allZformula_res_listsZdoc_preprocessor_imageru   Zformula_resr   r   r   r   Zoverall_ocr_resultsr`   Ztable_res_listsra   Zimgs_in_doc_for_imgZtable_contents_for_imgZpoly_pointsr   r   r   Ztable_res_allZsingle_table_res_listsZseal_res_allZseal_res_listsr   r   r   r   rt   r   r   r   Zchart_imgs_listr   Z	chart_imgZchart_res_batchr   Zsingle_img_resr7   r7   rB   predict  s@  S

		




\

$


	
 z _LayoutParsingPipelineV2.predictmarkdown_listc                 C   s   d}d}|D ]X}|d d }|d d }|sT|sT|r|d nd}|d r*|d d nd}|r4t d|nd	}	|r>t d|nd	}
|	sM|
sM|d
|d  7 }n||d 7 }n|d|d  7 }|}qd|i}t|S )a  
        Concatenate Markdown content from multiple pages into a single document.

        Args:
            markdown_list (list): A list containing Markdown data for each page.

        Returns:
            tuple: A tuple containing the processed Markdown text.
        r   TZpage_continuation_flagsr   r   r   markdown_texts[\u4e00-\u9fff]F z

)rematchr   )r>   r  r  Z-previous_page_last_element_paragraph_end_flagr   Z'page_first_element_paragraph_start_flagZ$page_last_element_paragraph_end_flagZlast_char_of_markdownZfirst_char_of_handlerZlast_is_chinese_charZfirst_is_chinese_charZmarkdown_resultr7   r7   rB   concatenate_markdown_pages+  sD   
z3_LayoutParsingPipelineV2.concatenate_markdown_pagesc                 C   s  g }d }d}t |D ]\}}g }d }|D ]o}	t|	d| t|	|\}
}|	}|	jdk}|d uo2|jdk}|rx|rx|
sx|j}|	j}|rE|d nd}|rM|d nd}td|}td|}d}|sg|sg|rg|rgd}| j|| 7  _t|	d|j n||	 |	}|d7 }q|| q
|S )	Nr   group_idrz   r   r   r  r	  r   )	r   setattrr%   rc   r   r
  r  r  rj   )r>   Zblocks_by_pageZmerged_blocks_by_pageZglobal_prev_blockZglobal_block_idr   Zone_page_blocksZcurrent_page_new_blocksZ
prev_blockr   Zseg_start_flagZseg_end_flagZis_textZprev_is_textZ	prev_textZ	curr_textZ	last_charZ
first_charZis_last_chineseZis_first_chinese	separatorr7   r7   rB   merge_text_across_paged  sJ   


z/_LayoutParsingPipelineV2.merge_text_across_pager   ) NNNNNNNNNNNNNNNNNNNNNNNNNFFTTFTN)(__name__
__module____qualname____doc__r   r   strr   r   boolr   r   r9   rE   r   r:   r   r   ro   rr   r   floatr   r   r   r   r   r   r   r   rk   Zndarrayr   r   r   r  tupler  r  __classcell__r7   r7   r@   rB   r+   3   s   	
, 
'
	
   	


y
	


P







	


F





	














 !
"$
  ~9r+   Zocrc                   @   s&   e Zd ZdgZedd Zdd ZdS )LayoutParsingPipelineV2zPP-StructureV3c                 C   s   t S r   )r+   rF   r7   r7   rB   _pipeline_cls  s   z%LayoutParsingPipelineV2._pipeline_clsc                 C   s   | ddS )Nr5   r   )r;   )r>   r3   r7   r7   rB   _get_batch_size  s   z'LayoutParsingPipelineV2._get_batch_sizeN)r  r  r  entitiespropertyr  r  r7   r7   r7   rB   r    s
    
r  )<r   r
  typingr   r   r   r   r   r   numpyrk   ZPILr   utilsr
   Z
utils.depsr   Zcommon.batch_samplerr   Zcommon.readerr   modelsr   r   Zmodels.object_detection.resultr   Zutils.benchmarkr   Z	_parallelr   baser   Z
ocr.resultr   Zpp_doctranslation.resultr   Zlayout_objectsr   r   Z	result_v2r   Zsettingr   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   Ztime_methodsr+   r  r7   r7   r7   rB   <module>   sF    4          x