o
    0jn                     @   sr  g d Z ddlZddlmZ ddlmZmZmZmZm	Z	 ddl
ZddlmZ ddlmZ ddlmZ d	d
lmZmZ dejdejdefddZ		dNdededededef
ddZdd Z		dOdee dee dedefdd Z	dPde	ejeef de	ejeef defd!d"Zd#d$ Z d%d& Z!d'd( Z"d)d* Z#d+d, Z$d-d. Z%d/d0 Z&	dQde	ee' ee'e'e'e'f f de	ee' ee'e'e'e'f f d1ed2edee	ee' ee'e'e'e'f f  f
d3d4Z(	dRd6eeeee' f  d7ed2edeeeeee' f  eeeee' f  f fd8d9Z)dSd;d<Z*defd=d>Z+d?d@ Z,dAedBe-fdCdDZ.dEdF Z/dGdH Z0dIdJ Z1dTdKedefdLdMZ2dS )U)get_sub_regions_ocr_resget_show_colorsorted_layout_boxes    N)deepcopy)DictListOptionalTupleUnion)Image   )convert_points_to_boxes)	OCRResult   )BLOCK_LABEL_MAPREGION_SETTINGS	src_boxes	ref_boxesreturnc                 C   s   g }t | }|dkrot |dkrott |D ]X}|| }t|d | dddf }t|d | dddf }t|d | dddf }t|d | dddf }	|| }
|	| }t|
dk|dk@ d }|| q|S )a  
    Get the indices of source boxes that overlap with reference boxes based on a specified threshold.

    Args:
        src_boxes (np.ndarray): A 2D numpy array of source bounding boxes.
        ref_boxes (np.ndarray): A 2D numpy array of reference bounding boxes.
    Returns:
        match_idx_list (list): A list of indices of source boxes that overlap with reference boxes.
    r   Nr   r      )lenrangenpmaximumminimumwhereextend)r   r   match_idx_listZsrc_boxes_numZrnoZref_boxx1y1x2y2Zpub_wZpub_hZ	match_idx r"   q/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/pipelines/layout_parsing/utils.pyget_overlap_boxes_idx!   s   
r$   TFoverall_ocr_resobject_boxesflag_withinreturn_match_idxc           
      C   s  i }g |d< g |d< g |d< g |d< | d }t ||}tt|}tt|D ]E}|r5||v r2d}nd}n	||vr<d}nd}|rl|d | d |  |d | d |  |d | d |  |d | d |  q'dD ]}	t||	 ||	< qo|r||fS |S )aX  
    Filters OCR results to only include text boxes within specified object boxes based on a flag.

    Args:
        overall_ocr_res (OCRResult): The original OCR result containing all text boxes.
        object_boxes (list): A list of bounding boxes for the objects of interest.
        flag_within (bool): If True, only include text boxes within the object boxes. If False, exclude text boxes within the object boxes.
        return_match_idx (bool): If True, return the list of matching indices.

    Returns:
        OCRResult: A filtered OCR result containing only the relevant text boxes.
    	rec_polys	rec_texts
rec_scores	rec_boxesTF)r)   r+   r,   )r$   listsetr   r   appendr   array)
r%   r&   r'   r(   Zsub_regions_ocr_resZoverall_text_boxesr   Zbox_noZ
flag_matchkeyr"   r"   r#   r   ;   sJ   




r   c           	      C   s4  t | }|dkr
| S t| dd d}t|}g }g }g }d}	 ||kr$nX|| d d |d k rJ|| d d	 d
| d k rJ|||  |d7 }n1|| d d d	| d krd|||  |d7 }n||7 }||7 }|||  g }g }|d7 }qt|dd d}t|dd d}|r||7 }|r||7 }|S )z
    Sort text boxes in order from top to bottom, left to right
    Args:
        res: List of dictionaries containing layout information.
        w: Width of image.

    Returns:
        List of dictionaries containing sorted layout information.
    r   c                 S   s   | d d | d d fS )N
block_bboxr   r   r"   xr"   r"   r#   <lambda>   s    z%sorted_layout_boxes.<locals>.<lambda>)r1   r   Tr2      r   r      c                 S      | d d S Nr2   r   r"   r3   r"   r"   r#   r5          c                 S   r8   r9   r"   r3   r"   r"   r#   r5      r:   )r   sortedr-   r/   )	reswZ	num_boxesZsorted_boxesZ_boxesZnew_resZres_leftZ	res_rightir"   r"   r#   r   w   sB   


r   
horizontalunionbbox1bbox2	directionc           
      C   s   d\}}|dkrd\}}t | | || }t| | || }|| }|dkr(dS |dkr?t | | || t| | ||  }	n4|dkrUt| | | |  || ||  }	n|dkrkt | | | |  || ||  }	ntd| d	|	dkr{||	 S d
S )a  
    Calculate the IoU of lines between two bounding boxes.

    Args:
        bbox1 (List[float]): First bounding box [x_min, y_min, x_max, y_max].
        bbox2 (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
        direction (str): direction of the projection, "horizontal" or "vertical".

    Returns:
        float: Line overlap ratio. Returns 0 if there is no overlap.
    )r   r   r?   )r   r   r   r@   smalllargeInvalid mode -, must be one of ['union', 'small', 'large'].        )maxmin
ValueError)
rA   rB   rC   modestart_indexZ	end_indexZintersection_startZintersection_endZoverlapZ	ref_widthr"   r"   r#   "calculate_projection_overlap_ratio   s0   
rN   c                 C   s  t j| t jd} t j|t jd}t | d |d }t | d |d }t | d |d }t | d |d }t d|| }t d|| }t j||t jd}	t| }
t|}|dkrd|
| |	 }n|dkrot |
|}n|dkrzt |
|}ntd	| d
|dkrdS |	| S )a  
    Calculate the overlap ratio between two bounding boxes using NumPy.

    Args:
        bbox1 (np.ndarray, list or tuple): The first bounding box, format [x_min, y_min, x_max, y_max]
        bbox2 (np.ndarray, list or tuple): The second bounding box, format [x_min, y_min, x_max, y_max]
        mode (str): The mode of calculation, either 'union', 'small', or 'large'.

    Returns:
        float: The overlap ratio value between the two bounding boxes
    Zdtyper   r   r   r   r@   rD   rE   rF   rG   rH   )r   r0   Zfloat64r   r   multiplycalculate_bbox_arearK   )rA   rB   rL   x_min_intery_min_interx_max_intery_max_interZinter_widthZinter_heightZ
inter_areaZ
bbox1_areaZ
bbox2_areaZref_arear"   r"   r#   calculate_overlap_ratio   s.   
rV   c                 C   s   | st dt| }t|dddf }t|dddf }t|dddf }t|dddf }t||||gS )a?  
    Calculate the minimum enclosing bounding box for a list of bounding boxes.

    Args:
        bboxes (list): A list of bounding boxes represented as lists of four integers [x1, y1, x2, y2].

    Returns:
        list: The minimum enclosing bounding box represented as a list of four integers [x1, y1, x2, y2].
    z$The list of bounding boxes is empty.Nr   r   r   r   )rK   r   r0   rJ   rI   )ZbboxesZbboxes_arrayZmin_xZmin_yZmax_xZmax_yr"   r"   r#    calculate_minimum_enclosing_bbox  s   

rW   c                 C      t td| S )z#check if the char is english letterz
^[A-Za-z]$boolrematchcharr"   r"   r#   is_english_letter(     r_   c                 C   rX   )zcheck if the char is numericz^[\d]+$rY   r]   r"   r"   r#   
is_numeric-  r`   ra   c                 C   s   h d}| |v S )z
    check if the char is non-breaking punctuation

    Args:
        char (str): character to check

    Returns:
        bool: True if the char is non-breaking punctuation
    >   ,u   “;:u   ：u   、"'u   ，-u   ；r"   )r^   Znon_breaking_punctuationsr"   r"   r#   is_non_breaking_punctuation2  s   
rh   c                 C   s:   t tt|\}}}}d|  d| d| d| d| dS )Nzimgs/img_in_Z_box__z.jpg)r-   mapint)labelboxx_miny_minx_maxy_maxr"   r"   r#   construct_img_pathM  s   $rr   c                 C   sr   | ||||f }|j dkrdS |jdkrt|S |jd dkr4t|dddf ddddf S t|S )z
    Crop `original_img` to [y_min:y_max, x_min:x_max], clamped to image bounds.
    Returns a C-contiguous array suitable for `PIL.Image.fromarray`.
    r   Nr   r   .)sizendimr   Zascontiguousarrayshape)original_imgrn   ro   rp   rq   cropr"   r"   r#   _crop_image_region_for_pilR  s   


$
ry   c              
   C   s  g }t | jd t | jd }}|D ]n}|d td v r|d }ttt |d \}}}	}
tdt||}tdt|	|}	tdt||}tdt|
|}
|	|ksV|
|krWqt||d }t| |||	|
}|d u rkqt	
|}|||||||	|
f|d d q|S )Nr   r   rl   Zimage_labels
coordinatescore)pathimgrl   rz   r{   )rk   rv   r   r-   rj   rI   rJ   rr   ry   r   Z	fromarrayr/   )rw   Zlayout_det_objsZimgs_in_dochr=   Zdet_objrl   rn   ro   rp   rq   Zimg_pathZarr_for_pilr}   r"   r"   r#   gather_imgsa  s:   


	r   ratiosmallerc                 C   sJ   t | }t |}t| |dd}||kr#||kr|s||kr!|s!dS dS dS )a  
    Determine if the overlap area between two bounding boxes exceeds a given ratio
    and return the smaller (or larger) bounding box based on the `smaller` flag.

    Args:
        bbox1 (Union[List[int], Tuple[int, int, int, int]]): Coordinates of the first bounding box [x_min, y_min, x_max, y_max].
        bbox2 (Union[List[int], Tuple[int, int, int, int]]): Coordinates of the second bounding box [x_min, y_min, x_max, y_max].
        ratio (float): The overlap ratio threshold.
        smaller (bool): If True, return the smaller bounding box; otherwise, return the larger one.

    Returns:
        Optional[Union[List[int], Tuple[int, int, int, int]]]:
            The selected bounding box or None if the overlap ratio is not exceeded.
    rD   rL   r   r   N)rQ   rV   )rA   rB   r   r   Zarea1Zarea2overlap_ratior"   r"   r#   _get_minbox_if_overlap_by_ratio  s   r   ?blocks	thresholdc                 C   s   t  }t| } g }t| d D ]^\}}t|d t| d D ]N}| d | }||v s.||v r/qt|d |d ||d}	|	durl|d dk}
|d dk}|
|kr_|
rS|n|}|| d |  n|	dkre|n|}|| qqt|dd	D ]}| d |= qt| S )
a  
    Remove overlapping blocks based on a specified overlap ratio threshold.

    Args:
        blocks (List[Dict[str, List[int]]]): List of block dictionaries, each containing a 'block_bbox' key.
        threshold (float): Ratio threshold to determine significant overlap.
        smaller (bool): If True, the smaller block in overlap is removed.

    Returns:
        Tuple[List[Dict[str, List[int]]], List[Dict[str, List[int]]]]:
            A tuple containing the updated list of blocks and a list of dropped blocks.
    Zboxesr   rz   )r   Nrl   imageT)reverse)	r.   r   	enumerater   r   r   r/   addr;   )r   r   r   Zdropped_indexesZoverlap_image_blocksr>   Zblock1jZblock2Zoverlap_box_indexZis_block1_imageZis_block2_imageZ
drop_indexindexr"   r"   r#   remove_overlap_blocks  s6   
r   bboxc           	      C   s  t | } t |}t| jdkr| nt| gd }t|jdkr#|nt|gd }t|d |d }t|d |d }t|d |d }t|d |d }||ksV||krXdS |dkret ||||gS |dkr}t j||g||g||g||ggt jdS td	)
a_  
    Compute the intersection of two bounding boxes, supporting both 4-coordinate and 8-coordinate formats.

    Args:
        bbox1 (tuple): The first bounding box, either in 4-coordinate format (x_min, y_min, x_max, y_max)
                       or 8-coordinate format (x1, y1, x2, y2, x3, y3, x4, y4).
        bbox2 (tuple): The second bounding box in the same format as bbox1.
        return_format (str): The format of the output intersection, either 'bbox' or 'poly'.

    Returns:
        tuple or None: The intersection bounding box in the specified format, or None if there is no intersection.
    r   r   r   r   Nr   ZpolyrO   z.return_format must be either 'bbox' or 'poly'.)	r   r0   r   rv   r   rI   rJ   Zint16rK   )	rA   rB   Zreturn_formatZrect1Zrect2rR   rS   rT   rU   r"   r"   r#   get_bbox_intersection  s,   

  
r   c              	      s  | \}}}}	|\}
}}}ddddd}|
| | || | || | |	| | g}t |}t|}||| }t|dkrB| g fS tdD ]}|| }| dd }|| ||< g g }}|D ]&}t| | dd}|td	d
krx|| q_|tddkr|| q_t|dkrt|dkr|D ]M} | }|\}}}}	|\}
}}}|
| | || | || | |	| | g}t	|}||}|| }|| ||< t
||||| \}}t|dkrqq fdd|D }t|}  | |fS || t|}||| }qF| |fS )aG  
    Shrink the supplement region bbox according to the reference region bbox and match the block bboxes.

    Args:
        supplement_region_bbox (list): The supplement region bbox.
        ref_region_bbox (list): The reference region bbox.
        image_width (int): The width of the image.
        image_height (int): The height of the image.
        block_idxes_set (set): The indexes of the blocks that intersect with the region bbox.
        block_bboxes (dict): The dictionary of block bboxes.

    Returns:
        list: The new region bbox and the matched block idxes.
    r   r   r   r   )r   r   r   r   NrD   r   Z#match_block_overlap_ratio_thresholdg?Z#split_block_overlap_ratio_thresholdg?c                    s   g | ]} | qS r"   r"   ).0idxblock_bboxesr"   r#   
<listcomp>T  s    z1shrink_supplement_region_bbox.<locals>.<listcomp>)r   rJ   r   r   r   rV   r   getr/   rI   shrink_supplement_region_bboxrW   remove)Zsupplement_region_bboxZref_region_bboxZimage_widthZimage_heightZblock_idxes_setr   r   r   r    r!   Zx1_primeZy1_primeZx2_primeZy2_primeZindex_conversion_mapZedge_distance_listZedge_distance_list_tmpZmin_distanceZ	src_indexri   Z	dst_indexZtmp_region_bboxZiner_block_idxesZsplit_block_idxesZ	block_idxr   Zsplit_block_idxZsplit_block_bboxZmax_distanceZ
iner_idxesZmatched_bboxesr"   r   r#   r     s   












r   c           
      C   sl   |du r| S | \}}}}|\}}}}	t t||}t t||}t t||}t t||	}	||||	g}|S )zUpdate region box with bboxN)rk   rJ   rI   )
r   Z
region_boxr   r   r    r!   Z	x1_regionZ	y1_regionZ	x2_regionZ	y2_regionr"   r"   r#   update_region_box^  s   r   formula_res_listocr_resc           	      C   s   | D ]d}t tt|d \}}}}||f||f||f||fg}|d | |d }|d | |d jdkrCt|d g|d< nt|d |d gf|d< |d d |d | |d	 d
 qdS )zConvert formula result to OCR result format

    Args:
        formula_res_list (List): Formula results
        ocr_res (dict): OCR result
    Returns:
        ocr_res (dict): Updated OCR result
    Zdt_polysZrec_formular*   r,   r   Z
rec_labelsformular)   r+   r   N)r-   rj   rk   r/   rt   r   r0   Zvstack)	r   r   Zformula_resrn   ro   rp   rq   Zpoly_pointsZformula_res_textr"   r"   r#   !convert_formula_res_to_ocr_formatp  s&   	r   c                 C   s*   t t| \}}}}t|| ||  }|S )zCalculate bounding box area)rj   floatabs)r   r   r   r    r!   Zarear"   r"   r#   rQ     s   rQ   c                 C   s,   | \}}|\}}|| d || d  d S )z/Calculate euclidean distance between two pointsr   g      ?r"   )Zpoint1Zpoint2r   r   r    r!   r"   r"   r#   caculate_euclidean_dist  s   r   c                 C   s   d}d}| j }| j}| j}| j}|durj|j}|j}	t|j|	 dk }
|dk}||jk o0||j k}|rJt|j |}t|j|}t||	 dk }
d}nt| j |j }|| dk }|
ri|ri|ri|t|j| jk rid}n|| dk rrd}|| dk rzd}||fS )a	  Get segment start flag and end flag based on previous block

    Args:
        block (Block): Current block
        prev_block (Block): Previous block

    Returns:
        seg_start_flag (bool): Segment start flag
        seg_end_flag (bool): Segment end flag
    TN
   r   r   F)	Zstart_coordinateZend_coordinateseg_start_coordinateseg_end_coordinateZnum_of_linesr   rJ   rI   width)blockZ
prev_blockZseg_start_flagZseg_end_flagZcontext_left_coordinateZcontext_right_coordinater   r   Znum_of_prev_linesZpre_block_seg_end_coordinateZprev_end_space_smallZprev_lines_more_than_oneZoverlap_blocksZedge_distanceZcurrent_start_space_smallr"   r"   r#   get_seg_flag  sR   
r   rl   c                 C   s   |rdddddddddd	d

}n<i dddddddddd	ddddddddddddddddddddddd dd!d"d"d#d$}d}| | |S )%N)         d   )r         r   )f   r   r   r   )r      r   r   )   r   3   r   )      r   r   )r   r   L   r   )5         r   )      r   r   )
	doc_titleZdoc_title_textparagraph_titleZsub_paragraph_titleZvisionZvision_titlevision_footnoteZnormal_textZcross_layoutZcross_referencer   r   Ztable_title)r   r   r   r   Zfigure_titleZchart_titler   textZvertical_textZinline_formular   )r   r   r   r   Zdisplay_formulaabstractcontent)(      \   r   Zseal)   r   r   r   table)   r   r   r   r   Zfigure)      r   r   )   r   r   r   )r         r   )chart	referenceZreference_content	algorithm)r   )rl   Zorder_labelZlabel_colorsZdefault_colorr"   r"   r#   r     sl   	"r   )TF)r?   r@   )r@   )T)r   T)r   )F)3__all__r[   copyr   typingr   r   r   r	   r
   numpyr   ZPILr   
componentsr   Z
ocr.resultr   Zsettingr   r   Zndarrayr$   rZ   r   r   r   strrN   r-   tuplerV   rW   r_   ra   rh   rr   ry   r   rk   r   r   r   r   r   dictr   rQ   r   r   r   r"   r"   r"   r#   <module>   s   
<:
2
1$
#*

3.
ZC