o
    0jy|                     @   s   d dl mZmZmZ d dlZddlmZmZ ddl	m
Z
mZmZmZmZ g dZG dd deZG d	d
 d
eZG dd deZG dd deZdS )    )AnyListUnionN   )BLOCK_LABEL_MAPLINE_SETTINGS)caculate_euclidean_dist"calculate_projection_overlap_ratiois_english_letteris_non_breaking_punctuation
is_numeric)TextSpanTextLineLayoutBlockLayoutRegionc                   @   s4   e Zd ZdZdd ZdefddZdefddZd	S )
r   zText span classc                 C   s   || _ || _|| _dS )z
        Initialize a TextSpan object.

        Args:
            box (list): The bounding box of the text span.
            text (str): The text content of the text span.
            label (int): The label of the text span.
        Nboxtextlabel)selfr   r   r    r   z/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/pipelines/layout_parsing/layout_objects.py__init__&   s   	
zTextSpan.__init__returnc                 C      | j  S Nr   r   r   r   r   __str__3      zTextSpan.__str__c                 C   r   r   r   r   r   r   r   __repr__6   r   zTextSpan.__repr__N)__name__
__module____qualname____doc__r   strr   r    r   r   r   r   r   #   s
    r   c                   @   s   e Zd ZdZg dfdee fddZedd Zedd	 Z	ed
d Z
edd ZdefddZdefddZdeeee f fddZdd Z		d)dedededefddZdd  Zd*d"d#Z	$	%d+dededed&ededefd'd(ZdS ),r   zText line class
horizontalspansc                 C   s    || _ || _|  | _d| _dS )z
        Initialize a TextLine object.

        Args:
            spans (List[TextSpan]): A list of TextSpan objects. Defaults to [].
            direction (str): The direction of the text line. Defaults to "horizontal".
        FN)r'   	directionget_region_box
region_boxneed_new_line)r   r'   r(   r   r   r   r   =   s   

zTextLine.__init__c                 C      dd | j D S )Nc                 S      g | ]}|j qS r   )r   .0spanr   r   r   
<listcomp>L       z#TextLine.labels.<locals>.<listcomp>r'   r   r   r   r   labelsJ      zTextLine.labelsc                 C   r,   )Nc                 S   r-   r   r   r.   r   r   r   r1   P   r2   z"TextLine.boxes.<locals>.<listcomp>r3   r   r   r   r   boxesN   r5   zTextLine.boxesc                 C   <   | j dkrdnd}| j dkrdnd}t| j| | j|  S )Nr&   r   r         r(   absr*   r   	start_idxend_idxr   r   r   heightR      zTextLine.heightc                 C   r8   )Nr&   r   r   r:   r9   r;   r=   r   r   r   widthX   rA   zTextLine.widthr   c                 C      d dd | jD  dS )N c                 S      g | ]}t |jqS r   r%   r   r.   r   r   r   r1   _       z$TextLine.__str__.<locals>.<listcomp>
joinr'   r   r   r   r   r   ^      zTextLine.__str__c                 C   rC   )NrD   c                 S   rE   r   rF   r.   r   r   r   r1   b   rG   z%TextLine.__repr__.<locals>.<listcomp>rH   rI   r   r   r   r   r    a   rK   zTextLine.__repr__r0   c                 C   s2   t |tr| j| n| j| |  | _dS )z
        Add a span to the text line.

        Args:
            span (Union[TextSpan, List[TextSpan]]): A single TextSpan object or a list of TextSpan objects.
        N)
isinstancelistr'   extendappendr)   r*   )r   r0   r   r   r   add_spand   s   
zTextLine.add_spanc                 C   sv   | j sdS | j d j\}}}}| j D ]"}t||jd }t||jd }t||jd }t||jd }q||||gS )z{
        Get the region box of the text line.

        Returns:
            list: The region box of the text line.
        Nr   r   r:   r9   )r'   r   minmax)r   Zx_minZy_minZx_maxZy_maxr0   r   r   r   r)   q   s   
zTextLine.get_region_boxNblock_labelblock_text_widthblock_start_coordinateblock_stop_coordinatec                    s:  j dkrdndj dkrdnd jj fddd djv rj dkr+dnd }tjt|kr|jfd	dd g }	|D ]B}
|
j}|
jd
kr|t|d t|d t|d t|d f }t	||gd }|d }|d }||
_
||k rqH|	|
 qH|	_j|||jd |d}|S )aV  
        Get the text of the text line.

        Args:
            block_label (str): The label of the block.
            block_text_width (int): The width of the block.
            block_start_coordinate (int): The starting coordinate of the block.
            block_stop_coordinate (int): The stopping coordinate of the block.
            ori_image (np.ndarray): The original image.
            text_rec_model (Any): The text recognition model.
            text_rec_score_thresh (float): The text recognition score threshold.

        Returns:
            str: The text of the text line.
        r&   r   r   r9   c                    s0   | j  d jdkr| j   fS | j    fS )Nr:   r&   )r   r(   r0   )lines_start_indexr   span_box_start_indexr   r   <lambda>   s   

z$TextLine.get_texts.<locals>.<lambda>)keyformulac                    s
   | j   S r   r6   rW   )
sort_indexr   r   rZ         
 r   r:   Z	rec_scoreZrec_text      ?)line_gap_limitrS   )r(   r'   sortr4   split_boxes_by_projectionlenr   r   intrM   r   rO   format_liner@   )r   rS   rT   rU   rV   	ori_imagetext_rec_modeltext_rec_score_threshZsplited_spans	new_spansr0   bboxZcrop_imgZcrop_img_rec_resZcrop_img_rec_scoreZcrop_img_rec_text	line_textr   )rX   r   r]   rY   r   	get_texts   sF   


zTextLine.get_textsc                 C   s    || || ko|| || kS )z<Check if box_a completely contains box_b in the x-direction.r   )r   box_abox_br>   r?   r   r   r   is_projection_contained   s    z TextLine.is_projection_containedh㈵>c              	   C   sn  g }| j dkrd\}}nd\}}tt| jD ]}| j| }d}t|t| jD ]}| j| j}	|j|j|j}
}}| |
|	||rd}|
| |	| k rt|	| | |
|  }|dkrt|
 }|	| | ||< |	t
t|||d |
| |	| kr|
| |	|  | }|dkr|	| | |
|< t
t|
||d}|t| jd kr|r|	| q(|s|	| q|S )a  
        Check if there is any complete containment in the x-direction
        between the bounding boxes and split the containing box accordingly.

        Args:
            offset (float): A small offset value to ensure that the split boxes are not too close to the original boxes.
        Returns:
            A new list of boxes, including split boxes, with the same `rec_text` and `label` attributes.
        r&   )r   r:   )r   r9   FTr   r   )r(   rangerc   r'   r   r   r   ro   copyrO   r   nparray)r   offsetri   Zprojection_start_indexZprojection_end_indexir0   Zis_splitjrn   rm   r   r   wZnew_bboxr   r   r   rb      sl   






z"TextLine.split_boxes_by_projection
   r   r`   c                 C   sF  | j d j}| j d j}d}| j D ]K}	|	jdkrA|dkrA|	j}
|
dsA|
dsAt| j dkr9d|	j d|	_nd|	j d|	_||	j7 }t|	jdkrSt|d sX|	jdkr\|d7 }q| jd	kred
}nd}|drr|dd }t|dkrzdS |d }t|st	|st
|r|||  |d kr| jd	kr|||  |ks| jdkr|||  |ks|d | |krd| _|dr|dd }|S t|dkrt|s|dr|d7 }t|dkrt|st
|r| jdkr|||  |d krt|dkrt	|s|d7 }d| _|S |||  || d kr!|d7 }d| _|S )ae  
        Format a line of text spans based on layout constraints.

        Args:
            block_text_width (int): The width of the block.
            block_start_coordinate (int): The starting coordinate of the block.
            block_stop_coordinate (int): The stopping coordinate of the block.
            line_gap_limit (int): The limit for the number of pixels after the last span that should be considered part of the last line. Default is 10.
            block_label (str): The label associated with the entire block. Default is 'text'.
        Returns:
            str: Formatted line of text.
        r    r\   $r   z
$rD   r&   r:   r9   N333333?verticalT-rH         ?)r'   r   r   r   
startswithendswithrc   r
   r(   r   r   r+   )r   rT   rU   rV   r`   rS   Zfirst_span_boxZlast_span_boxrk   r0   Zformula_recZtext_stop_index	last_charr   r   r   re     s   














zTextLine.format_line)NN)rp   )ry   r   )r!   r"   r#   r$   r   r   r   propertyr4   r7   r@   rB   r%   r   r    r   rP   r)   rd   rl   ro   rb   re   r   r   r   r   r   :   sV    




C
Ir   c                   @   s   e Zd ZdZd'd(ddZdefddZdefd	d
ZdefddZ	d)d(ddZ
d(ddZd(ddZdefddZdefddZd*dedefddZ	d+deee  dedefddZdee fdd Z	d)d!ed"ed#ed$eedf ddf
d%d&ZdS ),r   zLayout Block Classr{   Nr   c                 C   s   || _ d| _ttt|| _|| _|| _td| _	td| _
|d |d  | _|d |d  | _t| jt| j | _d| _d| _d| _d| _d| _d| _g | _|   dS )a  
        Initialize a LayoutBlock object.

        Args:
            label (str): Label assigned to the block.
            bbox (list): Bounding box coordinates of the block.
            content (str, optional): Content of the block. Defaults to an empty string.
        Ninfz-infr:   r   r9   r   )r   order_labelrM   maprd   rj   contentgroup_idfloatseg_start_coordinateseg_end_coordinaterB   r@   Zareanum_of_linesimageindexZorder_indextext_line_widthtext_line_heightchild_blocksupdate_direction)r   r   rj   r   r   r   r   r   r   ~  s$   	

zLayoutBlock.__init__c                 C   2   d| j  d| j d| j d| j d| j d}|S Nz

#################
index:	z
label:	z
region_label:	z
bbox:	z

content:	z
#################r   r   r   rj   r   r   _strr   r   r   r        .zLayoutBlock.__str__c                 C   r   r   r   r   r   r   r   r      r   zLayoutBlock.__repr__c                 C   s   | j S r   )__dict__r   r   r   r   to_dict  s   zLayoutBlock.to_dictc                 C   s   |s|   }|| _|   dS )z
        Update the direction of the block based on its bounding box.

        Args:
            direction (str, optional): Direction of the block. If not provided, it will be determined automatically using the bounding box. Defaults to `None`.
        N)get_bbox_directionr(   update_direction_infor   r(   r   r   r   r     s   zLayoutBlock.update_directionc                 C   s   | j dkr*d| _| j| _| j| _| jd | _| jd | _| jd | _	| jd | _
dS d| _| j| _| j| _| jd | _| jd | _| jd | _	| jd | _
dS )zEUpdate the direction information of the block based on its direction.r&   r~   r   r:   r   r9   N)r(   secondary_directionr@   Zshort_side_lengthrB   Zlong_side_lengthrj   Zstart_coordinateZend_coordinateZ$secondary_direction_start_coordinateZ"secondary_direction_end_coordinater   r   r   r   r     s   
z!LayoutBlock.update_direction_infoc                 C   s   | j s	| j | _| j\}}}}|j\}}}}	t||t||t||t||	f}
|
| _|   |g}|j r=||  | j | dS )z
        Append a child block to the current block.

        Args:
            child_block (LayoutBlock): Child block to be added.
        Returns:
            None
        N)	r   rj   rr   ori_bboxrQ   rR   r   rN   get_child_blocks)r   Zchild_blockx1y1x2y2Zx1_childZy1_childZx2_childZy2_childZ
union_bboxr   r   r   r   append_child_block  s   	zLayoutBlock.append_child_blockc                 C   s   | j | _| j }g | _|S )z*Get all child blocks of the current block.)r   rj   r   rr   )r   r   r   r   r   r     s   
zLayoutBlock.get_child_blocksc                 C   s*   | j \}}}}|| d || d f}|S )z2Get the centroid of the bounding box of the block.r:   )rj   )r   r   r   r   r   Zcentroidr   r   r   get_centroid  s   zLayoutBlock.get_centroid      ?direction_ratioc                 C   s   | j | | jkr
dS dS )z
        Determine if a bounding box is horizontal or vertical.

        Args:
            direction_ratio (float): Ratio for determining direction. Default is 1.0.

        Returns:
            str: "horizontal" or "vertical".
        r&   r~   )rB   r@   )r   r   r   r   r   r     s   zLayoutBlock.get_bbox_directionr_   bboxesc                 C   sn   d}|D ]&}t |dkrtd|\}}}}|| }	|| }
||	| |
kr'dnd7 }q|t |d kr5dS dS )a+  
        Calculate the direction of the text based on the bounding boxes.

        Args:
            bboxes (list): A list of bounding boxes.
            direction_ratio (float): Ratio for determining direction. Default is 1.5.

        Returns:
            str: "horizontal" or "vertical".
        r      z9Invalid bounding box format. Expected a list of length 4.r   r   r&   r~   )rc   
ValueError)r   r   r   Zhorizontal_box_numrj   r   r   r   r   rB   r@   r   r   r   calculate_text_line_direction  s   z)LayoutBlock.calculate_text_line_directionc                    s  |d  |d }|d  fddt t D }| |}| | dd t |D }|s3g S | jdkrD|jdd	 d
d d}n|jdd	 dd d}g }t|d g| jd}	|dd D ]#}
t|	j	|
j
|dd}||krw|	|
 qa||	 t|
g| jd}	qa||	 |r| jdkrtdd |D }t|}t|}||d kr|d }t||k }|t|d k r||k}dd t||D }|rdd |D }dd |D }t|| _t|| _|S d| _d| _|S )aN  
        Group the bounding boxes into lines based on their direction.

        Args:
            ocr_rec_res (dict): The result of OCR recognition.
            line_height_iou_threshold (float): The minimum IOU value required for two spans to belong to the same line.

        Returns:
            list: A list of TextLines.
        r7   	rec_texts
rec_labelsc                    s    g | ]}| d kr | qS r   r   )r/   rv   	rec_boxesr   r   r   r1   %  s    z6LayoutBlock.group_boxes_into_lines.<locals>.<listcomp>c                 S   s   g | ]}t | qS r   )r   r.   r   r   r   r1   +      r~   c                 S   
   | j d S )Nr   r6   rW   r   r   r   rZ   3  r^   z4LayoutBlock.group_boxes_into_lines.<locals>.<lambda>T)r[   reverser&   c                 S   r   )Nr   r6   rW   r   r   r   rZ   8  r^   Fr   r(   r   NZsmall)modec                 S   r-   r   r@   r/   liner   r   r   r1   M  r2   r:   g?皙?c                 S   s   g | ]\}}|r|qS r   r   )r/   r   Zkeepr   r   r   r1   Y  s    c                 S   r-   r   r   r   r   r   r   r1   ]  r2   c                 S   r-   r   )rB   r   r   r   r   r1   ^  r2   )rq   rc   r   r   zipr(   ra   r   r	   r*   r   rP   rO   rs   rt   rQ   rR   summeanr   r   )r   ocr_rec_resline_height_iou_thresholdr   Z
text_boxesr(   r'   Zmatch_directionlinescurrent_liner0   Zoverlap_ratioZline_heightsZ
min_height
max_heightZnormal_height_thresholdZnormal_height_countZkeep_conditionZline_widthsr   r   r   group_boxes_into_lines  sd   







z"LayoutBlock.group_boxes_into_linesr   r   rg   rh   c              
      s  t |d dkrd| _dS | |tdd}| jdkrdndd	  | jd
krD|d }tfdd|D }t fdd|D }n
| j	 }| j	  }g }	g }
d}t
|D ]F\}}|}|
|j |j| jt|
|||||d}|jrz|d7 }|dkr|jd jd | _n|t |d kr|jd jd	 | _|	| qXtd | jd}|dkrNd}d}d}t
|	D ]\}}t |dkrq|| }|r|j | }||jd krt|st|r||| d kr|dsd| }|| 7 }t |d	kr|dr|d }n|d }t |dkr/|ds/t|s/t|s/t|s/|t |	d ks8|t |	d kr<|d7 }||j   || d krLd}qn||	}|| _t |	| _dS )a  
        Update the text content of the block based on the OCR result.

        Args:
            image (list): The input image.
            ocr_rec_res (dict): The result of OCR recognition.
            text_rec_model (Any): The model used for text recognition.
            text_rec_score_thresh (Union[float, None]): The score threshold for text recognition. If None, use the default setting.

        Returns:
            None
        r   r   r{   Nr   g?r&   r   r:   	referencer7   c                       g | ]}|  qS r   r   r/   r   )coord_start_idxr   r   r1     r   z3LayoutBlock.update_text_content.<locals>.<listcomp>c                    r   r   r   r   )coord_end_idxr   r   r1     r   )rS   rT   rU   rV   rf   rg   rh   rz   Zdelimiter_mapFr_   r   rH   rD   r   g333333?r}   T)rc   r   r   r   getr(   r   rQ   rR   rj   	enumeraterO   rB   rl   r+   r'   r   r   r   r*   r@   r
   r   r   r   rJ   r   )r   r   r   rg   rh   r   r   Zblock_startZ
block_stopZ
text_linesZtext_width_listZneed_new_line_numZline_idxr   rk   delimr   Zpre_line_endr   idxZstart_gep_lenr   )r   r   r   update_text_contentg  s   









%zLayoutBlock.update_text_content)r{   N)r   Nr   )r   )r_   )r!   r"   r#   r$   r   r%   r   r    dictr   r   r   r   rM   r   tupler   r   r   r   rd   boolr   r   r   r   r   r   r   r   r   r   r   {  sF    




X
r   c                       s^   e Zd ZdZg fdee ddf fddZdee ddfddZd	d
 Zd fdd	Z	  Z
S )r   zLayoutRegion classblocksr   Nc                    s   t  jd|dd || _i | _d| _g | _g | _g | _g | _g | _	g | _
ttj| _g | _g | _d| _d| _d| _| | |   dS )z
        Initialize a LayoutRegion object.

        Args:
            bbox (List[int]): The bounding box of the region.
            blocks (List[LayoutBlock]): A list of blocks that belong to this region.
        regionr{   )r   r&      ry   N)superr   rj   	block_mapr(   doc_title_block_idxesparagraph_title_block_idxesvision_block_idxesunordered_block_idxesvision_title_block_idxesnormal_text_block_idxesr   rs   r   euclidean_distanceheader_block_idxesfooter_block_idxesr   r   r   init_region_info_from_layoutupdate_euclidean_distance)r   rj   r   	__class__r   r   r     s$   
zLayoutRegion.__init__c                 C   s~  d}g }g }t |D ]\}}|| j|< ||_|jtd v r$| j| q
|jtd v r2| j| q
|jtd v r@| j| q
|jtd v rN| j	| q
|jtd v r\| j
| q
|jtd v rj| j| q
|jtd v rx| j| q
| j| ||j ||j |jd	kr|d
7 }q
|t| jd krd	nd}| | |rt|nd| _|rt|| _dS d| _dS )zInitialize the information about the layout region from the given blocks.

        Args:
            blocks (List[LayoutBlock]): A list of blocks that belong to this region.
        Returns:
            None
        r   Zheader_labelsZdoc_title_labelsZparagraph_title_labelsZvision_labelsZvision_title_labelsZfooter_labelsZunordered_labelsr&   r   r   r~   r   ry   N)r   r   r   r   r   r   rO   r   r   r   r   r   r   r   r   r   r(   rc   r   rs   r   )r   r   Z horizontal_normal_text_block_numZtext_line_height_listZtext_line_width_listr   blockr(   r   r   r   r      sN   


z)LayoutRegion.init_region_info_from_layoutc                    st   t | j }| jdkrd  fdd|D }n| jd df  fdd|D }t|dkr5t|| _dS d| _dS )	zDUpdate euclidean distance between each block and the reference pointr&   )r   r   c                    &   g | ]}t |jd  |jd f qS )r   r   r   rj   r/   r   Z	ref_pointr   r   r1   5      z:LayoutRegion.update_euclidean_distance.<locals>.<listcomp>r:   r   c                    r   )r:   r   r   r   r   r   r   r1   ;  r   N)rM   r   valuesr(   rj   rc   rQ   r   )r   r   Zblock_distancer   r   r   r   0  s   


$z&LayoutRegion.update_euclidean_distancec                    s   t  j|d | jdkrd| _d| _d| _d| _d| _nd| _d| _d| _d| _d| _| j| j | j| j  d | _	| j| j | j| j  d | _
dS )	z
        Update the direction of the layout region.

        Args:
            direction (str): The new direction of the layout region.
        r   r&   r   r:   r   r9   r~   N)r   r   r(   Zdirection_start_indexZdirection_end_indexZsecondary_direction_start_indexZsecondary_direction_end_indexr   rj   Zdirection_center_coordinateZ%secondary_direction_center_coordinater   r   r   r   r   A  s(   



zLayoutRegion.update_directionr   )r!   r"   r#   r$   r   r   r   r   r   r   __classcell__r   r   r   r   r     s    0r   )typingr   r   r   numpyrs   Zsettingr   r   utilsr   r	   r
   r   r   __all__objectr   r   r   r   r   r   r   r   <module>   s     C  e