o
    j_\                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
 ddlmZ d	d
lmZ d	dlmZ d	dlmZ G dd dZG dd dZdS )z4Parsing table structure based on strokes and fills.
    N   )Element)RectType)	constants)ShapeStroke)Shapes   )
TableBlock)Row)Cellc                   @   sj   e Zd ZdZdefddZedd Zedd Zd	e	d
e	fddZ
defddZddedefddZdS )CellStructurez;Cell structure with properties bbox, borders, shading, etc.bboxc                 C   s.   t || _t || _d | _d | _d| _d S )Nr	   r	   )fitzRectr   merged_bboxbordersshadingmerged_cells)selfr    r   ^/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/pdf2docx/table/TableStructure.py__init__   s
   
zCellStructure.__init__c                 C   s   | j d dkp| j d dkS Nr   r	   r   r   r   r   r   	is_merged%      zCellStructure.is_mergedc                 C   s   | j d dkp| j d dkS r   r   r   r   r   r   
is_merging(   r   zCellStructure.is_merging	h_strokes	v_strokesc                 C   s`   | j \}}}}| || d}| || d}| || d}	| || d}
|||	|
f| _dS )aS  Parse cell borders from strokes.
        
        Args:
            h_strokes (dict): A dict of y-coordinate v.s. horizontal strokes, e.g. 
                ``{y0: [h1,h2,..], y1: [h3,h4,...]}``
            v_strokes (dict): A dict of x-coordinates v.s. vertical strokes, e.g. 
                ``{x0: [v1,v2,..], x1: [v3,v4,...]}``
        rowcolN)r   _get_border_stroker   )r   r    r!   x0y0x1y1topbottomleftrightr   r   r   parse_borders,   s   	zCellStructure.parse_bordersfillsc                 C   s   | j \}}}}|j}|j}|j}|j}	| j\}
}}}|
|	d  ||d  ||d  ||d  f}t |}|D ]}|j|tjdrH|| _ dS q8d| _dS )zParse cell shading from fills.
        
        Args:
            fills (Shapes): Fill shapes representing cell shading.
               @)	thresholdN)	r   widthr   r   update_bboxcontainsr   ZFACTOR_MOSTr   )r   r.   r)   r*   r+   r,   w_topw_rightw_bottomw_leftr%   r&   r'   r(   Z
inner_bboxtargetshaper   r   r   parse_shading=   s   ,
zCellStructure.parse_shadingr"   strokes	directionc                 C   s  |st  S |dkrdnd}| j| | j|d  }}d}g }|D ];}|j|j|j|jf}	|	| |	|d  }
}||kr<q |
|krB nt||t||
 }|tj	k rRq ||7 }|
| q |||  tjk rht  S t|dkrr|d S tdd |D }t|dkr|d S t  S )z Find strokes representing cell borders.
        
        Args:
            strokes (Shapes): Candidate stroke shapes for cell border.
            direction (str): Either ``row`` or ``col``.
        r"   r   r	   r   g        c                 S   s   g | ]}|j qS r   )color).0stroker   r   r   
<listcomp>~   s    z4CellStructure._get_border_stroke.<locals>.<listcomp>)r   r   r%   r&   r'   r(   minmaxr   Z
MAJOR_DISTappendZFACTOR_MAJORlenset)r   r;   r<   idxr%   r'   LZborder_strokesr?   r   t0t1dl
propertiesr   r   r   r$   X   s$   

z CellStructure._get_border_strokeNr"   )__name__
__module____qualname____doc__listr   propertyr   r   dictr-   r   r:   strr$   r   r   r   r   r      s    

r   c                	   @   s  e Zd ZdZdefddZedd Zedd Zed	d
 Z	edd Z
edd ZdefddZdd Zdd ZedededefddZdd Zdd Zededed edefd!d"Zed5d$eded efd%d&Zed'efd(d)Zd*ed+efd,d-Zd.ed/ed0ed1efd2d3Zd4S )6TableStructureaQ  Parsing table structure based on strokes/fills.
    
    Steps to parse table structure::

            x0        x1       x2        x3
        y0  +----h1---+---h2---+----h3---+
            |         |        |         |
            v1        v2       v3        v4
            |         |        |         |
        y1  +----h4------------+----h5---+
            |                  |         |
            v5                 v6        v7
            |                  |         |
        y2  +--------h6--------+----h7---+
        

    1. Group horizontal and vertical strokes::
        
        self.h_strokes = {
            y0 : [h1, h2, h3],
            y1 : [h4, h5],
            y2 : [h6, h7]
        }
    
    These ``[x0, x1, x2, x3] x [y0, y1, y2]`` forms table lattices, i.e. 2 rows x 3 cols.

    2. Check merged cells in row/column direction.

    Let horizontal line ``y=(y0+y1)/2`` cross through table, it gets intersection with 
    ``v1``, ``v2`` and ``v3``, indicating no merging exists for cells in the first row.

    When ``y=(y1+y2)/2``, it has no intersection with vertical strokes at ``x=x1``, i.e. 
    merging status is ``[1, 0, 1]``, indicating ``Cell(2,2)`` is merged into ``Cell(2,1)``.

    So, the final merging status in this case::

        [
            [(1,1), (1,1), (1,1)],
            [(1,2), (0,0), (1,1)]
        ]
    r;   c                 K   sB   g | _ t||d |d \| _| _| jr| jsdS |  | _ dS )a  Parse table structure from strokes and fills shapes.
        
        Args:
            strokes (Shapes): Stroke shapes representing table border. 
                For lattice table, they're retrieved from PDF raw contents; 
                for stream table, they're determined from layout of text blocks.

        .. note::
            Strokes must be sorted in reading order in advance, required by checking merged cells.        
        min_border_clearancemax_border_widthN)cellsrU   _group_h_v_strokesr    r!   _init_cells)r   r;   settingsr   r   r   r      s   zTableStructure.__init__c                 C   sJ   | j st S | j d d jj\}}| j d d jj\}}t||||S )zUTable boundary bbox.

        Returns:
            fitz.Rect: bbox of table.
        r   )rX   r   r   r   tlbr)r   r%   r&   r'   r(   r   r   r   r      s   zTableStructure.bboxc                 C   s
   t | jS )N)rD   rX   r   r   r   r   num_rows   s   
zTableStructure.num_rowsc                 C   s   | j r
t| j d S dS )Nr   )rX   rD   r   r   r   r   num_cols   s   zTableStructure.num_colsc                 C   s6   | j sg S dd | j D }|| j d d jj |S )zlTop y-coordinate ``y0`` of each row.

        Returns:
            list: y-coordinates of each row.
        c                 S   s   g | ]}|d  j jqS )r   )r   r&   )r>   r"   r   r   r   r@      s    z)TableStructure.y_rows.<locals>.<listcomp>r\   r   )rX   rC   r   r(   )r   Yr   r   r   y_rows   s   
zTableStructure.y_rowsc                 C   s:   | j sg S dd | j d D }|| j d d jj |S )zsLeft x-coordinate ``x0`` of each column.

        Returns:
            list: x-coordinates of each column.
        c                 S   s   g | ]}|j jqS r   )r   r%   )r>   cellr   r   r   r@      s    z)TableStructure.x_cols.<locals>.<listcomp>r   r\   )rX   rC   r   r'   )r   Xr   r   r   x_cols   s   
zTableStructure.x_colsr.   c                 C   sN   | j s| S |   | j D ]}|D ]}|jrq|| j| j || qq| S )zyParse table structure.
        
        Args:
            fills (Shapes): Fill shapes representing cell shading.
        )rX   _check_merging_statusr   r-   r    r!   r:   )r   r.   r"   rc   r   r   r   parse   s   

zTableStructure.parsec              	   C   s   t  }| jD ]a}t }|d jj|d jj |_|D ]G}|jr&|t	  q|j
\}}}}|j}	|j}
|j}|j}|jr@|jjnd}t	||j|j|j|jf|	|
||f|jd|j}|| q|| q|rn|   |S )zConvert parsed table structure to ``TableBlock`` instance.

        Returns:
            TableBlock: Parsed table block instance.
        r   N)bg_colorZborder_colorZborder_widthr   )r
   rX   r   r   r(   r&   heightr   rC   r   r   r1   r   r=   r   r2   r   _finalize_strokes_fills)r   tableZrow_structuresr"   Zcell_structurer)   r*   r+   r,   r4   r5   r6   r7   rh   rc   r   r   r   to_table_block  s4   

zTableStructure.to_table_blockc                 C   sv   | j  D ]\}}|D ]}tj|_qq| j D ]\}}|D ]}tj|_qq| jD ]}|D ]
}|jr7tj|j_q-q)dS )zZFinalize table structure, so set strokes and fills type as BORDER and SHADING accordingly.N)	r    itemsr   ZBORDERtyper!   rX   r   ZSHADING)r   kr;   r?   r"   rc   r   r   r   rj   6  s   
z&TableStructure._finalize_strokes_fillsrV   rW   c                    s0  dt dtf fdd}i }i }tdtdtd td f\}}}}	| D ]$}
||
|
jr.|n| t||
j}t||
j}t||
j}t|	|
j	}	q%|rN|sPdS t
 ||||	f}t||d| t||d| t||d	| t||d
| | D ]\}}|  q~| D ]\}}|  q||fS )a  Split strokes in horizontal and vertical groups respectively.

        According to strokes below, the grouped h-strokes looks like::

            h_strokes = {
                y0 : [h1, h2, h3],
                y1 : [h4, h5],
                y2 : [h6, h7]
            }

               x0        x1        x2        x3
            y0  +----h1---+---h2---+----h3---+
                |         |        |         |
                v1        v2       v3        v4
                |         |        |         |
            y1  +----h4------------+----h5---+
                |                  |         |
                v5                 v6        v7
                |                  |         |
            y2  +--------h6--------+----h7---+

        r?   r;   c                    sx   | j r	t| jdnt| jd}|D ]!}t||  krq|| d }||||< || |   d S t| g||< d S )Nr	   r/   )
horizontalroundr&   r%   abspoprC   r   )r?   r;   tZt_rV   r   r   group_strokes]  s   z8TableStructure._group_h_v_strokes.<locals>.group_strokesinf)NNr)   r*   r+   r,   )r   rS   floatrp   rA   r%   rB   r'   r&   r(   r   r2   rU   _check_outer_strokesrm   sort_in_line_ordersort_in_reading_order)r;   rV   rW   rv   r    r!   ZX0ZY0ZX1ZY1r?   
table_bbox_r   r   ru   r   rY   E  s&   (z!TableStructure._group_h_v_strokesc                 C   s   t | j}t | j}g }tt|d D ]7}|| ||d  }}|g  tt|d D ]}|| ||d  }}	t|||	|g}
|d |
 q.q|S )zInitialize table lattices.r	   r\   )sortedr    r!   rangerD   rC   r   )r   rb   re   rX   ir&   r(   jr%   r'   rc   r   r   r   rZ     s   


zTableStructure._init_cellsc                    s   j  j}}g } fdd|D } jD ]}|d jj|d jj d }t||d}|| qg } fdd|D } jd D ]}	|	jj	|	jj
 d }
t|
|d}|| qBt jD ],}t jD ]$} j| | }	t|| |d }t|| |d }||f|	_qgq`t jD ]4}t jD ],} ||  j| | }	|	j\}}|| || |||  |||  f}t||	_qqdS )	zCheck cell merging status.c                       g | ]} j | qS r   )r!   r>   ro   r   r   r   r@         z8TableStructure._check_merging_status.<locals>.<listcomp>r   r/   r"   c                    r   r   )r    r   r   r   r   r@     r   columnN)re   rb   rX   r   r&   r(   rU   _check_merged_cellsrC   r%   r'   r   r_   r`   _count_merged_cellsr   _validate_merging_regionr   r   r   )r   re   rb   Zmerged_cells_rowsZordered_strokesr"   Zref_yZrow_structureZmerged_cells_colsrc   Zref_xZcol_structurer   r   n_coln_rowr   r   r   r   rf     s:   

$z$TableStructure._check_merging_statusr|   r   r<   c                    s  t | j}|dkrd}t|}||   n5|dkr'd}t|}||   n$|dkr8d}t|}||   n|dkrId}t|}||   nd	S || }t }|d d
 }	|d|	|kr`dnd  ||	< t|| |kr|t|	 
|g||< d	S |d d d   fdd|| D }
|
|  d	f | }g }|
D ]%\}}|}t|| tjkr||< ||d < ||	 
| |}q|| | d	S )aW  Add missing outer borders based on table bbox and grouped horizontal/vertical borders.
        
        Args:
            * table_bbox (Element): Table region.
            * borders (dict): Grouped horizontal (or vertical) borders at y-coordinates.
            * direction (str): Either ``top`` or ``bottom`` or ``left`` or ``right``.
        r)   r	   r*      r+   r   r,   r   N   g?r\   c                    s    g | ]}|j  |j   fqS r   )r   )r>   borderZidx_endZ	idx_startr   r   r@     s
    z7TableStructure._check_outer_strokes.<locals>.<listcomp>)rQ   r   rA   rz   rB   r{   r   rr   r   copyr2   rC   r   Z
MINOR_DISTextend)r|   r   r<   rW   r   rF   currentr8   Zsample_borderZidx1Zoccupiedstartsegmentsr+   r,   endr   r   r   ry     sP   

z#TableStructure._check_outer_strokesr"   refc                 C   s   g }|dd D ]B}|D ]8}|dkr|j |j}}n|j|j}}||   k r+|k r4n n|d  n| |kr9q| |k rD|d  nq|d q|S )a  Check merged cells in a row/column. 
        
        Args:
            * ref (float): y (or x) coordinate of horizontal (or vertical) passing-through line.
            * borders (list[Shapes]): A list of vertical (or horizontal) rects list in a column (or row).
            * direction (str): ``row`` - check merged cells in row; ``column`` - check merged cells in a column.

        Taking cells in a row for example, give a horizontal line ``y=ref`` passing through this row, 
        check the intersection with vertical borders. The ``n-th`` cell is merged if no intersection 
        with the ``n-th`` border.
            
                +-----+-----+-----+
                |     |     |     |
                |     |     |     |
                +-----+-----------+
                |           |     |
            ----1-----0-----1----------> [1,0,1]
                |           |     |
                |           |     |
                +-----------+-----+
        r   r\   r"   r	   )r&   r(   r%   r'   rC   )r   r   r<   resZshapesr   Zref0Zref1r   r   r   r     s$   


z"TableStructure._check_merged_cellsmerging_statusc                 C   s@   | d dkrdS d}| dd D ]}|dkr|d7 }q |S |S )zCount merged cells, 
        e.g. ``[1,0,0,1]`` -> the second and third cells are merged into the first one.
        
        Args:
            merging_status (list): A list of 0-1 representing cell merging status.
        r   r	   Nr   )r   numvalr   r   r   r   >  s   	
z"TableStructure._count_merged_cellsr   r   c           	      C   s   | j | | }|jrdS |j\}}|dkr|dkrdS | ||| ||| sMt||| D ]}t||| D ]}| j | | }|jrFd|_q7q.d|_dS dS )zCheck whether the merging region of Cell (i,j) is valid. If not, unset merging status. 

        Args:
            i (int): Row index of the target cell.
            j (int): Column index of the target cell.
        Nr	   r   )rX   r   r   _is_valid_regionr   )	r   r   r   rc   r   r   mnr8   r   r   r   r   S  s   


z'TableStructure._validate_merging_region	row_startrow_end	col_startcol_endc                 C   sN   t ||D ]}t ||D ]}||kr||krq| j| | js#  dS qqdS )a  Check whether all cells in given region are marked to merge.

        Args:
            row_start (int): Start row index (included) of the target region.
            row_end (int): End row index (excluded) of the target region.
            col_start (int): Start column index (included) of the target region.
            col_end (int): Start column index (excluded) of the target region.
        FT)r   rX   r   )r   r   r   r   r   r   r   r   r   r   r   k  s   	zTableStructure._is_valid_regionNrL   )rM   rN   rO   rP   r   r   rR   r   r_   r`   rb   re   rg   rl   rj   staticmethodrx   rY   rZ   rf   r   rS   rT   ry   rQ   r   r   intr   r   r   r   r   r   rU      s8    *




1C*>8rU   )rP   r   Zcommon.Elementr   Zcommon.sharer   commonr   Zshape.Shaper   r   Zshape.Shapesr   r
   r   r   r   rU   r   r   r   r   <module>   s   q