o
    0j                     @   s`   d e de fddZdd Zdd Zdd	 ZdddZdd Zdd Zdd Zdd Z	dd Z
dS )textreturnc                 C   sV   g }| D ]!}t |}d|  krdkr n n
|t|d  q|| qd|S )Ni  i^  i   )ordappendchrjoin)r   resultcharcode r   w/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/pipelines/layout_parsing/merge_table.pyfull_to_half   s   
r   c                 C   s   |  d}|s	dS d}i }t|D ]e\}}d}| ddg}||vr&i ||< |D ]M}||| v r:|d7 }||| v s0t|dd}	t|dd}
t|||
 D ]}||vr[i ||< t|||	 D ]}d|| |< qbqQ||	7 }t||}q(q|S )	z\
    calculate total columns including colspan and rowspan, accounting for merged cells
    tr    tdth   colspanrowspanT)find_all	enumerateintgetrangemax)ZsouprowsZmax_colsZoccupiedZrow_idxrowZcol_idxcellscellr   r   rcr   r   r   calculate_table_total_columns   s4   
r!   c                 C   s   t dd | ddgD S )z@
    Calculate the actual number of columns in a single row
    c                 s   s     | ]}t |d dV  qdS )r   r   N)r   r   ).0r   r   r   r   	<genexpr>>   s    z(calculate_row_columns.<locals>.<genexpr>r   r   )sumr   r   r   r   r   calculate_row_columns:   s   r&   c                 C   s   t | ddgS )zo
    Calculate the visual number of columns in a single row, excluding colspan (merged cells count as one)
    r   r   )lenr   r%   r   r   r   calculate_visual_columnsA   s   r(      c                 C   s  |  d}| d}tt|t||}d}d}t|D ]g}||  ddg}	||  ddg}
t|	t|
kr>|dk} nFd}t|	|
D ]2\}}dt|  }dt|  }||ksst	|
ddt	|
ddkrwd	} nqE|r|d7 }q|dk} |dkrd	}||fS )
zP
    Determine how many identical rows exist at the beginning of two tables
    r   r   Tr   r   r   r   r   F)r   minr'   r   zipr   r   Zget_textsplitr   r   )soup1soup2Zmax_header_rowsrows1rows2Zmin_rowsZheader_rowsZheaders_matchiZcells1Zcells2matchc1c2Ztext1Ztext2r   r   r   detect_table_headersH   s4   

(
r5   c                 C   s   |  d}| d}|r|sdS |d }t| |\}}t||kr%|| nd }|s+dS t|}t|}	t|}
t|}||	kpB|
|kS )Nr   F)r   r5   r'   r&   r(   )r-   r.   r/   r0   last_rowheader_count_Zfirst_data_rowZ	last_colsZ
first_colsZlast_visualZfirst_visualr   r   r   check_rows_matchk   s   

r:   c                    s   g d}| j |v rdS tt| ddpd }tt| ddpd }tt| ddp+d }tt| ddp7d }| d| d| d|  t fd	d
|D rUdS dS )N)continue	continuedzcont'du   续u   cont‘du   續Tr   r   Zfigure_titleZ	doc_titleZparagraph_title c                 3   s    | ]}| v V  qd S Nr   )r"   kwZfull_contentr   r   r#      s    zis_skippable.<locals>.<genexpr>F)labelstrgetattrlowerany)blockZallowed_labelsZcontinue_keywordsZb_textZb_fig_titleZb_doc_titleZb_para_titler   r@   r   is_skippable|   s   
rG   c                    s.  ddl m} |j\}}}}|| }	|j\}
}}}||
 }|dks$|	dkr&dS t||	 t||	 dkr5dS | |}tdd | |d d  D }|sMdS ||}g d t fd	d|d | D }|sidS |j}|j}|rs|sudS ||d
}||d
}t|}t|}||k}t	||}|p|||fS )Nr   )BeautifulSoup)FNNg?c                 s   s    | ]}|j d v V  qdS ))footerZvision_footnotenumberZfootnoteZfooter_imagesealN)rA   r"   br   r   r   r#      s    
z#can_merge_tables.<locals>.<genexpr>r   )headerZheader_imagerJ   rK   c                 3   s    | ]}t | V  qd S r>   )rG   rL   Zcurr_allowed_labelsr   r   r#      s    

zhtml.parser)
Zbs4rH   Zbboxabsr*   indexallcontentr!   r:   )Z	prev_page
prev_blockZ	curr_page
curr_blockrH   Zx0Zy0x1y1Z
prev_widthZx2y2Zx3Zy4Z
curr_widthZ
prev_indexZallowed_followZ
curr_indexZallowed_beforeZ	html_prevZ	html_curr	soup_prev	soup_currZtotal_cols_prevZtotal_cols_currZtables_matchZ
rows_matchr   rO   r   can_merge_tables   sB   





r[   c                 C   sT   t | |\}}| d}|d}||d  D ]}|  |d j| qt| S )Nr   r6   )r5   r   extractparentr   rB   )rY   rZ   r8   r9   Z	rows_prevZ	rows_currr   r   r   r   perform_table_merge   s   

r^   c                 C   s   t t| d ddD ]R}| | }| |d  }|D ]}|jdkr#|} nqd }t|D ]}|jdkr5|} nq*d }|rG|rGt||||\}}}	nd}|r\t||	}
|
|_|j}d|_||_q
dd | D }| D ]}|D ]}|j|jkry||j j|_qjqf| S )	Nr   r   r6   tableFr   c                 S   s   g | ]	}|D ]}|qqS r   r   )r"   pagerF   r   r   r   
<listcomp>   s    z-merge_tables_across_pages.<locals>.<listcomp>)	r   r'   rA   reversedr[   r^   rS   Zglobal_block_idZglobal_group_id)Zpagesr1   Z	page_currZ	page_prevrF   rU   rT   Z	can_mergerY   rZ   Zmerged_htmlZprev_block_global_idZ
all_blocksr`   r   r   r   merge_tables_across_pages   sN   


rc   N)r)   )rB   r   r!   r&   r(   r5   r:   rG   r[   r^   rc   r   r   r   r   <module>   s   
#.
