o
    'j                     @   s`  d Z ddlZddlZddlZddlZddlZddlZddlZddlZ	ddl
mZmZ 	 dd Zdd Zdd	 ZdCddZdd Zdd Zdd Zdd ZdCddZdDddZdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Z d.d/ Z!dEd1d2Z"d3d4 Z#d5d6 Z$d7d8 Z%d9d: Z&d;d< Z'd=d> Z(G d?d@ d@Z)G dAdB dBe)Z*dS )Fzp
This code is refer from:
https://github.com/JiaquanYe/TableMASTER-mmocr/blob/master/table_recognition/match.py
    N)Polygon
MultiPointc                 C   s0   g }| D ]}t |dkrq|| qt|S )z~
    remove [0., 0., 0., 0.] in structure master bboxes.
    len(bboxes.shape) must be 2.
    :param bboxes:
    :return:
    g        )sumappendnparray)bboxes
new_bboxesbbox r   o/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddleocr/ppstructure/table/table_master_match.pyremove_empty_bboxes"   s   
r   c                 C   s\  t | jdkr>t| }| d | d d  |d< | d | d d  |d< | d | d d  |d< | d | d d  |d< |S t | jdkrt| }| d d df | d d df d  |d d df< | d d df | d d df d  |d d df< | d d df | d d df d  |d d df< | d d df | d d df d  |d d df< |S tN   r         lenshaper   Z
empty_like
ValueErrorr   r	   r   r   r   	xywh2xyxy1   s   

0000r   c                 C   s|  t | jdkrBt| }| d | d | d  d  |d< | d | d | d  d  |d< | d | d  |d< | d | d  |d< |S t | jdkrt| }| d d df | d d df | d d df  d  |d d df< | d d df | d d df | d d df  d  |d d df< | d d df | d d df  |d d df< | d d df | d d df  |d d df< |S tr   r   r   r   r   r   	xyxy2xywhD   s   
  
@@,,r   end2endc                 C   s|   t j| rtt| d}|S t j| r<t }t j| d	|}t

|}|D ]}tt|d}|| q*|S t)Nrbz{}_*.pkl)ospathisfilepickleloadopenisdirdictjoinformatglobupdater   )r   prefixdatasearch_pathZpklsZpklZ	this_datar   r   r   pickle_loadW   s   

r*   c                 C   s   t jddgt jd}| d | d |d< |d< | d | d |d< |d	< | d | d
 |d< |d< | d | d
 |d< |d< |S )zX
    Convert two points format to four points format.
    :param xyxy:
    :return:
       r   )Zdtyper   r   )r   r   r   r   )r   r   )r   r   r   )r   r   )r   r   )r   r   )r   r   )r   ZzerosZfloat32)ZxyxyZnew_bboxr   r   r   convert_coordf   s   r-   c                 C   sl   t | j}t |j}t| |f}||sd}|S ||j}t|jj}|dkr.d}|S t|| }|S )Nr   )	r   Zconvex_hullr   ZconcatenateZ
intersectsintersectionZarear   float)Zbbox1Zbbox2Z
bbox1_polyZ
bbox2_polyZ
union_polyiouZ
inter_areaZ
union_arear   r   r   cal_iout   s   


r1   c                 C   s:   | d |d  }| d |d  }t |d |d  }|S )Nr   r   r   )mathsqrt)p1Zp2Zdelta_xZdelta_ydr   r   r   cal_distance   s   r6   c                 C   sp   d}d}| d |d d kr| d |d d krd}| d |d d kr0| d |d d kr0d}|r6|r6dS dS )z
    Find if center_point inside the bbox(corner_point) or not.
    :param center_point: center point (x, y)
    :param corner_point: corner point ((x1,y1),(x2,y2))
    :return:
    Fr   r   Tr   )Zcenter_pointZcorner_pointZx_flagZy_flagr   r   r   	is_inside   s   r7   c                    sZ   |dkrd n	|dkrd nt g } fdd| D }t|D ]}||vr*|| q|S )a  
    Find out no match end2end bbox in previous match list.
    :param match_list: matching pairs.
    :param all_end2end_nums: numbers of end2end_xywh
    :param type: 'end2end' corresponding to idx 0, 'master' corresponding to idx 1.
    :return: no match pse bbox index list
    r   r   masterr   c                    s   g | ]}|  qS r   r   ).0midxr   r   
<listcomp>       z!find_no_match.<locals>.<listcomp>)r   ranger   )
match_listZall_end2end_numstypeZno_match_indexsZmatched_bbox_indexsnr   r;   r   find_no_match   s   
rC   r   c                 C   s$   t | d |d  }||k rdS dS )Nr   TF)abs)	this_bboxZtarget_bbox	thresholddeltar   r   r   is_abs_lower_than_threshold   s   rH   c           	      C   sl   dd |D }t |}dgt| }dgt| }t| |D ]\}}||d }|||< |||< q||fS )z
    Sorted the bbox in the same line(group)
    compare coord 'x' value, where 'y' value is closed in the same group.
    :param g: index in the same group
    :param bg: bbox in the same group
    :return:
    c                 S   s   g | ]}|d  qS )r   r   )r9   bg_itemr   r   r   r=      r>   z"sort_line_bbox.<locals>.<listcomp>Nr   )sortedr   zipindex)	gbgZxsZ	xs_sortedg_sorted	bg_sortedZg_itemrI   r<   r   r   r   sort_line_bbox   s   	
rQ   c                 C   sL   g }g }t | |D ]\}}t ||D ]\}}|| || qq	||fS N)rK   r   )sorted_groupssorted_bbox_groupsZidxsr   groupZ
bbox_grouprM   rN   r   r   r   flatten   s   
rV   c                 C   sf  g }g }t || D ]H\}}|}t|dkr"||g ||g q	d}t ||D ]\}}	t||	d rB|| |	| d} nq)|sQ||g ||g q	g g }
}t ||D ]\}}	t||	\}}|
| || q\dgt|
 }dgt| }dd |D }t|}t |
|D ]\}}	||	d d }|||< |	||< qt||\}}||||fS )z
    This function will group the render end2end bboxes in row.
    :param end2end_xywh_bboxes:
    :param no_match_end2end_indexes:
    :return:
    r   FTNc                 S   s   g | ]}|d  d qS r,   r   )r9   rN   r   r   r   r=   	  s    zsort_bbox.<locals>.<listcomp>r   )rK   r   r   rH   rQ   rJ   rL   rV   )end2end_xywh_bboxesno_match_end2end_indexesgroupsZbbox_groupsrL   Zend2end_xywh_bboxrE   flagrM   rN   Z
tmp_groupsZtmp_bbox_groupsrO   rP   rS   rT   ZysZ	sorted_ysr<   Zend2end_sorted_idx_listZend2end_sorted_bbox_listr   r   r   	sort_bbox   sJ   




r[   c                 C   sv   g }g }| D ]}|d }| | t|}| | qt|}t|}|d }	t|	}	|	}
t|	}|}||||
fS )a"  
    This function is use to convert end2end results and structure master results to
    List of xyxy bbox format and List of xywh bbox format
    :param end2end_result: bbox's format is xyxy
    :param structure_master_result: bbox's format is xywh
    :return: 4 kind list of bbox ()
    r
   )r   r   r   r   r   )end2end_resultstructure_master_resultZend2end_xyxy_listZend2end_xywh_listZend2end_itemZsrc_bboxZ	xywh_bboxend2end_xyxy_bboxesrW   Z
src_bboxesstructure_master_xyxy_bboxesstructure_master_xywh_bboxesr   r   r   get_bboxes_list  s   	


ra   c                 C   s   g }t | D ]@\}}t |D ]7\}}|d |d }}|d |d |d |d f\}	}
}}||f}|	|
f||ff}t||rE|||g qq|S )a!  
    Judge end2end Bbox's center point is inside structure master Bbox or not,
    if end2end Bbox's center is in structure master Bbox, get matching pair.
    :param end2end_xywh_bboxes:
    :param structure_master_xyxy_bboxes:
    :return: match pairs list, e.g. [[0,1], [1,2], ...]
    r   r   r   r   )	enumerater7   r   )rW   r_   Zmatch_pairs_listiZend2end_xywhjmaster_xyxy	x_end2end	y_end2endZ	x_master1Z	y_master1Z	x_master2Z	y_master2Zcenter_point_end2endZcorner_point_masterr   r   r   center_rule_match4  s   

	rh   c                 C   s   g }t || D ];\}}d}ddg}t|D ] \}}	t|}
t|	}t|
|}||kr5|||d< |d< |}q|d du r=q|| q|S )a  
    Use iou to find matching list.
    choose max iou value bbox as match pair.
    :param end2end_xyxy_bboxes:
    :param end2end_xyxy_indexes: original end2end indexes.
    :param structure_master_xyxy_bboxes:
    :return: match pairs list, e.g. [[0,1], [1,2], ...]
    r   Nr   )rK   rb   r-   r1   r   )r^   Zend2end_xyxy_indexesr_   Zmatch_pair_listZend2end_xyxy_indexZend2end_xyxyZmax_iouZ	max_matchrd   re   Zend2end_4xyZ
master_4xyr0   r   r   r   iou_rule_matchJ  s$   

ri   c                 C   s   g }t ||D ]H\}}tj}ddg}t | |D ]2\}	}
|
d |
d }}|d |d }}||f}||f}t||}||k rI|	||d< |d< |}q|| q|S )a  
    Get matching between no-match end2end bboxes and no-match master bboxes.
    Use min distance to match.
    This rule will only run (no-match end2end nums > 0) and (no-match master nums > 0)
    It will Return master_bboxes_nums match-pairs.
    :param end2end_indexes:
    :param end2end_bboxes:
    :param master_indexes:
    :param master_bboxes:
    :return: match_pairs list, e.g. [[0,1], [1,2], ...]
    r   r   )rK   r   infr6   r   )Zend2end_indexesZend2end_bboxesZmaster_indexesZmaster_bboxesZmin_match_listrd   Zmaster_bboxZmin_distanceZ	min_matchrc   Zend2end_bboxrf   rg   Zx_masterZy_masterZend2end_pointZmaster_pointdistr   r   r   distance_rule_matchh  s    
rl   c                 C   s>   t | | }g }t||D ]}| ||  }|||g q|S )z
    This function will create some virtual master bboxes,
    and get match with the no match end2end indexes.
    :param no_match_end2end_indexes:
    :param master_bbox_nums:
    :return:
    )r   r?   r   )rX   Zmaster_bbox_numsZend_numsextra_match_listrc   end2end_indexr   r   r   extra_match  s   ro   c                 C   sL   t  }| D ]}|d |d }}|| vr|g||< q|| | q|S )z
    Convert match_list to a dict, where key is master bbox's index, value is end2end bbox index.
    :param match_list:
    :return:
    r   r   )r"   keysr   )r@   
match_dictZ
match_pairrn   master_indexr   r   r   get_match_dict  s   rs   c                 C   s(   |  dd} |  dd} |  dd} | S )z
    deal successive space character for text
    1. Replace ' '*3 with '<space>' which is real space is text
    2. Remove ' ', which is split token, not true space
    3. Replace '<space>' with ' ', to get real text
    :param text:
    :return:
    z   z<space>  replacetextr   r   r   deal_successive_space  s   	rz   c                 C   sp   d}| D ]}| dr|d7 }q|t| kr6g }| D ]}|dddd}|| qd|| d gS | S )z
    convert ['<b>Local</b>', '<b>government</b>', '<b>unit</b>'] to ['<b>Local government unit</b>']
    PS: maybe style <i>Local</i> is also exist, too. it can be processed like this.
    :param text_list:
    :param break_token:
    :return:
    r   <b>r   ru   </b>)
startswithr   rw   r   r#   )	text_listbreak_tokencountry   Znew_text_listr   r   r   reduce_repeat_bb  s   
r   rt   c                    sJ   t  }|  D ]\}} fdd|D }t||}||}|||< q|S )Nc                    s   g | ]} | d  qS rx   r   )r9   rn   end2end_infor   r   r=     s    
z'get_match_text_dict.<locals>.<listcomp>)r"   itemsr   r#   )rq   r   r   match_text_dictrr   Zend2end_index_listr~   ry   r   r   r   get_match_text_dict  s   



r   c                 C   s>  g }d}| d dkr|  d | | dkrzs| | dkr|| |d  ds.| |d  drF	 d| ||d	 d  }|d
7 }| | nA| |d  dsX| |d  drp	 d| ||d
 d  }|d7 }| | n| | |  |d7 }n| | |  |d7 }W n	   td Y n| | dks| d |S )zg
    Merge the span style token (row span or col span).
    :param master_token_list:
    :return:
    r   </tbody><tdr   z	 colspan=z	 rowspan=ru   r   r+   r      zBreak in merge...)r   r}   r#   print)master_token_listZnew_master_token_listpointertmpr   r   r   merge_span_token  sT   


$r   c                 C   s   |  dd} |  dd} |  dd} |  dd} |  d	d
} |  dd} |  dd} |  dd} |  dd} |  dd} |  dd} | S )a  
    post process with <eb></eb>, <eb1></eb1>, ...
    emptyBboxTokenDict = {
        "[]": '<eb></eb>',
        "[' ']": '<eb1></eb1>',
        "['<b>', ' ', '</b>']": '<eb2></eb2>',
        "['\u2028', '\u2028']": '<eb3></eb3>',
        "['<sup>', ' ', '</sup>']": '<eb4></eb4>',
        "['<b>', '</b>']": '<eb5></eb5>',
        "['<i>', ' ', '</i>']": '<eb6></eb6>',
        "['<b>', '<i>', '</i>', '</b>']": '<eb7></eb7>',
        "['<b>', '<i>', ' ', '</i>', '</b>']": '<eb8></eb8>',
        "['<i>', '</i>']": '<eb9></eb9>',
        "['<b>', ' ', '\u2028', ' ', '\u2028', ' ', '</b>']": '<eb10></eb10>',
    }
    :param master_token:
    :return:
    z	<eb></eb>	<td></td>z<eb1></eb1>z
<td> </td>z<eb2></eb2>z<td><b> </b></td>z<eb3></eb3>u   <td>  </td>z<eb4></eb4>z<td><sup> </sup></td>z<eb5></eb5><td><b></b></td>z<eb6></eb6>z<td><i> </i></td>z<eb7></eb7>z<td><b><i></i></b></td>z<eb8></eb8>z<td><b><i> </i></b></td>z<eb9></eb9>z<td><i></i></td>z<eb10></eb10>u   <td><b>     </b></td>rv   )master_tokenr   r   r   deal_eb_token  s$   r   c                 C   s   t | } g }d}| D ]7}|dr8|t|d kr|d7 }q
|| vr)|d7 }q
|dd|| }|d7 }t|}|| q
d|S )z{
    Insert OCR text result to structure token.
    :param master_token_list:
    :param match_text_dict:
    :return:
    r   r   r   z><z>{}<ru   )	r   r}   r   rp   rw   r$   r   r   r#   )r   r   Zmerged_result_listZ
text_countr   r   r   r   insert_text_to_token/  s$   

r   c           
      C   s   d}t || }dd |D }d}g }|D ] }t ||}| }|dur0d|}	||	 q|d qt||D ]\}	}|	durJ| ||	} q;	 q;| S )z
    Deal with isolate span cases in this function.
    It causes by wrong prediction in structure recognition model.
    eg. predict <td rowspan="2"></td> to <td></td> rowspan="2"></b></td>.
    :param thead_part:
    :return:
    z<td></td> rowspan="(\d)+" colspan="(\d)+"></b></td>|<td></td> colspan="(\d)+" rowspan="(\d)+"></b></td>|<td></td> rowspan="(\d)+"></b></td>|<td></td> colspan="(\d)+"></b></td>c                 S      g | ]}|  qS r   rU   )r9   rc   r   r   r   r=   Y  r>   z%deal_isolate_span.<locals>.<listcomp>zc rowspan="(\d)+" colspan="(\d)+"| colspan="(\d)+" rowspan="(\d)+"| rowspan="(\d)+"| colspan="(\d)+"Nz<td{}></td>)refinditersearchrU   r$   r   rK   rw   )

thead_partZisolate_patternZisolate_iterZisolate_listspan_patternZcorrected_listZisolate_itemZ	span_partZspanStr_in_isolateItemZcorrected_itemr   r   r   deal_isolate_spanK  s"   	
r   c                 C   s   d}t || }dd |D }g }|D ]/}|ddks#|ddkr=|dddd}|dd	d
d}|| q|| qt||D ]
\}}| ||} qH| S )z
    Deal duplicate <b> or </b> after replace.
    Keep one <b></b> in a <td></td> token.
    :param thead_part:
    :return:
    z<td rowspan="(\d)+" colspan="(\d)+">(.+?)</td>|<td colspan="(\d)+" rowspan="(\d)+">(.+?)</td>|<td rowspan="(\d)+">(.+?)</td>|<td colspan="(\d)+">(.+?)</td>|<td>(.*?)</td>c                 S   r   r   r   )r9   tr   r   r   r=     r>   z%deal_duplicate_bb.<locals>.<listcomp>r{   r   r|   ru   <td><td><b></td>	</b></td>)r   r   r   rw   r   rK   )r   Z
td_patternZtd_iterZtd_listZnew_td_listZtd_itemZnew_td_itemr   r   r   deal_duplicate_bbt  s   r   c                 C   sF  d}t || du r| S t ||  }t|}d}t ||}dd |D }t|dkr0dnd}|sG|d	d
dddddd}nFg }|D ]}	||	dd qKt	||D ]
\}	}
||	|
}q\|dd}d}d}t 
|||}d}d}t 
|||}|d	d
dd}|dd}t|}t|}| ||} | S )z
    In our opinion, <b></b> always occurs in <thead></thead> text's context.
    This function will find out all tokens in <thead></thead> and insert <b></b> by manual.
    :param result_token:
    :return:
    z<thead>(.*?)</thead>Nzs<td rowspan="(\d)+" colspan="(\d)+">|<td colspan="(\d)+" rowspan="(\d)+">|<td rowspan="(\d)+">|<td colspan="(\d)+">c                 S   r   r   r   )r9   sr   r   r   r=     r>   zdeal_bb.<locals>.<listcomp>r   TFr   r   r   r   z<b><b>r{   z</b></b>r|   >z><b>z(<b>)+z(</b>)+r   r   )r   r   rU   copydeepcopyr   r   rw   r   rK   subr   r   )Zresult_tokenZthead_patternr   Zorigin_thead_partr   Z	span_iterZ	span_listZhas_span_in_headZreplaced_span_listspZrspZ
mb_patternZsingle_b_stringZmgb_patternZsingle_gb_stringr   r   r   deal_bb  sF   

r   c                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
Matcherc                 C   s,   || _ || _t|dd| _t|dd| _dS )a  
        This class process the end2end results and structure recognition results.
        :param end2end_file: end2end results predict by end2end inference.
        :param structure_master_file: structure recognition results predict by structure master inference.
        r   )r'   Z	structureN)end2end_filestructure_master_filer*   end2end_resultsstructure_master_results)selfr   r   r   r   r   __init__  s   zMatcher.__init__c                 C   s  t  }t| j D ]\}\}}g }|| jvrq
| j| }t||\}}}	}
t||
}|| t|t	|dd}t	|dkrN|| }t
|||
}|| t|t	|dd}t|t	|	dd}t	|dkrt	|dkr|| }|	| }t||||}|| t|t	|dd}t	|dkr|| }t||\}}}}t|t	|	}t|}|| n	t|}g }g }||||d}| ||}|||< q
|S )a2  
        Match process:
        pre-process : convert end2end and structure master results to xyxy, xywh ndnarray format.
        1. Use pseBbox is inside masterBbox judge rule
        2. Use iou between pseBbox and masterBbox rule
        3. Use min distance of center point rule
        :return:
        r   )rA   r   r8   )r@   match_list_add_extra_matchrS   sorted_bboxes_groups)r"   rb   r   r   r   ra   rh   extendrC   r   ri   rl   r[   ro   r   r   _format)r   match_resultsr<   	file_namer\   r@   r]   r^   rW   r`   r_   Zcenter_rule_match_listZcenter_no_match_end2end_indexsZcenter_no_match_end2end_xyxyZiou_rule_match_listZ!centerIou_no_match_end2end_indexsZ centerIou_no_match_master_indexsZcenterIou_no_match_end2end_xywhZcenterIou_no_match_master_xywhZdistance_match_listrX   Zno_match_end2end_xywhZend2end_sorted_indexes_listZend2end_sorted_bboxes_listrS   r   rm   r   Zmatch_result_dictr   r   r   match  s   	











zMatcher.matchc                 C   s   | j | }| j| }|d }|d }g }|D ]}dg}	t|}
t|
D ]}|	d q#|	d ||	 q|d}|d dkrK|d	d | n |d dkra|d || |d n
|| |d |d
| |S )z
        Extend the master token(insert virtual master token), and format matching result.
        :param match_result:
        :param file_name:
        :return:
        ry   rS   z<tr>r   z</tr>,r   r   Nmatched_master_token_list)r   r   r   r?   r   r   split
setdefault)r   Zmatch_resultr   r   Zmaster_infor   rS   Zvirtual_master_token_listZ
line_groupZtmp_listZ	item_nums_r   r   r   r   r   R  s.   







zMatcher._formatc                 C   sp   t  }d}t| D ]*\}\}}| j| }|d }|d }	t|	}
t|
||}t||}t|}|||< q|S )z
        Merge the OCR result into structure token to get final results.
        :param match_results:
        :return:
        rt   r   r   )r"   rb   r   r   rs   r   r   r   )r   r   merged_resultsr   r<   r   Z
match_infor   r   r@   rq   r   Zmerged_resultr   r   r   get_merge_result  s    

zMatcher.get_merge_resultN)__name__
__module____qualname__r   r   r   r   r   r   r   r   r     s
    b1r   c                   @   s   e Zd Zdd ZdddZdS )TableMasterMatcherc                 C   s   d S rR   r   )r   r   r   r   r     s   zTableMasterMatcher.__init__r   c                 C   s   |g i}t ||D ]\}}tt||d d}|| | q	|| _|i i}	|\}
}d|
dd }
|
|	| d< ||	| d< |	| _|  }| 	|}|| }d| d	 }|S )
Nr   )r
   ry   r   r   ry   r
   z<html><body><table>z</table></body></html>)
rK   r"   r   r   r   r   r#   r   r   r   )r   Zstructure_resZdt_boxesZrec_resZimg_namer   Zdt_boxresr5   Zstructure_master_result_dictZpred_structuresZpred_bboxesr   r   Z	pred_htmlr   r   r   __call__  s&   
zTableMasterMatcher.__call__N)r   )r   r   r   r   r   r   r   r   r   r     s    r   )r   )r   )rt   )+__doc__r   r   Zcv2r%   r   r2   r   numpyr   Zshapely.geometryr   r   r   r   r   r*   r-   r1   r6   r7   rC   rH   rQ   rV   r[   ra   rh   ri   rl   ro   rs   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sT   


	
5
3$)$K =