o
    'j                     @   sD  d dl mZ d dl mZ d dl mZ d dl mZ d dlZd dlZd dlZd dl	m
Z
mZmZ d dlZd dlZd dlmZ d dlmZ d d	lmZ G d
d deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG d d! d!eZ G d"d# d#e Z!G d$d% d%eZ"G d&d' d'eZ#G d(d) d)eZ$G d*d+ d+eZ%G d,d- d-eZ&G d.d/ d/eZ'G d0d1 d1eZ(G d2d3 d3eZ)G d4d5 d5eZ*G d6d7 d7eZ+G d8d9 d9eZ,G d:d; d;eZ-G d<d= d=eZ.G d>d? d?eZ/dS )@    )absolute_import)division)print_function)unicode_literalsN)
LineStringPointPolygon)sample)
get_logger)order_by_tbyxc                   @      e Zd Zdd Zdd ZdS )ClsLabelEncodec                 K   s
   || _ d S N)
label_list)selfr   kwargs r   e/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddleocr/ppocr/data/imaug/label_ops.py__init__!   s   
zClsLabelEncode.__init__c                 C   s.   |d }|| j vrd S | j |}||d< |S Nlabel)r   indexr   datar   r   r   r   __call__$   s   
zClsLabelEncode.__call__N__name__
__module____qualname__r   r   r   r   r   r   r           r   c                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
DetLabelEncodec                 K      d S r   r   r   r   r   r   r   r   .      zDetLabelEncode.__init__c           
      C   s   |d }t |}t|}g g g }}}td|D ]'}|| d }|| d }	|| ||	 |	dv r<|d q|d qt|dkrJd S | |}tj|tjd}tj|tj	d}||d	< ||d
< ||d< |S )Nr   r   pointstranscription*z###TFZdtypepolystextsignore_tags)
jsonloadslenrangeappendexpand_points_numnparrayfloat32bool_)
r   r   r   nBoxboxestxtstxt_tagsbnoboxtxtr   r   r   r   1   s*   



zDetLabelEncode.__call__c                 C   s   t jddd}|jdd}|t | |d< |t | |d< t j|t |t |fdd}t jt |dd}|t | |d< |t | |d< |S )	N)      r4   r(      )Zaxisr   r>      )r2   zerossumZargminZargmaxdeletediffr3   )r   ZptsrectstmprD   r   r   r   order_points_clockwiseJ   s    z%DetLabelEncode.order_points_clockwisec                 C   sX   d}|D ]}t ||krt |}qg }|D ]}||d g|t |   }|| q|S )Nr   r.   r0   )r   r7   Zmax_points_numr;   Zex_boxesZex_boxr   r   r   r1   U   s   z DetLabelEncode.expand_points_numN)r   r   r   r   r   rH   r1   r   r   r   r   r    -   s
    r    c                   @   s0   e Zd ZdZ			d
ddZdd Zdd	 ZdS )BaseRecLabelEncode+ Convert between text-label and text-index NFc                 C   s   || _ d| _d| _|| _|d u r$t }|d d| _t| j}d| _n?g | _t|d"}|	 }|D ]}	|	
ddd	}	| j|	 q3W d    n1 sQw   Y  |r^| jd
 t| j}| |}i | _t|D ]	\}
}|
| j|< qo|| _d S )NsoseoszRThe character_dict_path is None, model can only recognize number and lower lettersZ$0123456789abcdefghijklmnopqrstuvwxyzTrbutf-8

 )max_text_lenbeg_strend_strlowerr
   warningZcharacter_strlistopen	readlinesdecodestripr0   add_special_chardict	enumerate	character)r   max_text_lengthcharacter_dict_pathuse_space_charrW   loggerdict_characterfinlineslineicharr   r   r   r   d   s8   



zBaseRecLabelEncode.__init__c                 C   s   |S r   r   r   rf   r   r   r   r^      r#   z#BaseRecLabelEncode.add_special_charc                 C   sj   t |dkst || jkrdS | jr| }g }|D ]}|| jvr"q|| j|  qt |dkr3dS |S )aV  convert text-label into text-index.
        input:
            text: text labels of each image. [batch_size]

        output:
            text: concatenated text index for CTCLoss.
                    [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
            length: length of each text. [batch_size]
        r   N)r.   rT   rW   r_   r0   r   textZ	text_listrk   r   r   r   encode   s   

zBaseRecLabelEncode.encodeNFF)r   r   r   __doc__r   r^   ro   r   r   r   r   rK   a   s    
#rK   c                       6   e Zd ZdZ		d
 fdd	Zdd Zdd	 Z  ZS )CTCLabelEncoderL   NFc                       t t| ||| d S r   )superrs   r   r   rb   rc   rd   r   	__class__r   r   r         
zCTCLabelEncode.__init__c                 C   s   |d }|  |}|d u rd S tt||d< |dg| jt|   }t||d< dgt| j }|D ]
}||  d7  < q5t||d< |S )Nr   lengthr   r?   Z	label_ace)ro   r2   r3   r.   rT   ra   )r   r   rn   r   xr   r   r   r      s   
zCTCLabelEncode.__call__c                 C      dg| }|S )Nblankr   rl   r   r   r   r^         
zCTCLabelEncode.add_special_charNFr   r   r   rq   r   r   r^   __classcell__r   r   rw   r   rs      s    rs   c                       s*   e Zd Z		d fdd	Zdd Z  ZS )E2ELabelEncodeTestNFc                    rt   r   )ru   r   r   rv   rw   r   r   r      ry   zE2ELabelEncodeTest.__init__c                 C   s(  dd l }t| j}|d }||}t|}g g g }}}td|D ]'}	||	 d }
||	 d }||
 || |dv rE|d q#|d q#tj|tjd}tj|tj	d}||d	< ||d
< g }|D ]#}|
 }| |}|d u ry d S ||g| jt|   }|| qgt||d< |S )Nr   r   r$   r%   r&   TFr(   r)   r+   r*   )r,   r.   r_   r-   r/   r0   r2   r3   r4   r5   rW   ro   rT   )r   r   r,   Zpadnumr   r6   r7   r8   r9   r:   r;   r<   Z
temp_textsrn   r   r   r   r      s8   




zE2ELabelEncodeTest.__call__r   r   r   r   r   r   r   r   r   rw   r   r      s
    r   c                   @   r   )E2ELabelEncodeTrainc                 K   r!   r   r   r"   r   r   r   r      r#   zE2ELabelEncodeTrain.__init__c                 C   s   dd l }|d }||}t|}g g g }}}td|D ]'}|| d }	|| d }
||	 ||
 |
dv r@|d q|d qtj|tjd}tj|tjd}||d	< ||d
< ||d< |S )Nr   r   r$   r%   r&   TFr(   r)   r*   r+   )	r,   r-   r.   r/   r0   r2   r3   r4   r5   )r   r   r,   r   r6   r7   r8   r9   r:   r;   r<   r   r   r   r      s&   


zE2ELabelEncodeTrain.__call__Nr   r   r   r   r   r      r   r   c                       sR   e Zd Z		d fdd	Zdd Zdd Zd	d
 Zdd Zdd Zdd Z	  Z
S )KieLabelEncode
   Fc                    s   t t|   tddi| _t | _t|ddd}d}|D ]}| }	|| j|	< |d7 }qW d    n1 s8w   Y  t|d}
|
 }t|D ]\}}|d}|| j|< qKW d    n1 sdw   Y  || _	|| _
d S )N r   rrP   )encodingr?   rQ   )ru   r   r   r_   label2classid_maprZ   r]   r[   r`   normdirected)r   rc   
class_pathr   r   r   fridxri   rk   rg   rh   rw   r   r   r   
  s(   



zKieLabelEncode.__init__c                 C   s0  |ddddf |ddddf }}|ddddf |ddddf }}|| d t || d d}}|dddf d | | j }|dddf d | | j }	|dddf d | |dddf d | }
}|| t |
 }t ||	||
|gd}t ||||gdt j}||fS )	z)Compute relation between every two boxes.Nr   r?   r>   r=         rI   )r2   maximumr   Z
zeros_likestackconcatenateastyper4   )r   r7   Zx1sZy1sZx2sZy2swshsZdxsZdysZxhhsZxwhsZwhs	relationsbboxesr   r   r   compute_relation!  s   **"2zKieLabelEncode.compute_relationc                 C   sb   d}t dd |D }tt||ftj }t|D ]\}}t|||dt|f< q||fS )zPad text index to same length.,  c                 S   s   g | ]}t |qS r   )r.   ).0text_indr   r   r   
<listcomp>1      z3KieLabelEncode.pad_text_indices.<locals>.<listcomp>N)maxr2   Zonesr.   int32r`   r3   )r   	text_indsmax_lenrecoder_lenpadded_text_indsr   r   r   r   r   pad_text_indices.  s   zKieLabelEncode.pad_text_indicesc                 C   s  |d |d }}t |t j}| |\}}|dd}|durnt |t j}|dd}|durn|dddf }t |}|dddf |dddf kt j}| jr`||@ dkt j}t |d t ||gd}| 	|\}}	d}
t 
|
d	g}|j\}}||d|ddf< t 
|
|
d
g}||d|d|ddf< t 
|
|
g}||d|ddf< t 
|
|
g}||d|d|d f< t ||	g}t|d |||||dS )z7Convert bboxes, relations, texts and labels to ndarray.r$   r   labelsNedgesr?   rI   r   r=   r   image)r   r$   r   r*   r   tag)r2   r3   r   r   getr   r   Zfill_diagonalr   r   rA   shaper_   )r   	ann_infosr7   r   r   r   r   r   r   r   Zmax_numZtemp_bboxesh_Ztemp_relationsZtemp_padded_text_indsZtemp_labelsr   r   r   r   list_to_numpy7  sF   
(
zKieLabelEncode.list_to_numpyc                    s  t dksJ t dksJ fddtdD tdd D }|j\ }} fddtdD }tdd |D }t|}|d }|dkrUg d}	n|d	kr^g d
}	n|dkrgg d}	ng d}	fdd|	D }
fdd|	D }|
|fS )Nr=   c                    s   g | ]}t  | | qS r   )r   r   rj   )points_xpoints_yr   r   r   f  s    z4KieLabelEncode.convert_canonical.<locals>.<listcomp>c                 S   s   g | ]}|j |jfqS r   )r{   yr   pr   r   r   r   h      c                    s"   g | ]}t | t gqS r   )r   r   r   )min_xmin_yr$   r   r   r   j  s    c                 S   s   g | ]}|j qS r   )rz   )r   ri   r   r   r   r   m      r   )r   r?   r>   r@   r?   )r?   r>   r@   r   r>   )r>   r@   r   r?   )r@   r   r?   r>   c                       g | ]} | qS r   r   r   )r   r   r   r   z  r   c                    r   r   r   )r   j)r   r   r   r   {  r   )r.   r/   r   Zboundsr2   r3   argsort)r   r   r   Zpolygonr   Zpoints_to_lefttopZ	distancesZsort_dist_idxZlefttop_idxZpoint_orderssorted_points_xsorted_points_yr   )r   r   r$   r   r   r   convert_canonicala  s*   



z KieLabelEncode.convert_canonicalc                 C   s   t |dksJ t |dksJ t|}t|}t|d }t|d }t|| }t|| }t||d tj }	t|	}
g g }}tdD ]}|||
|   |||
|   qO| 	||S )Nr=   g      ?g     f@)
r.   r2   r3   rB   Zarctan2pir   r/   r0   r   )r   r   r   r{   r   Zcenter_xZcenter_yZx_arrZy_arrZangleZsort_idxr   r   rj   r   r   r   sort_vertex  s   



zKieLabelEncode.sort_vertexc                    sb  dd l }|d }||}g g g g g f\}}}}}	|D ]}
|
d   fddtdD } fddtdD }||\}}g }t||D ]\}}|| || qF|| |
d }||
d  fd	d|D }|| d|
 v r|j|
d   nd
|
 v r||
d
  ntd|	|
	dd qt
|d ||||	|d}|S )Nr   r   r$   c                       g | ]} | d  qS r   r   r   r;   r   r   r     r   z+KieLabelEncode.__call__.<locals>.<listcomp>r=   c                    r   r?   r   r   r   r   r   r     r   r%   c                    s    g | ]}| j v r j | qS r   )r_   )r   cr   r   r   r     s     Zkey_clszLCannot found 'key_cls' in ann.keys(), please check your training annotation.edger   )r   r$   r*   r   r   r   )r,   r-   r/   r   zipr0   keysr   
ValueErrorr   r_   r   )r   r   r,   r   annotationsr7   r*   r   r   r   annZx_listZy_listZsorted_x_listZsorted_y_listZ
sorted_boxr{   r   rn   r   r   r   )r;   r   r   r     sF   




zKieLabelEncode.__call__)r   F)r   r   r   r   r   r   r   r   r   r   r   r   r   rw   r   r   	  s    	*r   c                       sF   e Zd ZdZ		d fdd	Zdd Zdd	 Zd
d Zdd Z  Z	S )AttnLabelEncoderL   NFc                    rt   r   )ru   r   r   rv   rw   r   r   r     ry   zAttnLabelEncode.__init__c                 C   $   d| _ d| _| j g| | jg }|S NrM   rN   rU   rV   rl   r   r   r   r^        z AttnLabelEncode.add_special_charc                 C   s   |d }|  |}|d u rd S t|| jkrd S tt||d< dg| t| jd g dg| jt| d   }t||d< |S Nr   rz   r   r?   r>   ro   r.   rT   r2   r3   ra   r   r   rn   r   r   r   r     s   
 zAttnLabelEncode.__call__c                 C      |  d}|  d}||gS Nbegendget_beg_end_flag_idxr   Zbeg_idxend_idxr   r   r   get_ignored_tokens     

z"AttnLabelEncode.get_ignored_tokensc                 C   H   |dkrt | j| j }|S |dkrt | j| j }|S J d| Nr   r   Fz)Unsupport type %s in get_beg_end_flag_idxr2   r3   r_   rU   rV   r   Z
beg_or_endr   r   r   r   r        z$AttnLabelEncode.get_beg_end_flag_idxr   
r   r   r   rq   r   r^   r   r   r   r   r   r   rw   r   r     s    r   c                       sN   e Zd ZdZ		d fdd	Zdd Zdd	 Zd
d Zdd Zdd Z	  Z
S )RFLLabelEncoderL   NFc                    rt   r   )ru   r   r   rv   rw   r   r   r     ry   zRFLLabelEncode.__init__c                 C   r   r   r   rl   r   r   r   r^     r   zRFLLabelEncode.add_special_charc                 C   s4   dgt | j }|D ]
}||  d7  < q
t|S )Ng        r?   )r.   ra   r2   r3   )r   rn   	cnt_labelZchar_r   r   r   
encode_cnt  s   
zRFLLabelEncode.encode_cntc                 C   s   |d }|  |}|d u rd S t|| jkrd S | |}tt||d< dg| t| jd g dg| jt| d   }t|| jkrHd S t||d< ||d< |S )Nr   rz   r   r?   r>   r   )ro   r.   rT   r   r2   r3   ra   )r   r   rn   r   r   r   r   r     s$   

 zRFLLabelEncode.__call__c                 C   r   r   r   r   r   r   r   r     r   z!RFLLabelEncode.get_ignored_tokensc                 C   r   r   r   r   r   r   r   r     r   z#RFLLabelEncode.get_beg_end_flag_idxr   )r   r   r   rq   r   r^   r   r   r   r   r   r   r   rw   r   r     s    r   c                       rr   )SEEDLabelEncoderL   NFc                    rt   r   )ru   r   r   rv   rw   r   r   r   &  ry   zSEEDLabelEncode.__init__c                 C   s*   d| _ d| _d| _|| j| j | jg }|S )NpaddingrN   unknown)r   rV   r   rl   r   r   r   r^   .  s   z SEEDLabelEncode.add_special_charc                 C   s   |d }|  |}|d u rd S t|| jkrd S tt|d |d< |t| jd g t| jd g| jt| d   }t||d< |S )Nr   r?   rz   r@   r>   r   r   r   r   r   r   7  s   
 zSEEDLabelEncode.__call__r   r   r   r   rq   r   r^   r   r   r   r   rw   r   r   #  s    	r   c                       sH   e Zd ZdZ			d fdd	Zdd Zd	d
 Zdd Zdd Z  Z	S )SRNLabelEncoderL      NFc                    rt   r   )ru   r   r   rv   rw   r   r   r   H  ry   zSRNLabelEncode.__init__c                 C   s   || j | jg }|S r   r   rl   r   r   r   r^   P  s   zSRNLabelEncode.add_special_charc                 C   sz   |d }|  |}t| j}|d u rd S t|| jkrd S tt||d< ||d g| jt|   }t||d< |S )Nr   rz   r?   )ro   r.   ra   rT   r2   r3   )r   r   rn   Zchar_numr   r   r   r   T  s   

zSRNLabelEncode.__call__c                 C   r   r   r   r   r   r   r   r   a  r   z!SRNLabelEncode.get_ignored_tokensc                 C   r   r   r   r   r   r   r   r   f  r   z#SRNLabelEncode.get_beg_end_flag_idx)r   NFr   r   r   rw   r   r   E  s    r   c                   @   sF   e Zd ZdZ				dddZedd Zdd	 Zd
d Zdd Z	dS )TableLabelEncoderL   Fr=   c                 K   sL  || _ d| _|| _|| _|| _g }t|d!}	|	 }
|
D ]}|ddd}|	| qW d    n1 s:w   Y  | jrTd|vrK|	d d|v rT|
d | |}i | _t|D ]	\}}|| j|< q`dd	 | j D | _|| _|| _| j| j | _| j| j | _| j| j | _g d
| _dddddddddddd| _d S )NFrO   rP   rQ   rR   	<td></td><td>c                 S   s   i | ]\}}||qS r   r   )r   kvr   r   r   
<dictcomp>  s    z-TableLabelEncode.__init__.<locals>.<dictcomp>)r   <td	<eb></eb>r   r   z<eb1></eb1>z<eb2></eb2>z<eb3></eb3>z<eb4></eb4>z<eb5></eb5>z<eb6></eb6>z<eb7></eb7>z<eb8></eb8>z<eb9></eb9>z<eb10></eb10>)z[]z[' ']z['<b>', ' ', '</b>']z['\u2028', '\u2028']z['<sup>', ' ', '</sup>']z['<b>', '</b>']z['<i>', ' ', '</i>']z['<b>', '<i>', '</i>', '</b>']z#['<b>', '<i>', ' ', '</i>', '</b>']z['<i>', '</i>']z2['<b>', ' ', '\u2028', ' ', '\u2028', ' ', '</b>'])rT   rW   learn_empty_boxmerge_no_span_structurereplace_empty_cell_tokenrZ   r[   r\   r]   r0   remover^   r_   r`   itemsidx2charra   loc_reg_numrU   pad_idx	start_idxrV   r   td_tokenempty_bbox_token_dict)r   rb   rc   r   r   r   r   r   rf   rg   rh   ri   rj   rk   r   r   r   r   t  sR   



zTableLabelEncode.__init__c                 C   s
   | j d S )Nr>   rT   r   r   r   r   _max_text_len  s   
zTableLabelEncode._max_text_lenc                 C   s  |d }|d }| j r| |}| jr| ||}g }|D ]}|dkr6d|v r1|d dkr1d| }|| q| |}|d u rBd S | jg| | jg }|| jg| j	t
|   }t|}||d< t
|| j	krkd S tj| j	| jftjd}tj| j	dftjd}d}t|D ]B\}	}| j| | jv rd	|| v rt
|| d
 dkr|| d	  }
tj|
tjdd}
|
||	< d||	< | jrd||	< |d7 }q||d< ||d< |S )Ncells	structurer   spanr   rS   r(   r?   bboxtokensrI   g      ?r   
bbox_masks)r   _merge_no_span_structurer   _replace_empty_cell_tokenr0   ro   r   r   r   r  r.   r2   r3   rA   r   r4   r`   r   r  copyZreshaper   )r   r   r  r  new_structuretokenr   r
  bbox_idxrj   r  r   r   r   r     s^   



zTableLabelEncode.__call__c                 C   sR   g }d}|t |k r'|| }|dkrd}|d7 }|| |d7 }|t |k s
|S )z
        This code is refer from:
        https://github.com/JiaquanYe/TableMASTER-mmocr/blob/master/table_recognition/data_preprocess.py
        r   r   r   r?   rJ   )r   r  r  rj   r  r   r   r   r    s   
z)TableLabelEncode._merge_no_span_structurec                 C   sf   d}g }|D ]*}|dv r+d||   vr!t|| d }| j| }|| |d7 }q|| q|S )z
        This fun code is refer from:
        https://github.com/JiaquanYe/TableMASTER-mmocr/blob/master/table_recognition/data_preprocess.py
        r   )r   r   r   r  r	  r?   )r   strr  r0   )r   Z
token_listr  r  Zadd_empty_bbox_token_listr  contentr   r   r   r    s   


z*TableLabelEncode._replace_empty_cell_tokenNFFFr=   )
r   r   r   rq   r   propertyr  r   r  r  r   r   r   r   r   q  s    
7
3r   c                       s>   e Zd ZdZ				d
 fdd	Zedd Zdd	 Z  ZS )TableMasterLabelEncoderL   Fr=   c                    sB   t t| j||||||fi | | j| j | _| j| j | _d S r   )ru   r  r   r_   pad_strr   unknown_strunknown_idx)r   rb   rc   r   r   r   r   r   rw   r   r   r   	  s   
zTableMasterLabelEncode.__init__c                 C   s   | j S r   r  r   r   r   r   r    s   z$TableMasterLabelEncode._max_text_lenc                 C   s8   d| _ d| _d| _d| _|}|| j| j | j| jg }|S )Nz<SOS><EOS><UKN><PAD>)rU   rV   r  r  rl   r   r   r   r^     s   z'TableMasterLabelEncode.add_special_charr  )	r   r   r   rq   r   r  r  r^   r   r   r   rw   r   r    s    
r  c                   @   s.   e Zd ZdddZdd Zdd Zdd	 Zd
S )TableBoxEncodexyxyc                 K   s   |dv sJ || _ || _d S )N)xywhr  xyxyxyxy)in_box_formatout_box_format)r   r   r!  r   r   r   r   r   (  s   
zTableBoxEncode.__init__c                 C   s   |d j d d \}}|d }| j| jkr/| jdkr/| jdkr%| |}n
| jdkr/| |}|d d dd df  |  < |d d dd df  |  < ||d< |S )	Nr   r>   r   r  r  r  r   r?   )r   r   r!  xyxyxyxy2xywh	xyxy2xywh)r   r   Z
img_heightZ	img_widthr   r   r   r   r   -  s   



zTableBoxEncode.__call__c                 C   s   t ttdg}td d dd df  |d d df< td d dd df  |d d df< td d dd df  |d d df  |d d df< td d dd df  |d d df  |d d df< |S )Nr=   r   r>   r?   r@   )r2   rA   r.   r   minr   )r   r7   
new_bboxesr   r   r   r"  <  s   &&66zTableBoxEncode.xyxyxyxy2xywhc                 C   s   t |}|d d df |d d df  d |d d df< |d d df |d d df  d |d d df< |d d df |d d df  |d d df< |d d df |d d df  |d d df< |S )Nr   r>   r?   r@   )r2   Z
empty_like)r   r   r%  r   r   r   r#  D  s   
00,,zTableBoxEncode.xyxy2xywhN)r  r  )r   r   r   r   r   r"  r#  r   r   r   r   r  '  s
    
r  c                       s>   e Zd ZdZ		d fdd	Zdd Zdd	 Zd
d Z  ZS )SARLabelEncoderL   NFc                    rt   r   )ru   r&  r   rv   rw   r   r   r   P  ry   zSARLabelEncode.__init__c                 C   f   d}d}d}||g }t |d | _||g }t |d | _t |d | _||g }t |d | _|S Nz	<BOS/EOS>r  r  r?   r.   r  r   r   padding_idxr   rf   Zbeg_end_strr  padding_strr   r   r   r^   X     


zSARLabelEncode.add_special_charc                    s   |d }  |}|d u rd S t| jd krd S tt||d<  jg|  jg } fddt jD }||d t|< t||d< |S )Nr   r?   rz   c                       g | ]} j qS r   r*  r   r   r   r   r   r   o  r   z+SARLabelEncode.__call__.<locals>.<listcomp>)ro   r.   rT   r2   r3   r   r   r/   r   r   rn   targetZpadded_textr   r   r   r   f  s   
zSARLabelEncode.__call__c                 C      | j gS r   r/  r   r   r   r   r   u     z!SARLabelEncode.get_ignored_tokensr   )	r   r   r   rq   r   r^   r   r   r   r   r   rw   r   r&  M  s    r&  c                       sH   e Zd ZdZ			d fdd	Zdd Zdd	 Zd
d Zdd Z  Z	S )SATRNLabelEncoderL   NFc                       t t| ||| || _d S r   )ru   r5  r   rW   r   rb   rc   rd   rW   r   rw   r   r   r   |     

zSATRNLabelEncode.__init__c                 C   r'  r(  r)  r+  r   r   r   r^     r-  z!SATRNLabelEncode.add_special_charc                 C   sF   | j r|  }g }|D ]}|| j|| j qt|dkr!d S |S Nr   )rW   r0   r_   r   r  r.   rm   r   r   r   ro     s   zSATRNLabelEncode.encodec                    s   |d }  |}|d u rd S tt||d<  jg|  jg } fddt jD }t| jkr=|d  j }n||d t|< t||d< |S )Nr   rz   c                    r.  r   r/  r0  r   r   r   r     r   z-SATRNLabelEncode.__call__.<locals>.<listcomp>)ro   r2   r3   r.   r   r   r/   rT   r1  r   r   r   r     s   
zSATRNLabelEncode.__call__c                 C   r3  r   r/  r   r   r   r   r     r4  z#SATRNLabelEncode.get_ignored_tokensrp   )
r   r   r   rq   r   r^   ro   r   r   r   r   r   rw   r   r5  y  s    

r5  c                       s8   e Zd Z	d
 fdd	Zdd Zdd Zdd	 Z  ZS )PRENLabelEncodeFc                    rt   r   )ru   r:  r   rv   rw   r   r   r     ry   zPRENLabelEncode.__init__c                 C   s0   d}d}d}|||g| }d| _ d| _d| _|S )Nr  r  z<UNK>r   r?   r>   )r*  r   r  )r   rf   r,  rV   r  r   r   r   r^     s   z PRENLabelEncode.add_special_charc                 C   s   t |dkst || jkrd S | jr| }g }|D ]}|| jvr(|| j q|| j|  q|| j t || jk rK|| jg| jt |  7 }|S r9  )r.   rT   rW   r_   r0   r  r   r*  rm   r   r   r   ro     s   
zPRENLabelEncode.encodec                 C   s0   |d }|  |}|d u rd S t||d< |S r   )ro   r2   r3   )r   r   rn   Zencoded_textr   r   r   r     s   
zPRENLabelEncode.__call__)F)r   r   r   r   r^   ro   r   r   r   r   rw   r   r:    s    r:  c                       sh   e Zd ZdZ							d fdd	Zdd	 Zd
d Zdd Zdd Zdd Z	dd Z
dd Z  ZS )VQATokenLabelEncodez*
    Label encode for NLP VQA methods
    F	LayoutXLMTNc	                    s   t t|   ddlm}
m}m} ddlm} |
dd|dd|ddd}|| _	|| }|d	 
|d
 | _||\| _}|| _|| _|| _|| _|| _| jdv sTJ d S )Nr   )LayoutXLMTokenizerLayoutLMTokenizerLayoutLMv2Tokenizer)load_vqa_bio_label_mapszlayoutxlm-base-uncased)classpretrained_modelzlayoutlm-base-uncasedzlayoutlmv2-base-uncased)r<  ZLayoutLMZ
LayoutLMv2rA  rB  )Ntb-yx)ru   r;  r   Zpaddlenlp.transformersr=  r>  r?  Zppocr.utils.utilityr@  contains_reZfrom_pretrained	tokenizerlabel2id_mapadd_special_ids
infer_mode
ocr_engineuse_textline_bbox_infoorder_method)r   r   rD  rG  	algorithmrJ  rK  rH  rI  r   r=  r>  r?  r@  Ztokenizer_dictZtokenizer_configZid2label_maprw   r   r   r     s2   
zVQATokenLabelEncode.__init__c                 C   s   |  }g }d}|\}}}	}
|	| t| }t|D ])\}}t|| }|||| |
g}||gt||  |t|d | 7 }q|S )Nr   r?   )splitr.   r`   extendtokenize)r   r  rn   rE  wordsZtoken_bboxesZcurr_word_idxx1y1x2y2Zunit_wr   wordZcurr_wZ	word_bboxr   r   r   
split_bbox  s   zVQATokenLabelEncode.split_bboxc                 C   s   g }g }t |D ]\}}t|d dkr|t| q||d  qt |D ]%\}}g }|d D ]}|d |v sA|d |v rBq3|| q3||| d< q)|S )z?
        find out the empty texts and remove the links
        r%   r   idlinkingr?   )r`   r.   r0   r  deepcopy)r   ocr_infoZnew_ocr_infoZempty_indexr   infonew_linklinkr   r   r   filter_empty_contents  s   z)VQATokenLabelEncode.filter_empty_contentsc                 C   s  |  |}tt|D ]}d|| vr | || d || d< q| jdkr*t|}| jo0| j }|r8| |}|d j	\}}}g }g }	g }
g }g }g }g }|rYg }i }i }t
 }t||d< |D ]}|d }t|dkrpqb|rt|dkr||d  qb|d	 ||d < |d
d |d D  | |d |d< | jj|dddd}| js|d dd |d< |d dd |d< |d dd |d< | jr|d gt|d  }n| |d |d | j}t|dkrqb| |||}| jr|dg d |g d | js|d	 }| ||}|rA|d | jd kr@t|||d < | }|t|
t|
t|d  | d n|t|
t|
t|d  dd |
|d  ||d  |	| || |t|
 | js||| qb|
|d< ||d< |	|d< dgt|
 |d< ||d< ||d< t| jj| jj| jjd|d< ||d< |r||d< ||d< ||d< ||d < |S )!Nr  r$   rC  r   rZ  r%   r   rW  r   c                 S   s   g | ]}t t|qS r   )tuplesorted)r   lr   r   r   r   \  r   z0VQATokenLabelEncode.__call__.<locals>.<listcomp>rX  FT)Zpad_to_max_seq_lenZreturn_attention_maskZreturn_token_type_ids	input_idsr?   rI   Ztoken_type_idsZattention_mask)r   r   r   r   O)startr   r   r   segment_offset_id)padding_sidepad_token_type_idpad_token_idZtokenizer_paramsentitiesr   id2labelempty_entityentity_id_to_index_map)_load_ocr_infor/   r.   trans_poly_to_bboxrK  r   rD  rH  r^  r   setr  rY  addrN  rE  ro   rG  rJ  rV  _smooth_boxinsertr0   _parse_labelrF  upperr_   rf  rg  rh  )r   r   rZ  r   Ztrain_reheightwidthr   Z
words_listZ	bbox_listZinput_ids_listZtoken_type_ids_listre  Zgt_label_listri  r   rj  rl  rk  r[  rn   
encode_resr  r   gt_labelr   r   r   r   .  s   










zVQATokenLabelEncode.__call__c                 C   sl   t tdd |D }t tdd |D }t tdd |D }t tdd |D }||||gS )Nc                 S      g | ]}|d  qS r   r   r   r   r   r   r     r   z:VQATokenLabelEncode.trans_poly_to_bbox.<locals>.<listcomp>c                 S   ry  r   r   r   r   r   r   r     r   c                 S   ry  r   r   r   r   r   r   r     r   c                 S   ry  r   r   r   r   r   r   r     r   )intr2   r$  r   )r   ZpolyrQ  rS  rR  rT  r   r   r   rn    s
   z&VQATokenLabelEncode.trans_poly_to_bboxc                 C   sn   | j r,| jj|d ddd }g }|D ]}||d d | |d |d d q|S |d }t|}|S )Nr   F)clsr   r?   )r%   r  r$   r   )rH  rI  Zocrr0   rn  r,   r-   )r   r   Z
ocr_resultrZ  resr[  Z	info_dictr   r   r   rm    s   


z"VQATokenLabelEncode._load_ocr_infoc                 C   s   t |}|d d df d | |d d df< |d d df d | |d d df< |d d df d | |d d df< |d d df d | |d d df< |d }|S )Nr   i  r>   r?   r@   int64)r2   r3   r   tolist)r   r   ru  rv  r   r   r   rq    s   
$$$$zVQATokenLabelEncode._smooth_boxc                 C   sr   g }|  dv r|dgt|d   |S || jd|    || jd|   gt|d d   |S )N)otherZothersignorer   rb  zb-zi-r?   )rW   rN  r.   r0   rF  rt  )r   r   rw  rx  r   r   r   rs    s   z VQATokenLabelEncode._parse_label)FFr<  TNFN)r   r   r   rq   r   rV  r^  r   rn  rm  rq  rs  r   r   r   rw   r   r;    s"    '}	r;  c                       s,   e Zd Z			d fdd	Zdd Z  ZS )MultiLabelEncodeNFc                    sp   t t| ||| t|||fi || _|| _|d u r)t|||fi || _d S t||||fi || _d S r   )	ru   r  r   rs   
ctc_encodegtc_encode_typer&  
gtc_encodeeval)r   rb   rc   rd   r  r   rw   r   r   r     s(   
zMultiLabelEncode.__init__c                 C   s   t |}t |}t }|dd |d< |d |d< | j|}| j|}|d u s/|d u r1d S |d |d< | jd urC|d |d< n|d |d< |d |d< |S )NZimg_pathr   r   Z	label_ctcZ	label_gtcZ	label_sarrz   )r  rY  r_   r   r  r   r  r  )r   r   Zdata_ctcZdata_gtcZdata_outZctcZgtcr   r   r   r     s   


zMultiLabelEncode.__call__)NFNr   r   r   rw   r   r    s    r  c                       rr   )NRTRLabelEncoderL   NFc                    rt   r   )ru   r  r   rv   rw   r   r   r     s   
zNRTRLabelEncode.__init__c                 C   s   |d }|  |}|d u rd S t|| jd krd S tt||d< |dd |d |dg| jt|   }t||d< |S )Nr   r?   rz   r   r>   r@   )ro   r.   rT   r2   r3   rr  r0   r   r   r   r   r   
  s   

zNRTRLabelEncode.__call__c                 C   s   g d| }|S )N)r}   z<unk><s></s>r   rl   r   r   r   r^        z NRTRLabelEncode.add_special_charr   r   r   r   rw   r   r    s    	r  c                       8   e Zd ZdZ			d fdd	Zdd Zd	d
 Z  ZS )ViTSTRLabelEncoderL   NFr   c                    r6  r   )ru   r  r   ignore_indexr   rb   rc   rd   r  r   rw   r   r   r         

zViTSTRLabelEncode.__init__c                 C   s   |d }|  |}|d u rd S t|| jkrd S tt||d< |d| j |d || jg| jd t|   }t||d< |S r   )ro   r.   rT   r2   r3   rr  r  r0   r   r   r   r   r   +  s   

zViTSTRLabelEncode.__call__c                 C   s   ddg| }|S )Nr  r  r   rl   r   r   r   r^   9  r  z"ViTSTRLabelEncode.add_special_char)NFr   r   r   r   rw   r   r    s    r  c                       r  )ABINetLabelEncoderL   NFd   c                    r6  r   )ru   r  r   r  r  rw   r   r   r   A  r  zABINetLabelEncode.__init__c                 C   s|   |d }|  |}|d u rd S t|| jkrd S tt||d< |d || jg| jd t|   }t||d< |S )Nr   rz   r   r?   )ro   r.   rT   r2   r3   r0   r  r   r   r   r   r   L  s   

zABINetLabelEncode.__call__c                 C   r|   )Nr  r   rl   r   r   r   r^   Y  r~   z"ABINetLabelEncode.add_special_char)NFr  r   r   r   rw   r   r  >  s    r  c                       s2   e Zd Z		d	 fdd	Zdd Zdd Z  ZS )
SRLabelEncodeNFc                    s   t t| ||| i | _t|d}| D ]}| }| \}}|| j|< qW d    n1 s3w   Y  d}	i | _t	t
|	D ]	}
|
| j|	|
 < qCd S )Nr   
0123456789)ru   r  r   dicrZ   r[   r]   rM  english_stroke_dictr/   r.   )r   rb   rc   rd   r   rg   ri   ra   sequenceZenglish_stroke_alphabetr   rw   r   r   r   _  s    zSRLabelEncode.__init__c                 C   s~   d}|D ]}|| j vrq|| j | 7 }q|d7 }|}t|}t| jd}t|d D ]}| j||  ||d < q-||fS )Nr   0r}  r?   )r  r.   r2   rA   rT   r   r/   r  )r   r   Zstroke_sequencera   rz   input_tensorr   r   r   r   ro   q  s   
zSRLabelEncode.encodec                 C   s6   |d }|  |\}}||d< ||d< |d u rd S |S )Nr   rz   r  )ro   )r   r   rn   rz   r  r   r   r   r     s   zSRLabelEncode.__call__r   r   r   r   r   ro   r   r   r   r   rw   r   r  ^  s    r  c                       r  )SPINLabelEncoderL   NFTc                    r6  r   )ru   r  r   rW   r7  rw   r   r   r     r8  zSPINLabelEncode.__init__c                 C   s$   d| _ d| _| j g| jg | }|S r   r   rl   r   r   r   r^     r   z SPINLabelEncode.add_special_charc                 C   s   |d }|  |}|d u rd S t|| jkrd S tt||d< dg| dg }dd t| jd D }||d t|< t||d< |S )Nr   rz   r   r?   c                 S   s   g | ]}d qS r   r   r0  r   r   r   r     s    z,SPINLabelEncode.__call__.<locals>.<listcomp>r>   )ro   r.   rT   r2   r3   r/   r1  r   r   r   r     s   
zSPINLabelEncode.__call__)NFTr   r   r   rw   r   r    s    
r  c                       s.   e Zd ZdZ		d fdd	Zdd Z  ZS )	VLLabelEncoderL   NFc                    s<   t t| ||| i | _t| jD ]	\}}|| j|< qd S r   )ru   r  r   r_   r`   ra   )r   rb   rc   rd   r   rj   rk   rw   r   r   r     s   zVLLabelEncode.__init__c           	      C   s  |d }t |}|dkrd S d}tt|}t||d }|| }||d kr.|d | }n|dkr9|dd  }n|d | ||d d   }||d< ||d< ||d< | |}|d u r^d S dd |D }tt ||d	< |dg| jt |   }t||d< | |}| |}|d u rg }nd
d |D }|d u rg }ndd |D }tt ||d< tt ||d< |dg| jt |   }|dg| jt |   }t||d< t||d< |S )Nr   r   r?   	label_res	label_subZlabel_idc                 S      g | ]}|d  qS r   r   r   r   r   r   r     r   z*VLLabelEncode.__call__.<locals>.<listcomp>rz   c                 S   r  r   r   r   r   r   r   r     r   c                 S   r  r   r   r   r   r   r   r     r   Z
length_resZ
length_sub)r.   rY   r/   r	   ro   r2   r3   rT   )	r   r   rn   Zlen_strZ
change_numorderZ	change_idr  r  r   r   r   r     sL   


zVLLabelEncode.__call__r   )r   r   r   rq   r   r   r   r   r   rw   r   r    s    r  c                   @   r   )CTLabelEncodec                 K   r!   r   r   r"   r   r   r   r     r#   zCTLabelEncode.__init__c           	      C   s   |d }t |}t|}g g }}td|D ]}|| d }t|}|| || d }|| qt|dkr=d S ||d< ||d< |S )Nr   r   r$   r%   r)   r*   )r,   r-   r.   r/   r2   r3   r0   )	r   r   r   r6   r7   r8   r:   r;   r<   r   r   r   r     s   



zCTLabelEncode.__call__Nr   r   r   r   r   r    r   r  c                       s4   e Zd Z			d
 fdd	Zdd Zdd	 Z  ZS )CANLabelEncoder  FTc                    s   t t| |||| d S r   )ru   r  r   )r   rc   rb   rd   rW   r   rw   r   r   r   	  s   
zCANLabelEncode.__init__c                 C   s@   g }|D ]}|| j vrq|| j| qt|dkrd S |S r9  )ra   r0   r_   r   r.   )r   Ztext_seqZtext_seq_encodedrn   r   r   r   ro     s   
zCANLabelEncode.encodec                 C   s<   |d }t |tr|  }|| j | ||d< |S r   )
isinstancer  r]   rM  r0   rV   ro   r   r   r   r   r     s   
zCANLabelEncode.__call__)r  FTr  r   r   rw   r   r    s    	
r  )0
__future__r   r   r   r   r  numpyr2   stringZshapely.geometryr   r   r   r,   randomr	   Zppocr.utils.loggingr
   Zppocr.data.imaug.vqa.augmentr   objectr   r    rK   rs   r   r   r   r   r   r   r   r   r  r  r&  r5  r:  r;  r  r  r  r  r  r  r  r  r  r   r   r   r   <module>   sV   4C* 5.8", !&,8/ x'! 0#=