o
    0jDV                  	   @  s  d dl mZ d dlZd dlZd dlmZmZ ddlm	Z	 ddl
mZmZ ddlmZ dd	lmZmZmZmZmZmZmZ dd
lmZ ddlmZmZmZmZmZmZm Z m!Z! ddl"m#Z#m$Z$ g dZ%edrjd dl&Z&G dd de'Z(edG dd deeeeeZ)G dd de)Z*dS )    )annotationsN)Image	ImageDraw   )logging)class_requires_depsis_dep_available)SIMFANG_FONT   )BaseCVResult
BaseResult	HtmlMixin	JsonMixinMarkdownMixin	WordMixin	XlsxMixin)MarkdownConverter)build_handle_funcs_dictformat_centered_by_htmlformat_chart2html_tableformat_image_plainformat_image_scaled_by_htmlformat_table_centerformat_text_plainsimplify_table   )draw_box_txt_fineget_minarea_rect)Zfigure_titleZvision_footnoteimagecharttableheaderZheader_imagefooterZfooter_imagefootnote
aside_textzopencv-contrib-pythonc                   @  s:   e Zd ZdZ					ddddZdd	d
ZdddZdS )PaddleOCRVLBlockzPaddleOCRVL Block Class NreturnNonec                 C  s>   || _ ttt|| _|| _d| _|| _|| _|| _	|| _
dS )a  
        Initialize a PaddleOCRVLBlock object.

        Args:
            label (str): Label assigned to the block.
            bbox (list): Bounding box coordinates of the block.
            content (str, optional): Content of the block. Defaults to an empty string.
        N)labellistmapintbboxcontentr   polygon_pointsgroup_idglobal_block_idglobal_group_id)selfr)   r-   r.   r0   r/   r1   r2    r4   p/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/pipelines/paddleocr_vl/result.py__init__D   s   
zPaddleOCRVLBlock.__init__strc                 C  "   d| j  d| j d| j d}|S z>
        Return a string representation of the block.
        z

#################
label:	z
bbox:	z

content:	z
#################r)   r-   r.   r3   _strr4   r4   r5   __str___      zPaddleOCRVLBlock.__str__c                 C  r8   r9   r:   r;   r4   r4   r5   __repr__f   r>   zPaddleOCRVLBlock.__repr__)r&   NNNNr'   r(   )r'   r7   )__name__
__module____qualname____doc__r6   r=   r?   r4   r4   r4   r5   r%   A   s    
r%   c                      s~   e Zd ZdZd fddZdddZd d
dZd!ddZd!ddZd!ddZ	d"ddZ
d!ddZd#d$ddZd$ddZ  ZS )%PaddleOCRVLResultzT
    PaddleOCRVLResult class for holding and formatting OCR/VL parsing results.
    r'   r(   c                   sj   t  | t|  t|  t|  t|  t|  | d dg }dd t	 | D | _
dS )z
        Initializes a new instance of the class with the specified data.

        Args:
            data: The input data for the parsing result.
        model_settingsmarkdown_ignore_labelsc                 S     g | ]}|qS r4   r4   ).0r)   r4   r4   r5   
<listcomp>   s    z.PaddleOCRVLResult.__init__.<locals>.<listcomp>N)superr6   r   r   r   r   r   getSKIP_ORDER_LABELScopyskip_order_labels)r3   datarG   	__class__r4   r5   r6   t   s   





zPaddleOCRVLResult.__init__r,   c                 C  s   | d }t |tr|d S |S )z:Return the page image width, unwrapping list if necessary.widthr   )
isinstancer*   )r3   wr4   r4   r5   _page_image_width   s   z#PaddleOCRVLResult._page_image_widthdict[str, np.ndarray]c                 C  s  i }| d }|d rKt | d tr!| d j D ]\}}|||< qt | d trKt| d D ]\}}t |trJ|j D ]\}}||| d| < q<q.| d d rt | d tra| d jd |d< t | d trt| d D ]\}}t |tr|jd |d| < qn| d	rt | d	 ts| dr| d	 d
 }| d	 d }	| d d dddddddf }
|
jdd \}}t	|
}t
j||dft
jdd }td t|}t}tt||	D ]\}\}}ztddtddtddf}t
|}t|dkr\dd | D }|j||d|d t|}tdt|dddf t|dddf   }t
|dddf |dddf< t
|dddf td| |dddf< ndd | D }|j||d t |trv|d }t||f|||j}t
|t
j !d}t"#||gd|d t"$||}W q   Y qt%t	|
|d}t&d |d |fd!}|'|dd||f |'t	||d|d |f ||d"< |S )#z
        Convert the parsing result to a dictionary of images.

        Returns:
            dict: Keys are names, values are numpy arrays (images).
        rF   use_doc_preprocessordoc_preprocessor_res_use_layout_detectionlayout_det_resresZlayout_det_res_spotting_resZ	rec_polysZ	rec_textsZ
output_imgNr   r   r
   )Zdtype   r   c                 S  s   g | ]\}}||fqS r4   r4   rI   xyr4   r4   r5   rJ      s    z-PaddleOCRVLResult._to_img.<locals>.<listcomp>   )outlinerS   fillg      ?      c                 S  s    g | ]\}}t |t |fqS r4   )r,   ra   r4   r4   r5   rJ      s     )rf   )r_   rg   r   TRGB)r`   r`   r`   Zspotting_res_img)(rT   r   Zimgitemsr*   	enumeraterL   shaper   Z	fromarraynpZonesZuint8randomseedr   ZDrawr	   ziprandintarraylentolistZpolygonr   r,   maxminmeantupler   pathZint32Zreshapecv2Z	polylinesZbitwise_andblendnewZpaste)r3   Zres_img_dictrF   keyvalueidxrY   Z
layout_resZboxesZtxtsr   hrU   Zimg_leftZ	img_rightZ	draw_leftZvis_fontboxtxtcolorZptsheightZbox_ptsZimg_right_textZimg_showr4   r4   r5   _to_img   s   


$






0".zPaddleOCRVLResult._to_imgdict[str, str]c                 C  d   i }|  dr0t| d dkr0tt| d D ]}| d | }|d }d| }|jd ||< q|S )z
        Converts the prediction to its corresponding HTML representation.

        Returns:
            dict: The str type HTML representation result.
        table_res_listr   table_region_idtable_pred)rL   rs   rangehtml)r3   Zres_html_dictsno	table_resr   r}   r4   r4   r5   _to_html      
zPaddleOCRVLResult._to_htmlc                 C  r   )z
        Converts the prediction HTML to an XLSX file path.

        Returns:
            dict: The str type XLSX representation result.
        r   r   r   r   r   )rL   rs   r   Zxlsx)r3   Zres_xlsx_dictr   r   r   r}   r4   r4   r5   _to_xlsx   r   zPaddleOCRVLResult._to_xlsxc                 O  s   i }| d |d< | d |d< | d |d< | d |d< | d |d< | d }||d< | d d rEt | d tr?| d jd	 |d< n| d |d< | d d
 rbt | d tr\| d jd	 |d< n| d |d< | d }dd |D }||d< tj|g|R i |S )aw  
        Converts the instance's attributes to a dictionary and then to a string.

        Args:
            *args: Additional positional arguments passed to the base class method.
            **kwargs: Additional keyword arguments passed to the base class method.

        Returns:
            dict: A dictionary with the instance's attributes converted to strings.
        
input_path
page_index
page_countrS   r   rF   rX   rY   r]   r[   r\   parsing_res_listc                 S  s   g | ]}|j |j|jd qS ))block_labelblock_content
block_bbox)r)   r.   r-   )rI   parsing_resr4   r4   r5   rJ     s    z-PaddleOCRVLResult._to_str.<locals>.<listcomp>)rT   r   r7   r   _to_str)r3   argskwargsrP   rF   r   r4   r4   r5   r      s,   zPaddleOCRVLResult._to_strTc           
        s   | d  dd| d  dd|   |r)dd } fdd} fdd}nd	d }fd
d}fdd}| d  ddrCtn|}| d  ddsO|}|rVdd }ndd }dd }t||||||d}| d  dg D ]}	||	d qp|S )z6Build label-to-handler mapping for content formatting.rF   use_ocr_for_image_blockFuse_seal_recognitionc                 S  s   t t| S N)r   r   blockr4   r4   r5   <lambda>.  s    z<PaddleOCRVLResult._build_handle_funcs_dict.<locals>.<lambda>c                      t t|  d dS N)original_image_widthshow_ocr_content)Zcollapse_newlinesr   r   r   )r   r   r4   r5   r   1      c                   r   r   r   r   )r   r   r4   r5   r   9  r   c                 S     | j S r   r.   r   r4   r4   r5   r   B      c                      t |  dS N)r   r   r   )r   r4   r5   r   C      c                   r   r   r   r   )r   r4   r5   r   F  r   Zuse_chart_recognitionr[   c                 S  s   dt |  S N
)r   r   r4   r4   r5   r   T  s    c                 S  s   t d| j S r   )r   r.   r   r4   r4   r5   r   V  s    c                 S  r   r   r   r   r4   r4   r5   r   X  r   )Z	text_funcZ
image_funcZ
chart_funcZ
table_funcZformula_funcZ	seal_funcrG   N)rL   rV   r   r   pop)
r3   prettyZformat_text_funcZformat_image_funcZformat_seal_funcZformat_chart_funcZformat_table_funcZformat_formula_funchandle_funcs_dictr)   r4   )r   r   r   r5   _build_handle_funcs_dict%  sB   	
z*PaddleOCRVLResult._build_handle_funcs_dictc                 O  s  | dd}i }| d |d< | d |d< | d |d< | d |d< | d |d< | d }||d< | d d	dr<| jd
d}| d }g }d}	t|D ]\}
}|j}|| jvr[|	}|	d7 }	nd}|j|j|j|
||jdurm|jn|
d}t	|dr|j
dur|j
|d< t	|dr|jdur|j|d< |jdur|j|d< |r|jdur|j|d< | d d	dr||jr||j ||d< n|j|d< || qH||d< | drt| d trdd | d D |d< n| d |d< | d d r5t| d tr| d jd |d< n2t| d tr/g }| d D ]}t|tr#||jd  q|| q||d< n| d |d< | d d rt| d trN| d jd |d< n2t| d trzg }| d D ]}t|trn||jd  q\|| q\||d< n| d |d< tj|g|R i |S )aQ  
        Converts the object's data to a JSON dictionary.

        Args:
            *args: Positional arguments passed to the JsonMixin._to_json method.
            **kwargs: Keyword arguments passed to the JsonMixin._to_json method.

        Returns:
            dict: A dictionary containing the object's data in JSON format.
        Zkeep_imgFr   r   r   rS   r   rF   Zformat_block_contentTr   r   rg   N)r   r   r   Zblock_idZblock_orderr0   r1   r2   Zblock_polygon_pointsr   r   r^   c                 S  rH   r4   r4   )rI   r]   r4   r4   r5   rJ     s    z.PaddleOCRVLResult._to_json.<locals>.<listcomp>rX   rY   r]   r[   r\   )r   rL   r   rk   r)   rO   r.   r-   r0   hasattrr1   r2   r/   r   appendrT   r*   r   jsonr   _to_json)r3   r   r   Z	_keep_imgrP   rF   r   r   Zparsing_res_list_jsonZorder_indexr   r   r)   orderZres_dictrY   r]   r\   r4   r4   r5   r   f  s   













zPaddleOCRVLResult._to_jsonFdictc                 C  sB   | j |d}tj| d ||| d d}| d |d< | d |d< |S )a>  
        Save the parsing result to a Markdown file.

        Args:
            pretty (Optional[bool]): whether to pretty markdown by HTML, default by True.
            show_formula_number (bool): whether to show formula numbers.

        Returns:
            dict: Markdown information with text and images.
        r   r   imgs_in_doc)r   show_formula_numberr   r   r   )r   r   convert)r3   r   r   r   resultr4   r4   r5   _to_markdown  s   zPaddleOCRVLResult._to_markdownc           	      C  s   ddl m} ddlm} d|jddd|jddd|jd	d|jd	d|jd	d|jd	d|jd	d|jd	d
|jd	d	}|  }| dd}t	|t
rQ|d nt|pUd}|| d || dg d\}}|||| d |dS )a  Convert the parsing result to a Word-compatible dict.

        Returns:
            dict: {
                "word_blocks": List[Dict],       # Simplified list of content blocks
                "original_image_width": int,   # Pixel width of the source page
                "input_path": str,             # Original input file path
                "images": List[Dict]           # List of {"path": str, "img": PIL.Image}
            }
        r   )WD_ALIGN_PARAGRAPHr
   )build_word_blocks   T)sizealignindent
   )r   r   	   )	ZocrZvertical_textr$   ZspottingZinline_formulaZdisplay_formulaZreference_contentr.   r#   r   r   r   )extra_style_mapr   r   )word_blocksr   original_image_heightr   images)Zdocx.enum.textr   common.result.converterr   ZJUSTIFYZLEFTZCENTERrV   rL   rT   r*   r,   )	r3   r   r   r   r   Z
height_valr   r   r   r4   r4   r5   _to_word  sF   







zPaddleOCRVLResult._to_wordr@   )r'   r,   )r'   rW   )r'   r   )T)TF)r'   r   )rA   rB   rC   rD   r6   rV   r   r   r   r   r   r   r   r   __classcell__r4   r4   rQ   r5   rE   n   s    


O


)
AhrE   c                   @  s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
PaddleOCRVLPagesResultc                 O     t d d S )Nz@The result of multi-pages don't support to save as image format!r   warningr3   r   r   r4   r4   r5   save_to_img$     z"PaddleOCRVLPagesResult.save_to_imgc                 O  r   )Nz?The result of multi-pages don't support to save as html format!r   r   r4   r4   r5   save_to_html*  r   z#PaddleOCRVLPagesResult.save_to_htmlc                 O  r   )Nz?The result of multi-pages don't support to save as xlsx format!r   r   r4   r4   r5   save_to_xlsx0  r   z#PaddleOCRVLPagesResult.save_to_xlsxc                 O  r   )Nz?The result of multi-pages don't support to save as word format!r   r   r4   r4   r5   save_to_word6  r   z#PaddleOCRVLPagesResult.save_to_wordN)rA   rB   rC   r   r   r   r   r4   r4   r4   r5   r   #  s
    r   )+
__future__r   rn   numpyrm   ZPILr   r   utilsr   Z
utils.depsr   r   Zutils.fontsr	   Zcommon.resultr   r   r   r   r   r   r   r   r   Z-common.result.converter.markdown_format_funcsr   r   r   r   r   r   r   r   Z
ocr.resultr   r   rM   rz   objectr%   rE   r   r4   r4   r4   r5   <module>   s*   $	(
-   7