o
    0j                     @   s   d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	m
Z
 d dlZddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ej"edG dd de!Z#dS )    N)AnyDictListOptionalTupleUnion   )logging)pipeline_requires_extra)custom_open   )ImageBatchSampler)	ReadImage)	HPIConfigPaddlePredictorOption)	benchmark   )BaseChat)LayoutParsingResult   )PP_ChatOCR_PipelineZiec                -       s  e Zd ZdZdgZdddddddddedee d	ee d
eeeef  dee	 de
deeeeef ef  de
ddf fddZdeddfddZdeddfddZdeddfddZdedefddZ																				dedeeee ejeej f dee
 dee
 dee
 dee
 d eeeef  d!ee
 d"eeeeeef ef  d#ee d$ee d%ee d&ee d'ee d(ee d)ee d*ee d+ee d,ee d-ee d.ee d/ee def,d0d1Zd2ed3eddfd4d5Zd6edee fd7d8Zd9ee deeeef fd:d;Z	<	=		dfd2ed>ed?ed@e
dAedefdBdCZ	dgdDed3edAeddfdEdFZ dgd6edAedefdGdHZ!dIeeee f dee fdJdKZ"dLe#dMedIedNedOeddfdPdQZ$dAedRe
dDedIee dSed>edefdTdUZ%			<												dhdIeeee f d2ee dRe
dDed>edVedWedXedYedZed[ed\ed]ed^ed_ed`edAedef$dadbZ&didcddZ'  Z(S )jPP_ChatOCRv3_PipelinezPP-ChatOCR PipelinezPP-ChatOCRv3-docNFT)deviceengineengine_config	pp_optionuse_hpip
hpi_configinitial_predictorconfigr   r   r   r   r   r   r   returnc          
   	      s   t  jd||||||d|	 |d | _|| _|dd| _d| _d| _d| _|r9| 	| | 
| | | tdd| _tdd	| _d
| _dS )a  Initializes the PP-ChatOCRv3 pipeline.

        Args:
            config (Dict): Configuration dictionary containing various settings.
            device (Optional[str], optional): The device to use for prediction. Defaults to `None`.
            engine (Optional[str], optional): Inference engine. Defaults to `None`.
            engine_config (Optional[Dict[str, Any]], optional): Engine-specific config. Defaults to `None`.
            pp_option (Optional[PaddlePredictorOption], optional): Paddle predictor options.
                Defaults to `None`.
            use_hpip (bool, optional): Whether to use HPIP. Defaults to `False`.
            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
                HPIP configuration. Defaults to `None`.
            initial_predictor (bool, optional): Whether to initialize the predictor.
                Defaults to `True`.
        )r   r   r   r   r   r   pipeline_nameuse_layout_parserTNr   )Z
batch_sizeZBGR)formati   )super__init__r!   r   getr"   layout_parsing_pipelinechat_bot	retrieverinintial_visual_predictorinintial_chat_predictorinintial_retriever_predictorr   Zbatch_samplerr   Z
img_readertable_structure_len_max)
selfr   r   r   r   r   r   r   r   kwargs	__class__r$   s/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/pipelines/pp_chatocr/pipeline_v3.pyr&   *   s.   





zPP_ChatOCRv3_Pipeline.__init__c                 C   s<   | dd| _| jr| di  dddi}| || _dS )a  
        Initializes the visual predictor with the given configuration.

        Args:
            config (dict): The configuration dictionary containing the necessary
                                parameters for initializing the predictor.
        Returns:
            None
        r"   TZSubPipelinesZLayoutParserZpipeline_config_errorz)config error for layout_parsing_pipeline!N)r'   r"   Zcreate_pipeliner(   )r/   r   Zlayout_parsing_configr$   r$   r3   r+   b   s   
z/PP_ChatOCRv3_Pipeline.inintial_visual_predictorc                 C   s2   ddl m} |di dddi}||| _dS )a   
        Initializes the retriever predictor with the given configuration.

        Args:
            config (dict): The configuration dictionary containing the necessary
                                parameters for initializing the predictor.
        Returns:
            None
        r   create_retriever
SubModulesZLLM_RetrieverZretriever_config_errorzconfig error for llm retriever!N) r5   r'   r*   )r/   r   r5   retriever_configr$   r$   r3   r-   v   s   
z2PP_ChatOCRv3_Pipeline.inintial_retriever_predictorc                 C   s   ddl m} |di dddi}||| _ddl m} |di di d	d
di}||| _|di di dd
di}||| _dS )a  
        Initializes the chat predictor with the given configuration.

        Args:
            config (dict): The configuration dictionary containing the necessary
                                parameters for initializing the predictor.
        Returns:
            None
        r   create_chat_botr6   ZLLM_ChatZchat_bot_config_errorzconfig error for llm chat bot!)create_prompt_engineeringZPromptEngneeringZKIE_CommonTextZpe_config_errorzconfig error for text_pe!Z	KIE_Tablezconfig error for table_pe!N)r7   r:   r'   r)   r;   text_petable_pe)r/   r   r:   chat_bot_configr;   Ztext_pe_configZtable_pe_configr$   r$   r3   r,      s0   





z-PP_ChatOCRv3_Pipeline.inintial_chat_predictorlayout_parsing_resultc                 C   s   i }|d }t t|D ],}|| d }|| d }|dv rqd| }||vr-|||< q||  d| 7  < q|d }g }	g }
|D ]}|
|d  d	|d
 d }|	| qCi }||d< |	|d< |
|d< |S )z
        Decodes the visual result from the layout parsing result.

        Args:
            layout_parsing_result (LayoutParsingResult): The result of layout parsing.

        Returns:
            dict: The decoded visual information.
        parsing_res_listZblock_labelZblock_content)tableZformulaz	words in z
 table_res_listZ	pred_html Ztable_ocr_predZ	rec_textsnormal_text_dicttable_text_listtable_html_list)rangelenappendjoin)r/   r?   rD   r@   ZpnolabelcontentkeyrB   rE   rF   Z	table_resZsingle_table_textvisual_infor$   r$   r3   decode_visual_result   s.   

z*PP_ChatOCRv3_Pipeline.decode_visual_resultinputuse_doc_orientation_classifyuse_doc_unwarpinguse_seal_recognitionuse_table_recognitionlayout_threshold
layout_nmslayout_unclip_ratiolayout_merge_bboxes_modetext_det_limit_side_lentext_det_limit_typetext_det_threshtext_det_box_threshtext_det_unclip_ratiotext_rec_score_threshseal_det_limit_side_lenseal_det_limit_typeseal_det_threshseal_det_box_threshseal_det_unclip_ratioseal_rec_score_threshc                 k   s    | j dkrtd ddiV  | jdu r td | | j | jj|fi d|d|d|d	|d
|d|d|d|	d|
d|d|d|d|d|d|d|d|d|d|d|D ]}| |}||d}|V  qddS )a&  
        This function takes an input image or a list of images and performs various visual
        prediction tasks such as document orientation classification, document unwarping,
        general OCR, seal recognition, and table recognition based on the provided flags.

        Args:
            input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image path, list of image paths,
                                                                        numpy array of an image, or list of numpy arrays.
            use_doc_orientation_classify (bool): Flag to use document orientation classification.
            use_doc_unwarping (bool): Flag to use document unwarping.
            use_seal_recognition (bool): Flag to use seal recognition.
            use_table_recognition (bool): Flag to use table recognition.
            layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
            layout_nms (Optional[bool], optional): Whether to use layout-aware NMS. Defaults to `False`.
            layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
                Defaults to `None`.
                If it's a single number, then both width and height are used.
                If it's a tuple of two numbers, then they are used separately for width and height respectively.
                If it's None, then no unclipping will be performed.
            layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to `None`.
            text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
            text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
            text_det_thresh (Optional[float]): Threshold for text detection.
            text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
            text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
            text_rec_score_thresh (Optional[float]): Score threshold for text recognition.
            seal_det_limit_side_len (Optional[int]): Maximum side length for seal detection.
            seal_det_limit_type (Optional[str]): Type of limit to apply for seal detection.
            seal_det_thresh (Optional[float]): Threshold for seal detection.
            seal_det_box_thresh (Optional[float]): Threshold for seal detection boxes.
            seal_det_unclip_ratio (Optional[float]): Ratio for unclipping seal detection boxes.
            seal_rec_score_thresh (Optional[float]): Score threshold for seal recognition.
            **kwargs: Additional keyword arguments.

        Returns:
            dict: A dictionary containing the layout parsing result and visual information.
        Fz1The models for layout parser are not initialized.errorNzGThe layout parsing pipeline is not initialized, will initialize it now.rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   rb   r_   r`   ra   rc   rd   )r?   rN   )	r"   r	   re   r(   warningr+   r   predictrO   )r/   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   r0   r?   rN   Zvisual_predict_resr$   r$   r3   visual_predict   st   
?


	

z$PP_ChatOCRv3_Pipeline.visual_predictrN   	save_pathc                 C   s   t |ts	|g}n|}tj|}tj|st| t|d}|t	j
|ddd  W d   dS 1 s9w   Y  dS )a-  
        Save the visual info list to the specified file path.

        Args:
            visual_info (dict): The visual info result, which can be a single object or a list of objects.
            save_path (str): The file path to save the visual info list.

        Returns:
            None
        wFensure_ascii
N)
isinstancelistospathdirnameexistsmakedirsr   writejsondumps)r/   rN   ri   visual_info_list	directoryfoutr$   r$   r3   save_visual_info_list@  s   


z+PP_ChatOCRv3_Pipeline.save_visual_info_list	data_pathc                 C   sB   t |d}| }t|}W d   |S 1 sw   Y  |S )z
        Loads visual info list from a JSON file.

        Args:
            data_path (str): The path to the JSON file containing visual info.

        Returns:
            list[dict]: A list of dict objects parsed from the JSON file.
        rN)r   readlinerv   loads)r/   r|   findatarx   r$   r$   r3   load_visual_info_listX  s   

z+PP_ChatOCRv3_Pipeline.load_visual_info_listrx   c           
      C   st   g }g }g }|D ],}|d }|D ]}||  dd||< q|d }|d }	|| || ||	 q|||fS )aW  
        Merge visual info lists.

        Args:
            visual_info_list (list[dict]): A list of visual info results.

        Returns:
            tuple[list, list, list]: A tuple containing four lists, one for normal text dicts,
                                               one for table text lists, one for table HTML lists.
        rD   rm   r7   rE   rF   )replacerI   extend)
r/   rx   all_normal_text_listall_table_text_listall_table_html_listZsingle_visual_inforD   rM   rE   rF   r$   r$   r3   merge_visual_info_listg  s   


z,PP_ChatOCRv3_Pipeline.merge_visual_info_list  ,  min_characters
block_sizeflag_save_bytes_vectorr8   c                 C   sf  t |ts	|g}n|}|durddlm} ||}n| jdu r*td | | j | j}| 	|}	|	\}
}}i }g }t
|
D ]\}}| D ]\}}|| d| dg7 }qGq?t||D ]\}}t||| j krr|d| g7 }q]d|}d	|d
< t||krd	|d< |j|d< ||d< |j||d|d< |r||d |d< d|d
< |S d|d< ||d< |S )a  
        Build a vector representation from visual information.

        Args:
            visual_info (dict): The visual information input, can be a single instance or a list of instances.
            min_characters (int): The minimum number of characters required for text processing, defaults to 3500.
            block_size (int): The size of each chunk to split the text into.
            flag_save_bytes_vector (bool): Whether to save the vector as bytes, defaults to `False`.
            retriever_config (dict): The configuration for the retriever, defaults to `None`.

        Returns:
            dict: A dictionary containing the vector info and a flag indicating if the text is too short.
        Nr   r4   8The retriever is not initialized,will initialize it now.   ：rm   u   table：r7   Fr   flag_too_short_text
model_namer   )r   vectorT)rn   ro   r7   r5   r*   r	   rf   r-   r   r   	enumerateitemsziprH   r.   rJ   r   Zgenerate_vector_databaseencode_vector_store_to_bytes)r/   rN   r   r   r   r8   rx   r5   r*   all_visual_infor   r   r   vector_info	all_itemsirD   typetext
table_html
table_textZall_text_strr$   r$   r3   build_vector  s\   






z"PP_ChatOCRv3_Pipeline.build_vectorr   c           	      C   s   t j|}t j|st | |d ur ddlm} ||}n| jd u r0t	d | 
| j | j}t|}|d sM|d sM||d |d< d|d< t|d}|tj|d	d
d  W d    d S 1 sjw   Y  d S )Nr   r4   r   r   r   r   Trj   Frk   rm   )rp   rq   rr   rs   rt   r7   r5   r*   r	   rf   r-   r   copydeepcopyr   r   ru   rv   rw   )	r/   r   ri   r8   ry   r5   r*   Zvector_info_datarz   r$   r$   r3   save_vector  s6   




z!PP_ChatOCRv3_Pipeline.save_vectorc                 C   s   d }|d urddl m} ||}n| jd u r!td | | j | j}t|dG}| }t	
|}d|vs?d|vs?d|vrOtd d	d
iW  d    S |d rh||d |d< d|d< W d    |S W d    |S 1 ssw   Y  |S )Nr   r4   r   r}   r   r   r   zInvalid vector info.re   z%Invalid vector info when load vector!F)r7   r5   r*   r	   rf   r-   r   openr~   rv   r   re   decode_vector_store_from_bytes)r/   r|   r8   r   r5   r*   r   r   r$   r$   r3   load_vector  s>   







z!PP_ChatOCRv3_Pipeline.load_vectorkey_listc                 C   sL   |dkrg S t |tr|S t |tr$tdd|}|ddd}|S g S )z
        Formats the key list.

        Args:
            key_list (str|list[str]): A string or a list of strings representing the keys.

        Returns:
            list[str]: A list of formatted keys.
        r7   z[\t\n\r\f\v]u   ，,)rn   ro   strresubr   split)r/   r   r$   r$   r3   
format_key  s   


z PP_ChatOCRv3_Pipeline.format_keyr)   promptfinal_resultsfailed_resultsc                 C   s   | |}|d }|d }|dur"d|vr|g|d< n|d | |du r2td||jf  dS ||}| D ]\}	}
|
|vrP|	|v rP||	 |
||	< q;dS )a  
        Generate and merge chat results into the final results dictionary.

        Args:
            prompt (str): The input prompt for the chat bot.
            key_list (list): A list of keys to track which results to merge.
            final_results (dict): The dictionary to store the final merged results.
            failed_results (list): A list of failed results to avoid merging.

        Returns:
            None
        rL   Zreasoning_contentNz.chat bot error: 
 [prompt:]
 %s
 [result:] %s
)Zgenerate_chat_resultsrI   r	   re   ZERROR_MASSAGEZfix_llm_result_formatr   remove)r/   r)   r   r   r   r   Z
llm_resultZllm_result_contentZllm_result_reasoning_contentrM   valuer$   r$   r3   generate_and_merge_chat_results'  s*   


z5PP_ChatOCRv3_Pipeline.generate_and_merge_chat_resultsuse_vector_retrievalr   c                 C   sD  |rq|durq|durddl m} ||}n| jdu r%td | | j | j}dd |D }	|d }
|d s`|d	 |jksLJ d
|d	  d|j d|d rU||
}
|j	|	|
d|d}|S t
|
dkrmd|
}|S d}|S g }t|D ]\}}| D ]\}}|| d| dg7 }qqwd|}t
||krtd |S )a}  
        Retrieve related normal text based on vector retrieval or all normal text list.

        Args:
            retriever_config (dict): Configuration for the retriever.
            use_vector_retrieval (bool): Whether to use vector retrieval.
            vector_info (dict): Dictionary containing vector information.
            key_list (list[str]): List of keys to generate question keys.
            all_normal_text_list (list): List of normal text.
            min_characters (int): The minimum number of characters required for text processing, defaults to 3500.

        Returns:
            str: Related normal text.
        Nr   r4   r   c                 S   s   g | ]}| qS r$   r$   ).0rM   r$   r$   r3   
<listcomp>{  s    zAPP_ChatOCRv3_Pipeline.get_related_normal_text.<locals>.<listcomp>r   r   r   zThe vector model name (z+) does not match the retriever model name (z&). Please check your retriever config.r   2   )Ztopkr   r   r7   r   rm   zMThe input text content is too long, the large language model may truncate it.)r7   r5   r*   r	   rf   r-   r   r   r   Zsimilarity_retrievalrH   rJ   r   r   )r/   r8   r   r   r   r   r   r5   r*   Zquestion_key_listr   related_textr   r   rD   r   r   r$   r$   r3   get_related_normal_textU  sL   




z-PP_ChatOCRv3_Pipeline.get_related_normal_texttext_task_descriptiontext_output_formattext_rules_strtext_few_shot_demo_text_content!text_few_shot_demo_key_value_listtable_task_descriptiontable_output_formattable_rules_str table_few_shot_demo_text_content"table_few_shot_demo_key_value_listr>   c                  C   sn  |  |}t|dkrddiS t|ts|g}n|}| jdu r*td | | j |dur9ddl	m
} ||}n| j}| |}|\}}}i }g d}t|dkrt||D ]/\}}t||| j kr|fD ]}t|dkr| jj|||||||d	}| ||||| qgqWt|dkr| ||||||}t|dkr| jj||||||	|
d	}| ||||| d|iS )
a  
        Generates chat results based on the provided key list and visual information.

        Args:
            key_list (Union[str, list[str]]): A single key or a list of keys to extract information.
            visual_info (dict): The visual information result.
            use_vector_retrieval (bool): Whether to use vector retrieval.
            vector_info (dict): The vector information for retrieval.
            min_characters (int): The minimum number of characters required.
            text_task_description (str): The description of the text task.
            text_output_format (str): The output format for text results.
            text_rules_str (str): The rules for generating text results.
            text_few_shot_demo_text_content (str): The text content for few-shot demos.
            text_few_shot_demo_key_value_list (str): The key-value list for few-shot demos.
            table_task_description (str): The description of the table task.
            table_output_format (str): The output format for table results.
            table_rules_str (str): The rules for generating table results.
            table_few_shot_demo_text_content (str): The text content for table few-shot demos.
            table_few_shot_demo_key_value_list (str): The key-value list for table few-shot demos.
            chat_bot_config(dict): The parameters for LLM chatbot, including api_type, api_key... refer to config file for more details.
            retriever_config (dict): The parameters for LLM retriever, including api_type, api_key... refer to config file for more details.
        Returns:
            dict: A dictionary containing the chat results.
        r   Zchat_resu    Error:输入的key_list无效！Nz;The LLM chat bot is not initialized,will initialize it now.r   r9   )u   大模型调用失败u   未知u   未找到关键信息Noner7   )Ztask_descriptionZoutput_formatZ	rules_strZfew_shot_demo_text_contentZfew_shot_demo_key_value_list)r   rH   rn   ro   r)   r	   rf   r,   r   r7   r:   r   r   r.   r=   Zgenerate_promptr   r   r<   ) r/   r   rN   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r8   rx   r:   r)   r   r   r   r   r   r   r   r   Z
table_infor   r   r$   r$   r3   chat  s   
-





		
zPP_ChatOCRv3_Pipeline.chatc                 O   s   t d d S )NzPP-ChatOCRv3-doc Pipeline do not support to call `predict()` directly! Please invoke `visual_predict`, `build_vector`, `chat` sequentially to obtain the result.)r	   re   )r/   argsr0   r$   r$   r3   rg     s   zPP_ChatOCRv3_Pipeline.predict)NNNNNNNNNNNNNNNNNNNN)r   r   FN)N)TNr   NNNNNNNNNNNN)r    N))__name__
__module____qualname____doc__entitiesr   r   r   r   r   boolr   r   r&   dictr+   r-   r,   r   rO   r   npZndarrayfloatr   intrh   r{   r   ro   r   r   r   r   r   r   r   r   r   rg   __classcell__r$   r$   r1   r3   r   #   s   	
8)*	

h

M
!"!
.
F	

~r   )$r   rv   rp   r   typingr   r   r   r   r   r   numpyr   utilsr	   Z
utils.depsr
   Zutils.file_interfacer   Zcommon.batch_samplerr   Zcommon.readerr   modelsr   r   Zutils.benchmarkr   Zcomponents.chat_serverr   Zlayout_parsing.resultr   Zpipeline_baser   Ztime_methodsr   r$   r$   r$   r3   <module>   s&    