o
    0jS                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlZddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ edrrd dl%Z%ej&edG dd de$Z'dS )    N)AnyDictListOptionalTupleUnion   )logging)function_requires_depsis_dep_availablepipeline_requires_extra)custom_open   )ImageBatchSampler)	ReadImage)	HPIConfigPaddlePredictorOption)	benchmark   )BaseChat)LayoutParsingResult   )PP_ChatOCR_Pipelineopencv-contrib-pythonZiec                /       s  e Zd ZdZdgZdddddddddedee d	ee d
eeeef  dee	 de
deeeeef ef  de
ddf fddZdd ZdeddfddZdeddfddZdeddfddZdeddfddZdedefddZ																					dtdeeee ejeej f d ee
 d!ee
 d"ee
 d#ee
 d$ee
 d%eeeef  d&ee
 d'eeeeeef ef  d(ee d)ee d*ee d+ee d,ee d-ee d.ee d/ee d0ee d1ee d2ee d3ee d4ee def.d5d6Zd7ed8eddfd9d:Zd;edee fd<d=Zd>ee deeeeef fd?d@Z 	A	B		dud7edCedDedEe
dFedefdGdHZ!	dvdIed8edFeddfdJdKZ"dvd;edFedefdLdMZ#dNeeee f dee fdOdPZ$e%dQ	dvdeeejf dNeeee f defdRdSZ&dTe'dUedNedVedWeddfdXdYZ(dFedZe
dIedNee d[edCedefd\d]Z)dTe'dNee d^ed_edef
d`daZ*			A												b		dwdNeeee f d7edZe
dIedCedceddedeedfedgedhediedjedkedledmednedoedFedef(dpdqZ+dxdrdsZ,  Z-S )yPP_ChatOCRv4_PipelinezPP-ChatOCRv4 PipelinezPP-ChatOCRv4-docNFT)deviceengineengine_config	pp_optionuse_hpip
hpi_configinitial_predictorconfigr   r   r   r   r   r    r!   returnc          
   	      s   t  jd||||||d|	 |d | _|| _|dd| _|dd| _d| _d| _d| _	d| _
|rH| | | | | | | | tdd| _td	d
| _d| _dS )a  Initializes the PP-ChatOCRv4 pipeline.

        Args:
            config (Dict): Configuration dictionary containing various settings.
            device (Optional[str], optional): The device to use for prediction. Defaults to `None`.
            engine (Optional[str], optional): Inference engine. Defaults to `None`.
            engine_config (Optional[Dict[str, Any]], optional): Engine-specific config. Defaults to `None`.
            pp_option (Optional[PaddlePredictorOption], optional): Paddle predictor options.
                Defaults to `None`.
            use_hpip (bool, optional): Whether to use HPIP. Defaults to `False`.
            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
                HPIP configuration. Defaults to `None`.
            initial_predictor (bool, optional): Whether to initialize the predictor.
                Defaults to `True`.
        )r   r   r   r   r   r    pipeline_nameuse_layout_parserTuse_mllm_predictNr   )Z
batch_sizeZBGR)formati   )super__init__r$   r"   getr%   r&   layout_parsing_pipelinechat_bot	retrievermllm_chat_botinintial_visual_predictorinintial_chat_predictorinintial_retriever_predictorinintial_mllm_predictorr   Zbatch_samplerr   
img_readertable_structure_len_max)
selfr"   r   r   r   r   r   r    r!   kwargs	__class__r(   s/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/pipelines/pp_chatocr/pipeline_v4.pyr*   2   s4   






zPP_ChatOCRv4_Pipeline.__init__c                 C   s   | j d ur| j   d S d S N)r,   close)r6   r(   r(   r:   r<   m   s   
zPP_ChatOCRv4_Pipeline.closec                 C   s<   | dd| _| jr| di  dddi}| || _dS )a  
        Initializes the visual predictor with the given configuration.

        Args:
            config (dict): The configuration dictionary containing the necessary
                                parameters for initializing the predictor.
        Returns:
            None
        r%   TZSubPipelinesZLayoutParserZpipeline_config_errorz)config error for layout_parsing_pipeline!N)r+   r%   Zcreate_pipeliner,   )r6   r"   Zlayout_parsing_configr(   r(   r:   r0   q   s   
z/PP_ChatOCRv4_Pipeline.inintial_visual_predictorc                 C   s2   ddl m} |di dddi}||| _dS )a   
        Initializes the retriever predictor with the given configuration.

        Args:
            config (dict): The configuration dictionary containing the necessary
                                parameters for initializing the predictor.
        Returns:
            None
        r   create_retriever
SubModulesZLLM_RetrieverZretriever_config_errorzconfig error for llm retriever!N) r>   r+   r.   )r6   r"   r>   retriever_configr(   r(   r:   r2      s   
z2PP_ChatOCRv4_Pipeline.inintial_retriever_predictorc                 C   s   ddl m} |di dddi}||| _ddl m} |di di d	d
di}||| _|di di dd
di}||| _dS )a  
        Initializes the chat predictor with the given configuration.

        Args:
            config (dict): The configuration dictionary containing the necessary
                                parameters for initializing the predictor.
        Returns:
            None
        r   create_chat_botr?   ZLLM_ChatZchat_bot_config_errorzconfig error for llm chat bot!)create_prompt_engineeringPromptEngneeringZKIE_CommonTextpe_config_errorzconfig error for text_pe!Z	KIE_Tablezconfig error for table_pe!N)r@   rC   r+   r-   rD   text_petable_pe)r6   r"   rC   chat_bot_configrD   Ztext_pe_configZtable_pe_configr(   r(   r:   r1      s0   





z-PP_ChatOCRv4_Pipeline.inintial_chat_predictorc                 C   st   ddl m}m} |dd| _| jr8|di dddi}||| _|di d	i d
ddi}||| _dS )a  
        Initializes the predictor with the given configuration.

        Args:
            config (dict): The configuration dictionary containing the necessary
                                parameters for initializing the predictor.
        Returns:
            None
        r   )rC   rD   r&   Tr?   Z	MLLM_Chatmllm_chat_bot_configzconfig error for mllm chat bot!rE   ZEnsemblerF   zconfig error for ensemble_pe!N)r@   rC   rD   r+   r&   r/   ensemble_pe)r6   r"   rC   rD   rJ   Zensemble_pe_configr(   r(   r:   r3      s"   



z-PP_ChatOCRv4_Pipeline.inintial_mllm_predictorlayout_parsing_resultc                 C   s   i }|d }t t|D ],}|| d }|| d }|dv rqd| }||vr-|||< q||  d| 7  < q|d }g }	g }
g }|D ]}|
|d  d	|d
 d }|	| ||d  qEi }||d< |	|d< |
|d< ||d< |S )z
        Decodes the visual result from the layout parsing result.

        Args:
            layout_parsing_result (LayoutParsingResult): The result of layout parsing.

        Returns:
            dict: The decoded visual information.
        parsing_res_listZblock_labelZblock_content)tableZformulaz	words in z
 table_res_listZ	pred_html Ztable_ocr_predZ	rec_textsZneighbor_textsnormal_text_dicttable_text_listtable_html_listtable_nei_text_list)rangelenappendjoin)r6   rL   rQ   rM   ZpnolabelcontentkeyrO   rR   rS   rT   Z	table_resZsingle_table_textvisual_infor(   r(   r:   decode_visual_result   s4   



z*PP_ChatOCRv4_Pipeline.decode_visual_resultinputuse_doc_orientation_classifyuse_doc_unwarpinguse_textline_orientationuse_seal_recognitionuse_table_recognitionlayout_threshold
layout_nmslayout_unclip_ratiolayout_merge_bboxes_modetext_det_limit_side_lentext_det_limit_typetext_det_threshtext_det_box_threshtext_det_unclip_ratiotext_rec_score_threshseal_det_limit_side_lenseal_det_limit_typeseal_det_threshseal_det_box_threshseal_det_unclip_ratioseal_rec_score_threshc                 k   s    | j dkrtd ddiV  | jdu r td | | j | jj|fi d|d|d|d	|d
|d|d|d|	d|
d|d|d|d|d|d|d|d|d|d|d|d|D ]}| |}||d}|V  qgdS )a  
        This function takes an input image or a list of images and performs various visual
        prediction tasks such as document orientation classification, document unwarping,
        general OCR, seal recognition, and table recognition based on the provided flags.

        Args:
            input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image path, list of image paths,
                                                                        numpy array of an image, or list of numpy arrays.
            use_doc_orientation_classify (bool): Flag to use document orientation classification.
            use_doc_unwarping (bool): Flag to use document unwarping.
            use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
            use_seal_recognition (bool): Flag to use seal recognition.
            use_table_recognition (bool): Flag to use table recognition.
            layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
            layout_nms (Optional[bool], optional): Whether to use layout-aware NMS. Defaults to `False`.
            layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
                Defaults to `None`.
                If it's a single number, then both width and height are used.
                If it's a tuple of two numbers, then they are used separately for width and height respectively.
                If it's None, then no unclipping will be performed.
            layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to `None`.
            text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
            text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
            text_det_thresh (Optional[float]): Threshold for text detection.
            text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
            text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
            text_rec_score_thresh (Optional[float]): Score threshold for text recognition.
            seal_det_limit_side_len (Optional[int]): Maximum side length for seal detection.
            seal_det_limit_type (Optional[str]): Type of limit to apply for seal detection.
            seal_det_thresh (Optional[float]): Threshold for seal detection.
            seal_det_box_thresh (Optional[float]): Threshold for seal detection boxes.
            seal_det_unclip_ratio (Optional[float]): Ratio for unclipping seal detection boxes.
            seal_rec_score_thresh (Optional[float]): Score threshold for seal recognition.
            **kwargs: Additional keyword arguments.

        Returns:
            dict: A dictionary containing the layout parsing result and visual information.
        Fz1The models for layout parser are not initialized.errorNzGThe layout parsing pipeline is not initialized, will initialize it now.r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rq   rn   ro   rp   rr   rs   )rL   r\   )	r%   r	   rt   r,   warningr0   r"   predictr]   )r6   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   r7   rL   r\   Zvisual_predict_resr(   r(   r:   visual_predict  sx   
@


	

z$PP_ChatOCRv4_Pipeline.visual_predictr\   	save_pathc                 C   s^   t |ts	|g}n|}t|d}|tj|ddd  W d   dS 1 s(w   Y  dS )a-  
        Save the visual info list to the specified file path.

        Args:
            visual_info (dict): The visual info result, which can be a single object or a list of objects.
            save_path (str): The file path to save the visual info list.

        Returns:
            None
        wFensure_ascii
N)
isinstancelistopenwritejsondumps)r6   r\   rx   visual_info_listfoutr(   r(   r:   save_visual_info_listr  s   

z+PP_ChatOCRv4_Pipeline.save_visual_info_list	data_pathc                 C   sB   t |d}| }t|}W d   |S 1 sw   Y  |S )z
        Loads visual info list from a JSON file.

        Args:
            data_path (str): The path to the JSON file containing visual info.

        Returns:
            list[dict]: A list of dict objects parsed from the JSON file.
        rN)r   readliner   loads)r6   r   findatar   r(   r(   r:   load_visual_info_list  s   

z+PP_ChatOCRv4_Pipeline.load_visual_info_listr   c                 C   s   g }g }g }g }|D ]5}|d }|D ]}||  dd||< q|d }	|d }
|d }|| ||	 ||
 || q
||||fS )a  
        Merge visual info lists.

        Args:
            visual_info_list (list[dict]): A list of visual info results.

        Returns:
            tuple[list, list, list, list]: A tuple containing four lists, one for normal text dicts,
                                               one for table text lists, one for table HTML lists.
                                               one for table neighbor texts.
        rQ   r|   r@   rR   rS   rT   )replacerW   extend)r6   r   all_normal_text_listall_table_text_listall_table_html_listall_table_nei_text_listZsingle_visual_inforQ   r[   rR   rS   rT   r(   r(   r:   merge_visual_info_list  s(   


z,PP_ChatOCRv4_Pipeline.merge_visual_info_list  ,  min_characters
block_sizeflag_save_bytes_vectorrA   c                 C   sr  t |ts	|g}n|}|durddlm} ||}n| jdu r*td | | j | j}| 	|}	|	\}
}}}i }g }t
|
D ]\}}| D ]\}}|| d| dg7 }qHq@t|||D ]\}}}t||| j krx|d| d| g7 }q_d	|}d
|d< t||krd
|d< |j|d< ||d< |j||d|d< |r||d |d< d|d< |S d|d< ||d< |S )a  
        Build a vector representation from visual information.

        Args:
            visual_info (dict): The visual information input, can be a single instance or a list of instances.
            min_characters (int): The minimum number of characters required for text processing, defaults to 3500.
            block_size (int): The size of each chunk to split the text into.
            flag_save_bytes_vector (bool): Whether to save the vector as bytes, defaults to `False`.
            retriever_config (dict): The configuration for the retriever, defaults to `None`.

        Returns:
            dict: A dictionary containing the vector info and a flag indicating if the text is too short.
        Nr   r=   8The retriever is not initialized,will initialize it now.   ：r|   u   table：	r@   Fr   flag_too_short_text
model_namer   )r   vectorT)r}   r~   r@   r>   r.   r	   ru   r2   r"   r   	enumerateitemsziprV   r5   rX   r   Zgenerate_vector_databaseencode_vector_store_to_bytes)r6   r\   r   r   r   rA   r   r>   r.   all_visual_infor   r   r   r   vector_info	all_itemsirQ   typetext
table_html
table_texttable_nei_textZall_text_strr(   r(   r:   build_vector  sb   






z"PP_ChatOCRv4_Pipeline.build_vectorr   c           	      C   s   t j|}t j|st | |d ur ddlm} ||}n| jd u r0t	d | 
| j | j}t|}|d sM|d sM||d |d< d|d< t|d}|tj|d	d
d  W d    d S 1 sjw   Y  d S )Nr   r=   r   r   r   r   Try   Frz   r|   )ospathdirnameexistsmakedirsr@   r>   r.   r	   ru   r2   r"   copydeepcopyr   r   r   r   r   )	r6   r   rx   rA   	directoryr>   r.   Zvector_info_datar   r(   r(   r:   save_vector  s6   




z!PP_ChatOCRv4_Pipeline.save_vectorc                 C   s   d }|d urddl m} ||}n| jd u r!td | | j | j}t|dG}| }t	
|}d|vs?d|vs?d|vrOtd d	d
iW  d    S |d rh||d |d< d|d< W d    |S W d    |S 1 ssw   Y  |S )Nr   r=   r   r   r   r   r   zInvalid vector info.rt   z%Invalid vector info when load vector!F)r@   r>   r.   r	   ru   r2   r"   r   r   r   r   rt   decode_vector_store_from_bytes)r6   r   rA   r   r>   r.   r   r   r(   r(   r:   load_vector)  s>   







z!PP_ChatOCRv4_Pipeline.load_vectorkey_listc                 C   sZ   |dkrg S t |trdd |D }|S t |tr+tdd|}|ddd}|S g S )z
        Formats the key list.

        Args:
            key_list (str|list[str]): A string or a list of strings representing the keys.

        Returns:
            list[str]: A list of formatted keys.
        r@   c                 S   s   g | ]}| d dqS )    rP   )r   .0r[   r(   r(   r:   
<listcomp>X  s    z4PP_ChatOCRv4_Pipeline.format_key.<locals>.<listcomp>z[\t\n\r\f\v]u   ，,)r}   r~   strresubr   split)r6   r   r(   r(   r:   
format_keyJ  s   


z PP_ChatOCRv4_Pipeline.format_keyr   c                 C   sP  | j dkrtd ddiS | |}t|dkrddiS t|tr+td ddiS t|tr>|d	r>td
 ddiS | j	du rNt
d | | j |dur]ddlm} ||}n| j	}| |gD ]?}td|d  }t|d}i }	|D ]!}
t|
d }|j||dd }|du rddi    S ||	|
< q~d|	i  S dS )a  
        Generates MLLM results based on the provided key list and input image.

        Args:
            input (Union[str, np.ndarray]): Input image path, or numpy array of an image.
            key_list (Union[str, list[str]]): A single key or a list of keys to extract information.
            chat_bot_config (dict): The parameters for LLM chatbot, including api_type, api_key... refer to config file for more details.
        Returns:
            dict: A dictionary containing the chat results.
        FzMLLM prediction is disabled.Zmllm_resz"Error:MLLM prediction is disabled!r       Error:输入的key_list无效！z-Input is a list, but it's not supported here.z3Error:Input is a list, but it's not supported here!z.pdfz/MLMM prediction does not support PDF currently!z5Error:MLMM prediction does not support PDF currently!Nz<The MLLM chat bot is not initialized,will initialize it now.r   rB   z.jpgr   zutf-8u   
请用图片中完整出现的内容回答，可以是单词、短语或句子，针对问题回答尽可能详细和完整，并保持格式、单位、符号和标点都与图片中的文字内容完全一致。)promptimagerZ      大模型调用失败)r&   r	   rt   r   rV   r}   r~   r   endswithr/   ru   r3   r"   r@   rC   r4   cv2Zimencodetobytesbase64	b64encodedecodegenerate_chat_results)r6   r^   r   rJ   rC   r/   Zimage_arrayZimage_stringZimage_base64resultr[   r   Zmllm_chat_bot_resultr(   r(   r:   	mllm_predb  sP   








zPP_ChatOCRv4_Pipeline.mllm_predr-   r   final_resultsfailed_resultsc                 C   s   | |}|d }|d }|dur"d|vr|g|d< n|d | |du r2td||jf  dS ||}| D ]\}	}
|
|vrP|	|v rP||	 |
||	< q;dS )a  
        Generate and merge chat results into the final results dictionary.

        Args:
            prompt (str): The input prompt for the chat bot.
            key_list (list): A list of keys to track which results to merge.
            final_results (dict): The dictionary to store the final merged results.
            failed_results (list): A list of failed results to avoid merging.

        Returns:
            None
        rZ   reasoning_contentNz.chat bot error: 
 [prompt:]
 %s
 [result:] %s
)r   rW   r	   rt   ZERROR_MASSAGEfix_llm_result_formatr   remove)r6   r-   r   r   r   r   
llm_resultllm_result_contentllm_result_reasoning_contentr[   valuer(   r(   r:   generate_and_merge_chat_results  s*   


z5PP_ChatOCRv4_Pipeline.generate_and_merge_chat_resultsuse_vector_retrievalr   c                 C   sD  |rq|durq|durddl m} ||}n| jdu r%td | | j | j}dd |D }	|d }
|d s`|d	 |jksLJ d
|d	  d|j d|d rU||
}
|j	|	|
d|d}|S t
|
dkrmd|
}|S d}|S g }t|D ]\}}| D ]\}}|| d| dg7 }qqwd|}t
||krtd |S )a}  
        Retrieve related normal text based on vector retrieval or all normal text list.

        Args:
            retriever_config (dict): Configuration for the retriever.
            use_vector_retrieval (bool): Whether to use vector retrieval.
            vector_info (dict): Dictionary containing vector information.
            key_list (list[str]): List of keys to generate question keys.
            all_normal_text_list (list): List of normal text.
            min_characters (int): The minimum number of characters required for text processing, defaults to 3500.

        Returns:
            str: Related normal text.
        Nr   r=   r   c                 S   s   g | ]}| qS r(   r(   r   r(   r(   r:   r     s    zAPP_ChatOCRv4_Pipeline.get_related_normal_text.<locals>.<listcomp>r   r   r   zThe vector model name (z+) does not match the retriever model name (z&). Please check your retriever config.r   2   )Ztopkr   r   r@   r   r|   zMThe input text content is too long, the large language model may truncate it.)r@   r>   r.   r	   ru   r2   r"   r   r   Zsimilarity_retrievalrV   rX   r   r   )r6   rA   r   r   r   r   r   r>   r.   Zquestion_key_listr   related_textr   r   rQ   r   r   r(   r(   r:   get_related_normal_text  sL   




z-PP_ChatOCRv4_Pipeline.get_related_normal_textocr_llm_predict_dictmllm_predict_dictc                 C   s  i }|D ]}d}d}d}	||v r|| }||v r|| }	|dkrl|	dkrl| j |||	}
||
}|d }|d }|durNd|vrG|g|d< n|d | |durW||}||v ri|| }d|v rf|	}n|}n|}n||v ru|| }n||v r}|| }|dkr|||< q|S )a  
        Ensemble OCR_LLM and LMM predictions based on given key list.

        Args:
            key_list (list[str]): List of keys to retrieve predictions.
            ocr_llm_predict_dict (dict): Dictionary containing OCR LLM predictions.
            mllm_predict_dict (dict): Dictionary containing mLLM predictions.

        Returns:
            dict: A dictionary with final predictions.
        r@   rZ   r   NB)rK   generate_promptr   rW   r   )r6   r-   r   r   r   final_predict_dictr[   rv   Zocr_llm_predictZmllm_predictr   r   r   r   tmpr(   r(   r:   ensemble_ocr_llm_mllm  sR   


z+PP_ChatOCRv4_Pipeline.ensemble_ocr_llm_mllmintegrationtext_task_descriptiontext_output_formattext_rules_strtext_few_shot_demo_text_content!text_few_shot_demo_key_value_listtable_task_descriptiontable_output_formattable_rules_str table_few_shot_demo_text_content"table_few_shot_demo_key_value_listmllm_predict_infomllm_integration_strategyrI   c           &      C   s  |  |}| }t|dkrddiS t|ts|g}n|}| jdu r.td | | j	 |dur=ddl
m} ||}n| j}| |}|\}}}}i }g d}t|dkr}| ||||||}t|dkr}| jj||||||	|
d	} | || ||| t|dkrt|||D ]<\}!}"}#t|!|| j kr|!fD ]*}$t|dkrt|#dkr|$d
 |# }$| jj|$||||||d	} | || ||| qq| jr|dkr|dur|dkr| ||||}%d|%iS |dkr|}%d|%iS dd| diS |}%d|%iS )a  
        Generates chat results based on the provided key list and visual information.

        Args:
            key_list (Union[str, list[str]]): A single key or a list of keys to extract information.
            visual_info (dict): The visual information result.
            use_vector_retrieval (bool): Whether to use vector retrieval.
            vector_info (dict): The vector information for retrieval.
            min_characters (int): The minimum number of characters required for text processing, defaults to 3500.
            text_task_description (str): The description of the text task.
            text_output_format (str): The output format for text results.
            text_rules_str (str): The rules for generating text results.
            text_few_shot_demo_text_content (str): The text content for few-shot demos.
            text_few_shot_demo_key_value_list (str): The key-value list for few-shot demos.
            table_task_description (str): The description of the table task.
            table_output_format (str): The output format for table results.
            table_rules_str (str): The rules for generating table results.
            table_few_shot_demo_text_content (str): The text content for table few-shot demos.
            table_few_shot_demo_key_value_list (str): The key-value list for table few-shot demos.
            mllm_predict_dict (dict): The dictionary of mLLM predicts.
            mllm_integration_strategy (str): The integration strategy of mLLM and LLM, defaults to "integration", options are "integration", "llm_only" and "mllm_only".
            chat_bot_config (dict): The parameters for LLM chatbot, including api_type, api_key... refer to config file for more details.
            retriever_config (dict): The parameters for LLM retriever, including api_type, api_key... refer to config file for more details.
        Returns:
            dict: A dictionary containing the chat results.
        r   Zchat_resr   Nz;The LLM chat bot is not initialized,will initialize it now.r   rB   )r   u   未知u   未找到关键信息Noner@   )Ztask_descriptionZoutput_formatZ	rules_strZfew_shot_demo_text_contentZfew_shot_demo_key_value_listu   
 表格周围文字：Zllm_onlyr   Z	mllm_onlyz,Error:Unsupported mllm_integration_strategy z9, only support 'integration', 'llm_only' and 'mllm_only'!)r   r   rV   r}   r~   r-   r	   ru   r1   r"   r@   rC   r   r   rG   r   r   r   r5   rH   r&   r   )&r6   r   r\   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rI   rA   Zkey_list_orir   rC   r-   r   r   r   r   r   r   r   r   r   r   r   r   Z
table_infor   r(   r(   r:   chatR  s   
1



		



	zPP_ChatOCRv4_Pipeline.chatc                 O   s   t d d S )NzPP-ChatOCRv4-doc Pipeline do not support to call `predict()` directly! Please invoke `visual_predict`, `build_vector`, `chat` sequentially to obtain the result.)r	   rt   )r6   argsr7   r(   r(   r:   rv     s   zPP_ChatOCRv4_Pipeline.predict)NNNNNNNNNNNNNNNNNNNNN)r   r   FNr;   )TNr   NNNNNNNNNNNr   NN)r#   N).__name__
__module____qualname____doc__entitiesr   r   r   r   r   boolr   r   r*   r<   dictr0   r2   r1   r3   r   r]   r   npZndarrayfloatr   intrw   r   r   r~   r   r   r   r   r   r
   r   r   r   r   r   r   rv   __classcell__r(   r(   r8   r:   r   +   s   	
;),	

k
'
P
!"!>
.
B
E	

 r   )(r   r   r   r   r   typingr   r   r   r   r   r   numpyr   utilsr	   Z
utils.depsr
   r   r   Zutils.file_interfacer   Zcommon.batch_samplerr   Zcommon.readerr   modelsr   r   Zutils.benchmarkr   Zcomponents.chat_serverr   Zlayout_parsing.resultr   Zpipeline_baser   r   Ztime_methodsr   r(   r(   r(   r:   <module>   s,    