o
    *j                     @   s   d dl mZmZmZmZ d dlZd dlmZmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZ e ZejejddG dd deZ dS )    )AnyDictOptionalUnionN)AutoModelForCausalLM
get_logger)	PipelinesPreprocessors)Model)
OutputKeys)Pipeline)	PIPELINES)VisualQuestionAnsweringPipeline)Preprocessor
load_image)Fields
FrameworksTaskszovis-vl)module_namec                   @   s   e Zd Z					ddeeef dedededed	ee fd
dZ	de
eef fddZde
eef de
eef fddZde
eef de
eef fddZdS )VisionChatPipelineNgpuTmodelpreprocessorconfig_filedeviceauto_collatetrust_remote_codec           
      K   s~   || _ tj| _d| _|| _|dtj}|dd}	|dkr dn|| _t	j
|||	|d| j| _| j | _| j | _d S )NTtorch_dtypemultimodal_max_lengthi    r   cuda)r   r   r   )Zdevice_namer   torchZ	frameworkZ_model_prepareZ_auto_collategetZfloat16r   r   Zfrom_pretrainedtor   Zget_text_tokenizertext_tokenizerZget_visual_tokenizervisual_tokenizer)
selfr   r   r   r   r   r   kwargsr   r    r'   r/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py__init__   s"   	zVisionChatPipeline.__init__inputsc           
      C   s   |d }|d }t |}d| }| j||g\}}}t|| jj}	|dj| jj	d}|	dj| jj	d}	|j| j
j| j
j	dg}|||	dS )Ntextimagez<image>
r   )r   )dtyper   )	input_idspixel_valuesattention_mask)r   r   Zpreprocess_inputsr    ner#   pad_token_idZ	unsqueezer"   r   r$   r-   )
r%   r*   r+   Zimage_path_or_urlr,   query_r.   r/   r0   r'   r'   r(   
preprocess1   s*   


zVisionChatPipeline.preprocessreturnc                 K   s   |d }|d }|d }| dd}| dd}| dd }| d	d }	| d
d }
| dd }t , t||||	|
|| jjj| jjdd	}| jj	|f||d|d }W d    d|iS 1 scw   Y  d|iS )Nr.   r/   r0   max_new_tokensi   	do_sampleFtop_ptop_ktemperaturerepetition_penaltyT)	r7   r8   r9   r:   r;   r<   eos_token_idr2   Z	use_cache)r/   r0   r   
output_ids)
r!   r    Zinference_modedictr   Zgeneration_configr=   r#   r2   generate)r%   r*   Zforward_paramsr.   r/   r0   r7   r8   r9   r:   r;   r<   Z
gen_kwargsr>   r'   r'   r(   forwardH   sF   


zVisionChatPipeline.forwardc                 C   s"   |d }| j j|dd}tj|iS )Nr>   T)Zskip_special_tokens)r#   decoder   ZTEXT)r%   r*   r>   outputr'   r'   r(   postprocessf   s
   
zVisionChatPipeline.postprocess)NNr   TN)__name__
__module____qualname__r   r
   strr   boolr   r)   r   r   r5   rA   rD   r'   r'   r'   r(   r      s0    



&r   )!typingr   r   r   r   r    Z
modelscoper   r   Zmodelscope.metainfor   r	   Zmodelscope.models.baser
   Zmodelscope.outputsr   Zmodelscope.pipelines.baser   Zmodelscope.pipelines.builderr   ZCmodelscope.pipelines.multi_modal.visual_question_answering_pipeliner   Zmodelscope.preprocessorsr   r   Zmodelscope.utils.constantr   r   r   loggerZregister_moduleZvisual_question_answeringr   r'   r'   r'   r(   <module>   s     