o
    *j                     @   s   d dl Z d dlmZ d dlmZmZ d dlZd dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlm Z  e  Z!ej"ej#ej$dG dd deZ%dS )    N)AnyDict)	Pipelines)Model)LengthAdaptiveTokenizerinit_transform_dict	load_dataload_frames_from_video)
OutputKeys)InputPipeline)	PIPELINES)Config)	ModelFileTasks)
get_logger)module_namec                       s   e Zd Zdef fddZdedeeef fddZdeeef deeef fdd	Z	d
eeef deeef fddZ
  ZS )VopRetrievalSEPipelinemodelc                    s.  t  jdd|i| t|| j| _td || _	t
t|tj| _t| jjjd | _td tt|d dd}t| jj|| _td d	|v rett|d
| j| _n+d|v rutt|d| j| _nd|v rtt|d| j| _ntt|d| j| _td dS )a   Card VopRetrievalSE Pipeline.

        Examples:
        >>>
        >>>   from modelscope.pipelines import pipeline
        >>>   vop_pipeline = pipeline(Tasks.vop_retrieval,
        >>>            model='damo/cv_vit-b32_retrieval_vop_bias')
        >>>
        >>>   # IF DO TEXT-TO-VIDEO:
        >>>   input_text = 'a squid is talking'
        >>>   result = vop_pipeline(input_text)
        >>>   result:
        >>>   {'output_data': array([['video8916']], dtype='<U9'),'mode': 't2v'}
        >>>
        >>>   # IF DO VIDEO-TO-TEXT:
        >>>   input_video = 'video10.mp4'
        >>>   result = vop_pipeline(input_video)
        >>>   result:
        >>>   {'output_data': array([['assorted people are shown holding cute pets']], dtype='<U163'), 'mode': 'v2t'}
        >>>
        r   zload model doneZ	clip_testzload transform donezbpe_simple_vocab_16e6.txt.gzzutf-8
zload tokenizer doneZvop_biaszBias_msrvtt9k_features.pklZvop_partialzPartial_msrvtt9k_features.pklZvop_projzProj_msrvtt9k_features.pklzVoP_msrvtt9k_features.pklzload database doneN )super__init__r   Zfrom_pretrainedtodevicer   loggerinfo	local_pthr   	from_fileospjoinr   ZCONFIGURATIONcfgr   
hyperparamZ	input_resimg_transformgzipopenreaddecodesplitr   	tokenizerr   database)selfr   kwargsZbpe_path	__class__r   r/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/cv/vop_retrieval_se_pipeline.pyr      sF   


zVopRetrievalSEPipeline.__init__inputreturnc           	         s   t |trfd|v r@g }|fD ]!}t j|}t| jjj jjj	\}} 
|}|| qtj|ddj jdd}d}n/ j|dddd}t |tjrX|j jdd}n fd	d
| D }d}n	tdt| ||d}|S )Nz.mp4r   )dimTZnon_blockingv2tpt)Zreturn_tensorspaddingZ
truncationc                    s"   i | ]\}}||j  jd dqS )Tr3   )r   r   ).0keyvalr+   r   r/   
<dictcomp>k   s    z5VopRetrievalSEPipeline.preprocess.<locals>.<dictcomp>t2vz input should be a str,  but got )
input_datamode)
isinstancestrr   r    r   r	   r!   r"   Z
num_framesZvideo_sample_typer#   appendtorchstackr   r   r)   ZTensoritems	TypeErrortype)	r+   r0   Zpreprocess_paramsqueryZ
video_pathZimgsZidxsr>   resultr   r:   r/   
preprocessV   s@   





z!VopRetrievalSEPipeline.preprocessc                 K   s   | j \}}}}t h |d dkr8| j|d }||j }tj|| jjjddd 	 
 }	t||	 }
n+|d dkrc| j|d }||j }tj|| jjjddd 	 
 }	t||	 }
|
|d d}|W  d    S 1 svw   Y  d S )	Nr>   r<   r=   )kr2      r4   )Zoutput_datar>   )r*   rB   Zno_gradr   Zget_text_featuresTZtopkr!   r"   cpunumpynparrayZget_video_features)r+   r0   Zforward_paramsZtext_embedsZvid_embeds_pooledZvid_idsZtextsZquery_featsZscoreZretrieval_idxsresresultsr   r   r/   forwardv   s>   




$zVopRetrievalSEPipeline.forwardinputsc                 K   s   |S )Nr   )r+   rU   Zpost_paramsr   r   r/   postprocess   s   z"VopRetrievalSEPipeline.postprocess)__name__
__module____qualname__r@   r   r   r   r   rI   rT   rV   __classcell__r   r   r-   r/   r      s    9 


r   )&r$   Zos.pathpathr   typingr   r   rO   rP   rB   Zmodelscope.metainfor   Zmodelscope.modelsr   Z"modelscope.models.cv.vop_retrievalr   r   r   r	   Zmodelscope.outputsr
   Zmodelscope.pipelines.baser   r   Zmodelscope.pipelines.builderr   Zmodelscope.utils.configr   Zmodelscope.utils.constantr   r   Zmodelscope.utils.loggerr   r   Zregister_moduleZvop_retrievalZvop_retrieval_ser   r   r   r   r/   <module>   s&   