o
    *j                     @   s   d dl Z d dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ e Zejeje	jdG dd deZ dS )    N)AnyDict)	Pipelines)BaseVideoModel)
OutputKeys)InputPipeline)	PIPELINES)ReadVideoData)Config)	ModelFileTasks)
get_logger)module_namec                       s   e Zd Zdef fddZdedeeef fddZdeeef deeef fdd	Z	e
 dddZdeeef deeef fddZ  ZS )HICOSSLVideoEmbeddingPipelinemodelc                    s   t  jdd|i| t| jtj}td|  t| jtj	}td|  t
|| _t| jd| j| _| j  | jjtj|| jddd dd	 td
 dS )z
        use `model` to create a hicossl video embedding pipeline for prediction
        Args:
            model: model id on modelscope hub.
        r   zloading model from zloading config from )cfgT)Zmap_locationZweights_onlyZmodel_stateF)strictzload model doneN )super__init__ospjoinr   r   ZTORCH_MODEL_FILEloggerinfoZCONFIGURATIONr   	from_filer   r   todeviceinfer_modelevalZload_state_dicttorchload)selfr   kwargsZ
model_pathZconfig_path	__class__r   y/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/cv/hicossl_video_embedding_pipeline.pyr      s$   
z&HICOSSLVideoEmbeddingPipeline.__init__inputreturnc                 C   sB   t |trt| j|dd| j}n	tdt| d|i}|S )N   )Znum_temporal_views_overridez input should be a str,  but got 
video_data)
isinstancestrr
   r   r   r   	TypeErrortype)r"   r'   Zvideo_input_dataresultr   r   r&   
preprocess.   s   
z(HICOSSLVideoEmbeddingPipeline.preprocessc                 C   s"   |  |d }tj|j  iS )Nr*   )perform_inferencer   ZVIDEO_EMBEDDINGdatacpunumpy)r"   r'   featurer   r   r&   forward8   s   z%HICOSSLVideoEmbeddingPipeline.forward   c              	   C   s`   t |d| }g }t|D ]}|| ||| |d |  d  qtj|dd}|S )a1   Perform feature extracting for a given video
        Args:
            model (BaseVideoModel): video model with loadded state dict.
            max_bsz (int): the maximum batch size, limited by GPU memory.
        Returns:
            pred (Tensor): the extracted features for input video clips.
        r   r)   )dim)mathceilsizerangeappendr   r    cat)r"   r2   Zmax_bszZiter_numZ
preds_listipredr   r   r&   r1   <   s   	 z/HICOSSLVideoEmbeddingPipeline.perform_inferenceinputsc                 C   s   |S )Nr   )r"   rA   r   r   r&   postprocessM   s   z)HICOSSLVideoEmbeddingPipeline.postprocess)r7   )__name__
__module____qualname__r,   r   r   r   r   r0   r6   r    Zno_gradr1   rB   __classcell__r   r   r$   r&   r      s    "
*r   )!r9   Zos.pathpathr   typingr   r   r    Zmodelscope.metainfor   Z'modelscope.models.cv.action_recognitionr   Zmodelscope.outputsr   Zmodelscope.pipelines.baser   r   Zmodelscope.pipelines.builderr	   Zmodelscope.preprocessorsr
   Zmodelscope.utils.configr   Zmodelscope.utils.constantr   r   Zmodelscope.utils.loggerr   r   Zregister_moduleZvideo_embeddingZhicossl_video_embeddingr   r   r   r   r&   <module>   s$   