o
    *j                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZ ddlmZmZmZmZmZ dd	lmZ e	jejejd
G dd deZdS )    N)Models)
TorchModel)MODELS)Config)	ModelFileTasks   )BboxRegressorQ2VRankerStage1Q2VRankerStage2V2QRankerStage1V2QRankerStage2)SwinTransformerV2_1D)module_namec                       sJ   e Zd ZdZdef fddZdd Zdd Z										dd
dZ  Z	S )SOONeta  
        The implementation of 'Scanning Only Once: An End-to-end Framework for Fast Temporal Grounding
        in Long Videos'. The model is dynamically initialized with the following parts:
            - q2v_stage1: calculate qv_ctx_score.
            - v2q_stage1: calculate vq_ctx_score.
            - q2v_stage2: calculate qv_ctn_score.
            - v2q_stage2: calculate vq_ctn_score.
            - regressor: predict the offset of bounding box for each candidate anchor.
    	model_dirc           
         s  t    tj|tj}t|j	| _
| j
j}| j
j}| j
j}| j
j| _| j
j| _|| _t|||dg| dg| dg| dddddtjddd	g| d
| _t||| _t||| _| jrjt|||| _t||| _t|| j| _tj|d}tj|ddd }	| j |	dd dS )zl
            Initialize SOONet Model

            Args:
                model_dir: model id or path
              @   g       @Tg        g?Fr   )Z
patch_sizeZin_chansZ	embed_dimZdepthsZ	num_headsZwindow_sizeZ	mlp_ratioZqkv_biasZ	drop_rateZattn_drop_rateZdrop_path_rateZ
norm_layerZ
patch_normZuse_checkpointZpretrained_window_sizesz"SOONet_MAD_VIT-B-32_4Scale_10C.pthcpu)Zmap_locationmodel)strictN)!super__init__ospathjoinr   ZCONFIGURATIONr   	from_fileZhyperparamsconfignscales
hidden_dimsnippet_lengthenable_stage2stage2_topkr   nnZ	LayerNormvideo_encoderr
   
q2v_stage1r   Z
v2q_stage1r   
q2v_stage2r   Z
v2q_stage2r	   	regressortorchloadZload_state_dict)
selfr   argskwargsZconfig_pathr   r    r!   Z
model_pathZ
state_dict	__class__ k/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/multi_modal/soonet/model.pyr      sN   


zSOONet.__init__c                 K   s&   | j r| jdi |S | jdi |S )Nr0   )Ztrainingforward_trainforward_testr+   r-   r0   r0   r1   forwardO   s   zSOONet.forwardc                 K   s   t )N)NotImplementedErrorr4   r0   r0   r1   r2   U   s   zSOONet.forward_trainNc                 K   s  |}|  |ddd}| ||}	| jrt }
t }t }t }t| jD ]n}tj|	| ddd\}}tt	tt
|ddd| jf     \}}||j}|
| |t|| d| || }||d  }t||| d|}t||| d|}|| || q&tj|dd}tj|dd}| |||
|	\}}}|}nd}|	}|}|}| |||}ttj|dd}||||fS )a  
            Obtain matching scores and bbox bias of the top-k candidate anchors, with
            pre-extracted query features and video features as input.

            Args:
                query_feats: the pre-extracted text features.
                video_feats: the pre-extracted video features.
                start_ts: the start timestamps of pre-defined multi-scale anchors.
                end_ts: the end timestamps of pre-defined multi-scale anchors.
                scale_boundaries: the begin and end anchor index for each scale in start_ts and end_ts.

            Returns:
                [final_scores, bbox_bias, starts, ends]
        r   r   r   T)dimZ
descendingN)r7   )r%   Zpermuter&   r"   listranger   r)   sortZ
LongTensorsetr#   flattenr   numpytolisttoZdeviceappendZindex_selectcatr'   r(   Zsigmoid)r+   Zquery_featsZvideo_featsZstart_tsZend_tsZscale_boundariesr-   Z	sent_featZ	ctx_featsZqv_ctx_scoresZhit_indicesZstartsZendsZfiltered_ctx_featsi_indicesZscale_firstZ
scale_lastZfiltered_startZfiltered_endZqv_merge_scoresZqv_ctn_scoresZ	ctn_featsZ	bbox_biasZfinal_scoresr0   r0   r1   r3   X   sb   




zSOONet.forward_test)NNNNN)
__name__
__module____qualname____doc__strr   r5   r2   r3   __classcell__r0   r0   r.   r1   r      s    
0r   )r   r)   Ztorch.nnr$   Zmodelscope.metainfor   Z'modelscope.models.base.base_torch_modelr   Zmodelscope.models.builderr   Zmodelscope.utils.configr   Zmodelscope.utils.constantr   r   blocksr	   r
   r   r   r   Zswin_transformerr   Zregister_moduleZvideo_temporal_groundingZsoonetr   r0   r0   r0   r1   <module>   s   