o
    *j                     @   s   d dl Z d dlmZmZ d dlZd dlZd dlmZ d dlm  m	Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ dd	lmZmZ ejejejd
G dd deZdS )    N)AnyDict)Models)
TorchModel)MODELS)
OutputKeys)	ModelFileTasks   )CLIPProbingModel)module_namec                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )StructuredProbingModelz
    The implementation of 'Structured Model Probing: Empowering
        Efficient Adaptation by Structured Regularization'.
    c                    sz   t t|   tj|d}t|}|d d | _|d d | _	t
dd|d d| _t| j| j	| _| j|d	  d
S )zc
        Initialize a probing model.
        Args:
            model_dir: model id or path
        zfood101-clip-vitl14-full.ptZ	meta_infofeature_sizenum_classesZCLIP_ViTL14_FP16TZbackbone_model_state_dict)Zuse_pretrainZ
state_dictZprobing_model_state_dictN)superr   __init__ospathjointorchloadr   r   r   backboner   probing_modelZload_state_dict)selfZ	model_dirargskwargsZ
model_file	__class__ o/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/cv/image_probing_model/model.pyr      s   
zStructuredProbingModel.__init__c           	      C   s   g }t ddD ]}|d| |d| |d| q|d | | }g }|D ]}| || d}|| q2tj|dd	}| |	 }|S )
ze
        Forward Function of SMP.
        Args:
            x: the input images (B, 3, H, W)
        r      zlayer_{}_pre_attnzlayer_{}_attnzlayer_{}_mlpZ
pre_logitsi   r
   dim)
rangeappendformatr   Zhalfaggregate_tokenr   catr   float)	r   xkeysidxfeaturesZfeatures_aggiZaggregated_featureZoutputsr   r   r    forward.   s   
zStructuredProbingModel.forwardc                 C   s   t |jdkrA|j\}}}||krd}n
|| }t|| }|dkr:t|d}tjj||d|}tj|dd}ntj|dd}tjj	j
|dd}|S )z
        Aggregating features from tokens.
        Args:
            output: the output of intermidiant features
                from a ViT model
            target_size: target aggregated feature size
           r   )r      r
   )Zkernel_sizeZstrider
   )Z	start_dimr"   )lenshapeintr   ZpermutennZ	AvgPool1dflattenmean
functional	normalize)r   outputZtarget_size_Zn_tokenZchannelsZ	pool_sizeZn_groupsr   r   r    r'   D   s"   z&StructuredProbingModel.aggregate_token)__name__
__module____qualname____doc__r   r/   r'   __classcell__r   r   r   r    r      s
    r   )r   typingr   r   jsonr   Ztorch.nnr5   Ztorch.nn.functionalr8   FZmodelscope.metainfor   Z'modelscope.models.base.base_torch_modelr   Zmodelscope.models.builderr   Zmodelscope.outputsr   Zmodelscope.utils.constantr   r	   r   r   r   Zregister_moduleZimage_classificationZimage_probing_modelr   r   r   r   r    <module>   s    