o
    )j                     @   sn  d dl Z d dlmZ dgZdZdededefddZd	d
dddd dddedededededede	fddZ
ddde	fddZddde	fddZddde	fddZddde	fddZdd d!dd"d#ed$ed%ede	fd&d'ZG d(d) d)ejZd	d!dd*ded%ede	fd+d,Zd!dd-d%ede	fd.d/Zddde	fd0d1Zddde	fd2d3Zddde	fd4d5ZdS )6    Ntorchz%https://dl.fbaipublicfiles.com/dinov2	arch_name
patch_sizereturnc                 C   s"   |  ddd d }d| | S )N_    Zdinov2_)replace)r   r   Zcompact_arch_name r
   l/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/cv/anydoor/dinov2/hubconf.py_make_dinov2_model_name   s   r   	vit_largei     g      ?ZmlpT)r   img_sizer   init_values	ffn_layerblock_chunks
pretrainedr   r   r   r   r   c                 K   sP   ddl m} t| |}	t|||||d}
|
jdi | |j|  di |
}|S )N   )vision_transformer)r   r   r   r   r   r
   )Zdinov2.modelsr   r   dictupdate__dict__)r   r   r   r   r   r   r   kwargsZvitsr   Z
vit_kwargsmodelr
   r
   r   _make_dinov2_model   s   
r   )r   c                 K      t dd| d|S )zP
    DINOv2 ViT-S/14 model (optionally) pretrained on the LVD-142M dataset.
    	vit_smallr   r   Nr
   r   r   r   r
   r
   r   dinov2_vits142   
   r!   c                 K   r   )zC
    DINOv2 ViT-B/14 model pretrained on the LVD-142M dataset.
    vit_baser   Nr
   r   r    r
   r
   r   dinov2_vitb14:   r"   r$   c                 K   r   )zP
    DINOv2 ViT-L/14 model (optionally) pretrained on the LVD-142M dataset.
    r   r   Nr
   r   r    r
   r
   r   dinov2_vitl14B   r"   r%   c                 K      t ddd| d|S )zP
    DINOv2 ViT-g/14 model (optionally) pretrained on the LVD-142M dataset.
    
vit_giant2swiglufusedr   r   r   Nr
   r   r    r
   r
   r   dinov2_vitg14J   s   r*   i   r   
model_name	embed_dimlayersr   r,   r-   r.   c           	      K   s   |dv sJ d| t d| | d}|r>|dkrt|nd}td|  d|  d| d	 }tjj|d
d}|j|dd |S )N)r   r   Unsupported number of layers: r   i  r   r   /Z_linearz	_head.pthcpu)Zmap_locationF)strict)nnZLinearstr_DINOV2_BASE_URLr   ZhubZload_state_dict_from_urlZload_state_dict)	r,   r-   r.   r   r   linear_headZ
layers_strurlZ
state_dictr
   r
   r   _make_dinov2_linear_headU   s   r8   c                       s<   e Zd Zdddejdejdef fddZdd	 Z  ZS )
_LinearClassifierWrapperr   )r.   backboner6   r.   c                   s    t    || _|| _|| _d S )N)super__init__r:   r6   r.   )selfr:   r6   r.   	__class__r
   r   r<   l   s   

z!_LinearClassifierWrapper.__init__c              	   C   s   | j dkr$| j|}|d d}|d d}t||dg}nG| j dkrc| jj|ddd}t|d d d|d d d|d d d|d	 d d|d	 d ddg}nJ d| j  | |S )Nr   Zx_norm_clstokenr   Zx_norm_patchtokensr   T)nZreturn_class_token      Fr/   )	r.   r:   Zforward_featuresZsqueezer   catmeanZget_intermediate_layersr6   )r=   xZ	cls_tokenZpatch_tokensZlinear_inputr
   r
   r   forwardv   s   

0&
z _LinearClassifierWrapper.forward)	__name__
__module____qualname__r3   Moduleintr<   rF   __classcell__r
   r
   r>   r   r9   j   s    
r9   r   r.   r   c           	      K   sH   t d| |d|}|j}|j}t| |}t||||d}t|||dS )Nr   r+   )r:   r6   r.   r
   )r   r-   r   r   r8   r9   )	r   r.   r   r   r:   r-   r   r,   r6   r
   r
   r   _make_dinov2_linear_classifier   s"   
rN   )r.   r   c                 K   s   t dd| |d|S )z
    Linear classifier (1 or 4 layers) on top of a DINOv2 ViT-S/14 backbone (optionally)
    pretrained on the LVD-142M dataset and trained on ImageNet-1k.
    r   rM   Nr
   rN   )r.   r   r   r
   r
   r   dinov2_vits14_lc   s
   rP   c                 K   r   )z
    Linear classifier (1 or 4 layers) on top of a DINOv2 ViT-B/14 backbone (optionally)
    pretrained on the LVD-142M dataset and trained on ImageNet-1k.
    r#   r   Nr
   rO   r    r
   r
   r   dinov2_vitb14_lc   
   rQ   c                 K   r   )z
    Linear classifier (1 or 4 layers) on top of a DINOv2 ViT-L/14 backbone (optionally)
    pretrained on the LVD-142M dataset and trained on ImageNet-1k.
    r   r   Nr
   rO   r    r
   r
   r   dinov2_vitl14_lc   rR   rS   c                 K   r&   )z
    Linear classifier (1 or 4 layers) on top of a DINOv2 ViT-g/14 backbone (optionally)
    pretrained on the LVD-142M dataset and trained on ImageNet-1k.
    r'   r(   r)   Nr
   rO   r    r
   r
   r   dinov2_vitg14_lc   s   rT   )r   Ztorch.nnr3   dependenciesr5   r4   rK   r   floatboolr   r!   r$   r%   r*   r8   rJ   r9   rN   rP   rQ   rS   rT   r
   r
   r
   r   <module>   sv   

 
			