o
    *j/                     @   s&  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZ d dl	Z
d dlZd dlm  mZ d dlmZ d dlmZ d dlmZ G dd dejZG d	d
 d
eZG dd deZG dd deZG dd deZG dd deZdefddZej rdnddfdedeeejf fddZdS )    N)OrderedDict)TupleUnion)nn)tqdm)
TorchModelc                       s$   e Zd Zdejf fddZ  ZS )	LayerNormxc                    s$   |j }t |tj}||S N)dtypesuperforwardtypetorchZfloat32)selfr	   	orig_typeret	__class__ l/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/cv/vop_retrieval/backbone.pyr      s   
zLayerNorm.forward)__name__
__module____qualname__r   Tensorr   __classcell__r   r   r   r   r      s    r   c                   @   s   e Zd ZdejfddZdS )	QuickGELUr	   c                 C   s   |t d|  S )NgZd;?)r   Zsigmoidr   r	   r   r   r   r          zQuickGELU.forwardN)r   r   r   r   r   r   r   r   r   r   r      s    r   c                       sP   e Zd Z	ddededejf fddZdejfdd	Zdejfd
dZ  Z	S )ResidualAttentionBlockNd_modeln_head	attn_maskc              
      sr   t    t||| _t|| _ttdt	||d fdt
 fdt	|d |fg| _t|| _|| _d S )Nc_fc   Zgeluc_proj)r   __init__r   ZMultiheadAttentionattnr   ln_1
Sequentialr   ZLinearr   mlpln_2r"   )r   r    r!   r"   r   r   r   r&   &   s   



zResidualAttentionBlock.__init__r	   c                 C   s>   | j d ur| j j|j|jdnd | _ | j|||d| j dd S )Nr   deviceF)Zneed_weightsr"   r   )r"   tor   r-   r'   r   r   r   r   	attention4   s   
z ResidualAttentionBlock.attentionc                 C   s,   ||  | | }|| | | }|S r
   )r/   r(   r*   r+   r   r   r   r   r   ;   s   zResidualAttentionBlock.forwardr
   )
r   r   r   intr   r   r&   r/   r   r   r   r   r   r   r   $   s    r   c                	       sD   e Zd Z	ddedededejf fddZdejfd	d
Z  ZS )TransformerNwidthlayersheadsr"   c                    s<   t    | _|| _tj fddt|D  | _d S )Nc                    s   g | ]}t  qS r   )r   ).0_r"   r4   r2   r   r   
<listcomp>K   s    
z(Transformer.__init__.<locals>.<listcomp>)r   r&   r2   r3   r   r)   range	resblocks)r   r2   r3   r4   r"   r   r7   r   r&   C   s   
zTransformer.__init__r	   c                 C   s
   |  |S r
   )r:   r   r   r   r   r   P   s   
zTransformer.forwardr
   )	r   r   r   r0   r   r   r&   r   r   r   r   r   r   r1   A   s    r1   c                       sF   e Zd Zdedededededef fddZd	ejfd
dZ  ZS )VisualTransformerinput_resolution
patch_sizer2   r3   r4   
output_dimc                    s   t    || _|| _tjd|||dd| _|d }t|t	| | _
t|t	|| d d | | _t|| _t|||| _t|| _t|t	|| | _d S )N   F)Zin_channelsZout_channelsZkernel_sizeZstrideZbias            )r   r&   r<   r>   r   ZConv2dconv1	Parameterr   Zrandnclass_embeddingpositional_embeddingr   ln_prer1   transformerln_postproj)r   r<   r=   r2   r3   r4   r>   scaler   r   r   r&   V   s&   




zVisualTransformer.__init__r	   c                 C   s   |  |}||jd |jd d}|ddd}| j|j}tj|jd d|jd |j|j	d}|| }tj
||gdd}|| j|j }| |}|ddd}| |}|ddd}| |d d dd d f }| jd urx|| j }|S )Nr   rB   rA   r,   dim)rC   ZreshapeshapepermuterE   r.   r   r   Zzerosr-   catrF   rG   rH   rI   rJ   )r   r	   Zx_1Zx_2r   r   r   r   m   s$   




zVisualTransformer.forward)	r   r   r   r0   r&   r   r   r   r   r   r   r   r   r;   T   s    r;   c                       s   e Zd Zdededeeeeeef ef dededededed	ed
ef fddZdd Zdd Ze	dd Z
dd ZdddZdd Z  ZS )CLIP	embed_dimimage_resolutionvision_layersvision_widthvision_patch_sizecontext_length
vocab_sizetransformer_widthtransformer_headstransformer_layersc                    s   t    || _|d }t||||||d| _t||
|	|  d| _|| _t	
||| _t	t| j|| _t|| _t	t||| _t	tg td | _|   d S )N@   )r<   r=   r2   r3   r4   r>   )r2   r3   r4   r"   g$I$I,@)r   r&   rX   r;   visualr1   build_attention_maskrH   rY   r   Z	Embeddingtoken_embeddingrD   r   emptyrF   r   ln_finaltext_projectiononesnploglogit_scaleinitialize_parameters)r   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   Zvision_headsr   r   r   r&      s8   


zCLIP.__init__c                 C   s   t jj| jjdd t jj| jdd | jjd d| jj d  }| jjd }d| jj d }| jj	D ]-}t jj|j
j|d t jj|j
jj|d t jj|jjj|d t jj|jjj|d q3| jd urut jj| j| jjd d d S d S )Ng{Gz?)Zstdg{Gz?r@   rA   )r   initZnormal_r`   weightrF   rH   r2   r3   r:   r'   Zin_proj_weightZout_projr*   r#   r%   rc   )r   Zproj_stdZattn_stdZfc_stdblockr   r   r   rh      s"   


zCLIP.initialize_parametersc                 C   s,   t | j| j}|td |d |S )Nz-infrB   )r   ra   rX   Zfill_floatZtriu_)r   maskr   r   r   r_      s   
zCLIP.build_attention_maskc                 C   s   | j jjjS r
   )r^   rC   rj   r   )r   r   r   r   r      s   z
CLIP.dtypec                 C   s   |  || jS r
   )r^   r   r   )r   imager   r   r   encode_image   r   zCLIP.encode_imageFc                 C   s   |  || j}|| j| j }|ddd}| |}|ddd}| || j}|r5|| j S |t	|j
d |jddf | j }|S )NrB   r   rA   rL   rM   )r`   r   r   rF   rP   rH   rb   rc   r   ZarangerO   Zargmax)r   textZreturn_all_tokensr	   r   r   r   encode_text   s   


zCLIP.encode_textc                 C   sj   |  |}| |}||jddd }||jddd }| j }|| |  }|| |  }||fS )NrL   T)rN   Zkeepdim)ro   rq   Znormrg   expt)r   rn   rp   Zimage_featuresZtext_featuresrg   Zlogits_per_imageZlogits_per_textr   r   r   r      s   


zCLIP.forward)F)r   r   r   r0   r   r   r&   rh   r_   propertyr   ro   rq   r   r   r   r   r   r   rR      s2    
$

rR   
state_dictc                    sh  d v }|r1 d j d }tdd   D } d j d }t d j d d d	 }|| }n6 fd
ddD }t|} d j d }t d j d d d	 }d }|d d  d j d kscJ |d } d j d }	 d j d }
 d j d } d j d }|d }ttdd  D }t|	|||||
||||
}dD ]	}| v r |= q|  | S )Nzvisual.projzvisual.conv1.weightr   c                 S   s$   g | ]}| d r|dr|qS )zvisual.z.attn.in_proj_weight)
startswithendswithr5   kr   r   r   r8      s    zbuild_model.<locals>.<listcomp>rL   zvisual.positional_embeddingrB   g      ?c                    s&   g | ] t t fd dD qS )c                 3   s.    | ]}| d   r|dd V  qdS )zvisual.layer.rA   Nrv   splitrx   br   r   	<genexpr>   s    
z)build_model.<locals>.<listcomp>.<genexpr>)lenset)r5   ru   r}   r   r8      s    )rB   rA   r?   r$   zvisual.layer1.0.conv1.weightz$visual.attnpool.positional_embeddingrA       rc   rF   ztoken_embedding.weightzln_final.weightr]   c                 s   s(    | ]}| d r|dd V  qdS )ztransformer.resblocksrz   rA   Nr{   rx   r   r   r   r     s    
zbuild_model.<locals>.<genexpr>)r<   rX   rY   )	rO   r   keysroundtupler   rR   Zload_state_dicteval)ru   ZvitrV   rU   rW   Z	grid_sizerT   countsZoutput_widthrS   rX   rY   rZ   r[   r\   modelkeyr   r   r   build_model   sd   




r   cudacpuTnamer-   c                    sn  d}| }zt jj||r ndd }d }W n ty4   |r*td| d d}t j|ddd}Y nw |sNt|p=| 	 }t
 dkrL|  |S t jj fdd	g d
}dd |jdD d fdd}|| ||j ||j t
 dkrt jjdd	 g d
}t|jd d }	|	 fdd}
||
 |
|j |
|j |  |S )NFr   )map_locationzFile z6 is not a JIT archive. Loading as a state dict insteadT)r   Zweights_onlyc                      s   t g t  S r
   )r   rd   r.   r-   r   )r-   r   r   <lambda>6  s    zload_clip.<locals>.<lambda>)Zexample_inputsc                 S   s   g | ]
}d t |v r|qS )ZDevice)repr)r5   nr   r   r   r8   7  s
    zload_clip.<locals>.<listcomp>prim::ConstantrL   c                    st   t | dr	| jgng }t | dr|| jj |D ]}|dD ]}d| v r6t|d dr6|  q qd S )Ngraphforward1r   valuer   )	hasattrr   appendr   findAllNodesZattributeNamesstrrv   copyAttributes)modulegraphsr   node)device_noder   r   patch_device<  s   

zload_clip.<locals>.patch_devicec                   S   s   t g  S r
   )r   rd   rl   r   r   r   r   r   M  s    aten::torB   c                    s   t | dr	| jgng }t | dr|| jj |D ](}|dD ] }t| }dD ]}||  d dkr?||    q*q qd S )Nr   r   r   )rB   rA   r      )	r   r   r   r   r   listinputsr   r   )r   r   r   r   r   i)
float_noder   r   patch_floatQ  s   
zload_clip.<locals>.patch_float)r   jitloadr   RuntimeErrorwarningswarnr   ru   r.   r   rl   tracer   r   applyro   rq   r   ZfindNoder   r   )r   r-   r   Z
model_pathr   ru   Zdevice_holderr   Zfloat_holderZfloat_inputr   r   )r-   r   r   r   	load_clip  s`   

	






r   ) hashlibosurllibr   collectionsr   typingr   r   numpyre   r   Ztorch.nn.functionalr   Z
functionalFr   Z'modelscope.models.base.base_torch_modelr   r   r   r   r1   r;   rR   dictr   r   Zis_availabler   r-   r   r   r   r   r   <module>   s4   1b7