o
    *j                     @   sX   d dl Zd dlZd dlZd dlmZ d dlmZ G dd dej	Z
G dd dej	ZdS )    Nc                       sh   e Zd ZdZddgZ						d fd	d
	Zdd Zdd Zdd Zdde	j
fddZdd Z  ZS )FrozenOpenCLIPEmbedder8
    Uses the OpenCLIP transformer encoder for text
    lastpenultimateViT-H-14laion2b_s32b_b79kcudaM   Tc           	         s   t    || jv sJ tj|td|d\}}}|`|| _|| _|| _	|r+| 
  || _| jdkr8d| _d S | jdkrBd| _d S t )Ncpudevice
pretrainedr   r   r      )super__init__LAYERS	open_clipcreate_model_and_transformstorchr   Zvisualmodel
max_lengthfreezelayer	layer_idxNotImplementedError)	selfarchr   r   r   r   r   r   _	__class__ q/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/multi_modal/videocomposer/clip.pyr      s"   




zFrozenOpenCLIPEmbedder.__init__c                 C   $   | j  | _ |  D ]}d|_q
d S NFr   eval
parametersZrequires_gradr   paramr    r    r!   r   *      zFrozenOpenCLIPEmbedder.freezec                 C   s    t |}| || j}|S N)r   tokenizeencode_with_transformertor   )r   texttokenszr    r    r!   forward/   s   
zFrozenOpenCLIPEmbedder.forwardc                 C   V   | j |}|| j j }|ddd}| j|| j jd}|ddd}| j |}|S Nr   r      	attn_maskr   Ztoken_embeddingZpositional_embeddingZpermutetext_transformer_forwardr6   Zln_finalr   r.   xr    r    r!   r,   4      z.FrozenOpenCLIPEmbedder.encode_with_transformerNr:   c                 C   h   t | jjjD ]*\}}|t| jjj| j kr |S | jjjr+tj	 s+t
|||}q|||d}q|S Nr5   	enumerater   transformerZ	resblockslenr   Zgrad_checkpointingr   ZjitZis_scripting
checkpointr   r:   r6   irr    r    r!   r8   =      z/FrozenOpenCLIPEmbedder.text_transformer_forwardc                 C      | |S r*   r    r   r.   r    r    r!   encodeH      zFrozenOpenCLIPEmbedder.encode)r   r   r   r	   Tr   r*   __name__
__module____qualname____doc__r   r   r   r1   r,   r   ZTensorr8   rI   __classcell__r    r    r   r!   r   
   s    	r   c                       sj   e Zd ZdZddgZ								d fd
d	Zdd Zdd Zdd Zdde	j
fddZdd Z  ZS )FrozenOpenCLIPVisualEmbedderr   r   r   r   r   r   r	   T   rS      c                    s   t    || jv sJ tj|td|d\}}	}
|`|| _t	j
|t	jdd }|
t |d| _|
| _|| _|| _|rD|   || _| jdkrQd| _d S | jdkr[d| _d S t )	Nr
   r   )Zdtype   r   r   r   r   )r   r   r   r   r   r   r   r@   r   npZonesZuint8TZ
ToPILImageZ	unsqueezeZblack_image
preprocessr   r   r   r   r   )r   r   r   r   r   r   r   Zinput_shaper   r   rX   Z
data_whiter   r    r!   r   R   s(   




z%FrozenOpenCLIPVisualEmbedder.__init__c                 C   r"   r#   r$   r'   r    r    r!   r   p   r)   z#FrozenOpenCLIPVisualEmbedder.freezec                 C   s   | j || j}|S r*   )r   Zencode_imager-   r   )r   imager0   r    r    r!   r1   u   s   z$FrozenOpenCLIPVisualEmbedder.forwardc                 C   r2   r3   r7   r9   r    r    r!   r,   z   r;   z4FrozenOpenCLIPVisualEmbedder.encode_with_transformerNr:   c                 C   r<   r=   r>   rC   r    r    r!   r8      rF   z5FrozenOpenCLIPVisualEmbedder.text_transformer_forwardc                 C   rG   r*   r    rH   r    r    r!   rI      rJ   z#FrozenOpenCLIPVisualEmbedder.encode)r   r   r   r	   Tr   rR   r*   rK   r    r    r   r!   rQ   L   s     	rQ   )numpyrV   r   r   Ztorch.nnnnZtorchvision.transformsZ
transformsrW   Moduler   rQ   r    r    r    r!   <module>   s   B