o
    *j[                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlm  m	Z	 d dl
mZ d dlm  mZ d dlmZ G dd dejZedkrdZedddd	ed
 Zejedddgejd Zejeddgejd Ze  e	jdd2 edD ]ZeeeZqzej  e Z edD ]ZeeeZqej  e Z!W d   n1 sw   Y  e"de!e  d d  d dS dS )    N)FlashAttentionc                       s8   e Zd Z				d	 fdd	Zdd Zd
ddZ  ZS )FlashAttentionBlockN   c                    s   |r|| n|}|| }|| |ksJ t t|   || _|| _|| _|| _t|d| _	t
d|| _t
||d d| _|d urLt
||d | _t
||d| _| jdkrg| jd dkrgtd d	d
| _t
j| jj d S )Ng      п                   r           )Zsoftmax_scaleZattention_dropout)superr   __init__dimcontext_dim	num_headshead_dimmathpowscalennZ	GroupNormnormConv2dto_qkvLinear
context_kvprojr   
flash_attninitZzeros_weight)selfr   r   r   r   
batch_size	__class__ v/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/multi_modal/videocomposer/mha_flash.pyr      s&   zFlashAttentionBlock.__init__c                 C   s|   t |tjr|jjjddd |jd ur|jj  d S d S t |tjr:|jjjddd |jd ur<|jj  d S d S d S )Nr   g333333?)meanZstd)	
isinstancer   r   r   dataZnormal_ZbiasZzero_r   )r   moduler#   r#   r$   _init_weight1   s   

z FlashAttentionBlock._init_weightc                 C   s  |}g |  | j| jR \}}}}}}	| |}| |||d |	|| jddd\}
}}|durx| ||d|d |		ddddjddd\}}t
j||gdd}t
j||gdd}t
j|||	dg|
j|
jd	}t
j|
|gdd}
t
j|
||gdd}|j}|	dddd|dd||	  }| |\}}|| |dur|dddd
ddddf }|	dddd||||}| |}|| S )zGx:       [B, C, H, W].
            context: [B, L, C] or None.
        r   r   )r   Nr   r   r   )dtypedevice)sizer   r   r   r   viewchunkr   ZreshapeZpermutetorchcatZzerosr+   r,   Zhalf
contiguousr   tor   )r   xcontextidentitybchwndqkvZckZcvZcqZqkvZorigin_dtypeout_r#   r#   r$   forward;   s@   &
.

 
zFlashAttentionBlock.forward)NNNr   )N)__name__
__module____qualname__r   r)   rC   __classcell__r#   r#   r!   r$   r      s    
r   __main__r
   i   i   @   )r   r   r   r   r    r   )r+   r   T)enabled   
   zAverage cost time i  z ms)#r   osrandomtimenumpynpr1   Ztorch.cuda.ampcudaampZtorch.nnr   Ztorch.nn.functionalZ
functionalFZflash_attn.flash_attentionr   Moduler   rD   r    Z	flash_netZrandnZfloat32r5   r6   evalZautocastrangeiyZsynchronizes1s2printr#   r#   r#   r$   <module>   sJ   O


 