o
    *jf                     @   s  d dl Z d dlZd dlZd dlmZ d dlZd dlm  mZ d dl	m	Z	 ddl
mZmZmZmZ dZdd Zd	d
 Z	 dZdZdZdZd Ze Ze Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Z 			d(ddZ!d d! Z"d"d# Z#G d$d% d%Z$G d&d' d'Z%dS ))    N)time)tqdm   )deviceget_optimal_devicetest_for_nanstorch_gcFc                  C   s\   t j r*t jtjd } | dkrd}|S | dkrd}|S | dkr&d}|S d}|S d	}|S )
N   >  i   .  i   @  i   i  i   torchcudais_availableZget_device_propertiesr   total_memory)r   ZENCODER_TILE_SIZE r   {/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/multi_modal/diffusers_wrapped/vaehook.pyget_recommend_encoder_tile_size   s&   
	r   c                  C   sl   t j r2t jtjd } | dkrd}|S | dkrd}|S | dkr&d}|S | dkr.d	}|S d
}|S d
}|S )Nr	   i0u     r
      r      r   `   @   r   )r   ZDECODER_TILE_SIZEr   r   r   get_recommend_decoder_tile_size#   s,   
	r   zglobal constTc                 C   s   t j| ddS )NT)Zinplace)Fsiluxr   r   r   inplace_nonlinearityA   s   r   c                 C   s   |j \}}}}||||| dd}d }d }|j \}}	}
| ||	|}| |}|d u r1|}n| jr9| |}| |}| |}| 	|}| 	|}| 	|}| 
|||}t||}| |}| jd |}| jd |}|dd||||}|S )Nr      r   )shapeviewZ	transposeZprepare_attention_maskZto_qZ
norm_crossZnorm_encoder_hidden_statesZto_kZto_vZhead_to_batch_dimZget_attention_scoresr   ZbmmZbatch_to_head_dimZto_outreshape)selfZh_Z
batch_sizeZchannelheightwidthZhidden_statesZattention_maskZencoder_hidden_statesZsequence_length_querykeyvalueZattention_probsr   r   r   attn_forward_newI   sB   






r-   c                 C   sL   |  dtj f |  d|jf |  d|fddf |  dd g d S )N	store_respre_normZattnc                 S   s
   t || S N)r-   )r   netr   r   r   <lambda>u   s   
 zattn2task.<locals>.<lambda>add_res)appendr   nnIdentityZ
group_norm)
task_queuer1   r   r   r   	attn2taskr   s   r8   c                 C   s   |j |jkr2tr|jr| d|jf n(| d|jf n|jr)| d|jf n| d|jf n
| dtj	
 f | d|jf | dtf | d|jf | d|jf | dtf | d|jf | ddg dS )z
    Turn a ResNetBlock into a sequence of tasks and append to the task queue

    @param queue: the target task queue
    @param block: ResNetBlock

    r.   r/   r   conv1conv2r3   N)Zin_channelsZout_channelssd_flagZuse_conv_shortcutr4   Zconv_shortcutZnin_shortcutZuse_in_shortcutr   r5   r6   Znorm1r   r9   Znorm2r:   )queueblockr   r   r   resblock2tasky   s    r>   c           
      C   s  |r^t r-t| |jj t| |jj t| |jj tt|j	}|j
d }d}|j}d}nCt| |jjd  t| |jjd  t| |jjd  tt|j}d}t|jd }|j}d}nt|j	}|j
}|j	d }|j}d}|D ]?}t|D ]}	t rt| || j|	  qxt| || j|	  qx||krt r| |t|| |f qr| ||| jd f qr|st rt| |jj t| |jj t| |jj dS t| |jjd  t| |jjd  t| |jjd  dS dS )z
    Build the sampling part of a task queue
    @param task_queue: the target task queue
    @param net: the network
    @param is_decoder: currently building decoder or encoder
    r   r   Zupsample   
upsamplers
downsampleN)r;   r>   midZblock_1r8   Zattn_1Zblock_2reversedrangeZnum_resolutionsZnum_res_blocksupZ	mid_blockZresnetsZ
attentionslenZ	up_blocksZdownr=   r4   getattrr@   )
r7   r1   
is_decoderZresolution_iterZ	block_ids	conditionmodule	func_nameZi_levelZi_blockr   r   r   build_sampling   sX   


rL   c                 C   s   g }| d| jf t|| | |rtsd| _d| _|r| jsNtr*| d| jf n| d| jf | dtf | d| j	f |rN| jrN| dt
jf |S )z
    Build a single task queue for the encoder or decoder
    @param net: the VAE decoder or encoder network
    @param is_decoder: currently building decoder or encoder
    @return: the task queue
    conv_inFr/   r   conv_outtanh)r4   rM   rL   r;   Zgive_pre_endZtanh_outZnorm_outZconv_norm_outr   rN   r   rO   )r1   rH   r7   r   r   r   build_task_queue   s   

rP   c                 C   s   dd | D S )zr
    Clone a task queue
    @param task_queue: the task queue to be cloned
    @return: the cloned task queue
    c                 S   s   g | ]	}d d |D qS )c                 S   s   g | ]}|qS r   r   ).0itemr   r   r   
<listcomp>   s    z/clone_task_queue.<locals>.<listcomp>.<listcomp>r   )rQ   taskr   r   r   rS      s    z$clone_task_queue.<locals>.<listcomp>r   )r7   r   r   r   clone_task_queue   s   rU   c           
      C   sF   | j \}}}}|| }| || |||}tj|ddd\}}	||	fS )z=
    Get mean and var for group norm (optimized version)
    r   r    r?   F)dimZunbiased)r#   r%   r   Zvar_mean)
input
num_groupsbchwchannel_in_groupinput_reshapedvarmeanr   r   r   get_var_mean   s
   rb   ư>c              
   C   s   | j \}}}	}
|| }| || ||	|
}tj|||dddd|d}||||	|
}|dur<||dddd|j }|durN||dddd|j }|S )a  
    Custom group norm with fixed mean and var

    @param input: input tensor
    @param num_groups: number of groups. by default, num_groups = 32
    @param mean: mean, must be pre-calculated by get_var_mean
    @param var: var, must be pre-calculated by get_var_mean
    @param weight: weight, should be fetched from the original group norm
    @param bias: bias, should be fetched from the original group norm
    @param eps: epsilon, by default, eps = 1e-6 to match the original group norm

    @return: normalized tensor
    NFr   )weightbiasZtrainingZmomentumepsr   r!   )r#   r%   r   Z
batch_normr$   todtype)rX   rY   ra   r`   rd   re   rf   rZ   r[   r\   r]   r^   r_   outr   r   r   custom_group_norm  s&   
rj   c                    sn    fdd|D fddt dD }| dddd|d | d|d  |d | d|d	  f S )
z
    Crop the valid region from the tile
    @param x: input tile
    @param input_bbox: original input bounding box
    @param target_bbox: output bounding box
    @param scale: scale factor
    @return: cropped tile
    c                    s    g | ]} r
|d  n|d  qS    r   rQ   irH   r   r   rS   7  s     z%crop_valid_region.<locals>.<listcomp>c                    s   g | ]
}|  |  qS r   r   rm   )padded_bboxtarget_bboxr   r   rS   8  s       Nr    r?   r   r   )rD   size)r   
input_bboxrq   rH   marginr   )rH   rp   rq   r   crop_valid_region.  s
   	&rv   c                    s    fdd}|S )Nc                     s   t  }tj rtjt t  t   | i |}t  t  tj rHtj	td }tjt t
dt  | dd|dd |S t
dt  | dd |S )Nr	   z[Tiled VAE]: Done in z.3fzs, max VRAM alloc z MBs)r   r   r   r   Zreset_peak_memory_statsr   r   gccollectZmax_memory_allocatedprint)argskwargstsretZvramfnr   r   wrapperB  s"   

zperfcount.<locals>.wrapperr   )r   r   r   r   r   	perfcount@  s   r   c                   @   s0   e Zd Zdd Zdd Zdd Zedd Zd	S )
GroupNormParamc                 C   s"   g | _ g | _g | _d | _d | _d S r0   )var_list	mean_list
pixel_listrd   re   r&   r   r   r   __init__a  s
   
zGroupNormParam.__init__c                 C   s   t |d\}}|jtjkr|  r| }t |d\}}| j| | j	| | j
|jd |jd   t|drG|j| _|j| _d S d | _d | _d S )N    r    r?   rd   )rb   rh   r   float16isinfanyfloatr   r4   r   r   r#   hasattrrd   re   )r&   tilelayerr`   ra   	fp32_tiler   r   r   add_tileh  s   

zGroupNormParam.add_tilec                    s   t jdkr	dS tjtj tj}tjjtjt	d| }|
d}tj| ddtj | dd  fddS )zm
        summarize the mean and var and return a function
        that apply group norm on each tile
        r   N)rh   r   r   )rW   c                    s   t | d jjS )Nr   )rj   rd   re   r   ra   r&   r`   r   r   r2     s    z(GroupNormParam.summary.<locals>.<lambda>)rF   r   r   Zvstackr   sumr   ZtensorZfloat32r   Z	unsqueeze)r&   Ztotal_pixelsZpixelsr   r   r   summaryy  s   


zGroupNormParam.summaryc                 C   s   t | d\}}|jtjkr7|  r7|  }t |d\}}|jjdkr7t	|dd}|
tj}|
tj}t|drC|j}|j}nd}d}||||fdd}|S )	zF
        create a function from a single tile without summary
        r   Zmpsr   i`  rd   Nc                 S   s   t | d||||dS )Nr   rc   )rj   )r   ra   r`   rd   re   r   r   r   group_norm_func  s   z1GroupNormParam.from_tile.<locals>.group_norm_func)rb   rh   r   r   r   r   r   r   typeclamprg   r   rd   re   )r   Znormr`   ra   r   rd   re   r   r   r   r   	from_tile  s   
zGroupNormParam.from_tileN)__name__
__module____qualname__r   r   r   staticmethodr   r   r   r   r   r   _  s    r   c                   @   sT   e Zd Z	dddZdd Zdd Zdd	 Ze d
d Z	e
e dd ZdS )VAEHookFc                 C   s\   || _ || _|| _|r| p|o|| _|o| | _|| _|r dnd| _tdddk| _	d S )N   r   ZMODELSCOPE_VAE_EMPTY_CACHE01)
r1   	tile_sizerH   	fast_mode	color_fixto_gpupadosgetenvenable_cuda_empty_cache)r&   r1   r   rH   Zfast_decoderZfast_encoderr   r   r   r   r   r     s   
zVAEHook.__init__c              	   C   s
  |j \}}}}t| j j}z_| jrt n|}| jr!| j| t||| j	d | j
 krQtd | j||W | j| tj rO| jrPtj  S S S | ||W | j| tj rm| jrntj  S S S | j| tj r| jrtj  w w w )Nr    z<[Tiled VAE]: the input size is tiny and unnecessary to tile.)r#   nextr1   
parametersr   r   r   rg   maxr   r   rz   Zoriginal_forwardr   r   r   r   empty_cachevae_tile_forward)r&   r   BCHWZoriginal_deviceZtarget_devicer   r   r   __call__  s6   zVAEHook.__call__c                 C   sL   d}|dkr$|| }|dkr|S || | }||kr|S |d }|dks|S )z7
        Get the best tile size for GPU memory
        r   r    r   r   )r&   Z
lowerboundZ
upperbounddivider	remainder	candidater   r   r   get_best_tile_size  s   zVAEHook.get_best_tile_sizec                    s  g g }} j } j}t|d|  | }t|d|  | }t|d}t|d}t|d|  | }	t|d|  | }
 |	|}	 |
|}
td| d| d||  dd|
 d|	 d| d|  t|D ]}t|D ]}|||
  t||d |
  ||||	  t||d |	  |g}|d	 |kr|d	 nd	|d || k r|d n||d |kr|d nd	|d
 || k r|d
 n|g} fdd|D }|	| |	td	|d	 | t||d | td	|d | t||d
 | g qtqn||fS )z
        Tool function to split the image into tiles
        @param h: height of the image
        @param w: width of the image
        @return: tile_input_bboxes, tile_output_bboxes
        r    r   z[Tiled VAE]: split to r   =z tiles.zOptimal tile size z, original tile size r   r?   c                    s"   g | ]} j r|d  n|d  qS rk   ro   )rQ   r   r   r   r   rS   	  s    z'VAEHook.split_tiles.<locals>.<listcomp>)
r   r   mathceilr   r   rz   rD   minr4   )r&   r\   r]   Ztile_input_bboxesZtile_output_bboxesr   r   Znum_height_tilesZnum_width_tilesZreal_tile_heightZreal_tile_widthrn   jrt   Zoutput_bboxr   r   r   split_tiles  sN   






zVAEHook.split_tilesc                 C   s  |j }|}t|d }|dkr'|| d dkr'|d8 }|dkr'|| d dks|dks3|| d dkr7tdt|d D ]}|| }|d dkrct||d }	d|	f||< ||kr^ dS |	|}n||d dkr|d }
|
|k r||
 d dkr|
d7 }
|
|k r||
 d dksy|
|krq=|d |||
 d< nF|d dkr||d |7 }d |d< n2| jr|d d	kr||k rt||d D ]}|| d dkrd
|| d f||< q dS |d |}zt|d W q= t	y } zt
| d W Y d }~ dS d }~ww td)Nr   r   r/   z%No group norm found in the task queue
apply_normTr.   r3   rA   store_res_cpuvaez;. Nan detected in fast mode estimation. Fast mode disabled.FzShould not reach here)r   rF   
ValueErrorrD   r   r   rg   r   r   	Exceptionrz   
IndexError)r&   zr7   r   r   r   Zlast_idrn   rT   r   task_idr   er   r   r   estimate_group_norm  sX   

zVAEHook.estimate_group_normc           $   	      s  j }| j}| j}| j} jd jd jd }}}j|_tdj d| d| j  | 	||\}	}
fdd|	D }t
|}d}t|| | jrÈ||t|| }tj|d	d
}td|jd  d|jd  d tjg ddd\}}tj|g ddd\}}|| | | | }tj|  d}t }| j||| jdr| ~tj r| jrtj   fddt|D }d}dt  tdd |D }t |d|rdnd d}d}||k rpt! }|rt|nt"t|}|D ]D}|| du rq|| |}|| }t#|D ]\}} | d dkr8|$|| d  d||<  nl| d dksF| d dkr~| d |}!| d dksW| js[|!% }!t|d t
|D ]}"||" d dkrwd|!f||"<  nqdd||< n| d dkr|| d |7 }d||< n
| d |}d||< |&d qd d |D ||< t'|d! t
|| dkrd||< |d7 }|du rtj(||jd |r|d" n|d" |r|d" n|d" f|d#}t)||	| |
| ||dddd|
| d |
| d |
| d |
| d f< ~n&||d kr$|r$d$}|||< n|dkr3|s3d}|||< n|% ||< tj rH| jrHtj  q|* }#|#durlt|D ]}|| durj|| +dd%|#f qW||k s|,  ||S )&z
        Decode a latent vector z into an image in a tiled manner.
        @param z: latent vector
        @return: image
        r   r    r?   z[Tiled VAE]: input_size: z, tile_size: z, padding: c              	      s@   g | ]} d d d d |d |d |d |d f   qS )Nr    r?   r   r   )cpu)rQ   rZ   )r   r   r   rS   ^  s   @ z,VAEHook.vae_tile_forward.<locals>.<listcomp>znearest-exact)scale_factormodezD[Tiled VAE]: Fast mode enabled, estimating group norm parameters on z x z imagerV   T)rW   Zkeepdim)r   r   )r   c                    s   g | ]}t  qS r   )rU   )rQ   r)   )single_task_queuer   r   rS     s    Nc                 s   s    | ]}t |V  qd S r0   )rF   )rQ   qr   r   r   	<genexpr>  s    z+VAEHook.vae_tile_forward.<locals>.<genexpr>z[Tiled VAE]: Executing ZDecoderZEncoder)totaldescr/   r   r.   r   r3   c                 S   s   g | ]}|d ur|qS r0   r   )rQ   tr   r   r   rS     s    r   rl   )r   Fr   )-r   r1   r   rH   detachr#   Zlast_z_shaperz   r   r   rF   rP   r   rg   r   r   Zinterpolater   Zstd_meanr   r   rU   r   r   r   r   r   r   rD   rx   ry   r   r   r   rC   	enumerater   r   updater   Zzerosrv   r   insertclose)$r&   r   r   r1   r   rH   Nr'   r(   Z	in_bboxesZ
out_bboxesZtilesZ	num_tilesZnum_completedr   Zdownsampled_zZstd_oldZmean_oldZstd_newZmean_newZestimate_task_queueZtask_queuesresultZtotal_tasksZpbarforwardZgroup_norm_paramindicesrn   r   r7   Ztask_idxrT   resZadd_idxr   r   )r   r   r   r   D  s  "













$




O
zVAEHook.vae_tile_forwardN)F)r   r   r   r   r   r   r   r   Zno_gradr   r   r   r   r   r   r   r     s    	
7
,r   )NNrc   )&rx   r   r   r   r   Ztorch.nn.functionalr5   Z
functionalr   r   Zdevicesr   r   r   r   r;   r   r   ZDEFAULT_ENABLEDZDEFAULT_MOVE_TO_GPUZDEFAULT_FAST_ENCODERZDEFAULT_FAST_DECODERZDEFAULT_COLOR_FIXZDEFAULT_ENCODER_TILE_SIZEZDEFAULT_DECODER_TILE_SIZEr   r-   r8   r>   rL   rP   rU   rb   rj   rv   r   r   r   r   r   r   r   <module>   sF   )9	
,H