o
    *j.H                     @   s   d dl Zd dlZd dlmZ d dlm  mZ dgZdd Z	dddZ
G dd	 d	eZG d
d dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejjZdS )    NAutoencoderKLc                 C   s   | t |  S N)torchZsigmoid)x r   z/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/multi_modal/video_synthesis/autoencoder.pynonlinearity   s   r       c                 C   s   t jj|| dddS )Ngư>T)
num_groupsZnum_channelsepsZaffine)r   nnZ	GroupNorm)in_channelsr
   r   r   r   	Normalize   s   r   c                   @   s@   e Zd ZdddZdd ZdddZg d	fd
dZdd ZdS )DiagonalGaussianDistributionFc                 C   s   || _ tj|ddd\| _| _t| jdd| _|| _td| j | _t| j| _	| jr@t
| jj| j jd | _	| _d S d S )N      dimg      >g      4@      ?device)
parametersr   chunkmeanlogvarclampdeterministicexpstdvarZ
zeros_liketor   )selfr   r   r   r   r   __init__   s   z%DiagonalGaussianDistribution.__init__c                 C   s*   | j | jt| j jj| jjd  }|S )Nr   )r   r   r   randnshaper    r   r   r!   r   r   r   r   sample$   s   
z#DiagonalGaussianDistribution.sampleNc                 C   s   | j r	tdgS |d u r%dtjt| jd| j d | j g dd S dtjt| j|j d|j | j|j  d | j |j g dd S )N        r   r         ?r   r      r   )r   r   Tensorsumpowr   r   r   )r!   otherr   r   r   kl)   s&   
zDiagonalGaussianDistribution.klr)   c                 C   sR   | j r	tdgS tdtj }dtj|| j t|| j	 d| j
  |d S )Nr'          @r   r   r   )r   r   r+   nplogpir,   r   r-   r   r   )r!   r&   dimsZlogtwopir   r   r   nll7   s   z DiagonalGaussianDistribution.nllc                 C   s   | j S r   )r   r!   r   r   r   mode@   s   z!DiagonalGaussianDistribution.mode)Fr   )__name__
__module____qualname__r"   r&   r/   r5   r7   r   r   r   r   r      s    

	r   c                       s.   e Zd Zdddd fdd
Zdd Z  ZS )	ResnetBlockNFi   )out_channelsconv_shortcuttemb_channelsc                   s   t    || _|d u r|n|}|| _|| _t|| _tjj	||dddd| _
|dkr3tj||| _t|| _tj|| _tjj	||dddd| _| j| jkrp| jrbtjj	||dddd| _d S tjj	||dddd| _d S d S )Nr*   r   kernel_sizestridepaddingr   )superr"   r   r<   use_conv_shortcutr   norm1r   r   Conv2dconv1ZLinear	temb_projnorm2ZDropoutdropoutconv2r=   nin_shortcut)r!   r   r<   r=   rJ   r>   	__class__r   r   r"   F   sB   




zResnetBlock.__init__c                 C   s   |}|  |}t|}| |}|d ur'|| t|d d d d d d f  }| |}t|}| |}| |}| j| jkrQ| j	rL| 
|}|| S | |}|| S r   )rE   r   rG   rH   rI   rJ   rK   r   r<   rD   r=   rL   )r!   r   tembhr   r   r   forwardl   s    

&




zResnetBlock.forwardr8   r9   r:   r"   rQ   __classcell__r   r   rM   r   r;   D   s    &r;   c                       $   e Zd Z fddZdd Z  ZS )	AttnBlockc                    s~   t    || _t|| _tjj||dddd| _tjj||dddd| _	tjj||dddd| _
tjj||dddd| _d S )Nr   r   r?   )rC   r"   r   r   normr   r   rF   qkvproj_out)r!   r   rM   r   r   r"      s   





zAttnBlock.__init__c                 C   s   |}|  |}| |}| |}| |}|j\}}}}	|||||	 }|ddd}|||||	 }t||}
|
t	|d  }
tj
jj|
dd}
|||||	 }|
ddd}
t||
}|||||	}| |}|| S )Nr   r   r   g      r   )rV   rW   rX   rY   r$   Zreshapepermuter   Zbmmintr   
functionalZsoftmaxrZ   )r!   r   Zh_rW   rX   rY   bcrP   wZw_r   r   r   rQ      s(   




zAttnBlock.forwardrR   r   r   rM   r   rU      s    rU   c                       rT   )Upsamplec                    s6   t    || _| jrtjj||dddd| _d S d S )Nr*   r   r?   rC   r"   	with_convr   r   rF   convr!   r   rc   rM   r   r   r"      s   

zUpsample.__init__c                 C   s(   t jjj|ddd}| jr| |}|S )Nr0   nearest)Zscale_factorr7   )r   r   r]   Zinterpolaterc   rd   r%   r   r   r   rQ      s   
zUpsample.forwardrR   r   r   rM   r   ra      s    ra   c                       rT   )
Downsamplec                    s6   t    || _| jrtjj||dddd| _d S d S )Nr*   r   r   r?   rb   re   rM   r   r   r"      s   

zDownsample.__init__c                 C   sF   | j rd}tjjj||ddd}| |}|S tjjj|ddd}|S )N)r   r   r   r   Zconstantr   )r7   valuer   )r@   rA   )rc   r   r   r]   padrd   Z
avg_pool2d)r!   r   ri   r   r   r   rQ      s   
zDownsample.forwardrR   r   r   rM   r   rg      s    rg   c                       s0   e Zd Zddddd fdd
Zdd Z  ZS )	Encoderr   r         r'   T)ch_multrJ   resamp_with_convdouble_zc       
      
      s  t    || _d| _t|| _|| _|| _|| _t	j
j|| jdddd| _|}dt| }|| _t
 | _t| jD ]X}t
 }t
 }|||  }|||  }t| jD ]}|t||| j|d |}||v rq|t| qVt
 }||_||_|| jd krt|||_|d }| j| q;t
 | _t||| j|d| j_t|| j_t||| j|d| j_t|| _t	j
j||
rd|	 n|	dddd| _ d S )Nr   r*   r   r?   )r   r   r<   r>   rJ   r   )!rC   r"   chtemb_chlennum_resolutionsnum_res_blocks
resolutionr   r   r   rF   conv_intuple
in_ch_mult
ModuleListdownrangeappendr;   rU   Moduleblockattnrg   
downsamplemidblock_1attn_1block_2r   norm_outconv_out)r!   rr   rn   rv   attn_resolutionsrJ   ro   r   rw   
z_channelsrp   Zignore_kwargscurr_resrz   i_levelr   r   block_in	block_outi_blockr|   rM   r   r   r"      sz   






zEncoder.__init__c                 C   s   d }|  |g}t| jD ]D}t| jD ](}| j| j| |d |}t| j| jdkr7| j| j| |}|| q|| jd krQ|| j| 	|d  q|d }| j
||}| j
|}| j
||}| |}t|}| |}|S )Nr   r   )rx   r}   ru   rv   r|   r   rt   r   r~   r   r   r   r   r   r   r   r   )r!   r   rO   hsr   r   rP   r   r   r   rQ   !  s&   

zEncoder.forwardrR   r   r   rM   r   rj      s    Krj   c                       s2   e Zd Zdddddd fdd
Zdd	 Z  ZS )
Decoderrk   r'   TF)rn   rJ   ro   give_pre_endtanh_outc             
      s  t    || _d| _t|| _|| _|	| _|| _|| _	|| _
||| jd   }|	d| jd   }d|
||f| _tjj|
|dddd| _t | _t||| j|d| j_t|| j_t||| j|d| j_t | _tt| jD ]R}t }t }|||  }t| jd D ]}|t||| j|d |}||v r|t| qt }||_||_|dkrt|||_|d }| j d| qqt!|| _"tjj||dddd| _#d S )Nr   r   r   r*   r?   rq   )$rC   r"   rr   rs   rt   ru   rv   rw   r   r   r   Zz_shaper   r   rF   rx   r   r   r;   r   rU   r   r   r{   upreversedr}   r~   r   r   ra   upsampleinsertr   r   r   )r!   rr   Zout_chrn   rv   r   rJ   ro   r   rw   r   r   r   Zignorekwargsr   r   r   r   r   r   r   r   rM   r   r   r"   ?  st   








zDecoder.__init__c                 C   s   |j | _d }| |}| j||}| j|}| j||}tt| j	D ]7}t| j
d D ]!}| j| j| ||}t| j| jdkrP| j| j| |}q/|dkr]| j| |}q&| jrc|S | |}t|}| |}| jryt|}|S )Nr   r   )r$   Zlast_z_shaperx   r   r   r   r   r   r}   ru   rv   r   r   rt   r   r   r   r   r   r   r   r   tanh)r!   zrO   rP   r   r   r   r   r   rQ     s.   



zDecoder.forwardrR   r   r   rM   r   r   =  s    Mr   c                       s~   e Zd Z						d fdd	Zdd Zdd	 Zd
d Zdd ZdddZdd Z	dd Z
e dddZdd Z  ZS )r   NimageFc	           	   	      s   t    || _|| _tdi || _tdi || _|d s!J tj	
d|d  d| d| _tj	
||d d| _|| _|d urVt|tksJJ | dtd|dd |d ur]|| _|d u| _|d urm| | d S d S )Nrp   r   r   r   colorizer*   r   )rC   r"   learn_logvar	image_keyrj   encoderr   decoderr   r   rF   
quant_convpost_quant_conv	embed_dimtyper\   register_bufferr#   monitoruse_emainit_from_ckpt)	r!   Zddconfigr   Z	ckpt_pathr   Zcolorize_nlabelsr   Z	ema_decayr   rM   r   r   r"     s0   
	
zAutoencoderKL.__init__c                 C   sr   t j|ddd }t| }dd l}| }|D ]}|ddkr/|dd }|| ||< q| j|dd	 d S )
Ncpu)Zmap_locationZ
state_dictr   Zfirst_stage_modelzfirst_stage_model.r   T)strict)	r   loadlistkeyscollectionsOrderedDictfindsplitZload_state_dict)r!   pathsdr   r   Zsd_newrX   Zk_newr   r   r   r     s   zAutoencoderKL.init_from_ckptc                 O   s   | j r
| |  d S d S r   )r   Z	model_ema)r!   argskwargsr   r   r   on_train_batch_end  s   z AutoencoderKL.on_train_batch_endc                 C   s    |  |}| |}t|}|S r   )r   r   r   )r!   r   rP   Zmoments	posteriorr   r   r   encode  s   

zAutoencoderKL.encodec                 C   s   |  |}| |}|S r   )r   r   )r!   r   decr   r   r   decode  s   

zAutoencoderKL.decodeTc                 C   s2   |  |}|r| }n| }| |}||fS r   )r   r&   r7   r   )r!   inputZsample_posteriorr   r   r   r   r   r   rQ     s   


zAutoencoderKL.forwardc                 C   s@   || }t |jdkr|d }|ddddjtjd }|S )Nr*   ).Nr   r   r   )Zmemory_format)rt   r$   r[   r    r   Zcontiguous_formatfloat)r!   batchrX   r   r   r   r   	get_input  s   
zAutoencoderKL.get_inputc                 C   s
   | j jjS r   )r   r   weightr6   r   r   r   get_last_layer  s   
zAutoencoderKL.get_last_layerc                 K   s  t  }| || j}|| j}|s| |\}}|jd dkr2|jd dks(J | |}| |}| t	|
 |d< ||d< |sG| jr|  3 | |\}	}
|jd dkrg|	jd dksbJ | |	}	| t	|

 |d< |	|d< W d    n1 sw   Y  ||d< |S )Nr   r*   ZsamplesZreconstructionsZsamples_emaZreconstructions_emaZinputs)dictr   r   r    r   r$   to_rgbr   r   Z
randn_liker&   r   Z	ema_scope)r!   r   Zonly_inputsZlog_emar   r2   r   Zxrecr   Zxrec_emaZposterior_emar   r   r   
log_images  s0   





	zAutoencoderKL.log_imagesc              	   C   st   | j dksJ t| ds| dtd|jd dd| tj|| j	d}d||
   | |
   d }|S )NZsegmentationr   r*   r   )r   r0   r(   )r   hasattrr   r   r#   r$   r    FZconv2dr   minmaxr%   r   r   r   r     s   
$zAutoencoderKL.to_rgb)Nr   NNNF)T)FF)r8   r9   r:   r"   r   r   r   r   rQ   r   r   r   Zno_gradr   r   rS   r   r   rM   r   r     s$     
	c                       sB   e Zd Zdd fdd
Zdd Zdd Zd	d
 Zdd Z  ZS )IdentityFirstStageFvq_interfacec                   s   || _ t   d S r   )r   rC   r"   )r!   r   r   r   rM   r   r   r"   )  s   zIdentityFirstStage.__init__c                 O      |S r   r   r!   r   r   r   r   r   r   r   -     zIdentityFirstStage.encodec                 O   r   r   r   r   r   r   r   r   0  r   zIdentityFirstStage.decodec                 O   s   | j r
|d g dfS |S )N)NNNr   r   r   r   r   quantize3  s   zIdentityFirstStage.quantizec                 O   r   r   r   r   r   r   r   rQ   8  r   zIdentityFirstStage.forward)	r8   r9   r:   r"   r   r   r   rQ   rS   r   r   rM   r   r   '  s    r   )r	   )numpyr1   r   Ztorch.nnr   Ztorch.nn.functionalr]   r   __all__r   r   objectr   r   r;   rU   ra   rg   rj   r   r   r   r   r   r   r   <module>   s    
-?,irx