o
    *jYS                     @   s  d dl Zd dlZd dlmZ d dlm  mZ dgZdd Z	dddZ
G dd	 d	eZG d
d dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejjZdS )    NAutoencoderKLc                 C   s   | t |  S N)torchZsigmoid)x r   x/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/multi_modal/videocomposer/autoencoder.pynonlinearity   s   r       c                 C   s   t jj|| dddS )Ngư>T)
num_groupsZnum_channelsepsZaffine)r   nnZ	GroupNorm)in_channelsr
   r   r   r   	Normalize   s   r   c                   @   s@   e Zd ZdddZdd ZdddZg d	fd
dZdd ZdS )DiagonalGaussianDistributionFc                 C   s   || _ tj|ddd\| _| _t| jdd| _|| _td| j | _t| j| _	| jr@t
| jj| j jd | _	| _d S d S )N      dimg      >g      4@      ?device)
parametersr   chunkmeanlogvarclampdeterministicexpstdvarZ
zeros_liketor   )selfr   r   r   r   r   __init__   s   z%DiagonalGaussianDistribution.__init__c                 C   s*   | j | jt| j jj| jjd  }|S )Nr   )r   r   r   randnshaper    r   r   r!   r   r   r   r   sample"   s   
z#DiagonalGaussianDistribution.sampleNc                 C   s   | j r	tdgS |d u r%dtjt| jd| j d | j g dd S dtjt| j|j d|j | j|j  d | j |j g dd S )N        r   r         ?r   r      r   )r   r   Tensorsumpowr   r   r   )r!   otherr   r   r   kl'   s&   
zDiagonalGaussianDistribution.klr)   c                 C   sR   | j r	tdgS tdtj }dtj|| j t|| j	 d| j
  |d S )Nr'          @r   r   r   )r   r   r+   nplogpir,   r   r-   r   r   )r!   r&   dimsZlogtwopir   r   r   nll5   s   z DiagonalGaussianDistribution.nllc                 C   s   | j S r   )r   r!   r   r   r   mode>   s   z!DiagonalGaussianDistribution.mode)Fr   )__name__
__module____qualname__r"   r&   r/   r5   r7   r   r   r   r   r      s    

	r   c                       $   e Zd Z fddZdd Z  ZS )
Downsamplec                    6   t    || _| jrtjj||dddd| _d S d S Nr*   r   r   kernel_sizestridepaddingsuperr"   	with_convr   r   Conv2dconvr!   r   rE   	__class__r   r   r"   D      

Downsample.__init__c                 C   F   | j rd}tjjj||ddd}| |}|S tjjj|ddd}|S N)r   r   r   r   Zconstantr   )r7   valuer   )r@   rA   rE   r   r   
functionalpadrG   Z
avg_pool2dr!   r   rR   r   r   r   forwardL      
Downsample.forwardr8   r9   r:   r"   rT   __classcell__r   r   rI   r   r<   B       r<   c                       s.   e Zd Zdddd fdd
Zdd Z  ZS )	ResnetBlockNFi   )out_channelsconv_shortcuttemb_channelsc                   s   t    || _|d u r|n|}|| _|| _t|| _tjj	||dddd| _
|dkr3tj||| _t|| _tj|| _tjj	||dddd| _| j| jkrp| jrbtjj	||dddd| _d S tjj	||dddd| _d S d S )Nr*   r   r?   r   )rD   r"   r   r[   use_conv_shortcutr   norm1r   r   rF   conv1ZLinear	temb_projnorm2ZDropoutdropoutconv2r\   nin_shortcut)r!   r   r[   r\   rc   r]   rI   r   r   r"   X   sB   




zResnetBlock.__init__c                 C   s   |}|  |}t|}| |}|d ur'|| t|d d d d d d f  }| |}t|}| |}| |}| j| jkrQ| j	rL| 
|}|| S | |}|| S r   )r_   r   r`   ra   rb   rc   rd   r   r[   r^   r\   re   )r!   r   tembhr   r   r   rT   ~   s    

&




zResnetBlock.forwardrW   r   r   rI   r   rZ   V   s    &rZ   c                       r;   )	AttnBlockc                    ~   t    || _t|| _tjj||dddd| _tjj||dddd| _	tjj||dddd| _
tjj||dddd| _d S Nr   r   r?   rD   r"   r   r   normr   r   rF   qkvproj_outr!   r   rI   r   r   r"         





AttnBlock.__init__c                 C      |}|  |}| |}| |}| |}|j\}}}}	|||||	 }|ddd}|||||	 }t||}
|
t	|d  }
tj
jj|
dd}
|||||	 }|
ddd}
t||
}|||||	}| |}|| S Nr   r   r   g      r   rl   rm   rn   ro   r$   Zreshapepermuter   Zbmmintr   rQ   Zsoftmaxrp   r!   r   Zh_rm   rn   ro   bcrg   wZw_r   r   r   rT      (   




AttnBlock.forwardrW   r   r   rI   r   rh          rh   c                       r;   )rh   c                    ri   rj   rk   rq   rI   r   r   r"      rr   rs   c                 C   rt   ru   rv   ry   r   r   r   rT      r}   r~   rW   r   r   rI   r   rh      r   c                       r;   )Upsamplec                    s6   t    || _| jrtjj||dddd| _d S d S )Nr*   r   r?   rC   rH   rI   r   r   r"      s   

zUpsample.__init__c                 C   s(   t jjj|ddd}| jr| |}|S )Nr0   nearest)Zscale_factorr7   )r   r   rQ   ZinterpolaterE   rG   r%   r   r   r   rT      s   
zUpsample.forwardrW   r   r   rI   r   r      s    r   c                       r;   )r<   c                    r=   r>   rC   rH   rI   r   r   r"      rK   rL   c                 C   rM   rN   rP   rS   r   r   r   rT     rU   rV   rW   r   r   rI   r   r<      rY   c                       s4   e Zd Zddddddd fdd
Zd	d
 Z  ZS )Encoderr   r         r'   TFvanilla)ch_multrc   resamp_with_convdouble_zuse_linear_attn	attn_typec             
      s  t    || _d| _t|| _|| _|	| _|| _t	j
j|| jdddd| _|	}dt| }|| _t
 | _t| jD ]X}t
 }t
 }|||  }|||  }t| jD ]}|t||| j|d |}||v rq|t| qVt
 }||_||_|| jd krt|||_|d }| j| q;t
 | _t||| j|d| j_t|| j_t||| j|d| j_t|| _t	j
j||rd|
 n|
dddd| _ d S )Nr   r*   r   r?   )r   r   r[   r]   rc   r   )!rD   r"   chtemb_chlennum_resolutionsnum_res_blocks
resolutionr   r   r   rF   conv_intuple
in_ch_mult
ModuleListdownrangeappendrZ   rh   Moduleblockattnr<   
downsamplemidblock_1attn_1block_2r   norm_outconv_out)r!   r   out_chr   r   attn_resolutionsrc   r   r   r   
z_channelsr   r   r   Zignore_kwargscurr_resr   i_levelr   r   block_in	block_outi_blockr   rI   r   r   r"     sz   






zEncoder.__init__c                 C   s   d }|  |g}t| jD ]D}t| jD ](}| j| j| |d |}t| j| jdkr7| j| j| |}|| q|| jd krQ|| j| 	|d  q|d }| j
||}| j
|}| j
||}| |}t|}| |}|S )Nr   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r!   r   rf   hsr   r   rg   r   r   r   rT   b  s&   

zEncoder.forwardrW   r   r   rI   r   r     s    Nr   c                       s6   e Zd Zdddddddd fdd
Zd	d
 Z  ZS )Decoderr   r'   TFr   )r   rc   r   give_pre_endtanh_outr   r   c             
      s  t    || _d| _t|| _|| _|	| _|| _|| _	|| _
||| jd   }|	d| jd   }d|
||f| _td| jt| j tjj|
|dddd| _t | _t||| j|d| j_t|| j_t||| j|d| j_t | _tt| jD ]R}t }t }|||  }t| jd D ]}|t||| j|d |}||v r|t| qt }||_ ||_!|dkrt"|||_#|d }| j$d| q~t%|| _&tjj||dddd| _'d S )Nr   r   r   z+Working with z of shape {} = {} dimensions.r*   r?   r   )(rD   r"   r   r   r   r   r   r   r   r   r   Zz_shapeprintformatr1   prodr   r   rF   r   r   r   rZ   r   rh   r   r   r   upreversedr   r   r   r   r   upsampleinsertr   r   r   )r!   r   r   r   r   r   rc   r   r   r   r   r   r   r   r   Zignorekwargsr   r   r   r   r   r   r   r   rI   r   r   r"     sz   








zDecoder.__init__c                 C   s   |j | _d }| |}| j||}| j|}| j||}tt| j	D ]7}t| j
d D ]!}| j| j| ||}t| j| jdkrP| j| j| |}q/|dkr]| j| |}q&| jrc|S | |}t|}| |}| jryt|}|S )Nr   r   )r$   Zlast_z_shaper   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   tanh)r!   zrf   rg   r   r   r   r   r   rT     s.   



zDecoder.forwardrW   r   r   rI   r   r   ~  s    Qr   c                       s   e Zd Zdg dddddf fdd	Ze fddZe fdd	Zd
d Zdd Zdd Z	dddZ
dd Zdd Ze dddZdd Z  ZS )r   NimageFc
           
   	      s   t    |	| _|| _tdi || _tdi || _|d s!J tj	
d|d  d| d| _tj	
||d d| _|| _|d urVt|tksJJ | dtd|dd |d ur]|| _|d u| _|d uro| j||d d S d S )	Nr   r   r   r   colorizer*   )ignore_keysr   )rD   r"   learn_logvar	image_keyr   encoderr   decoderr   r   rF   
quant_convpost_quant_conv	embed_dimtyperx   register_bufferr#   monitoruse_emainit_from_ckpt)
r!   Zddconfigr   Z	ckpt_pathr   r   Zcolorize_nlabelsr   Z	ema_decayr   rI   r   r   r"     s0   


zAutoencoderKL.__init__c           
      C   s   t j|ddd }t| }|D ]
}t||| j qdd l}| }|D ]}|ddkr<|	dd }	|| ||	< q&| j
|dd	 td
|  d S )NcpuZmap_location
state_dictr   first_stage_modelzfirst_stage_model.r   TstrictRestored from )r   loadlistkeysr   r$   collectionsOrderedDictfindsplitload_state_dict)
r!   pathr   sdr   keyr   Zsd_newrn   Zk_newr   r   r   r     s   zAutoencoderKL.init_from_ckptc                 C   st   t j|ddd }t| }t |D ]}|D ]}||r(td| ||= qq| j|dd td|  d S )Nr   r   r   z Deleting key {} from state_dict.Fr   r   )	r   r   r   r   r   
startswithr   r   r   )r!   r   r   r   r   rn   ikr   r   r   init_from_ckpt2#  s   
zAutoencoderKL.init_from_ckpt2c                 O   s   | j r
| |  d S d S r   )r   Z	model_ema)r!   argskwargsr   r   r   on_train_batch_end0  s   z AutoencoderKL.on_train_batch_endc                 C   s    |  |}| |}t|}|S r   )r   r   r   )r!   r   rg   Zmoments	posteriorr   r   r   encode4  s   

zAutoencoderKL.encodec                 C   s   |  |}| |}|S r   )r   r   )r!   r   decr   r   r   decode:  s   

zAutoencoderKL.decodeTc                 C   s2   |  |}|r| }n| }| |}||fS r   )r   r&   r7   r   )r!   inputZsample_posteriorr   r   r   r   r   r   rT   ?  s   


zAutoencoderKL.forwardc                 C   s@   || }t |jdkr|d }|ddddjtjd }|S )Nr*   ).Nr   r   r   )Zmemory_format)r   r$   rw   r    r   Zcontiguous_formatfloat)r!   batchrn   r   r   r   r   	get_inputH  s   
zAutoencoderKL.get_inputc                 C   s
   | j jjS r   )r   r   weightr6   r   r   r   get_last_layerP  s   
zAutoencoderKL.get_last_layerc                 K   s  t  }| || j}|| j}|s| |\}}|jd dkr2|jd dks(J | |}| |}| t	|
 |d< ||d< |sG| jr|  3 | |\}	}
|jd dkrg|	jd dksbJ | |	}	| t	|

 |d< |	|d< W d    n1 sw   Y  ||d< |S )Nr   r*   ZsamplesZreconstructionsZsamples_emaZreconstructions_emaZinputs)dictr   r   r    r   r$   to_rgbr   r   Z
randn_liker&   r   Z	ema_scope)r!   r   Zonly_inputsZlog_emar   r2   r   Zxrecr   Zxrec_emaZposterior_emar   r   r   
log_imagesS  s0   





	zAutoencoderKL.log_imagesc              	   C   st   | j dksJ t| ds| dtd|jd dd| tj|| j	d}d||
   | |
   d }|S )NZsegmentationr   r*   r   )r   r0   r(   )r   hasattrr   r   r#   r$   r    FZconv2dr   minmaxr%   r   r   r   r   n  s   
$zAutoencoderKL.to_rgb)T)FF)r8   r9   r:   r"   r   r   r   r   r   r   rT   r   r   r   Zno_gradr   r   rX   r   r   rI   r   r     s(    
	c                       sB   e Zd Zdd fdd
Zdd Zdd Zd	d
 Zdd Z  ZS )IdentityFirstStageFvq_interfacec                   s   || _ t   d S r   )r   rD   r"   )r!   r   r   r   rI   r   r   r"   z  s   zIdentityFirstStage.__init__c                 O      |S r   r   r!   r   r   r   r   r   r   r   ~     zIdentityFirstStage.encodec                 O   r   r   r   r   r   r   r   r     r   zIdentityFirstStage.decodec                 O   s   | j r
|d g dfS |S )N)NNNr   r   r   r   r   quantize  s   zIdentityFirstStage.quantizec                 O   r   r   r   r   r   r   r   rT     r   zIdentityFirstStage.forward)	r8   r9   r:   r"   r   r   r   rT   rX   r   r   rI   r   r   x  s    r   )r	   )numpyr1   r   Ztorch.nnr   Ztorch.nn.functionalrQ   r   __all__r   r   objectr   r   r<   rZ   rh   r   r   r   r   r   r   r   r   r   <module>   s&   
-?,,lv 