o
    *Îjo8  ã                   @   sT  d dl mZ d dlmZmZ d dlZd dlZd dl	Z	d dl
mZ d dlm  mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d d	lmZ d d
lm Z  d dl!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z) e)ƒ Z*dgZ+				ddd„Z,G dd„ de-ƒZ.G dd„ dej/ƒZ0ej1e%j2ej3dG dd„ deƒƒZ4dS )é    N)ÚAnyÚDict)ÚModels)ÚModel)ÚMODELS)ÚGaussianDiffusionÚbeta_schedule)Ú
BertConfigÚ	BertModel)ÚFullTokenizer)ÚDiffusionGenerator)ÚSuperResUNet256)ÚSuperResUNet1024)Ú	ModelFileÚTasks)Úcreate_device)Ú
get_loggerÚ DiffusionForTextToImageSynthesiséè  Úfixed_smallc                 C   s   t | |||ƒ}t||d}|S )N)Úvar_type)r   r   )ZscheduleZnum_timestepsZ	init_betaZ	last_betar   ZbetasÚ	diffusion© r   ún/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/multi_modal/diffusion/model.pyÚmake_diffusion"   s   r   c                   @   s   e Zd Zddd„Zdd„ ZdS )Ú	Tokenizeré@   c                 C   s   || _ || _t|dd| _d S )NT)Ú
vocab_fileZdo_lower_case)r   Úseq_lenr   Ú	tokenizer)Úselfr   r   r   r   r   Ú__init__.   s
   ÿzTokenizer.__init__c                 C   sö   | j  |¡}dg|d | jd …  dg }| j  |¡}dgt|ƒ }dgt|ƒ }|dg| jt|ƒ  7 }|dg| jt|ƒ  7 }|dg| jt|ƒ  7 }t|ƒt|ƒ  krdt|ƒ  krd| jksgJ ‚ J ‚t |¡}t |¡}t |¡}|||fS )Nz[CLS]é   z[SEP]é   r   )r   Útokenizer   Zconvert_tokens_to_idsÚlenÚtorchZ
LongTensor)r    ÚtextÚtokensÚ	input_idsZ
input_maskZsegment_idsr   r   r   Ú__call__4   s"   
ÿÿ



zTokenizer.__call__N)r   )Ú__name__Ú
__module__Ú__qualname__r!   r*   r   r   r   r   r   ,   s    
r   c                       s$   e Zd Z‡ fdd„Zdd„ Z‡  ZS )ÚDiffusionModelc                    sŠ   t t| ƒ ¡  t td |¡dd¡}|d }tt 	|¡ƒ| _
|d }tdi |¤Ž| _|d }tdi |¤Ž| _|d }tdi |¤Ž| _d S )	Nz{}/model_config.jsonúutf-8©ÚencodingÚtext_configÚgenerator_configÚupsampler_256_configÚupsampler_1024_configr   )Úsuperr.   r!   ÚjsonÚloadÚopenÚformatr
   r	   Ú	from_dictÚtext_encoderr   Úunet_generatorr   Úunet_upsampler_256r   Úunet_upsampler_1024)r    Ú	model_dirZmodel_configr2   r3   r4   r5   ©Ú	__class__r   r   r!   L   s   ÿzDiffusionModel.__init__c           	   	   C   s\   | j |||d\}}|d }|  |||||¡}|  |||t |¡|||¡}|  |t|¡}|S )N©r)   Útoken_type_idsÚattention_maskéÿÿÿÿ)r<   r=   r>   r&   Ú
zeros_liker?   Út)	r    ÚnoiseÚ	timestepsr)   rD   rE   ÚcontextÚyÚxr   r   r   Úforwardb   s   
ý
þzDiffusionModel.forward)r+   r,   r-   r!   rN   Ú__classcell__r   r   rA   r   r.   J   s    r.   )Úmodule_namec                       sŒ   e Zd Zd‡ fdd„	Zdeeef deeef fdd„Zdeeef deeef fd	d
„Ze	 
¡ deeef deeef fdd„ƒZ‡  ZS )r   Úgpuc                    s  t j ¡ rdnd}tƒ jd||dœ|¤Ž t|d}t  t |t	j
¡d¡}| |¡ | ¡  ¡  t|ƒ| _| | j¡ |j| _|j| _|j| _|j| _|› dt	j› }t|dd| _t td |¡d	d
¡}tdi |d ¤Ž| _tdi |d ¤Ž| _tdi |d ¤Ž| _d S )NrQ   Úcpu)r@   Údevice)r@   ú/r   )r   r   z{}/diffusion_config.jsonr/   r0   r3   r4   r5   r   )r&   ÚcudaZis_availabler6   r!   r.   r8   ÚospÚjoinr   ZTORCH_MODEL_BIN_FILEZload_state_dictÚevalÚtor   rS   r<   r=   r>   r?   Z
VOCAB_FILEr   r   r7   r9   r:   r   Údiffusion_generatorÚdiffusion_upsampler_256Údiffusion_upsampler_1024)r    r@   rS   ÚkwargsZdiffusion_modelZpretrained_paramsZ
vocab_pathZdiffusion_paramsrA   r   r   r!   u   s>   
ÿ

þÿÿÿÿz)DiffusionForTextToImageSynthesis.__init__ÚinputÚreturnc           	   	      s
  t ‡ fdd„dD ƒƒstdˆ  ¡ › ƒ‚|  ˆ d ¡\}}}| | j¡ d¡}| | j¡ d¡}| | j¡ d¡}| j|||d\}}|d }|  t	t
|||¡}|  t	t
|t t
¡|||¡}|  |t|¡}| dd	¡ d	¡ d
¡}| d¡ d	dd¡ ¡  ¡  tj¡}|S )Nc                    s   g | ]}|ˆ v ‘qS r   r   )Ú.0Úkey©r^   r   r   Ú
<listcomp>˜   s    z<DiffusionForTextToImageSynthesis.forward.<locals>.<listcomp>)r'   rI   rJ   z@input should contains "text", "noise", and "timesteps", but got r'   r   rC   rF   r#   ç     à_@r"   )ÚallÚ
ValueErrorÚkeysr   rY   rS   Ú	unsqueezer<   r=   rI   rJ   r>   r&   rG   r?   rH   ÚclampÚaddÚmulÚsqueezeÚpermuterR   ÚnumpyÚastypeÚnpÚuint8)	r    r^   r)   rD   rE   rK   rL   rM   Úimgr   rb   r   rN   —   s2   ÿ
ÿ
ý
þ$z(DiffusionForTextToImageSynthesis.forwardÚinputsc                 C   s   |S )Nr   )r    rs   r   r   r   Úpostprocess¯   s   z,DiffusionForTextToImageSynthesis.postprocessc           	      C   s  d|vrt d| ¡ › ƒ‚|  |d ¡\}}}| | j¡ d¡}| | j¡ d¡}| | j¡ d¡}| j|||d\}}|d }| dd¡}|dkr+| jj	t
 dd	d
d
¡ | j¡| j|||dœt
 |¡t
 |¡|dœg| dd¡| dd¡| dd¡d	dddd
}| dd¡s‘tj|dddd}| jj	t
 |¡| j|t
 d¡ | j¡|||dœ|t
 d¡ | j¡t
 |¡t
 |¡t
 |¡dœg| dd¡| dd¡| dd¡d	dddd
}| dd¡såtj|dddd}| jj	t
 |¡| j|t
 d¡ | j¡|||dœ|t
 d¡ | j¡t
 |¡t
 |¡t
 |¡dœg| dd¡| dd¡| dd¡d	ddd d
}nÆ|dkrí| jjt
 dd	d
d
¡ | j¡| j|||dœt
 |¡t
 |¡|dœg| dd¡| dd¡| d d!¡| d"d#¡d$}| dd¡sxtj|dddd}| jjt
 |¡| j|t
 d¡ | j¡|||dœ|t
 d¡ | j¡t
 |¡t
 |¡t
 |¡dœg| dd¡| dd¡| d%d&¡| d'd#¡d$}| dd¡sÎtj|dddd}| jjt
 |¡| jd(|i| d)d¡| d*d¡| d+d#¡d,}nt d-ƒ‚| dd¡ d¡ d.¡ d¡ dd/d¡ ¡  ¡  tj ¡}|S )0Nr'   z%input should contain "text", but got r   rC   rF   Úsolverz
dpm-solverr#   é   r   )rL   rK   ÚmaskZgenerator_percentileg×£p=
×ï?Zgenerator_guide_scaleg      @Údpm_solver_timestepsé   ZlogSNRZ
singlestepgO@aÃÓï?)
rI   ÚmodelÚmodel_kwargsÚ
percentileÚguide_scalerx   ÚorderZ	skip_typeÚmethodZt_startÚdebugFg      @Zbilinear)Zscale_factorÚmodeZalign_corners)ÚlxÚltrL   rK   rw   Zupsampler_256_percentileZupsampler_256_guide_scaleé
   ZddimZgenerator_ddim_timestepséú   Zgenerator_ddim_etag        )rI   rz   r{   r|   r}   Úddim_timestepsÚetaZupsampler_256_ddim_timestepsé2   Zupsampler_256_ddim_etaÚconcatZupsampler_1024_percentileZupsampler_1024_ddim_timestepsZupsampler_1024_ddim_eta)rI   rz   r{   r|   r†   r‡   z6currently only supports "ddim" and "dpm-solve" solversrd   r"   )!rf   rg   r   rY   rS   rh   r<   ÚgetrZ   Zdpm_solver_sample_loopr&   Zrandnr=   rG   ÚFZinterpolater[   Z
randn_liker>   Zzerosr\   Zddim_sample_loopr?   ri   rj   rk   rl   rm   rR   rn   ro   rp   rq   )	r    r^   r)   rD   rE   rK   rL   ru   rr   r   r   r   Úgenerate²   sT  ÿ
ÿ
ý
ýýü
	

îüûûú


êüûûú


ê
ýýü
	


ñüûûú



íü


úÿÿÿz)DiffusionForTextToImageSynthesis.generate)rQ   )r+   r,   r-   r!   r   Ústrr   rN   rt   r&   Zno_gradrŒ   rO   r   r   rA   r   r   q   s    """,)r   NNr   )5Zos.pathÚpathrV   Útypingr   r   r7   rn   rp   r&   Ztorch.nnÚnnZtorch.nn.functionalZ
functionalr‹   Zmodelscope.metainfor   Zmodelscope.modelsr   Zmodelscope.models.builderr   Z1modelscope.models.multi_modal.diffusion.diffusionr   r   Z2modelscope.models.multi_modal.diffusion.structbertr	   r
   Z1modelscope.models.multi_modal.diffusion.tokenizerr   Z6modelscope.models.multi_modal.diffusion.unet_generatorr   Z:modelscope.models.multi_modal.diffusion.unet_upsampler_256r   Z;modelscope.models.multi_modal.diffusion.unet_upsampler_1024r   Zmodelscope.utils.constantr   r   Zmodelscope.utils.devicer   Zmodelscope.utils.loggerr   ÚloggerÚ__all__r   Úobjectr   ÚModuler.   Zregister_moduleZtext_to_image_synthesisr   r   r   r   r   r   Ú<module>   s@   
ü
'ÿ