o
    *j{7                     @   s  d dl Zd dlmZmZmZmZ d dlZd dl	Z	d dl
m  mZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& e& Z'dgZ(ej)e$j*ej*dG dd de Z+G dd deZ,dS )    N)AnyDictOptionalUnion)DDIMSchedulerStableDiffusionPipeline)Image)
transforms)tqdm)	Pipelines)MutualSelfAttentionControl#register_attention_editor_diffusers)
OutputKeys)	PIPELINES)DiffusersPipeline)	LoadImage)Tasks)
get_loggerImageEditingPipeline)module_namec                       s   e Zd Zedf fdd	Zdeeef deeef fddZdeeef deeef fdd	Zdeeef deeef fd
dZ	  Z
S )r   Nc                    s   t  jd||d| |dtj}t|dttj rdnd| _	t
d tjtj|ddd	}tjtj|d||d
d| j	| _dS )a    MasaCtrl Image Editing Pipeline.

        Examples:

        >>> import cv2
        >>> from modelscope.pipelines import pipeline
        >>> from modelscope.utils.constant import Tasks

        >>> prompts = [
        >>>     "",                           # source prompt
        >>>     "a photo of a running corgi"  # target prompt
        >>> ]
        >>> output_image_path = './result.png'
        >>> img = 'https://public-vigen-video.oss-cn-shanghai.aliyuncs.com/public/ModelScope/test/images/corgi.jpg'
        >>> input = {'img': img, 'prompts': prompts}
        >>>
        >>> pipe = pipeline(
        >>>     Tasks.image_editing,
        >>>     model='damo/cv_masactrl_image-editing')
        >>>
        >>> output = pipe(input)['output_img']
        >>> cv2.imwrite(output_image_path, output)
        >>> print('pipeline: the output image path is {}'.format(output_image_path))
        )modelpreprocessortorch_dtypedevicecudacpuz load image editing pipeline donezstable-diffusion-v1-4	scheduler)Z	subfolderT)r   r   Zuse_safetensorsN )super__init__gettorchZfloat32getattrr   r   Zis_available_deviceloggerinfor   Zfrom_pretrainedospathjoin_MasaCtrlPipelinetopipeline)selfr   r   kwargsr   r   	__class__r   o/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/cv/image_editing_pipeline.pyr   !   s&   
zImageEditingPipeline.__init__inputreturnc                 C   s\   t |d}tt tdgdgg}||d}t	|d}|
| j|d< |S )Nimg      ?r   )   r5   )r   Zconvert_to_imgr    r	   ZComposeZToTensorZ	Normalize	unsqueezeFZinterpolater*   r#   )r,   r1   r3   Ztest_transformsr   r   r0   
preprocessJ   s   zImageEditingPipeline.preprocessc           	      C   s   t |tstdt| |d}| jj|d|d dddd\}}|t|d	d	d	}d
\}}t	||}t
| j| | j|||dddd	d  }d|iS )Nz/Expected the input to be a dictionary, but got promptsr3   r         @2   T)guidance_scalenum_inference_stepsreturn_intermediates)   
   r<   )latentsr<   output_tensor)
isinstancedict
ValueErrortyper    r+   invertexpandlenr   r   )	r,   r1   r9   Z
start_codelatents_listZSTEPZLAYERZeditoroutputr   r   r0   forwardT   s0   




zImageEditingPipeline.forwardc                 C   sL   |d  dd  ddd d}tj|d d d d d d df iS )NrC   r            uint8r?   )Zsqueezer   permutenumpyastyper   Z
OUTPUT_IMG)r,   r1   Z
output_imgr   r   r0   postprocesso   s   
"z ImageEditingPipeline.postprocess)__name__
__module____qualname__strr   r   r   r8   rM   rU   __classcell__r   r   r.   r0   r      s
    ")"
*c                	   @   s   e Zd Z		ddejdedejfddZ		ddejdedejd	efd
dZe	 dd Z
e	 dddZe	 											dddZe	 				ddejfddZdS ) r)   r   Fmodel_outputtimestepxc                 C   s   |rt d| |}t|| jjj| jj  d}|dkr!| jj| n| jj}| jj| }d| }	||	d |  |d  }
d| d | }|d |
 | }||
fS )zL
        Inverse sampling for DDIM Inversion
        x_t -> x_(t+1)
        z
timestep: i  r   rO   r4   )printminr   confignum_train_timestepsr=   alphas_cumprodfinal_alpha_cumprod)r,   r[   r\   r]   etaverbose	next_stepalpha_prod_tZalpha_prod_t_nextbeta_prod_tpred_x0pred_dirZx_nextr   r   r0   rf   w   s*   

z_MasaCtrlPipeline.next_step        rd   c                 C   s   || j jj| j j  }| j j| }|dkr| j j| n| j j}d| }	||	d |  |d  }
d| d | }|d |
 | }||
fS )za
        predict the sample the next step in the denoise process.
        x_t -> x_(t-1)
        r   rO   r4   )r   r`   ra   r=   rb   rc   )r,   r[   r\   r]   rd   re   Zprev_timesteprg   Zalpha_prod_t_prevrh   ri   rj   Zx_prevr   r   r0   step   s   z_MasaCtrlPipeline.stepc                 C   sj   | j }t|tu r&t|}t| d d }|ddd	d
|}| j|d j}|d }|S )Ng     _@rO   rP   r   Zlatent_distg{P?)_execution_devicerG   r   nparrayr!   Z
from_numpyfloatrR   r6   r*   vaeencodemean)r,   imageDEVICErB   r   r   r0   image2latent   s   
z_MasaCtrlPipeline.image2latentptc                 C   s   d|   }| j|d }|dkr4|d d dd}| dddd d }|d	 tj	}|S |d
krB|d d dd}|S )Ng!ޅ@samplern   rP   r4   r   rO      rN   rw   )
detachrq   decodeclampr   rR   rS   rT   rn   rQ   )r,   rB   return_typert   r   r   r0   latent2image   s   z_MasaCtrlPipeline.latent2imagerO   r5   r;   r:   Nc           "         s   j }t|trt|}nt|tr|dkr|g| } j|dddd} |j|d }t	d|j
 | jj|d |d f}|d u rMtj||d	}n|j
|ks[J d
|j
 d|dkr|
rd|
}nd} j|g| dddd} |j|d }tj||gdd}t	d|j
  j| |g}|g}tt jjddD ]z\}}|d ur|d|  }|d\}}t||g}|dkrt|gd }n|}|	d urt|	tr|d\}}t|	| j|j
 |g} j|||dj}|dkr	|jddd\}}||||   } |||\}} || ||  q j|dd}!|r> fdd|D } fdd|D }|!||fS |!S )NrO   
max_lengthM   rw   paddingr   Zreturn_tensorsr   input text embeddings :   )r   z!The shape of input latent tensor z  should equal to predefined one.      ? dimlatents shape: zDDIM Samplerdescr?   rP   Zencoder_hidden_statesr}   c                       g | ]	} j |d dqS rw   r   r~   .0r3   r,   r   r0   
<listcomp>      z._MasaCtrlPipeline.__call__.<locals>.<listcomp>c                    r   r   r   r   r   r   r0   r      r   )rm   rD   listrJ   rY   	tokenizertext_encoder	input_idsr*   r^   shapeunetZin_channelsr!   Zrandncatr   set_timesteps	enumerater
   	timestepschunkrI   rx   rl   appendr~   )"r,   prompt
batch_sizeheightwidthr=   r<   rd   rB   ZunconditioningZ
neg_promptZref_intermediate_latentsr>   kwdsru   
text_inputtext_embeddingsZlatents_shapeZuc_textunconditional_inputunconditional_embeddingsrK   pred_x0_listitZlatents_ref_Zlatents_curmodel_inputs
noise_prednoise_pred_unconnoise_pred_conri   rt   r   r   r0   __call__   s   









z_MasaCtrlPipeline.__call__rt   c                 K   s  | j }|jd }	t|tr|	dkr|t|ddd}nt|tr*|	dkr*|g|	 }| j|dddd}
| |
j	
|d }td|j | |}|}|d	kro| jd
g|	 dddd}| |j	
|d }tj||gdd}td|j | j| tdt| jj |g}|g}ttt| jjddD ]D\}}|d	krt|gd }n|}| j|||dj}|d	kr|jddd\}}||||   }| |||\}}|| || q|r||fS ||fS )zT
        invert a real image into noise map with determinisc DDIM inversion
        r   rO   r?   r   r   rw   r   r   r   r   r   r   zValid timesteps: zDDIM Inversionr   rP   r   )rm   r   rD   r   rI   rJ   rY   r   r   r   r*   r^   rv   r!   r   r   r   reversedr   r   r
   r   rx   r   rf   r   )r,   rt   r   r=   r<   rd   r>   r   ru   r   r   r   rB   Zstart_latentsr   r   rK   r   r   r   r   r   r   r   ri   r   r   r0   rH   '  sz   







z_MasaCtrlPipeline.invert)r   F)rk   F)rw   )rO   r5   r5   r;   r:   rk   NNNNF)r;   r:   rk   F)rV   rW   rX   r!   ZFloatTensorintrf   rp   rl   Zno_gradrv   r~   r   ZTensorrH   r   r   r   r0   r)   u   s\    
 

er)   )-Zos.pathr&   typingr   r   r   r   rS   rn   r!   Ztorch.nn.functionalnnZ
functionalr7   Z	diffusersr   r   ZPILr   Ztorchvisionr	   r
   Zmodelscope.metainfor   Z"modelscope.models.cv.image_editingr   r   Zmodelscope.outputsr   Zmodelscope.pipelines.builderr   ZEmodelscope.pipelines.multi_modal.diffusers_wrapped.diffusers_pipeliner   Zmodelscope.preprocessorsr   Zmodelscope.utils.constantr   Zmodelscope.utils.loggerr   r$   __all__Zregister_moduleZimage_editingr   r)   r   r   r   r0   <module>   s0   V