From e63dc20c4833d4a45d2d086f637a894cac83bd34 Mon Sep 17 00:00:00 2001 From: Johannes Stelzer Date: Thu, 16 Nov 2023 13:57:11 +0100 Subject: [PATCH] diffusers update fix --- diffusers_holder.py | 51 ++++++++++++++++++++++++++++++++------ example1_standard.py | 56 ------------------------------------------ example2_multitrans.py | 2 +- 3 files changed, 44 insertions(+), 65 deletions(-) diff --git a/diffusers_holder.py b/diffusers_holder.py index 8bc2599..e6dee73 100644 --- a/diffusers_holder.py +++ b/diffusers_holder.py @@ -294,20 +294,55 @@ class DiffusersHolder(): # 6. Prepare extra step kwargs. usedummy generator extra_step_kwargs = self.pipe.prepare_extra_step_kwargs(generator, eta) # dummy + # 7. Prepare added time ids & embeddings + # add_text_embeds = pooled_prompt_embeds + # add_time_ids = self.pipe._get_add_time_ids( + # original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype + # ) + + # if do_classifier_free_guidance: + # prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) + # add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0) + # add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0) + + # prompt_embeds = prompt_embeds.to(self.device) + # add_text_embeds = add_text_embeds.to(self.device) + # add_time_ids = add_time_ids.to(self.device).repeat(batch_size * num_images_per_prompt, 1) + # 7. Prepare added time ids & embeddings add_text_embeds = pooled_prompt_embeds + if self.pipe.text_encoder_2 is None: + text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1]) + else: + text_encoder_projection_dim = self.pipe.text_encoder_2.config.projection_dim + add_time_ids = self.pipe._get_add_time_ids( - original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype + original_size, + crops_coords_top_left, + target_size, + dtype=prompt_embeds.dtype, + text_encoder_projection_dim=text_encoder_projection_dim, ) - - if do_classifier_free_guidance: - prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) - add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0) - add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0) - + # if negative_original_size is not None and negative_target_size is not None: + # negative_add_time_ids = self.pipe._get_add_time_ids( + # negative_original_size, + # negative_crops_coords_top_left, + # negative_target_size, + # dtype=prompt_embeds.dtype, + # text_encoder_projection_dim=text_encoder_projection_dim, + # ) + # else: + negative_add_time_ids = add_time_ids + + prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) + add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0) + add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0) + prompt_embeds = prompt_embeds.to(self.device) add_text_embeds = add_text_embeds.to(self.device) add_time_ids = add_time_ids.to(self.device).repeat(batch_size * num_images_per_prompt, 1) + + # 8. Denoising loop for i, t in enumerate(timesteps): @@ -508,7 +543,7 @@ if __name__ == "__main__": #%% pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0" pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16) - pipe.to('cuda:1') # xxx + pipe.to('cuda') # xxx #%% self = DiffusersHolder(pipe) diff --git a/example1_standard.py b/example1_standard.py index a258cc1..e69de29 100644 --- a/example1_standard.py +++ b/example1_standard.py @@ -1,56 +0,0 @@ -# Copyright 2022 Lunar Ring. All rights reserved. -# Written by Johannes Stelzer, email stelzer@lunar-ring.ai twitter @j_stelzer -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -torch.backends.cudnn.benchmark = False -torch.set_grad_enabled(False) -import warnings -warnings.filterwarnings('ignore') -import warnings -from latent_blending import LatentBlending -from diffusers_holder import DiffusersHolder -from diffusers import DiffusionPipeline - -# %% First let us spawn a stable diffusion holder. Uncomment your version of choice. -pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0" -pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16) -pipe.to('cuda') -dh = DiffusersHolder(pipe) -# %% Next let's set up all parameters -depth_strength = 0.55 # Specifies how deep (in terms of diffusion iterations the first branching happens) -t_compute_max_allowed = 60 # Determines the quality of the transition in terms of compute time you grant it -num_inference_steps = 50 -size_output = (1024, 768) - -prompt1 = "underwater landscape, fish, und the sea, incredible detail, high resolution" -prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal" - -fp_movie = 'movie_example1.mp4' -duration_transition = 12 # In seconds - -# Spawn latent blending -lb = LatentBlending(dh) -lb.set_prompt1(prompt1) -lb.set_prompt2(prompt2) -lb.set_dimensions(size_output) - -# Run latent blending -lb.run_transition( - depth_strength=depth_strength, - num_inference_steps=num_inference_steps, - t_compute_max_allowed=t_compute_max_allowed) - -# Save movie -lb.write_movie_transition(fp_movie, duration_transition) diff --git a/example2_multitrans.py b/example2_multitrans.py index ef492f9..ce985cc 100644 --- a/example2_multitrans.py +++ b/example2_multitrans.py @@ -28,7 +28,7 @@ from huggingface_hub import hf_hub_download # %% First let us spawn a stable diffusion holder. Uncomment your version of choice. pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0" pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16) -pipe.to('cuda:1') +pipe.to('cuda') dh = DiffusersHolder(pipe) # %% Let's setup the multi transition