diffusers update fix

This commit is contained in:
Johannes Stelzer 2023-11-16 13:57:11 +01:00
parent 448078c8ae
commit e63dc20c48
3 changed files with 44 additions and 65 deletions

View File

@ -295,20 +295,55 @@ class DiffusersHolder():
extra_step_kwargs = self.pipe.prepare_extra_step_kwargs(generator, eta) # dummy
# 7. Prepare added time ids & embeddings
add_text_embeds = pooled_prompt_embeds
add_time_ids = self.pipe._get_add_time_ids(
original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
)
# add_text_embeds = pooled_prompt_embeds
# add_time_ids = self.pipe._get_add_time_ids(
# original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
# )
# if do_classifier_free_guidance:
# prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
# add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
# add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
# prompt_embeds = prompt_embeds.to(self.device)
# add_text_embeds = add_text_embeds.to(self.device)
# add_time_ids = add_time_ids.to(self.device).repeat(batch_size * num_images_per_prompt, 1)
# 7. Prepare added time ids & embeddings
add_text_embeds = pooled_prompt_embeds
if self.pipe.text_encoder_2 is None:
text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
else:
text_encoder_projection_dim = self.pipe.text_encoder_2.config.projection_dim
add_time_ids = self.pipe._get_add_time_ids(
original_size,
crops_coords_top_left,
target_size,
dtype=prompt_embeds.dtype,
text_encoder_projection_dim=text_encoder_projection_dim,
)
# if negative_original_size is not None and negative_target_size is not None:
# negative_add_time_ids = self.pipe._get_add_time_ids(
# negative_original_size,
# negative_crops_coords_top_left,
# negative_target_size,
# dtype=prompt_embeds.dtype,
# text_encoder_projection_dim=text_encoder_projection_dim,
# )
# else:
negative_add_time_ids = add_time_ids
if do_classifier_free_guidance:
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
prompt_embeds = prompt_embeds.to(self.device)
add_text_embeds = add_text_embeds.to(self.device)
add_time_ids = add_time_ids.to(self.device).repeat(batch_size * num_images_per_prompt, 1)
# 8. Denoising loop
for i, t in enumerate(timesteps):
# Set the right starting latents
@ -508,7 +543,7 @@ if __name__ == "__main__":
#%%
pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16)
pipe.to('cuda:1') # xxx
pipe.to('cuda') # xxx
#%%
self = DiffusersHolder(pipe)

View File

@ -1,56 +0,0 @@
# Copyright 2022 Lunar Ring. All rights reserved.
# Written by Johannes Stelzer, email stelzer@lunar-ring.ai twitter @j_stelzer
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
torch.backends.cudnn.benchmark = False
torch.set_grad_enabled(False)
import warnings
warnings.filterwarnings('ignore')
import warnings
from latent_blending import LatentBlending
from diffusers_holder import DiffusersHolder
from diffusers import DiffusionPipeline
# %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16)
pipe.to('cuda')
dh = DiffusersHolder(pipe)
# %% Next let's set up all parameters
depth_strength = 0.55 # Specifies how deep (in terms of diffusion iterations the first branching happens)
t_compute_max_allowed = 60 # Determines the quality of the transition in terms of compute time you grant it
num_inference_steps = 50
size_output = (1024, 768)
prompt1 = "underwater landscape, fish, und the sea, incredible detail, high resolution"
prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal"
fp_movie = 'movie_example1.mp4'
duration_transition = 12 # In seconds
# Spawn latent blending
lb = LatentBlending(dh)
lb.set_prompt1(prompt1)
lb.set_prompt2(prompt2)
lb.set_dimensions(size_output)
# Run latent blending
lb.run_transition(
depth_strength=depth_strength,
num_inference_steps=num_inference_steps,
t_compute_max_allowed=t_compute_max_allowed)
# Save movie
lb.write_movie_transition(fp_movie, duration_transition)

View File

@ -28,7 +28,7 @@ from huggingface_hub import hf_hub_download
# %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16)
pipe.to('cuda:1')
pipe.to('cuda')
dh = DiffusersHolder(pipe)
# %% Let's setup the multi transition