diffusers update fix

2023-11-16 13:57:11 +01:00
parent 448078c8ae
commit e63dc20c48
3 changed files with 44 additions and 65 deletions
--- a/diffusers_holder.py
+++ b/diffusers_holder.py
@@ -295,20 +295,55 @@ class DiffusersHolder():
        extra_step_kwargs = self.pipe.prepare_extra_step_kwargs(generator, eta)  # dummy
        # 7. Prepare added time ids & embeddings
-        add_text_embeds = pooled_prompt_embeds
+        # add_text_embeds = pooled_prompt_embeds
-        add_time_ids = self.pipe._get_add_time_ids(
+        # add_time_ids = self.pipe._get_add_time_ids(
-            original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
+        #     original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
-        )
+        # )
        # if do_classifier_free_guidance:
        #     prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
        #     add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
        #     add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
        # prompt_embeds = prompt_embeds.to(self.device)
        # add_text_embeds = add_text_embeds.to(self.device)
        # add_time_ids = add_time_ids.to(self.device).repeat(batch_size * num_images_per_prompt, 1)
        # 7. Prepare added time ids & embeddings
        add_text_embeds = pooled_prompt_embeds
        if self.pipe.text_encoder_2 is None:
            text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
        else:
            text_encoder_projection_dim = self.pipe.text_encoder_2.config.projection_dim
        add_time_ids = self.pipe._get_add_time_ids(
            original_size,
            crops_coords_top_left,
            target_size,
            dtype=prompt_embeds.dtype,
            text_encoder_projection_dim=text_encoder_projection_dim,
        )
        # if negative_original_size is not None and negative_target_size is not None:
        #     negative_add_time_ids = self.pipe._get_add_time_ids(
        #         negative_original_size,
        #         negative_crops_coords_top_left,
        #         negative_target_size,
        #         dtype=prompt_embeds.dtype,
        #         text_encoder_projection_dim=text_encoder_projection_dim,
        #     )
        # else:
        negative_add_time_ids = add_time_ids
        if do_classifier_free_guidance:
        prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
        add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
-            add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
+        add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
        prompt_embeds = prompt_embeds.to(self.device)
        add_text_embeds = add_text_embeds.to(self.device)
        add_time_ids = add_time_ids.to(self.device).repeat(batch_size * num_images_per_prompt, 1)
        # 8. Denoising loop
        for i, t in enumerate(timesteps):
            # Set the right starting latents
@@ -508,7 +543,7 @@ if __name__ == "__main__":
    #%% 
    pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
    pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16)
-    pipe.to('cuda:1')    # xxx
+    pipe.to('cuda')    # xxx
    #%%
    self = DiffusersHolder(pipe)
--- a/example1_standard.py
+++ b/example1_standard.py
@@ -1,56 +0,0 @@
 # Copyright 2022 Lunar Ring. All rights reserved.
 # Written by Johannes Stelzer, email stelzer@lunar-ring.ai twitter @j_stelzer
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import torch
 torch.backends.cudnn.benchmark = False
 torch.set_grad_enabled(False)
 import warnings
 warnings.filterwarnings('ignore')
 import warnings
 from latent_blending import LatentBlending
 from diffusers_holder import DiffusersHolder
 from diffusers import DiffusionPipeline
 # %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
 pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
 pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16)
 pipe.to('cuda')
 dh = DiffusersHolder(pipe)
 # %% Next let's set up all parameters
 depth_strength = 0.55  # Specifies how deep (in terms of diffusion iterations the first branching happens)
 t_compute_max_allowed = 60  # Determines the quality of the transition in terms of compute time you grant it
 num_inference_steps = 50
 size_output = (1024, 768)
 prompt1 = "underwater landscape, fish, und the sea, incredible detail, high resolution"
 prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal"
 fp_movie = 'movie_example1.mp4'
 duration_transition = 12  # In seconds
 # Spawn latent blending
 lb = LatentBlending(dh)
 lb.set_prompt1(prompt1)
 lb.set_prompt2(prompt2)
 lb.set_dimensions(size_output)
 # Run latent blending
 lb.run_transition(
    depth_strength=depth_strength,
    num_inference_steps=num_inference_steps,
    t_compute_max_allowed=t_compute_max_allowed)
 # Save movie
 lb.write_movie_transition(fp_movie, duration_transition)
--- a/example2_multitrans.py
+++ b/example2_multitrans.py
@@ -28,7 +28,7 @@ from huggingface_hub import hf_hub_download
 # %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
 pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
 pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16)
-pipe.to('cuda:1')
+pipe.to('cuda')
 dh = DiffusersHolder(pipe)
 # %% Let's setup the multi transition