diffusers update fix

2023-11-16 13:57:11 +01:00
parent 448078c8ae
commit e63dc20c48
3 changed files with 44 additions and 65 deletions
--- a/diffusers_holder.py
+++ b/diffusers_holder.py
@@ -295,20 +295,55 @@ class DiffusersHolder():
        extra_step_kwargs = self.pipe.prepare_extra_step_kwargs(generator, eta)  # dummy

        # 7. Prepare added time ids & embeddings
-        add_text_embeds = pooled_prompt_embeds
-        add_time_ids = self.pipe._get_add_time_ids(
-            original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
-        )
+        # add_text_embeds = pooled_prompt_embeds
+        # add_time_ids = self.pipe._get_add_time_ids(
+        #     original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
+        # )
+
+        # if do_classifier_free_guidance:
+        #     prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+        #     add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+        #     add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
+
+        # prompt_embeds = prompt_embeds.to(self.device)
+        # add_text_embeds = add_text_embeds.to(self.device)
+        # add_time_ids = add_time_ids.to(self.device).repeat(batch_size * num_images_per_prompt, 1)
+        
+        # 7. Prepare added time ids & embeddings
+        add_text_embeds = pooled_prompt_embeds
+        if self.pipe.text_encoder_2 is None:
+            text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
+        else:
+            text_encoder_projection_dim = self.pipe.text_encoder_2.config.projection_dim
+        
+        add_time_ids = self.pipe._get_add_time_ids(
+            original_size,
+            crops_coords_top_left,
+            target_size,
+            dtype=prompt_embeds.dtype,
+            text_encoder_projection_dim=text_encoder_projection_dim,
+        )
+        # if negative_original_size is not None and negative_target_size is not None:
+        #     negative_add_time_ids = self.pipe._get_add_time_ids(
+        #         negative_original_size,
+        #         negative_crops_coords_top_left,
+        #         negative_target_size,
+        #         dtype=prompt_embeds.dtype,
+        #         text_encoder_projection_dim=text_encoder_projection_dim,
+        #     )
+        # else:
+        negative_add_time_ids = add_time_ids
        
-        if do_classifier_free_guidance:
        prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
        add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
-            add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
+        add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
        
        prompt_embeds = prompt_embeds.to(self.device)
        add_text_embeds = add_text_embeds.to(self.device)
        add_time_ids = add_time_ids.to(self.device).repeat(batch_size * num_images_per_prompt, 1)
        
+        
+
        # 8. Denoising loop
        for i, t in enumerate(timesteps):
            # Set the right starting latents
@@ -508,7 +543,7 @@ if __name__ == "__main__":
    #%% 
    pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
    pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16)
-    pipe.to('cuda:1')    # xxx
+    pipe.to('cuda')    # xxx
    
    #%%
    self = DiffusersHolder(pipe)
--- a/example1_standard.py
+++ b/example1_standard.py
@@ -1,56 +0,0 @@
-# Copyright 2022 Lunar Ring. All rights reserved.
-# Written by Johannes Stelzer, email stelzer@lunar-ring.ai twitter @j_stelzer
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-torch.backends.cudnn.benchmark = False
-torch.set_grad_enabled(False)
-import warnings
-warnings.filterwarnings('ignore')
-import warnings
-from latent_blending import LatentBlending
-from diffusers_holder import DiffusersHolder
-from diffusers import DiffusionPipeline
-
-# %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
-pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
-pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16)
-pipe.to('cuda')
-dh = DiffusersHolder(pipe)
-# %% Next let's set up all parameters
-depth_strength = 0.55  # Specifies how deep (in terms of diffusion iterations the first branching happens)
-t_compute_max_allowed = 60  # Determines the quality of the transition in terms of compute time you grant it
-num_inference_steps = 50
-size_output = (1024, 768)
-
-prompt1 = "underwater landscape, fish, und the sea, incredible detail, high resolution"
-prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal"
-
-fp_movie = 'movie_example1.mp4'
-duration_transition = 12  # In seconds
-
-# Spawn latent blending
-lb = LatentBlending(dh)
-lb.set_prompt1(prompt1)
-lb.set_prompt2(prompt2)
-lb.set_dimensions(size_output)
-
-# Run latent blending
-lb.run_transition(
-    depth_strength=depth_strength,
-    num_inference_steps=num_inference_steps,
-    t_compute_max_allowed=t_compute_max_allowed)
-
-# Save movie
-lb.write_movie_transition(fp_movie, duration_transition)
--- a/example2_multitrans.py
+++ b/example2_multitrans.py
@@ -28,7 +28,7 @@ from huggingface_hub import hf_hub_download
 # %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
 pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
 pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16)
-pipe.to('cuda:1')
+pipe.to('cuda')
 dh = DiffusersHolder(pipe)

 # %% Let's setup the multi transition