From 6189391fbc19aa4caeb598f5f07d7085cb5977ac Mon Sep 17 00:00:00 2001
From: Johannes Stelzer <jsdmail@gmail.com>
Date: Sat, 6 Jan 2024 18:16:36 +0100
Subject: [PATCH] ok version in latent_blending.py

---
 diffusers_holder.py  |  4 +++
 example1_standard.py | 14 ++++++----
 latent_blending.py   | 66 ++++++++++++++++++++++++++++++++++++++++++--
 movie_util.py        |  1 -
 4 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/diffusers_holder.py b/diffusers_holder.py
index b848d4c..e0d81de 100644
--- a/diffusers_holder.py
+++ b/diffusers_holder.py
@@ -773,6 +773,10 @@ if __name__ == "__main__":
     
     self.run_diffusion_sd_xl(text_embeddings_mix, latents_start_mixed, idx_start=idx_start, return_image=True)
     
+    #%%
+    fract=0.8
+    latentsmix = interpolate_spherical(latents1[-1], latents2[-1], fract)
+    self.latent2image(latentsmix)
     
     
 
diff --git a/example1_standard.py b/example1_standard.py
index 125ce61..eac5bfb 100644
--- a/example1_standard.py
+++ b/example1_standard.py
@@ -18,21 +18,24 @@ import warnings
 from latent_blending import LatentBlending
 from diffusers_holder import DiffusersHolder
 from diffusers import DiffusionPipeline
+
+from diffusers import AutoPipelineForText2Image
 warnings.filterwarnings('ignore')
 torch.set_grad_enabled(False)
 torch.backends.cudnn.benchmark = False
 
 # %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
-pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
-pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16)
-pipe.to('cuda')
+pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16")
+pipe.to("cuda")
+
 dh = DiffusersHolder(pipe)
 # %% Next let's set up all parameters
 depth_strength = 0.55  # Specifies how deep (in terms of diffusion iterations the first branching happens)
-t_compute_max_allowed = 60  # Determines the quality of the transition in terms of compute time you grant it
-num_inference_steps = 30
+t_compute_max_allowed = 10  # Determines the quality of the transition in terms of compute time you grant it
+num_inference_steps = 4
 size_output = (1024, 1024)
 
+
 prompt1 = "underwater landscape, fish, und the sea, incredible detail, high resolution"
 prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal"
 negative_prompt = "blurry, ugly, pale"  # Optional
@@ -46,6 +49,7 @@ lb.set_prompt1(prompt1)
 lb.set_prompt2(prompt2)
 lb.set_dimensions(size_output)
 lb.set_negative_prompt(negative_prompt)
+lb.set_guidance_scale(0)
 
 # Run latent blending
 lb.run_transition(
diff --git a/latent_blending.py b/latent_blending.py
index 21846e1..63a7746 100644
--- a/latent_blending.py
+++ b/latent_blending.py
@@ -797,10 +797,10 @@ class LatentBlending():
         Used to determine the optimal point of insertion to create smooth transitions.
         High values indicate low similarity.
         """
-        tensorA = torch.from_numpy(imgA).float().cuda(self.device)
+        tensorA = torch.from_numpy(np.asarray(imgA)).float().cuda(self.device)
         tensorA = 2 * tensorA / 255.0 - 1
         tensorA = tensorA.permute([2, 0, 1]).unsqueeze(0)
-        tensorB = torch.from_numpy(imgB).float().cuda(self.device)
+        tensorB = torch.from_numpy(np.asarray(imgB)).float().cuda(self.device)
         tensorB = 2 * tensorB / 255.0 - 1
         tensorB = tensorB.permute([2, 0, 1]).unsqueeze(0)
         lploss = self.lpips(tensorA, tensorB)
@@ -831,3 +831,65 @@ class LatentBlending():
             b_parent1 = tmp
 
         return b_parent1, b_parent2
+
+
+if __name__ == "__main__":
+    
+    # %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
+    from diffusers_holder import DiffusersHolder
+    from diffusers import DiffusionPipeline
+    from diffusers import AutoencoderTiny
+    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16")
+    pipe.to("cuda")
+    # pipe.vae = AutoencoderTiny.from_pretrained('madebyollin/taesdxl', torch_device='cuda', torch_dtype=torch.float16)
+    # pipe.vae = pipe.vae.cuda()
+
+    dh = DiffusersHolder(pipe)
+    # %% Next let's set up all parameters
+    depth_strength = 0.5  # Specifies how deep (in terms of diffusion iterations the first branching happens)
+    t_compute_max_allowed = 3  # Determines the quality of the transition in terms of compute time you grant it
+    num_inference_steps = 4
+    size_output = (512, 512)
+
+
+    prompt1 = "underwater landscape, fish, und the sea, incredible detail, high resolution"
+    prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal"
+    negative_prompt = "blurry, ugly, pale"  # Optional
+
+    fp_movie = 'movie_example1.mp4'
+    duration_transition = 12  # In seconds
+
+    # Spawn latent blending
+    lb = LatentBlending(dh)
+    lb.set_prompt1(prompt1)
+    lb.set_prompt2(prompt2)
+    lb.set_dimensions(size_output)
+    lb.set_negative_prompt(negative_prompt)
+    lb.set_guidance_scale(0)
+    
+    lb.branch1_crossfeed_power = 0.3
+    lb.branch1_crossfeed_range = 0.6
+    lb.branch1_crossfeed_decay = 0.99
+    
+    lb.parental_crossfeed_power = 0.8
+    lb.parental_crossfeed_power_decay = 1.0
+    lb.parental_crossfeed_range = 1.0
+
+    # Run latent blending
+    lb.run_transition(
+        depth_strength=depth_strength,
+        num_inference_steps=num_inference_steps,
+        t_compute_max_allowed=t_compute_max_allowed)
+
+
+    # Save movie
+    lb.write_movie_transition(fp_movie, duration_transition)
+
+    #%%
+    
+    """
+    checkout sizes
+    checkout good tree for num inference steps
+    checkout that good nmb inference step given
+    
+    """
\ No newline at end of file
diff --git a/movie_util.py b/movie_util.py
index e6e0c6a..eb7e157 100644
--- a/movie_util.py
+++ b/movie_util.py
@@ -262,7 +262,6 @@ def add_subtitles_to_video(
 
 
 
-
 class MovieReader():
     r"""
     Class to read in a movie.