From 4ce995a023a103843a5e6452034dfda1aeceb751 Mon Sep 17 00:00:00 2001
From: Johannes Stelzer <stelzer@lunar-ring.ai>
Date: Mon, 20 Feb 2023 11:26:04 +0100
Subject: [PATCH] cleanup

---
 latent_blending.py         | 46 +++++++++++++++++++++++++++++---------
 stable_diffusion_holder.py | 14 +++---------
 2 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/latent_blending.py b/latent_blending.py
index 3537d4d..4352212 100644
--- a/latent_blending.py
+++ b/latent_blending.py
@@ -101,7 +101,6 @@ class LatentBlending():
         self.text_embedding2 = None
         self.image1_lowres = None
         self.image2_lowres = None
-        self.stop_diffusion = False
         self.negative_prompt = None
         self.num_inference_steps = self.sdh.num_inference_steps
         self.noise_level_upscaling = 20
@@ -117,7 +116,6 @@ class LatentBlending():
         self.parental_crossfeed_range = 0.8
         self.parental_crossfeed_power_decay = 0.8    
         
-        self.branch1_insertion_completed = False
         self.set_guidance_scale(guidance_scale)
         self.init_mode()
         self.multi_transition_img_first = None
@@ -355,7 +353,7 @@ class LatentBlending():
         if return_image:
             return self.sdh.latent2image(list_latents2[-1])
         else:
-            return list_latents2
+            return list_latents2            
 
 
     def compute_latents_mix(self, fract_mixing, b_parent1, b_parent2, idx_injection):    
@@ -454,7 +452,6 @@ class LatentBlending():
             
             if stop_criterion == "t_compute_max_allowed" and t_compute > t_compute_max_allowed:
                 stop_criterion_reached = True
-                # FIXME: also undersample here... but how... maybe drop them iteratively?
             elif stop_criterion == "nmb_max_branches" and np.sum(list_nmb_stems) >= nmb_max_branches:
                 stop_criterion_reached = True
                 if is_first_iteration:
@@ -527,16 +524,21 @@ class LatentBlending():
         self.tree_idx_injection.insert(b_parent1+1, idx_injection)
             
     
-    def get_spatial_mask_template(self):    
+    def get_spatial_mask_template(self):   
+        r"""
+        Experimental helper function to get a spatial mask template. 
+        """
         shape_latents = [self.sdh.C, self.sdh.height // self.sdh.f, self.sdh.width // self.sdh.f]
         C, H, W = shape_latents
         return np.ones((H, W))
     
     def set_spatial_mask(self, img_mask):
         r"""
-        Helper function to #FIXME
+        Experimental helper function to set a spatial mask. 
+        The mask forces latents to be overwritten.
         Args:
-            seed: int
+            img_mask: 
+                mask image [0,1]. You can get a template using get_spatial_mask_template
             
         """
         
@@ -591,7 +593,8 @@ class LatentBlending():
         Depending on the mode, the correct one will be executed.
         
         Args:
-            list_conditionings: List of all conditionings for the diffusion model.
+            list_conditionings: list
+                List of all conditionings for the diffusion model.
             latents_start: torch.FloatTensor 
                 Latents that are used for injection
             idx_start: int
@@ -640,10 +643,33 @@ class LatentBlending():
             num_inference_steps: int = 100,
             nmb_max_branches_highres: int = 5,
             nmb_max_branches_lowres: int = 6,
-            fixed_seeds: Optional[List[int]] = None,
             duration_single_segment = 3,
+            fixed_seeds: Optional[List[int]] = None,
             ):
-        #FIXME
+        r"""
+        Runs upscaling with the x4 model. Requires that you run a transition before with a low-res model and save the results using write_imgs_transition.
+        
+        Args:
+            dp_img: str
+                Path to the low-res transition path (as saved in write_imgs_transition)
+            depth_strength:
+                Determines how deep the first injection will happen. 
+                Deeper injections will cause (unwanted) formation of new structures,
+                more shallow values will go into alpha-blendy land.
+            num_inference_steps:
+                Number of diffusion steps. Higher values will take more compute time.
+            nmb_max_branches_highres: int
+                Number of final branches of the upscaling transition pass. Note this is the number
+                of branches between each pair of low-res images.
+            nmb_max_branches_lowres: int
+                Number of input low-res images, subsampling all transition images written in the low-res pass.
+                Setting this number lower (e.g. 6) will decrease the compute time but not affect the results too much.
+            duration_single_segment: float
+                The duration of each high-res movie segment. You will have nmb_max_branches_lowres-1 segments in total.
+            fixed_seeds: Optional[List[int)]:
+                You can supply two seeds that are used for the first and second keyframe (prompt1 and prompt2).
+                Otherwise random seeds will be taken.
+        """
         fp_yml = os.path.join(dp_img, "lowres.yaml")
         fp_movie = os.path.join(dp_img, "movie_highres.mp4")
         fps = 24
diff --git a/stable_diffusion_holder.py b/stable_diffusion_holder.py
index fcd720d..d16b5a7 100644
--- a/stable_diffusion_holder.py
+++ b/stable_diffusion_holder.py
@@ -265,7 +265,9 @@ class StableDiffusionHolder:
             idx_start: int
                 Index of the diffusion process start and where the latents_for_injection are injected
             mixing_coeff:
-                # FIXME spatial_mask
+                mixing coefficients for latent blending
+            spatial_mask:
+                experimental feature for enforcing pixels from list_latents_mixing
             return_image: Optional[bool]
                 Optionally return image directly
             
@@ -352,15 +354,6 @@ class StableDiffusionHolder:
         ):
         r"""
         Diffusion upscaling version. 
-        # FIXME
-        Args:
-            ??
-            latents_for_injection: torch.FloatTensor 
-                Latents that are used for injection
-            idx_start: int
-                Index of the diffusion process start and where the latents_for_injection are injected
-            return_image: Optional[bool]
-                Optionally return image directly
         """
  
         # Asserts
@@ -376,7 +369,6 @@ class StableDiffusionHolder:
             assert len(list_latents_mixing) == self.num_inference_steps
         
         precision_scope = autocast if self.precision == "autocast" else nullcontext
-        generator = torch.Generator(device=self.device).manual_seed(int(self.seed))
         
         h = uc_full['c_concat'][0].shape[2]        
         w = uc_full['c_concat'][0].shape[3]