upscaling model

2023-01-09 10:59:00 +01:00 · 2023-01-09 10:59:00 +01:00 · bc36077678
parent 94f5211e5f
commit bc36077678
4 changed files with 79 additions and 14 deletions
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-Latent blending enables the creation of super-smooth video transitions between prompts. Powered by [stable diffusion 2.1](https://stability.ai/blog/stablediffusion2-1-release7-dec-2022), this method involves specific mixing of intermediate latent representations to create a seamless transition – with users having the option to choose full customization or preset options.
+Latent blending enables lightning-fast video transitions with incredible smoothness between prompts. Powered by [stable diffusion 2.1](https://stability.ai/blog/stablediffusion2-1-release7-dec-2022), this method involves specific mixing of intermediate latent representations to create a seamless transition – with users having the option to fully customize the transition and run high-resolution upscaling.
 # Quickstart
 ```python
@ -28,6 +28,10 @@ To run a transition between two prompts where you want some part of the image to
 ## Example 3: Multi transition
 To run multiple transition between K prompts, resulting in a stitched video, run `example3_multitrans.py`
 ## Example 4: High-resolution with upscaling
 ![](example4.jpg)
 You can run a high-res transition using the x4 upscaling model in a two-stage procedure, see `example4_upscaling.py`
 # Customization
 ## Most relevant parameters
--- a/example4.jpg
+++ b/example4.jpg
--- a/example4_upscaling.py
+++ b/example4_upscaling.py
@ -0,0 +1,67 @@
 # Copyright 2022 Lunar Ring. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os, sys
 import torch
 torch.backends.cudnn.benchmark = False
 import numpy as np
 import warnings
 warnings.filterwarnings('ignore')
 import warnings
 import torch
 from tqdm.auto import tqdm
 from PIL import Image
 # import matplotlib.pyplot as plt
 import torch
 from movie_util import MovieSaver
 from typing import Callable, List, Optional, Union
 from latent_blending import LatentBlending, add_frames_linear_interp
 from stable_diffusion_holder import StableDiffusionHolder
 torch.set_grad_enabled(False)
 #%% Define vars for low-resoltion pass
 dp_img = "upscaling_bleding" # the results will be saved in this folder
 prompt1 = "photo of mount vesuvius erupting a terrifying pyroclastic ash cloud"
 prompt2 = "photo of a inside a building full of ash, fire, death, destruction, explosions"
 fixed_seeds = [5054613, 1168652]
 width = 512
 height = 384
 num_inference_steps_lores = 40
 nmb_branches_final_lores = 10
 depth_strength_lores = 0.5
 device = "cuda" 
 fp_ckpt_lores = "../stable_diffusion_models/ckpt/v2-1_512-ema-pruned.ckpt" 
 fp_config_lores = 'configs/v2-inference.yaml'
 #%% Define vars for high-resoltion pass
 fp_ckpt_hires = "../stable_diffusion_models/ckpt/x4-upscaler-ema.ckpt"
 fp_config_hires = 'configs/x4-upscaling.yaml'
 depth_strength_hires = 0.65
 num_inference_steps_hires = 100
 nmb_branches_final_hires = 6
 #%% Run low-res pass
 sdh = StableDiffusionHolder(fp_ckpt_lores, fp_config_lores, device)
 lb = LatentBlending(sdh)
 lb.set_prompt1(prompt1)
 lb.set_prompt2(prompt2)
 lb.set_width(width)
 lb.set_height(height)
 lb.run_upscaling_step1(dp_img, depth_strength_lores, num_inference_steps_lores, nmb_branches_final_lores, fixed_seeds)
 #%% Run high-res pass
 sdh = StableDiffusionHolder(fp_ckpt_hires, fp_config_hires)
 lb = LatentBlending(sdh) 
 lb.run_upscaling_step2(dp_img, depth_strength_hires, num_inference_steps_hires, nmb_branches_final_hires)
--- a/latent_blending.py
+++ b/latent_blending.py
@ -614,10 +614,10 @@ class LatentBlending():
    def run_upscaling_step1(
            self, 
            dp_img: str,
            quality: str = 'upscaling_step1',
            depth_strength: float = 0.65,
            num_inference_steps: int = 30,
            nmb_branches_final: int = 10,
            fixed_seeds: Optional[List[int]] = None,
            overwrite_folder: bool = False,
            ):
        r"""
        Initializes inpainting with a source and maks image.
@ -644,20 +644,19 @@ class LatentBlending():
            fixed_seeds = list(np.random.randint(0, 1000000, 2).astype(np.int32))
        # Run latent blending
-        self.load_branching_profile(quality='upscaling_step1', depth_strength=depth_strength)
+        self.autosetup_branching(depth_strength, num_inference_steps, nmb_branches_final)
        imgs_transition = self.run_transition(fixed_seeds=fixed_seeds)
        self.write_imgs_transition(dp_img, imgs_transition)
        print(f"run_upscaling_step1: completed! {dp_img}")
    def run_upscaling_step2(
            self, 
            dp_img: str,
-            quality: str = 'upscaling_step2',
+            depth_strength: float = 0.65,
-            depth_strength: float = 0.6,
+            num_inference_steps: int = 30,
            nmb_branches_final: int = 10,
            fixed_seeds: Optional[List[int]] = None,
            ):
@ -683,12 +682,7 @@ class LatentBlending():
        text_embeddingA = self.sdh.get_text_embedding(prompt1)
        text_embeddingB = self.sdh.get_text_embedding(prompt2)
-        self.load_branching_profile(quality='upscaling_step2', depth_strength=depth_strength)
+        self.autosetup_branching(depth_strength, num_inference_steps, nmb_branches_final)
        # list_nmb_branches = [2, 3, 4]
        # list_injection_strength = [0.0, 0.6, 0.95]
        # num_inference_steps = 100
        # self.setup_branching(num_inference_steps, list_nmb_branches, list_injection_strength)
        duration_single_trans = 3
        list_fract_mixing = np.linspace(0, 1, nmb_images_lowres-1)