diff --git a/README.md b/README.md index 3a0c731..91f489a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Latent blending enables the creation of super-smooth video transitions between prompts. Powered by [stable diffusion 2.1](https://stability.ai/blog/stablediffusion2-1-release7-dec-2022), this method involves specific mixing of intermediate latent representations to create a seamless transition – with users having the option to choose full customization or preset options. +Latent blending enables lightning-fast video transitions with incredible smoothness between prompts. Powered by [stable diffusion 2.1](https://stability.ai/blog/stablediffusion2-1-release7-dec-2022), this method involves specific mixing of intermediate latent representations to create a seamless transition – with users having the option to fully customize the transition and run high-resolution upscaling. # Quickstart ```python @@ -28,6 +28,10 @@ To run a transition between two prompts where you want some part of the image to ## Example 3: Multi transition To run multiple transition between K prompts, resulting in a stitched video, run `example3_multitrans.py` +## Example 4: High-resolution with upscaling +![](example4.jpg) +You can run a high-res transition using the x4 upscaling model in a two-stage procedure, see `example4_upscaling.py` + # Customization ## Most relevant parameters diff --git a/example4.jpg b/example4.jpg new file mode 100644 index 0000000..b422312 Binary files /dev/null and b/example4.jpg differ diff --git a/example4_upscaling.py b/example4_upscaling.py new file mode 100644 index 0000000..a91caf6 --- /dev/null +++ b/example4_upscaling.py @@ -0,0 +1,67 @@ +# Copyright 2022 Lunar Ring. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, sys +import torch +torch.backends.cudnn.benchmark = False +import numpy as np +import warnings +warnings.filterwarnings('ignore') +import warnings +import torch +from tqdm.auto import tqdm +from PIL import Image +# import matplotlib.pyplot as plt +import torch +from movie_util import MovieSaver +from typing import Callable, List, Optional, Union +from latent_blending import LatentBlending, add_frames_linear_interp +from stable_diffusion_holder import StableDiffusionHolder +torch.set_grad_enabled(False) +#%% Define vars for low-resoltion pass +dp_img = "upscaling_bleding" # the results will be saved in this folder +prompt1 = "photo of mount vesuvius erupting a terrifying pyroclastic ash cloud" +prompt2 = "photo of a inside a building full of ash, fire, death, destruction, explosions" +fixed_seeds = [5054613, 1168652] + +width = 512 +height = 384 +num_inference_steps_lores = 40 +nmb_branches_final_lores = 10 +depth_strength_lores = 0.5 + +device = "cuda" +fp_ckpt_lores = "../stable_diffusion_models/ckpt/v2-1_512-ema-pruned.ckpt" +fp_config_lores = 'configs/v2-inference.yaml' + +#%% Define vars for high-resoltion pass +fp_ckpt_hires = "../stable_diffusion_models/ckpt/x4-upscaler-ema.ckpt" +fp_config_hires = 'configs/x4-upscaling.yaml' +depth_strength_hires = 0.65 +num_inference_steps_hires = 100 +nmb_branches_final_hires = 6 + +#%% Run low-res pass +sdh = StableDiffusionHolder(fp_ckpt_lores, fp_config_lores, device) +lb = LatentBlending(sdh) +lb.set_prompt1(prompt1) +lb.set_prompt2(prompt2) +lb.set_width(width) +lb.set_height(height) +lb.run_upscaling_step1(dp_img, depth_strength_lores, num_inference_steps_lores, nmb_branches_final_lores, fixed_seeds) + +#%% Run high-res pass +sdh = StableDiffusionHolder(fp_ckpt_hires, fp_config_hires) +lb = LatentBlending(sdh) +lb.run_upscaling_step2(dp_img, depth_strength_hires, num_inference_steps_hires, nmb_branches_final_hires) \ No newline at end of file diff --git a/latent_blending.py b/latent_blending.py index 37e5205..5894a4f 100644 --- a/latent_blending.py +++ b/latent_blending.py @@ -614,10 +614,10 @@ class LatentBlending(): def run_upscaling_step1( self, dp_img: str, - quality: str = 'upscaling_step1', depth_strength: float = 0.65, + num_inference_steps: int = 30, + nmb_branches_final: int = 10, fixed_seeds: Optional[List[int]] = None, - overwrite_folder: bool = False, ): r""" Initializes inpainting with a source and maks image. @@ -644,11 +644,9 @@ class LatentBlending(): fixed_seeds = list(np.random.randint(0, 1000000, 2).astype(np.int32)) # Run latent blending - self.load_branching_profile(quality='upscaling_step1', depth_strength=depth_strength) + self.autosetup_branching(depth_strength, num_inference_steps, nmb_branches_final) imgs_transition = self.run_transition(fixed_seeds=fixed_seeds) - self.write_imgs_transition(dp_img, imgs_transition) - print(f"run_upscaling_step1: completed! {dp_img}") @@ -656,8 +654,9 @@ class LatentBlending(): def run_upscaling_step2( self, dp_img: str, - quality: str = 'upscaling_step2', - depth_strength: float = 0.6, + depth_strength: float = 0.65, + num_inference_steps: int = 30, + nmb_branches_final: int = 10, fixed_seeds: Optional[List[int]] = None, ): @@ -683,12 +682,7 @@ class LatentBlending(): text_embeddingA = self.sdh.get_text_embedding(prompt1) text_embeddingB = self.sdh.get_text_embedding(prompt2) - self.load_branching_profile(quality='upscaling_step2', depth_strength=depth_strength) - - # list_nmb_branches = [2, 3, 4] - # list_injection_strength = [0.0, 0.6, 0.95] - # num_inference_steps = 100 - # self.setup_branching(num_inference_steps, list_nmb_branches, list_injection_strength) + self.autosetup_branching(depth_strength, num_inference_steps, nmb_branches_final) duration_single_trans = 3 list_fract_mixing = np.linspace(0, 1, nmb_images_lowres-1)