upscaling model

This commit is contained in:
Johannes Stelzer 2023-01-09 10:59:00 +01:00
parent 94f5211e5f
commit bc36077678
4 changed files with 79 additions and 14 deletions

View File

@ -1,4 +1,4 @@
Latent blending enables the creation of super-smooth video transitions between prompts. Powered by [stable diffusion 2.1](https://stability.ai/blog/stablediffusion2-1-release7-dec-2022), this method involves specific mixing of intermediate latent representations to create a seamless transition with users having the option to choose full customization or preset options. Latent blending enables lightning-fast video transitions with incredible smoothness between prompts. Powered by [stable diffusion 2.1](https://stability.ai/blog/stablediffusion2-1-release7-dec-2022), this method involves specific mixing of intermediate latent representations to create a seamless transition with users having the option to fully customize the transition and run high-resolution upscaling.
# Quickstart # Quickstart
```python ```python
@ -28,6 +28,10 @@ To run a transition between two prompts where you want some part of the image to
## Example 3: Multi transition ## Example 3: Multi transition
To run multiple transition between K prompts, resulting in a stitched video, run `example3_multitrans.py` To run multiple transition between K prompts, resulting in a stitched video, run `example3_multitrans.py`
## Example 4: High-resolution with upscaling
![](example4.jpg)
You can run a high-res transition using the x4 upscaling model in a two-stage procedure, see `example4_upscaling.py`
# Customization # Customization
## Most relevant parameters ## Most relevant parameters

BIN
example4.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 213 KiB

67
example4_upscaling.py Normal file
View File

@ -0,0 +1,67 @@
# Copyright 2022 Lunar Ring. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os, sys
import torch
torch.backends.cudnn.benchmark = False
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import warnings
import torch
from tqdm.auto import tqdm
from PIL import Image
# import matplotlib.pyplot as plt
import torch
from movie_util import MovieSaver
from typing import Callable, List, Optional, Union
from latent_blending import LatentBlending, add_frames_linear_interp
from stable_diffusion_holder import StableDiffusionHolder
torch.set_grad_enabled(False)
#%% Define vars for low-resoltion pass
dp_img = "upscaling_bleding" # the results will be saved in this folder
prompt1 = "photo of mount vesuvius erupting a terrifying pyroclastic ash cloud"
prompt2 = "photo of a inside a building full of ash, fire, death, destruction, explosions"
fixed_seeds = [5054613, 1168652]
width = 512
height = 384
num_inference_steps_lores = 40
nmb_branches_final_lores = 10
depth_strength_lores = 0.5
device = "cuda"
fp_ckpt_lores = "../stable_diffusion_models/ckpt/v2-1_512-ema-pruned.ckpt"
fp_config_lores = 'configs/v2-inference.yaml'
#%% Define vars for high-resoltion pass
fp_ckpt_hires = "../stable_diffusion_models/ckpt/x4-upscaler-ema.ckpt"
fp_config_hires = 'configs/x4-upscaling.yaml'
depth_strength_hires = 0.65
num_inference_steps_hires = 100
nmb_branches_final_hires = 6
#%% Run low-res pass
sdh = StableDiffusionHolder(fp_ckpt_lores, fp_config_lores, device)
lb = LatentBlending(sdh)
lb.set_prompt1(prompt1)
lb.set_prompt2(prompt2)
lb.set_width(width)
lb.set_height(height)
lb.run_upscaling_step1(dp_img, depth_strength_lores, num_inference_steps_lores, nmb_branches_final_lores, fixed_seeds)
#%% Run high-res pass
sdh = StableDiffusionHolder(fp_ckpt_hires, fp_config_hires)
lb = LatentBlending(sdh)
lb.run_upscaling_step2(dp_img, depth_strength_hires, num_inference_steps_hires, nmb_branches_final_hires)

View File

@ -614,10 +614,10 @@ class LatentBlending():
def run_upscaling_step1( def run_upscaling_step1(
self, self,
dp_img: str, dp_img: str,
quality: str = 'upscaling_step1',
depth_strength: float = 0.65, depth_strength: float = 0.65,
num_inference_steps: int = 30,
nmb_branches_final: int = 10,
fixed_seeds: Optional[List[int]] = None, fixed_seeds: Optional[List[int]] = None,
overwrite_folder: bool = False,
): ):
r""" r"""
Initializes inpainting with a source and maks image. Initializes inpainting with a source and maks image.
@ -644,11 +644,9 @@ class LatentBlending():
fixed_seeds = list(np.random.randint(0, 1000000, 2).astype(np.int32)) fixed_seeds = list(np.random.randint(0, 1000000, 2).astype(np.int32))
# Run latent blending # Run latent blending
self.load_branching_profile(quality='upscaling_step1', depth_strength=depth_strength) self.autosetup_branching(depth_strength, num_inference_steps, nmb_branches_final)
imgs_transition = self.run_transition(fixed_seeds=fixed_seeds) imgs_transition = self.run_transition(fixed_seeds=fixed_seeds)
self.write_imgs_transition(dp_img, imgs_transition) self.write_imgs_transition(dp_img, imgs_transition)
print(f"run_upscaling_step1: completed! {dp_img}") print(f"run_upscaling_step1: completed! {dp_img}")
@ -656,8 +654,9 @@ class LatentBlending():
def run_upscaling_step2( def run_upscaling_step2(
self, self,
dp_img: str, dp_img: str,
quality: str = 'upscaling_step2', depth_strength: float = 0.65,
depth_strength: float = 0.6, num_inference_steps: int = 30,
nmb_branches_final: int = 10,
fixed_seeds: Optional[List[int]] = None, fixed_seeds: Optional[List[int]] = None,
): ):
@ -683,12 +682,7 @@ class LatentBlending():
text_embeddingA = self.sdh.get_text_embedding(prompt1) text_embeddingA = self.sdh.get_text_embedding(prompt1)
text_embeddingB = self.sdh.get_text_embedding(prompt2) text_embeddingB = self.sdh.get_text_embedding(prompt2)
self.load_branching_profile(quality='upscaling_step2', depth_strength=depth_strength) self.autosetup_branching(depth_strength, num_inference_steps, nmb_branches_final)
# list_nmb_branches = [2, 3, 4]
# list_injection_strength = [0.0, 0.6, 0.95]
# num_inference_steps = 100
# self.setup_branching(num_inference_steps, list_nmb_branches, list_injection_strength)
duration_single_trans = 3 duration_single_trans = 3
list_fract_mixing = np.linspace(0, 1, nmb_images_lowres-1) list_fract_mixing = np.linspace(0, 1, nmb_images_lowres-1)