Compare commits

..

2 Commits

Author SHA1 Message Date
DGX 9f9512fa48 movie import fix 2024-02-21 12:43:17 +00:00
DGX 359ef99eaf movie engine fix 2024-02-21 12:42:17 +00:00
9 changed files with 483 additions and 424 deletions

1
.gitignore vendored
View File

@ -7,7 +7,6 @@ __pycache__/
*.so *.so
# Distribution / packaging # Distribution / packaging
*.json
.Python .Python
build/ build/
develop-eggs/ develop-eggs/

View File

@ -1,51 +0,0 @@
FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
# Configure environment
ENV DEBIAN_FRONTEND=noninteractive \
PIP_PREFER_BINARY=1 \
CUDA_HOME=/usr/local/cuda-12.1 \
TORCH_CUDA_ARCH_LIST="8.6"
# Redirect shell
RUN rm /bin/sh && ln -s /bin/bash /bin/sh
# Install prereqs
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
git-lfs \
ffmpeg \
libgl1-mesa-dev \
libglib2.0-0 \
git \
python3-dev \
python3-pip \
# Lunar Tools prereqs
libasound2-dev \
libportaudio2 \
&& apt clean && rm -rf /var/lib/apt/lists/* \
&& ln -s /usr/bin/python3 /usr/bin/python
# Set symbolic links
RUN echo "export PATH=/usr/local/cuda/bin:$PATH" >> /etc/bash.bashrc \
&& echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> /etc/bash. bashrc \
&& echo "export CUDA_HOME=/usr/local/cuda-12.1" >> /etc/bash.bashrc
# Install Python packages: Basic, then CUDA-compatible, then custom
RUN pip3 install \
wheel \
ninja && \
pip3 install \
torch==2.1.0 \
torchvision==0.16.0 \
xformers>=0.0.22 \
triton>=2.1.0 \
--index-url https://download.pytorch.org/whl/cu121 && \
pip3 install git+https://github.com/lunarring/latentblending \
git+https://github.com/chengzeyi/stable-fast.git@main#egg=stable-fast
# Optionally store weights in image
# RUN mkdir -p /root/.cache/torch/hub/checkpoints/ && curl -o /root/.cache/torch/hub/checkpoints//alexnet-owt-7be5be79.pth https://download.pytorch.org/models/alexnet-owt-7be5be79.pth
# RUN git lfs install && git clone https://huggingface.co/stabilityai/sdxl-turbo /sdxl-turbo
# Clone base repo because why not
RUN git clone https://github.com/lunarring/latentblending.git

View File

@ -35,16 +35,11 @@ be = BlendingEngine(pipe, do_compile=True)
``` ```
## Gradio UI ## Gradio UI
We can launch the a user-interface version with: Coming soon again :)
```commandline
python latentblending/gradio_ui.py
```
With the UI, you can iteratively generate your desired keyframes, and then render the movie with latent blending it at the end.
## Example 1: Simple transition ## Example 1: Simple transition
![](example1.jpg) ![](example1.jpg)
To run a simple transition between two prompts, see `examples/single_trans.py`, or [check this volcano eruption ](https://youtu.be/O_2fpWHdnm4). To run a simple transition between two prompts, see `examples/single_trans.py`
## Example 2: Multi transition ## Example 2: Multi transition
To run multiple transition between K prompts, resulting in a stitched video, see `examples/multi_trans.py`. To run multiple transition between K prompts, resulting in a stitched video, see `examples/multi_trans.py`.
@ -140,6 +135,7 @@ With latent blending, we can create transitions that appear to defy the laws of
# Coming soon... # Coming soon...
- [ ] MacOS support - [ ] MacOS support
- [ ] Gradio interface
- [ ] Huggingface Space - [ ] Huggingface Space
- [ ] Controlnet - [ ] Controlnet
- [ ] IP-Adapter - [ ] IP-Adapter

View File

@ -1,75 +0,0 @@
import torch
import warnings
from diffusers import AutoPipelineForText2Image
from latentblending.blending_engine import BlendingEngine
from lunar_tools import concatenate_movies
import numpy as np
torch.set_grad_enabled(False)
torch.backends.cudnn.benchmark = False
warnings.filterwarnings('ignore')
import json
# %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
# pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
pretrained_model_name_or_path = "stabilityai/sdxl-turbo"
pipe = AutoPipelineForText2Image.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16, variant="fp16")
pipe.to('cuda')
be = BlendingEngine(pipe, do_compile=False)
fp_movie = f'test.mp4'
fp_json = "movie_240221_1520.json"
duration_single_trans = 10
# Load the JSON data from the file
with open(fp_json, 'r') as file:
data = json.load(file)
# Set up width, height, num_inference steps
width = data[0]["width"]
height = data[0]["height"]
num_inference_steps = data[0]["num_inference_steps"]
be.set_dimensions((width, height))
be.set_num_inference_steps(num_inference_steps)
# Initialize lists for prompts, negative prompts, and seeds
list_prompts = []
list_negative_prompts = []
list_seeds = []
# Extract prompts, negative prompts, and seeds from the data
for item in data[1:]: # Skip the first item as it contains settings
list_prompts.append(item["prompt"])
list_negative_prompts.append(item["negative_prompt"])
list_seeds.append(item["seed"])
list_movie_parts = []
for i in range(len(list_prompts) - 1):
# For a multi transition we can save some computation time and recycle the latents
if i == 0:
be.set_prompt1(list_prompts[i])
be.set_negative_prompt(list_negative_prompts[i])
be.set_prompt2(list_prompts[i + 1])
recycle_img1 = False
else:
be.swap_forward()
be.set_negative_prompt(list_negative_prompts[i+1])
be.set_prompt2(list_prompts[i + 1])
recycle_img1 = True
fp_movie_part = f"tmp_part_{str(i).zfill(3)}.mp4"
fixed_seeds = list_seeds[i:i + 2]
# Run latent blending
be.run_transition(
recycle_img1=recycle_img1,
fixed_seeds=fixed_seeds)
# Save movie
be.write_movie_transition(fp_movie_part, duration_single_trans)
list_movie_parts.append(fp_movie_part)
# Finally, concatente the result
concatenate_movies(fp_movie, list_movie_parts)
print(f"DONE! MOVIE SAVED IN {fp_movie}")

View File

@ -1,7 +1,7 @@
import torch import torch
import warnings import warnings
from diffusers import AutoPipelineForText2Image from diffusers import AutoPipelineForText2Image
from lunar_tools import concatenate_movies from latentblending.movie_util import concatenate_movies
from latentblending.blending_engine import BlendingEngine from latentblending.blending_engine import BlendingEngine
import numpy as np import numpy as np
torch.set_grad_enabled(False) torch.set_grad_enabled(False)
@ -23,6 +23,9 @@ be.set_dimensions((1024, 1024))
nmb_prompts = 20 nmb_prompts = 20
# Specify a list of prompts below # Specify a list of prompts below
#%% #%%

View File

@ -1,3 +1,4 @@
from .blending_engine import BlendingEngine from .blending_engine import BlendingEngine
from .diffusers_holder import DiffusersHolder from .diffusers_holder import DiffusersHolder
from .movie_util import MovieSaver
from .utils import interpolate_spherical, add_frames_linear_interp, interpolate_linear, get_spacing, get_time, yml_load, yml_save from .utils import interpolate_spherical, add_frames_linear_interp, interpolate_linear, get_spacing, get_time, yml_load, yml_save

View File

@ -288,7 +288,7 @@ class BlendingEngine():
if t_compute_max_allowed is None and nmb_max_branches is None: if t_compute_max_allowed is None and nmb_max_branches is None:
t_compute_max_allowed = 20 t_compute_max_allowed = 20
elif t_compute_max_allowed is not None and nmb_max_branches is not None: elif t_compute_max_allowed is not None and nmb_max_branches is not None:
raise ValueErorr("Either specify t_compute_max_allowed or nmb_max_branches") raise ValueError("Either specify t_compute_max_allowed or nmb_max_branches")
self.list_idx_injection, self.list_nmb_stems = self.get_time_based_branching(depth_strength, t_compute_max_allowed, nmb_max_branches) self.list_idx_injection, self.list_nmb_stems = self.get_time_based_branching(depth_strength, t_compute_max_allowed, nmb_max_branches)
@ -680,6 +680,7 @@ class BlendingEngine():
img_leaf = Image.fromarray(img) img_leaf = Image.fromarray(img)
img_leaf.save(os.path.join(dp_img, f"lowres_img_{str(i).zfill(4)}.jpg")) img_leaf.save(os.path.join(dp_img, f"lowres_img_{str(i).zfill(4)}.jpg"))
fp_yml = os.path.join(dp_img, "lowres.yaml") fp_yml = os.path.join(dp_img, "lowres.yaml")
self.save_statedict(fp_yml)
def write_movie_transition(self, fp_movie, duration_transition, fps=30): def write_movie_transition(self, fp_movie, duration_transition, fps=30):
r""" r"""
@ -727,6 +728,35 @@ class BlendingEngine():
pass pass
return state_dict return state_dict
def randomize_seed(self):
r"""
Set a random seed for a fresh start.
"""
seed = np.random.randint(999999999)
self.set_seed(seed)
def set_seed(self, seed: int):
r"""
Set a the seed for a fresh start.
"""
self.seed = seed
self.dh.seed = seed
def set_width(self, width):
r"""
Set the width of the resulting image.
"""
assert np.mod(width, 64) == 0, "set_width: value needs to be divisible by 64"
self.width = width
self.dh.width = width
def set_height(self, height):
r"""
Set the height of the resulting image.
"""
assert np.mod(height, 64) == 0, "set_height: value needs to be divisible by 64"
self.height = height
self.dh.height = height
def swap_forward(self): def swap_forward(self):
r""" r"""

View File

@ -1,3 +1,18 @@
# Copyright 2022 Lunar Ring. All rights reserved.
# Written by Johannes Stelzer, email stelzer@lunar-ring.ai twitter @j_stelzer
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os import os
import torch import torch
torch.backends.cudnn.benchmark = False torch.backends.cudnn.benchmark = False
@ -5,340 +20,481 @@ torch.set_grad_enabled(False)
import numpy as np import numpy as np
import warnings import warnings
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
import warnings
from tqdm.auto import tqdm from tqdm.auto import tqdm
from PIL import Image from PIL import Image
from movie_util import MovieSaver, concatenate_movies
from latent_blending import LatentBlending
from stable_diffusion_holder import StableDiffusionHolder
import gradio as gr import gradio as gr
from dotenv import find_dotenv, load_dotenv
import shutil import shutil
import uuid import uuid
from diffusers import AutoPipelineForText2Image from utils import get_time, add_frames_linear_interp
from latentblending.blending_engine import BlendingEngine from huggingface_hub import hf_hub_download
import datetime
import tempfile
import json
from lunar_tools import concatenate_movies
import argparse
"""
TODO
- time per segment
- init phase (model, res, nmb iter)
- recycle existing movies
- hf spaces integration
"""
class MultiUserRouter(): class BlendingFrontend():
def __init__( def __init__(
self, self,
do_compile=False sdh,
): share=False):
self.user_blendingvariableholder = {}
self.do_compile = do_compile
self.list_models = ["stabilityai/sdxl-turbo", "stabilityai/stable-diffusion-xl-base-1.0"]
self.init_models()
def init_models(self):
self.dict_blendingengines = {}
for m in self.list_models:
pipe = AutoPipelineForText2Image.from_pretrained(m, torch_dtype=torch.float16, variant="fp16")
pipe.to("cuda")
be = BlendingEngine(pipe, do_compile=self.do_compile)
self.dict_blendingengines[m] = be
def register_new_user(self, model, width, height):
user_id = str(uuid.uuid4().hex.upper()[0:8])
be = self.dict_blendingengines[model]
be.set_dimensions((width, height))
self.user_blendingvariableholder[user_id] = BlendingVariableHolder(be)
return user_id
def user_overflow_protection(self):
pass
def preview_img_selected(self, user_id, data: gr.SelectData, button):
return self.user_blendingvariableholder[user_id].preview_img_selected(data, button)
def movie_img_selected(self, user_id, data: gr.SelectData, button):
return self.user_blendingvariableholder[user_id].movie_img_selected(data, button)
def compute_imgs(self, user_id, prompt, negative_prompt):
return self.user_blendingvariableholder[user_id].compute_imgs(prompt, negative_prompt)
def get_list_images_movie(self, user_id):
return self.user_blendingvariableholder[user_id].get_list_images_movie()
def init_new_movie(self, user_id):
return self.user_blendingvariableholder[user_id].init_new_movie()
def write_json(self, user_id):
return self.user_blendingvariableholder[user_id].write_json()
def add_image_to_video(self, user_id):
return self.user_blendingvariableholder[user_id].add_image_to_video()
def img_movie_delete(self, user_id):
return self.user_blendingvariableholder[user_id].img_movie_delete()
def img_movie_later(self, user_id):
return self.user_blendingvariableholder[user_id].img_movie_later()
def img_movie_earlier(self, user_id):
return self.user_blendingvariableholder[user_id].img_movie_earlier()
def generate_movie(self, user_id, t_per_segment):
return self.user_blendingvariableholder[user_id].generate_movie(t_per_segment)
#%% BlendingVariableHolder Class
class BlendingVariableHolder():
def __init__(
self,
be):
r""" r"""
Gradio Helper Class to collect UI data and start latent blending. Gradio Helper Class to collect UI data and start latent blending.
Args: Args:
be: sdh:
Blendingengine StableDiffusionHolder
share: bool share: bool
Set true to get a shareable gradio link (e.g. for running a remote server) Set true to get a shareable gradio link (e.g. for running a remote server)
""" """
self.be = be self.share = share
# UI Defaults # UI Defaults
self.num_inference_steps = 30
self.depth_strength = 0.25
self.seed1 = 420 self.seed1 = 420
self.seed2 = 420 self.seed2 = 420
self.prompt1 = "" self.prompt1 = ""
self.prompt2 = "" self.prompt2 = ""
self.negative_prompt = "" self.negative_prompt = ""
self.nmb_preview_images = 4 self.fps = 30
self.duration_video = 8
self.t_compute_max_allowed = 10
self.lb = LatentBlending(sdh)
self.lb.sdh.num_inference_steps = self.num_inference_steps
self.init_parameters_from_lb()
self.init_save_dir()
# Vars # Vars
self.prompt = None self.list_fp_imgs_current = []
self.negative_prompt = None self.recycle_img1 = False
self.list_seeds = [] self.recycle_img2 = False
self.idx_movie = 0 self.list_all_segments = []
self.list_seeds = [] self.dp_session = ""
self.list_images_preview = [] self.user_id = None
self.data = []
self.idx_img_preview_selected = None
self.idx_img_movie_selected = None
self.jpg_quality = 80
self.fp_movie = ''
def preview_img_selected(self, data: gr.SelectData, button): def init_parameters_from_lb(self):
self.idx_img_preview_selected = data.index r"""
print(f"preview image {self.idx_img_preview_selected} selected, seed {self.list_seeds[self.idx_img_preview_selected]}") Automatically init parameters from latentblending instance
"""
self.height = self.lb.sdh.height
self.width = self.lb.sdh.width
self.guidance_scale = self.lb.guidance_scale
self.guidance_scale_mid_damper = self.lb.guidance_scale_mid_damper
self.mid_compression_scaler = self.lb.mid_compression_scaler
self.branch1_crossfeed_power = self.lb.branch1_crossfeed_power
self.branch1_crossfeed_range = self.lb.branch1_crossfeed_range
self.branch1_crossfeed_decay = self.lb.branch1_crossfeed_decay
self.parental_crossfeed_power = self.lb.parental_crossfeed_power
self.parental_crossfeed_range = self.lb.parental_crossfeed_range
self.parental_crossfeed_power_decay = self.lb.parental_crossfeed_power_decay
def movie_img_selected(self, data: gr.SelectData, button): def init_save_dir(self):
self.idx_img_movie_selected = data.index r"""
print(f"movie image {self.idx_img_movie_selected} selected") Initializes the directory where stuff is being saved.
You can specify this directory in a ".env" file in your latentblending root, setting
DIR_OUT='/path/to/saving'
"""
load_dotenv(find_dotenv(), verbose=False)
self.dp_out = os.getenv("DIR_OUT")
if self.dp_out is None:
self.dp_out = ""
self.dp_imgs = os.path.join(self.dp_out, "imgs")
os.makedirs(self.dp_imgs, exist_ok=True)
self.dp_movies = os.path.join(self.dp_out, "movies")
os.makedirs(self.dp_movies, exist_ok=True)
self.save_empty_image()
def compute_imgs(self, prompt, negative_prompt): def save_empty_image(self):
self.prompt = prompt r"""
self.negative_prompt = negative_prompt Saves an empty/black dummy image.
self.be.set_prompt1(prompt) """
self.be.set_prompt2(prompt) self.fp_img_empty = os.path.join(self.dp_imgs, 'empty.jpg')
self.be.set_negative_prompt(negative_prompt) Image.fromarray(np.zeros((self.height, self.width, 3), dtype=np.uint8)).save(self.fp_img_empty, quality=5)
self.list_seeds = []
self.list_images_preview = []
self.idx_img_preview_selected = None
for i in range(self.nmb_preview_images):
seed = np.random.randint(0, np.iinfo(np.int32).max)
self.be.seed1 = seed
self.list_seeds.append(seed)
img = self.be.compute_latents1(return_image=True)
fn_img_tmp = f"image_{uuid.uuid4()}.jpg"
temp_img_path = os.path.join(tempfile.gettempdir(), fn_img_tmp)
img.save(temp_img_path)
img.save(temp_img_path, quality=self.jpg_quality, optimize=True)
self.list_images_preview.append(temp_img_path)
return self.list_images_preview
def randomize_seed1(self):
r"""
Randomizes the first seed
"""
seed = np.random.randint(0, 10000000)
self.seed1 = int(seed)
print(f"randomize_seed1: new seed = {self.seed1}")
return seed
def get_list_images_movie(self): def randomize_seed2(self):
return [entry["preview_image"] for entry in self.data] r"""
Randomizes the second seed
"""
seed = np.random.randint(0, 10000000)
self.seed2 = int(seed)
print(f"randomize_seed2: new seed = {self.seed2}")
return seed
def setup_lb(self, list_ui_vals):
r"""
Sets all parameters from the UI. Since gradio does not support to pass dictionaries,
we have to instead pass keys (list_ui_keys, global) and values (list_ui_vals)
"""
# Collect latent blending variables
self.lb.set_width(list_ui_vals[list_ui_keys.index('width')])
self.lb.set_height(list_ui_vals[list_ui_keys.index('height')])
self.lb.set_prompt1(list_ui_vals[list_ui_keys.index('prompt1')])
self.lb.set_prompt2(list_ui_vals[list_ui_keys.index('prompt2')])
self.lb.set_negative_prompt(list_ui_vals[list_ui_keys.index('negative_prompt')])
self.lb.guidance_scale = list_ui_vals[list_ui_keys.index('guidance_scale')]
self.lb.guidance_scale_mid_damper = list_ui_vals[list_ui_keys.index('guidance_scale_mid_damper')]
self.t_compute_max_allowed = list_ui_vals[list_ui_keys.index('duration_compute')]
self.lb.num_inference_steps = list_ui_vals[list_ui_keys.index('num_inference_steps')]
self.lb.sdh.num_inference_steps = list_ui_vals[list_ui_keys.index('num_inference_steps')]
self.duration_video = list_ui_vals[list_ui_keys.index('duration_video')]
self.lb.seed1 = list_ui_vals[list_ui_keys.index('seed1')]
self.lb.seed2 = list_ui_vals[list_ui_keys.index('seed2')]
self.lb.branch1_crossfeed_power = list_ui_vals[list_ui_keys.index('branch1_crossfeed_power')]
self.lb.branch1_crossfeed_range = list_ui_vals[list_ui_keys.index('branch1_crossfeed_range')]
self.lb.branch1_crossfeed_decay = list_ui_vals[list_ui_keys.index('branch1_crossfeed_decay')]
self.lb.parental_crossfeed_power = list_ui_vals[list_ui_keys.index('parental_crossfeed_power')]
self.lb.parental_crossfeed_range = list_ui_vals[list_ui_keys.index('parental_crossfeed_range')]
self.lb.parental_crossfeed_power_decay = list_ui_vals[list_ui_keys.index('parental_crossfeed_power_decay')]
self.num_inference_steps = list_ui_vals[list_ui_keys.index('num_inference_steps')]
self.depth_strength = list_ui_vals[list_ui_keys.index('depth_strength')]
def init_new_movie(self): if len(list_ui_vals[list_ui_keys.index('user_id')]) > 1:
current_time = datetime.datetime.now() self.user_id = list_ui_vals[list_ui_keys.index('user_id')]
self.fp_movie = "movie_" + current_time.strftime("%y%m%d_%H%M") + ".mp4"
self.fp_json = "movie_" + current_time.strftime("%y%m%d_%H%M") + ".json"
def write_json(self):
# Write the data list to a JSON file
data_copy = self.data.copy()
data_copy.insert(0, {"settings": "sdxl", "width": self.be.dh.width_img, "height": self.be.dh.height_img, "num_inference_steps": self.be.dh.num_inference_steps})
with open(self.fp_json, 'w') as f:
json.dump(data_copy, f, indent=4)
def add_image_to_video(self):
if self.prompt is None:
print("Cannot take because no prompt was set!")
return self.get_list_images_movie()
if self.idx_movie == 0:
self.init_new_movie()
self.data.append({"iteration": self.idx_movie,
"seed": self.list_seeds[self.idx_img_preview_selected],
"prompt": self.prompt,
"negative_prompt": self.negative_prompt,
"preview_image": self.list_images_preview[self.idx_img_preview_selected]
})
self.write_json()
self.idx_movie += 1
return self.get_list_images_movie()
def img_movie_delete(self):
if self.idx_img_movie_selected is not None and 0 <= self.idx_img_movie_selected < len(self.data)+1:
del self.data[self.idx_img_movie_selected]
self.idx_img_movie_selected = None
else: else:
print(f"Invalid movie image index for deletion: {self.idx_img_movie_selected}") # generate new user id
return self.get_list_images_movie() self.user_id = uuid.uuid4().hex
print(f"made new user_id: {self.user_id} at {get_time('second')}")
def img_movie_later(self): def save_latents(self, fp_latents, list_latents):
if self.idx_img_movie_selected is not None and self.idx_img_movie_selected < len(self.data): r"""
# Swap the selected image with the next one Saves a latent trajectory on disk, in npy format.
self.data[self.idx_img_movie_selected], self.data[self.idx_img_movie_selected + 1] = \ """
self.data[self.idx_img_movie_selected+1], self.data[self.idx_img_movie_selected] list_latents_cpu = [l.cpu().numpy() for l in list_latents]
self.idx_img_movie_selected = None np.save(fp_latents, list_latents_cpu)
def load_latents(self, fp_latents):
r"""
Loads a latent trajectory from disk, converts to torch tensor.
"""
list_latents_cpu = np.load(fp_latents)
list_latents = [torch.from_numpy(l).to(self.lb.device) for l in list_latents_cpu]
return list_latents
def compute_img1(self, *args):
r"""
Computes the first transition image and returns it for display.
Sets all other transition images and last image to empty (as they are obsolete with this operation)
"""
list_ui_vals = args
self.setup_lb(list_ui_vals)
fp_img1 = os.path.join(self.dp_imgs, f"img1_{self.user_id}")
img1 = Image.fromarray(self.lb.compute_latents1(return_image=True))
img1.save(fp_img1 + ".jpg")
self.save_latents(fp_img1 + ".npy", self.lb.tree_latents[0])
self.recycle_img1 = True
self.recycle_img2 = False
return [fp_img1 + ".jpg", self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.user_id]
def compute_img2(self, *args):
r"""
Computes the last transition image and returns it for display.
Sets all other transition images to empty (as they are obsolete with this operation)
"""
if not os.path.isfile(os.path.join(self.dp_imgs, f"img1_{self.user_id}.jpg")): # don't do anything
return [self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.user_id]
list_ui_vals = args
self.setup_lb(list_ui_vals)
self.lb.tree_latents[0] = self.load_latents(os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy"))
fp_img2 = os.path.join(self.dp_imgs, f"img2_{self.user_id}")
img2 = Image.fromarray(self.lb.compute_latents2(return_image=True))
img2.save(fp_img2 + '.jpg')
self.save_latents(fp_img2 + ".npy", self.lb.tree_latents[-1])
self.recycle_img2 = True
# fixme save seeds. change filenames?
return [self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, fp_img2 + ".jpg", self.user_id]
def compute_transition(self, *args):
r"""
Computes transition images and movie.
"""
list_ui_vals = args
self.setup_lb(list_ui_vals)
print("STARTING TRANSITION...")
fixed_seeds = [self.seed1, self.seed2]
# Inject loaded latents (other user interference)
self.lb.tree_latents[0] = self.load_latents(os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy"))
self.lb.tree_latents[-1] = self.load_latents(os.path.join(self.dp_imgs, f"img2_{self.user_id}.npy"))
imgs_transition = self.lb.run_transition(
recycle_img1=self.recycle_img1,
recycle_img2=self.recycle_img2,
num_inference_steps=self.num_inference_steps,
depth_strength=self.depth_strength,
t_compute_max_allowed=self.t_compute_max_allowed,
fixed_seeds=fixed_seeds)
print(f"Latent Blending pass finished ({get_time('second')}). Resulted in {len(imgs_transition)} images")
# Subselect three preview images
idx_img_prev = np.round(np.linspace(0, len(imgs_transition) - 1, 5)[1:-1]).astype(np.int32)
list_imgs_preview = []
for j in idx_img_prev:
list_imgs_preview.append(Image.fromarray(imgs_transition[j]))
# Save the preview imgs as jpgs on disk so we are not sending umcompressed data around
current_timestamp = get_time('second')
self.list_fp_imgs_current = []
for i in range(len(list_imgs_preview)):
fp_img = os.path.join(self.dp_imgs, f"img_preview_{i}_{current_timestamp}.jpg")
list_imgs_preview[i].save(fp_img)
self.list_fp_imgs_current.append(fp_img)
# Insert cheap frames for the movie
imgs_transition_ext = add_frames_linear_interp(imgs_transition, self.duration_video, self.fps)
# Save as movie
self.fp_movie = self.get_fp_video_last()
if os.path.isfile(self.fp_movie):
os.remove(self.fp_movie)
ms = MovieSaver(self.fp_movie, fps=self.fps)
for img in tqdm(imgs_transition_ext):
ms.write_frame(img)
ms.finalize()
print("DONE SAVING MOVIE! SENDING BACK...")
# Assemble Output, updating the preview images and le movie
list_return = self.list_fp_imgs_current + [self.fp_movie]
return list_return
def stack_forward(self, prompt2, seed2):
r"""
Allows to generate multi-segment movies. Sets last image -> first image with all
relevant parameters.
"""
# Save preview images, prompts and seeds into dictionary for stacking
if len(self.list_all_segments) == 0:
timestamp_session = get_time('second')
self.dp_session = os.path.join(self.dp_out, f"session_{timestamp_session}")
os.makedirs(self.dp_session)
idx_segment = len(self.list_all_segments)
dp_segment = os.path.join(self.dp_session, f"segment_{str(idx_segment).zfill(3)}")
self.list_all_segments.append(dp_segment)
self.lb.write_imgs_transition(dp_segment)
fp_movie_last = self.get_fp_video_last()
fp_movie_next = self.get_fp_video_next()
shutil.copyfile(fp_movie_last, fp_movie_next)
self.lb.tree_latents[0] = self.load_latents(os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy"))
self.lb.tree_latents[-1] = self.load_latents(os.path.join(self.dp_imgs, f"img2_{self.user_id}.npy"))
self.lb.swap_forward()
shutil.copyfile(os.path.join(self.dp_imgs, f"img2_{self.user_id}.npy"), os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy"))
fp_multi = self.multi_concat()
list_out = [fp_multi]
list_out.extend([os.path.join(self.dp_imgs, f"img2_{self.user_id}.jpg")])
list_out.extend([self.fp_img_empty] * 4)
list_out.append(gr.update(interactive=False, value=prompt2))
list_out.append(gr.update(interactive=False, value=seed2))
list_out.append("")
list_out.append(np.random.randint(0, 10000000))
print(f"stack_forward: fp_multi {fp_multi}")
return list_out
def multi_concat(self):
r"""
Concatentates all stacked segments into one long movie.
"""
list_fp_movies = self.get_fp_video_all()
# Concatenate movies and save
fp_final = os.path.join(self.dp_session, f"concat_{self.user_id}.mp4")
concatenate_movies(fp_final, list_fp_movies)
return fp_final
def get_fp_video_all(self):
r"""
Collects all stacked movie segments.
"""
list_all = os.listdir(self.dp_movies)
str_beg = f"movie_{self.user_id}_"
list_user = [l for l in list_all if str_beg in l]
list_user.sort()
list_user = [os.path.join(self.dp_movies, l) for l in list_user]
return list_user
def get_fp_video_next(self):
r"""
Gets the filepath of the next movie segment.
"""
list_videos = self.get_fp_video_all()
if len(list_videos) == 0:
idx_next = 0
else: else:
print("Cannot move the image later in the sequence.") idx_next = len(list_videos)
return self.get_list_images_movie() fp_video_next = os.path.join(self.dp_movies, f"movie_{self.user_id}_{str(idx_next).zfill(3)}.mp4")
return fp_video_next
def img_movie_earlier(self): def get_fp_video_last(self):
if self.idx_img_movie_selected is not None and self.idx_img_movie_selected > 0: r"""
# Swap the selected image with the previous one Gets the current video that was saved.
self.data[self.idx_img_movie_selected-1], self.data[self.idx_img_movie_selected] = \ """
self.data[self.idx_img_movie_selected], self.data[self.idx_img_movie_selected-1] fp_video_last = os.path.join(self.dp_movies, f"last_{self.user_id}.mp4")
self.idx_img_movie_selected = None return fp_video_last
else:
print("Cannot move the image earlier in the sequence.")
return self.get_list_images_movie()
def generate_movie(self, t_per_segment=10):
print("starting movie gen")
list_prompts = []
list_negative_prompts = []
list_seeds = []
# Extract prompts, negative prompts, and seeds from the data
for item in self.data:
list_prompts.append(item["prompt"])
list_negative_prompts.append(item["negative_prompt"])
list_seeds.append(item["seed"])
list_movie_parts = []
for i in range(len(list_prompts) - 1):
# For a multi transition we can save some computation time and recycle the latents
if i == 0:
self.be.set_prompt1(list_prompts[i])
self.be.set_negative_prompt(list_negative_prompts[i])
self.be.set_prompt2(list_prompts[i + 1])
recycle_img1 = False
else:
self.be.swap_forward()
self.be.set_negative_prompt(list_negative_prompts[i+1])
self.be.set_prompt2(list_prompts[i + 1])
recycle_img1 = True
fp_movie_part = f"tmp_part_{str(i).zfill(3)}.mp4"
fixed_seeds = list_seeds[i:i + 2]
# Run latent blending
self.be.run_transition(
recycle_img1=recycle_img1,
fixed_seeds=fixed_seeds)
# Save movie
self.be.write_movie_transition(fp_movie_part, t_per_segment)
list_movie_parts.append(fp_movie_part)
# Finally, concatenate the result
concatenate_movies(self.fp_movie, list_movie_parts)
print(f"DONE! MOVIE SAVED IN {self.fp_movie}")
return self.fp_movie
#%% Runtime engine
if __name__ == "__main__": if __name__ == "__main__":
# fp_ckpt = hf_hub_download(repo_id="stabilityai/stable-diffusion-2-1-base", filename="v2-1_512-ema-pruned.ckpt")
fp_ckpt = hf_hub_download(repo_id="stabilityai/stable-diffusion-2-1", filename="v2-1_768-ema-pruned.ckpt")
bf = BlendingFrontend(StableDiffusionHolder(fp_ckpt))
# self = BlendingFrontend(None)
# Change Parameters below
parser = argparse.ArgumentParser(description="Latent Blending GUI")
parser.add_argument("--do_compile", type=bool, default=False)
parser.add_argument("--nmb_preview_images", type=int, default=4)
parser.add_argument("--server_name", type=str, default=None)
try:
args = parser.parse_args()
nmb_preview_images = args.nmb_preview_images
do_compile = args.do_compile
server_name = args.server_name
except SystemExit:
# If the script is run in an interactive environment (like Jupyter), parse_args might fail.
nmb_preview_images = 4
do_compile = False # compile SD pipes with sdfast
server_name = None
mur = MultiUserRouter(do_compile=do_compile)
with gr.Blocks() as demo: with gr.Blocks() as demo:
with gr.Accordion("Setup", open=True) as accordion_setup: gr.HTML("""<h1>Latent Blending</h1>
# New user registration, model selection, ... <p>Create butter-smooth transitions between prompts, powered by stable diffusion</p>
with gr.Row(): <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
model = gr.Dropdown(mur.list_models, value=mur.list_models[0], label="model") <br/>
width = gr.Slider(256, 2048, 512, step=128, label='width', interactive=True) <a href="https://huggingface.co/spaces/lunarring/latentblending?duplicate=true">
height = gr.Slider(256, 2048, 512, step=128, label='height', interactive=True) <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
user_id = gr.Textbox(label="user id (filled automatically)", interactive=False) </p>""")
b_start_session = gr.Button('start session', variant='primary')
with gr.Accordion("Latent Blending (expand with arrow on right side after you clicked 'start session')", open=False) as accordion_latentblending: with gr.Row():
with gr.Row(): prompt1 = gr.Textbox(label="prompt 1")
prompt = gr.Textbox(label="prompt") prompt2 = gr.Textbox(label="prompt 2")
negative_prompt = gr.Textbox(label="negative prompt")
b_compute = gr.Button('generate preview images', variant='primary')
b_select = gr.Button('add selected image to video', variant='primary')
with gr.Row(): with gr.Row():
gallery_preview = gr.Gallery( duration_compute = gr.Slider(10, 25, bf.t_compute_max_allowed, step=1, label='waiting time', interactive=True)
label="Generated images", show_label=False, elem_id="gallery" duration_video = gr.Slider(1, 100, bf.duration_video, step=0.1, label='video duration', interactive=True)
, columns=[nmb_preview_images], rows=[1], object_fit="contain", height="auto", allow_preview=False, interactive=False) height = gr.Slider(256, 1024, bf.height, step=128, label='height', interactive=True)
width = gr.Slider(256, 1024, bf.width, step=128, label='width', interactive=True)
with gr.Accordion("Advanced Settings (click to expand)", open=False):
with gr.Row(): with gr.Accordion("Diffusion settings", open=True):
gr.Markdown("Your movie contains the following images (see below)") with gr.Row():
with gr.Row(): num_inference_steps = gr.Slider(5, 100, bf.num_inference_steps, step=1, label='num_inference_steps', interactive=True)
gallery_movie = gr.Gallery( guidance_scale = gr.Slider(1, 25, bf.guidance_scale, step=0.1, label='guidance_scale', interactive=True)
label="Generated images", show_label=False, elem_id="gallery" negative_prompt = gr.Textbox(label="negative prompt")
, columns=[20], rows=[1], object_fit="contain", height="auto", allow_preview=False, interactive=False)
with gr.Accordion("Seed control: adjust seeds for first and last images", open=True):
with gr.Row():
b_newseed1 = gr.Button("randomize seed 1", variant='secondary')
seed1 = gr.Number(bf.seed1, label="seed 1", interactive=True)
seed2 = gr.Number(bf.seed2, label="seed 2", interactive=True)
b_newseed2 = gr.Button("randomize seed 2", variant='secondary')
with gr.Row(): with gr.Accordion("Last image crossfeeding.", open=True):
b_delete = gr.Button('delete selected image') with gr.Row():
b_move_earlier = gr.Button('move image to earlier time') branch1_crossfeed_power = gr.Slider(0.0, 1.0, bf.branch1_crossfeed_power, step=0.01, label='branch1 crossfeed power', interactive=True)
b_move_later = gr.Button('move image to later time') branch1_crossfeed_range = gr.Slider(0.0, 1.0, bf.branch1_crossfeed_range, step=0.01, label='branch1 crossfeed range', interactive=True)
branch1_crossfeed_decay = gr.Slider(0.0, 1.0, bf.branch1_crossfeed_decay, step=0.01, label='branch1 crossfeed decay', interactive=True)
with gr.Row(): with gr.Accordion("Transition settings", open=True):
b_generate_movie = gr.Button('generate movie', variant='primary') with gr.Row():
t_per_segment = gr.Slider(1, 30, 10, step=0.1, label='time per segment', interactive=True) parental_crossfeed_power = gr.Slider(0.0, 1.0, bf.parental_crossfeed_power, step=0.01, label='parental crossfeed power', interactive=True)
parental_crossfeed_range = gr.Slider(0.0, 1.0, bf.parental_crossfeed_range, step=0.01, label='parental crossfeed range', interactive=True)
parental_crossfeed_power_decay = gr.Slider(0.0, 1.0, bf.parental_crossfeed_power_decay, step=0.01, label='parental crossfeed decay', interactive=True)
with gr.Row():
depth_strength = gr.Slider(0.01, 0.99, bf.depth_strength, step=0.01, label='depth_strength', interactive=True)
guidance_scale_mid_damper = gr.Slider(0.01, 2.0, bf.guidance_scale_mid_damper, step=0.01, label='guidance_scale_mid_damper', interactive=True)
with gr.Row(): with gr.Row():
movie = gr.Video() b_compute1 = gr.Button('step1: compute first image', variant='primary')
b_compute2 = gr.Button('step2: compute last image', variant='primary')
b_compute_transition = gr.Button('step3: compute transition', variant='primary')
# bindings with gr.Row():
b_start_session.click(mur.register_new_user, inputs=[model, width, height], outputs=user_id) img1 = gr.Image(label="1/5")
b_compute.click(mur.compute_imgs, inputs=[user_id, prompt, negative_prompt], outputs=gallery_preview) img2 = gr.Image(label="2/5", show_progress=False)
b_select.click(mur.add_image_to_video, user_id, gallery_movie) img3 = gr.Image(label="3/5", show_progress=False)
gallery_preview.select(mur.preview_img_selected, user_id, None) img4 = gr.Image(label="4/5", show_progress=False)
gallery_movie.select(mur.movie_img_selected, user_id, None) img5 = gr.Image(label="5/5")
b_delete.click(mur.img_movie_delete, user_id, gallery_movie)
b_move_earlier.click(mur.img_movie_earlier, user_id, gallery_movie)
b_move_later.click(mur.img_movie_later, user_id, gallery_movie)
b_generate_movie.click(mur.generate_movie, [user_id, t_per_segment], movie)
with gr.Row():
vid_single = gr.Video(label="current single trans")
vid_multi = gr.Video(label="concatented multi trans")
if server_name is None: with gr.Row():
demo.launch(share=False, inbrowser=True, inline=False) b_stackforward = gr.Button('append last movie segment (left) to multi movie (right)', variant='primary')
else:
demo.launch(share=False, inbrowser=True, inline=False, server_name=server_name) with gr.Row():
gr.Markdown(
"""
# Parameters
## Main
- waiting time: set your waiting time for the transition. high values = better quality
- video duration: seconds per segment
- height/width: in pixels
## Diffusion settings
- num_inference_steps: number of diffusion steps
- guidance_scale: latent blending seems to prefer lower values here
- negative prompt: enter negative prompt here, applied for all images
## Last image crossfeeding
- branch1_crossfeed_power: Controls the level of cross-feeding between the first and last image branch. For preserving structures.
- branch1_crossfeed_range: Sets the duration of active crossfeed during development. High values enforce strong structural similarity.
- branch1_crossfeed_decay: Sets decay for branch1_crossfeed_power. Lower values make the decay stronger across the range.
## Transition settings
- parental_crossfeed_power: Similar to branch1_crossfeed_power, however applied for the images withinin the transition.
- parental_crossfeed_range: Similar to branch1_crossfeed_range, however applied for the images withinin the transition.
- parental_crossfeed_power_decay: Similar to branch1_crossfeed_decay, however applied for the images withinin the transition.
- depth_strength: Determines when the blending process will begin in terms of diffusion steps. Low values more inventive but can cause motion.
- guidance_scale_mid_damper: Decreases the guidance scale in the middle of a transition.
""")
with gr.Row():
user_id = gr.Textbox(label="user id", interactive=False)
# Collect all UI elemts in list to easily pass as inputs in gradio
dict_ui_elem = {}
dict_ui_elem["prompt1"] = prompt1
dict_ui_elem["negative_prompt"] = negative_prompt
dict_ui_elem["prompt2"] = prompt2
dict_ui_elem["duration_compute"] = duration_compute
dict_ui_elem["duration_video"] = duration_video
dict_ui_elem["height"] = height
dict_ui_elem["width"] = width
dict_ui_elem["depth_strength"] = depth_strength
dict_ui_elem["branch1_crossfeed_power"] = branch1_crossfeed_power
dict_ui_elem["branch1_crossfeed_range"] = branch1_crossfeed_range
dict_ui_elem["branch1_crossfeed_decay"] = branch1_crossfeed_decay
dict_ui_elem["num_inference_steps"] = num_inference_steps
dict_ui_elem["guidance_scale"] = guidance_scale
dict_ui_elem["guidance_scale_mid_damper"] = guidance_scale_mid_damper
dict_ui_elem["seed1"] = seed1
dict_ui_elem["seed2"] = seed2
dict_ui_elem["parental_crossfeed_range"] = parental_crossfeed_range
dict_ui_elem["parental_crossfeed_power"] = parental_crossfeed_power
dict_ui_elem["parental_crossfeed_power_decay"] = parental_crossfeed_power_decay
dict_ui_elem["user_id"] = user_id
# Convert to list, as gradio doesn't seem to accept dicts
list_ui_vals = []
list_ui_keys = []
for k in dict_ui_elem.keys():
list_ui_vals.append(dict_ui_elem[k])
list_ui_keys.append(k)
bf.list_ui_keys = list_ui_keys
b_newseed1.click(bf.randomize_seed1, outputs=seed1)
b_newseed2.click(bf.randomize_seed2, outputs=seed2)
b_compute1.click(bf.compute_img1, inputs=list_ui_vals, outputs=[img1, img2, img3, img4, img5, user_id])
b_compute2.click(bf.compute_img2, inputs=list_ui_vals, outputs=[img2, img3, img4, img5, user_id])
b_compute_transition.click(bf.compute_transition,
inputs=list_ui_vals,
outputs=[img2, img3, img4, vid_single])
b_stackforward.click(bf.stack_forward,
inputs=[prompt2, seed2],
outputs=[vid_multi, img1, img2, img3, img4, img5, prompt1, seed1, prompt2])
demo.launch(share=bf.share, inbrowser=True, inline=False)