Compare commits

...

14 Commits

Author SHA1 Message Date
DGX fd5916a598 new gradio interface 2024-03-29 14:44:23 +00:00
DGX 02d9405d54 tood list upgrade 2024-03-27 22:11:00 +00:00
DGX 1950844705 functional gallery for movie frames 2024-03-27 22:07:28 +00:00
DGX 2a2886157f more powerful UI 2024-03-27 21:23:21 +00:00
Johannes Stelzer ac56d0e2c0
Update README.md 2024-03-19 11:39:48 +00:00
Johannes Stelzer 42bc353cb1 moved examples 2024-03-19 11:28:19 +00:00
Johannes Stelzer 49c0a5585f
Merge pull request #16 from JimothyJohn/dev
Add Dockerfile
2024-03-19 11:24:57 +00:00
JimothyJohn c10f1dd334 Add Dockerfile 2024-03-16 15:39:13 -05:00
DGX 3de2021542 simple gradio interface for saving jsons 2024-02-21 15:22:34 +00:00
DGX 02ca854f43 import fix 2024-02-21 15:22:06 +00:00
DGX 8c89cd3a25 cleanup 2024-02-21 13:49:27 +00:00
DGX 2775f538c9 import fix 2024-02-21 12:48:00 +00:00
DGX d7d750f615 import fix 2024-02-21 12:46:29 +00:00
Johannes Stelzer 50a7084627
Merge pull request #14 from lunarring/lunar_tools
Lunar tools
2024-02-06 12:45:54 +00:00
9 changed files with 423 additions and 483 deletions

1
.gitignore vendored
View File

@ -7,6 +7,7 @@ __pycache__/
*.so *.so
# Distribution / packaging # Distribution / packaging
*.json
.Python .Python
build/ build/
develop-eggs/ develop-eggs/

51
Dockerfile Normal file
View File

@ -0,0 +1,51 @@
FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
# Configure environment
ENV DEBIAN_FRONTEND=noninteractive \
PIP_PREFER_BINARY=1 \
CUDA_HOME=/usr/local/cuda-12.1 \
TORCH_CUDA_ARCH_LIST="8.6"
# Redirect shell
RUN rm /bin/sh && ln -s /bin/bash /bin/sh
# Install prereqs
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
git-lfs \
ffmpeg \
libgl1-mesa-dev \
libglib2.0-0 \
git \
python3-dev \
python3-pip \
# Lunar Tools prereqs
libasound2-dev \
libportaudio2 \
&& apt clean && rm -rf /var/lib/apt/lists/* \
&& ln -s /usr/bin/python3 /usr/bin/python
# Set symbolic links
RUN echo "export PATH=/usr/local/cuda/bin:$PATH" >> /etc/bash.bashrc \
&& echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> /etc/bash. bashrc \
&& echo "export CUDA_HOME=/usr/local/cuda-12.1" >> /etc/bash.bashrc
# Install Python packages: Basic, then CUDA-compatible, then custom
RUN pip3 install \
wheel \
ninja && \
pip3 install \
torch==2.1.0 \
torchvision==0.16.0 \
xformers>=0.0.22 \
triton>=2.1.0 \
--index-url https://download.pytorch.org/whl/cu121 && \
pip3 install git+https://github.com/lunarring/latentblending \
git+https://github.com/chengzeyi/stable-fast.git@main#egg=stable-fast
# Optionally store weights in image
# RUN mkdir -p /root/.cache/torch/hub/checkpoints/ && curl -o /root/.cache/torch/hub/checkpoints//alexnet-owt-7be5be79.pth https://download.pytorch.org/models/alexnet-owt-7be5be79.pth
# RUN git lfs install && git clone https://huggingface.co/stabilityai/sdxl-turbo /sdxl-turbo
# Clone base repo because why not
RUN git clone https://github.com/lunarring/latentblending.git

View File

@ -35,11 +35,16 @@ be = BlendingEngine(pipe, do_compile=True)
``` ```
## Gradio UI ## Gradio UI
Coming soon again :) We can launch the a user-interface version with:
```commandline
python latentblending/gradio_ui.py
```
With the UI, you can iteratively generate your desired keyframes, and then render the movie with latent blending it at the end.
## Example 1: Simple transition ## Example 1: Simple transition
![](example1.jpg) ![](example1.jpg)
To run a simple transition between two prompts, see `examples/single_trans.py` To run a simple transition between two prompts, see `examples/single_trans.py`, or [check this volcano eruption ](https://youtu.be/O_2fpWHdnm4).
## Example 2: Multi transition ## Example 2: Multi transition
To run multiple transition between K prompts, resulting in a stitched video, see `examples/multi_trans.py`. To run multiple transition between K prompts, resulting in a stitched video, see `examples/multi_trans.py`.
@ -135,7 +140,6 @@ With latent blending, we can create transitions that appear to defy the laws of
# Coming soon... # Coming soon...
- [ ] MacOS support - [ ] MacOS support
- [ ] Gradio interface
- [ ] Huggingface Space - [ ] Huggingface Space
- [ ] Controlnet - [ ] Controlnet
- [ ] IP-Adapter - [ ] IP-Adapter

View File

@ -1,7 +1,7 @@
import torch import torch
import warnings import warnings
from diffusers import AutoPipelineForText2Image from diffusers import AutoPipelineForText2Image
from latentblending.movie_util import concatenate_movies from lunar_tools import concatenate_movies
from latentblending.blending_engine import BlendingEngine from latentblending.blending_engine import BlendingEngine
import numpy as np import numpy as np
torch.set_grad_enabled(False) torch.set_grad_enabled(False)
@ -23,9 +23,6 @@ be.set_dimensions((1024, 1024))
nmb_prompts = 20 nmb_prompts = 20
# Specify a list of prompts below # Specify a list of prompts below
#%% #%%

View File

@ -0,0 +1,75 @@
import torch
import warnings
from diffusers import AutoPipelineForText2Image
from latentblending.blending_engine import BlendingEngine
from lunar_tools import concatenate_movies
import numpy as np
torch.set_grad_enabled(False)
torch.backends.cudnn.benchmark = False
warnings.filterwarnings('ignore')
import json
# %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
# pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
pretrained_model_name_or_path = "stabilityai/sdxl-turbo"
pipe = AutoPipelineForText2Image.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16, variant="fp16")
pipe.to('cuda')
be = BlendingEngine(pipe, do_compile=False)
fp_movie = f'test.mp4'
fp_json = "movie_240221_1520.json"
duration_single_trans = 10
# Load the JSON data from the file
with open(fp_json, 'r') as file:
data = json.load(file)
# Set up width, height, num_inference steps
width = data[0]["width"]
height = data[0]["height"]
num_inference_steps = data[0]["num_inference_steps"]
be.set_dimensions((width, height))
be.set_num_inference_steps(num_inference_steps)
# Initialize lists for prompts, negative prompts, and seeds
list_prompts = []
list_negative_prompts = []
list_seeds = []
# Extract prompts, negative prompts, and seeds from the data
for item in data[1:]: # Skip the first item as it contains settings
list_prompts.append(item["prompt"])
list_negative_prompts.append(item["negative_prompt"])
list_seeds.append(item["seed"])
list_movie_parts = []
for i in range(len(list_prompts) - 1):
# For a multi transition we can save some computation time and recycle the latents
if i == 0:
be.set_prompt1(list_prompts[i])
be.set_negative_prompt(list_negative_prompts[i])
be.set_prompt2(list_prompts[i + 1])
recycle_img1 = False
else:
be.swap_forward()
be.set_negative_prompt(list_negative_prompts[i+1])
be.set_prompt2(list_prompts[i + 1])
recycle_img1 = True
fp_movie_part = f"tmp_part_{str(i).zfill(3)}.mp4"
fixed_seeds = list_seeds[i:i + 2]
# Run latent blending
be.run_transition(
recycle_img1=recycle_img1,
fixed_seeds=fixed_seeds)
# Save movie
be.write_movie_transition(fp_movie_part, duration_single_trans)
list_movie_parts.append(fp_movie_part)
# Finally, concatente the result
concatenate_movies(fp_movie, list_movie_parts)
print(f"DONE! MOVIE SAVED IN {fp_movie}")

View File

@ -1,4 +1,3 @@
from .blending_engine import BlendingEngine from .blending_engine import BlendingEngine
from .diffusers_holder import DiffusersHolder from .diffusers_holder import DiffusersHolder
from .movie_util import MovieSaver
from .utils import interpolate_spherical, add_frames_linear_interp, interpolate_linear, get_spacing, get_time, yml_load, yml_save from .utils import interpolate_spherical, add_frames_linear_interp, interpolate_linear, get_spacing, get_time, yml_load, yml_save

View File

@ -5,7 +5,6 @@ import warnings
import time import time
from tqdm.auto import tqdm from tqdm.auto import tqdm
from PIL import Image from PIL import Image
from latentblending.movie_util import MovieSaver
from typing import List, Optional from typing import List, Optional
import lpips import lpips
import platform import platform
@ -681,7 +680,6 @@ class BlendingEngine():
img_leaf = Image.fromarray(img) img_leaf = Image.fromarray(img)
img_leaf.save(os.path.join(dp_img, f"lowres_img_{str(i).zfill(4)}.jpg")) img_leaf.save(os.path.join(dp_img, f"lowres_img_{str(i).zfill(4)}.jpg"))
fp_yml = os.path.join(dp_img, "lowres.yaml") fp_yml = os.path.join(dp_img, "lowres.yaml")
self.save_statedict(fp_yml)
def write_movie_transition(self, fp_movie, duration_transition, fps=30): def write_movie_transition(self, fp_movie, duration_transition, fps=30):
r""" r"""
@ -729,35 +727,6 @@ class BlendingEngine():
pass pass
return state_dict return state_dict
def randomize_seed(self):
r"""
Set a random seed for a fresh start.
"""
seed = np.random.randint(999999999)
self.set_seed(seed)
def set_seed(self, seed: int):
r"""
Set a the seed for a fresh start.
"""
self.seed = seed
self.dh.seed = seed
def set_width(self, width):
r"""
Set the width of the resulting image.
"""
assert np.mod(width, 64) == 0, "set_width: value needs to be divisible by 64"
self.width = width
self.dh.width = width
def set_height(self, height):
r"""
Set the height of the resulting image.
"""
assert np.mod(height, 64) == 0, "set_height: value needs to be divisible by 64"
self.height = height
self.dh.height = height
def swap_forward(self): def swap_forward(self):
r""" r"""

View File

@ -1,18 +1,3 @@
# Copyright 2022 Lunar Ring. All rights reserved.
# Written by Johannes Stelzer, email stelzer@lunar-ring.ai twitter @j_stelzer
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os import os
import torch import torch
torch.backends.cudnn.benchmark = False torch.backends.cudnn.benchmark = False
@ -20,481 +5,340 @@ torch.set_grad_enabled(False)
import numpy as np import numpy as np
import warnings import warnings
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
import warnings
from tqdm.auto import tqdm from tqdm.auto import tqdm
from PIL import Image from PIL import Image
from movie_util import MovieSaver, concatenate_movies
from latent_blending import LatentBlending
from stable_diffusion_holder import StableDiffusionHolder
import gradio as gr import gradio as gr
from dotenv import find_dotenv, load_dotenv
import shutil import shutil
import uuid import uuid
from utils import get_time, add_frames_linear_interp from diffusers import AutoPipelineForText2Image
from huggingface_hub import hf_hub_download from latentblending.blending_engine import BlendingEngine
import datetime
import tempfile
import json
from lunar_tools import concatenate_movies
import argparse
"""
TODO
- time per segment
- init phase (model, res, nmb iter)
- recycle existing movies
- hf spaces integration
"""
class BlendingFrontend(): class MultiUserRouter():
def __init__( def __init__(
self, self,
sdh, do_compile=False
share=False): ):
self.user_blendingvariableholder = {}
self.do_compile = do_compile
self.list_models = ["stabilityai/sdxl-turbo", "stabilityai/stable-diffusion-xl-base-1.0"]
self.init_models()
def init_models(self):
self.dict_blendingengines = {}
for m in self.list_models:
pipe = AutoPipelineForText2Image.from_pretrained(m, torch_dtype=torch.float16, variant="fp16")
pipe.to("cuda")
be = BlendingEngine(pipe, do_compile=self.do_compile)
self.dict_blendingengines[m] = be
def register_new_user(self, model, width, height):
user_id = str(uuid.uuid4().hex.upper()[0:8])
be = self.dict_blendingengines[model]
be.set_dimensions((width, height))
self.user_blendingvariableholder[user_id] = BlendingVariableHolder(be)
return user_id
def user_overflow_protection(self):
pass
def preview_img_selected(self, user_id, data: gr.SelectData, button):
return self.user_blendingvariableholder[user_id].preview_img_selected(data, button)
def movie_img_selected(self, user_id, data: gr.SelectData, button):
return self.user_blendingvariableholder[user_id].movie_img_selected(data, button)
def compute_imgs(self, user_id, prompt, negative_prompt):
return self.user_blendingvariableholder[user_id].compute_imgs(prompt, negative_prompt)
def get_list_images_movie(self, user_id):
return self.user_blendingvariableholder[user_id].get_list_images_movie()
def init_new_movie(self, user_id):
return self.user_blendingvariableholder[user_id].init_new_movie()
def write_json(self, user_id):
return self.user_blendingvariableholder[user_id].write_json()
def add_image_to_video(self, user_id):
return self.user_blendingvariableholder[user_id].add_image_to_video()
def img_movie_delete(self, user_id):
return self.user_blendingvariableholder[user_id].img_movie_delete()
def img_movie_later(self, user_id):
return self.user_blendingvariableholder[user_id].img_movie_later()
def img_movie_earlier(self, user_id):
return self.user_blendingvariableholder[user_id].img_movie_earlier()
def generate_movie(self, user_id, t_per_segment):
return self.user_blendingvariableholder[user_id].generate_movie(t_per_segment)
#%% BlendingVariableHolder Class
class BlendingVariableHolder():
def __init__(
self,
be):
r""" r"""
Gradio Helper Class to collect UI data and start latent blending. Gradio Helper Class to collect UI data and start latent blending.
Args: Args:
sdh: be:
StableDiffusionHolder Blendingengine
share: bool share: bool
Set true to get a shareable gradio link (e.g. for running a remote server) Set true to get a shareable gradio link (e.g. for running a remote server)
""" """
self.share = share self.be = be
# UI Defaults # UI Defaults
self.num_inference_steps = 30
self.depth_strength = 0.25
self.seed1 = 420 self.seed1 = 420
self.seed2 = 420 self.seed2 = 420
self.prompt1 = "" self.prompt1 = ""
self.prompt2 = "" self.prompt2 = ""
self.negative_prompt = "" self.negative_prompt = ""
self.fps = 30 self.nmb_preview_images = 4
self.duration_video = 8
self.t_compute_max_allowed = 10
self.lb = LatentBlending(sdh)
self.lb.sdh.num_inference_steps = self.num_inference_steps
self.init_parameters_from_lb()
self.init_save_dir()
# Vars # Vars
self.list_fp_imgs_current = [] self.prompt = None
self.recycle_img1 = False self.negative_prompt = None
self.recycle_img2 = False self.list_seeds = []
self.list_all_segments = [] self.idx_movie = 0
self.dp_session = "" self.list_seeds = []
self.user_id = None self.list_images_preview = []
self.data = []
self.idx_img_preview_selected = None
self.idx_img_movie_selected = None
self.jpg_quality = 80
self.fp_movie = ''
def init_parameters_from_lb(self): def preview_img_selected(self, data: gr.SelectData, button):
r""" self.idx_img_preview_selected = data.index
Automatically init parameters from latentblending instance print(f"preview image {self.idx_img_preview_selected} selected, seed {self.list_seeds[self.idx_img_preview_selected]}")
"""
self.height = self.lb.sdh.height
self.width = self.lb.sdh.width
self.guidance_scale = self.lb.guidance_scale
self.guidance_scale_mid_damper = self.lb.guidance_scale_mid_damper
self.mid_compression_scaler = self.lb.mid_compression_scaler
self.branch1_crossfeed_power = self.lb.branch1_crossfeed_power
self.branch1_crossfeed_range = self.lb.branch1_crossfeed_range
self.branch1_crossfeed_decay = self.lb.branch1_crossfeed_decay
self.parental_crossfeed_power = self.lb.parental_crossfeed_power
self.parental_crossfeed_range = self.lb.parental_crossfeed_range
self.parental_crossfeed_power_decay = self.lb.parental_crossfeed_power_decay
def init_save_dir(self): def movie_img_selected(self, data: gr.SelectData, button):
r""" self.idx_img_movie_selected = data.index
Initializes the directory where stuff is being saved. print(f"movie image {self.idx_img_movie_selected} selected")
You can specify this directory in a ".env" file in your latentblending root, setting
DIR_OUT='/path/to/saving'
"""
load_dotenv(find_dotenv(), verbose=False)
self.dp_out = os.getenv("DIR_OUT")
if self.dp_out is None:
self.dp_out = ""
self.dp_imgs = os.path.join(self.dp_out, "imgs")
os.makedirs(self.dp_imgs, exist_ok=True)
self.dp_movies = os.path.join(self.dp_out, "movies")
os.makedirs(self.dp_movies, exist_ok=True)
self.save_empty_image()
def save_empty_image(self): def compute_imgs(self, prompt, negative_prompt):
r""" self.prompt = prompt
Saves an empty/black dummy image. self.negative_prompt = negative_prompt
""" self.be.set_prompt1(prompt)
self.fp_img_empty = os.path.join(self.dp_imgs, 'empty.jpg') self.be.set_prompt2(prompt)
Image.fromarray(np.zeros((self.height, self.width, 3), dtype=np.uint8)).save(self.fp_img_empty, quality=5) self.be.set_negative_prompt(negative_prompt)
self.list_seeds = []
self.list_images_preview = []
self.idx_img_preview_selected = None
for i in range(self.nmb_preview_images):
seed = np.random.randint(0, np.iinfo(np.int32).max)
self.be.seed1 = seed
self.list_seeds.append(seed)
img = self.be.compute_latents1(return_image=True)
fn_img_tmp = f"image_{uuid.uuid4()}.jpg"
temp_img_path = os.path.join(tempfile.gettempdir(), fn_img_tmp)
img.save(temp_img_path)
img.save(temp_img_path, quality=self.jpg_quality, optimize=True)
self.list_images_preview.append(temp_img_path)
return self.list_images_preview
def randomize_seed1(self):
r"""
Randomizes the first seed
"""
seed = np.random.randint(0, 10000000)
self.seed1 = int(seed)
print(f"randomize_seed1: new seed = {self.seed1}")
return seed
def randomize_seed2(self): def get_list_images_movie(self):
r""" return [entry["preview_image"] for entry in self.data]
Randomizes the second seed
"""
seed = np.random.randint(0, 10000000)
self.seed2 = int(seed)
print(f"randomize_seed2: new seed = {self.seed2}")
return seed
def setup_lb(self, list_ui_vals):
r"""
Sets all parameters from the UI. Since gradio does not support to pass dictionaries,
we have to instead pass keys (list_ui_keys, global) and values (list_ui_vals)
"""
# Collect latent blending variables
self.lb.set_width(list_ui_vals[list_ui_keys.index('width')])
self.lb.set_height(list_ui_vals[list_ui_keys.index('height')])
self.lb.set_prompt1(list_ui_vals[list_ui_keys.index('prompt1')])
self.lb.set_prompt2(list_ui_vals[list_ui_keys.index('prompt2')])
self.lb.set_negative_prompt(list_ui_vals[list_ui_keys.index('negative_prompt')])
self.lb.guidance_scale = list_ui_vals[list_ui_keys.index('guidance_scale')]
self.lb.guidance_scale_mid_damper = list_ui_vals[list_ui_keys.index('guidance_scale_mid_damper')]
self.t_compute_max_allowed = list_ui_vals[list_ui_keys.index('duration_compute')]
self.lb.num_inference_steps = list_ui_vals[list_ui_keys.index('num_inference_steps')]
self.lb.sdh.num_inference_steps = list_ui_vals[list_ui_keys.index('num_inference_steps')]
self.duration_video = list_ui_vals[list_ui_keys.index('duration_video')]
self.lb.seed1 = list_ui_vals[list_ui_keys.index('seed1')]
self.lb.seed2 = list_ui_vals[list_ui_keys.index('seed2')]
self.lb.branch1_crossfeed_power = list_ui_vals[list_ui_keys.index('branch1_crossfeed_power')]
self.lb.branch1_crossfeed_range = list_ui_vals[list_ui_keys.index('branch1_crossfeed_range')]
self.lb.branch1_crossfeed_decay = list_ui_vals[list_ui_keys.index('branch1_crossfeed_decay')]
self.lb.parental_crossfeed_power = list_ui_vals[list_ui_keys.index('parental_crossfeed_power')]
self.lb.parental_crossfeed_range = list_ui_vals[list_ui_keys.index('parental_crossfeed_range')]
self.lb.parental_crossfeed_power_decay = list_ui_vals[list_ui_keys.index('parental_crossfeed_power_decay')]
self.num_inference_steps = list_ui_vals[list_ui_keys.index('num_inference_steps')]
self.depth_strength = list_ui_vals[list_ui_keys.index('depth_strength')]
if len(list_ui_vals[list_ui_keys.index('user_id')]) > 1: def init_new_movie(self):
self.user_id = list_ui_vals[list_ui_keys.index('user_id')] current_time = datetime.datetime.now()
self.fp_movie = "movie_" + current_time.strftime("%y%m%d_%H%M") + ".mp4"
self.fp_json = "movie_" + current_time.strftime("%y%m%d_%H%M") + ".json"
def write_json(self):
# Write the data list to a JSON file
data_copy = self.data.copy()
data_copy.insert(0, {"settings": "sdxl", "width": self.be.dh.width_img, "height": self.be.dh.height_img, "num_inference_steps": self.be.dh.num_inference_steps})
with open(self.fp_json, 'w') as f:
json.dump(data_copy, f, indent=4)
def add_image_to_video(self):
if self.prompt is None:
print("Cannot take because no prompt was set!")
return self.get_list_images_movie()
if self.idx_movie == 0:
self.init_new_movie()
self.data.append({"iteration": self.idx_movie,
"seed": self.list_seeds[self.idx_img_preview_selected],
"prompt": self.prompt,
"negative_prompt": self.negative_prompt,
"preview_image": self.list_images_preview[self.idx_img_preview_selected]
})
self.write_json()
self.idx_movie += 1
return self.get_list_images_movie()
def img_movie_delete(self):
if self.idx_img_movie_selected is not None and 0 <= self.idx_img_movie_selected < len(self.data)+1:
del self.data[self.idx_img_movie_selected]
self.idx_img_movie_selected = None
else: else:
# generate new user id print(f"Invalid movie image index for deletion: {self.idx_img_movie_selected}")
self.user_id = uuid.uuid4().hex return self.get_list_images_movie()
print(f"made new user_id: {self.user_id} at {get_time('second')}")
def save_latents(self, fp_latents, list_latents): def img_movie_later(self):
r""" if self.idx_img_movie_selected is not None and self.idx_img_movie_selected < len(self.data):
Saves a latent trajectory on disk, in npy format. # Swap the selected image with the next one
""" self.data[self.idx_img_movie_selected], self.data[self.idx_img_movie_selected + 1] = \
list_latents_cpu = [l.cpu().numpy() for l in list_latents] self.data[self.idx_img_movie_selected+1], self.data[self.idx_img_movie_selected]
np.save(fp_latents, list_latents_cpu) self.idx_img_movie_selected = None
else:
print("Cannot move the image later in the sequence.")
return self.get_list_images_movie()
def load_latents(self, fp_latents): def img_movie_earlier(self):
r""" if self.idx_img_movie_selected is not None and self.idx_img_movie_selected > 0:
Loads a latent trajectory from disk, converts to torch tensor. # Swap the selected image with the previous one
""" self.data[self.idx_img_movie_selected-1], self.data[self.idx_img_movie_selected] = \
list_latents_cpu = np.load(fp_latents) self.data[self.idx_img_movie_selected], self.data[self.idx_img_movie_selected-1]
list_latents = [torch.from_numpy(l).to(self.lb.device) for l in list_latents_cpu] self.idx_img_movie_selected = None
return list_latents else:
print("Cannot move the image earlier in the sequence.")
return self.get_list_images_movie()
def compute_img1(self, *args):
r"""
Computes the first transition image and returns it for display.
Sets all other transition images and last image to empty (as they are obsolete with this operation)
"""
list_ui_vals = args
self.setup_lb(list_ui_vals)
fp_img1 = os.path.join(self.dp_imgs, f"img1_{self.user_id}")
img1 = Image.fromarray(self.lb.compute_latents1(return_image=True))
img1.save(fp_img1 + ".jpg")
self.save_latents(fp_img1 + ".npy", self.lb.tree_latents[0])
self.recycle_img1 = True
self.recycle_img2 = False
return [fp_img1 + ".jpg", self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.user_id]
def compute_img2(self, *args): def generate_movie(self, t_per_segment=10):
r""" print("starting movie gen")
Computes the last transition image and returns it for display. list_prompts = []
Sets all other transition images to empty (as they are obsolete with this operation) list_negative_prompts = []
""" list_seeds = []
if not os.path.isfile(os.path.join(self.dp_imgs, f"img1_{self.user_id}.jpg")): # don't do anything
return [self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.user_id]
list_ui_vals = args
self.setup_lb(list_ui_vals)
self.lb.tree_latents[0] = self.load_latents(os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy")) # Extract prompts, negative prompts, and seeds from the data
fp_img2 = os.path.join(self.dp_imgs, f"img2_{self.user_id}") for item in self.data:
img2 = Image.fromarray(self.lb.compute_latents2(return_image=True)) list_prompts.append(item["prompt"])
img2.save(fp_img2 + '.jpg') list_negative_prompts.append(item["negative_prompt"])
self.save_latents(fp_img2 + ".npy", self.lb.tree_latents[-1]) list_seeds.append(item["seed"])
self.recycle_img2 = True
# fixme save seeds. change filenames?
return [self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, fp_img2 + ".jpg", self.user_id]
def compute_transition(self, *args): list_movie_parts = []
r""" for i in range(len(list_prompts) - 1):
Computes transition images and movie. # For a multi transition we can save some computation time and recycle the latents
""" if i == 0:
list_ui_vals = args self.be.set_prompt1(list_prompts[i])
self.setup_lb(list_ui_vals) self.be.set_negative_prompt(list_negative_prompts[i])
print("STARTING TRANSITION...") self.be.set_prompt2(list_prompts[i + 1])
fixed_seeds = [self.seed1, self.seed2] recycle_img1 = False
# Inject loaded latents (other user interference) else:
self.lb.tree_latents[0] = self.load_latents(os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy")) self.be.swap_forward()
self.lb.tree_latents[-1] = self.load_latents(os.path.join(self.dp_imgs, f"img2_{self.user_id}.npy")) self.be.set_negative_prompt(list_negative_prompts[i+1])
imgs_transition = self.lb.run_transition( self.be.set_prompt2(list_prompts[i + 1])
recycle_img1=self.recycle_img1, recycle_img1 = True
recycle_img2=self.recycle_img2,
num_inference_steps=self.num_inference_steps, fp_movie_part = f"tmp_part_{str(i).zfill(3)}.mp4"
depth_strength=self.depth_strength, fixed_seeds = list_seeds[i:i + 2]
t_compute_max_allowed=self.t_compute_max_allowed, # Run latent blending
self.be.run_transition(
recycle_img1=recycle_img1,
fixed_seeds=fixed_seeds) fixed_seeds=fixed_seeds)
print(f"Latent Blending pass finished ({get_time('second')}). Resulted in {len(imgs_transition)} images")
# Subselect three preview images # Save movie
idx_img_prev = np.round(np.linspace(0, len(imgs_transition) - 1, 5)[1:-1]).astype(np.int32) self.be.write_movie_transition(fp_movie_part, t_per_segment)
list_movie_parts.append(fp_movie_part)
list_imgs_preview = [] # Finally, concatenate the result
for j in idx_img_prev: concatenate_movies(self.fp_movie, list_movie_parts)
list_imgs_preview.append(Image.fromarray(imgs_transition[j])) print(f"DONE! MOVIE SAVED IN {self.fp_movie}")
return self.fp_movie
# Save the preview imgs as jpgs on disk so we are not sending umcompressed data around
current_timestamp = get_time('second')
self.list_fp_imgs_current = []
for i in range(len(list_imgs_preview)):
fp_img = os.path.join(self.dp_imgs, f"img_preview_{i}_{current_timestamp}.jpg")
list_imgs_preview[i].save(fp_img)
self.list_fp_imgs_current.append(fp_img)
# Insert cheap frames for the movie
imgs_transition_ext = add_frames_linear_interp(imgs_transition, self.duration_video, self.fps)
# Save as movie
self.fp_movie = self.get_fp_video_last()
if os.path.isfile(self.fp_movie):
os.remove(self.fp_movie)
ms = MovieSaver(self.fp_movie, fps=self.fps)
for img in tqdm(imgs_transition_ext):
ms.write_frame(img)
ms.finalize()
print("DONE SAVING MOVIE! SENDING BACK...")
# Assemble Output, updating the preview images and le movie
list_return = self.list_fp_imgs_current + [self.fp_movie]
return list_return
def stack_forward(self, prompt2, seed2):
r"""
Allows to generate multi-segment movies. Sets last image -> first image with all
relevant parameters.
"""
# Save preview images, prompts and seeds into dictionary for stacking
if len(self.list_all_segments) == 0:
timestamp_session = get_time('second')
self.dp_session = os.path.join(self.dp_out, f"session_{timestamp_session}")
os.makedirs(self.dp_session)
idx_segment = len(self.list_all_segments)
dp_segment = os.path.join(self.dp_session, f"segment_{str(idx_segment).zfill(3)}")
self.list_all_segments.append(dp_segment)
self.lb.write_imgs_transition(dp_segment)
fp_movie_last = self.get_fp_video_last()
fp_movie_next = self.get_fp_video_next()
shutil.copyfile(fp_movie_last, fp_movie_next)
self.lb.tree_latents[0] = self.load_latents(os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy"))
self.lb.tree_latents[-1] = self.load_latents(os.path.join(self.dp_imgs, f"img2_{self.user_id}.npy"))
self.lb.swap_forward()
shutil.copyfile(os.path.join(self.dp_imgs, f"img2_{self.user_id}.npy"), os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy"))
fp_multi = self.multi_concat()
list_out = [fp_multi]
list_out.extend([os.path.join(self.dp_imgs, f"img2_{self.user_id}.jpg")])
list_out.extend([self.fp_img_empty] * 4)
list_out.append(gr.update(interactive=False, value=prompt2))
list_out.append(gr.update(interactive=False, value=seed2))
list_out.append("")
list_out.append(np.random.randint(0, 10000000))
print(f"stack_forward: fp_multi {fp_multi}")
return list_out
def multi_concat(self):
r"""
Concatentates all stacked segments into one long movie.
"""
list_fp_movies = self.get_fp_video_all()
# Concatenate movies and save
fp_final = os.path.join(self.dp_session, f"concat_{self.user_id}.mp4")
concatenate_movies(fp_final, list_fp_movies)
return fp_final
def get_fp_video_all(self):
r"""
Collects all stacked movie segments.
"""
list_all = os.listdir(self.dp_movies)
str_beg = f"movie_{self.user_id}_"
list_user = [l for l in list_all if str_beg in l]
list_user.sort()
list_user = [os.path.join(self.dp_movies, l) for l in list_user]
return list_user
def get_fp_video_next(self):
r"""
Gets the filepath of the next movie segment.
"""
list_videos = self.get_fp_video_all()
if len(list_videos) == 0:
idx_next = 0
else:
idx_next = len(list_videos)
fp_video_next = os.path.join(self.dp_movies, f"movie_{self.user_id}_{str(idx_next).zfill(3)}.mp4")
return fp_video_next
def get_fp_video_last(self):
r"""
Gets the current video that was saved.
"""
fp_video_last = os.path.join(self.dp_movies, f"last_{self.user_id}.mp4")
return fp_video_last
#%% Runtime engine
if __name__ == "__main__": if __name__ == "__main__":
# fp_ckpt = hf_hub_download(repo_id="stabilityai/stable-diffusion-2-1-base", filename="v2-1_512-ema-pruned.ckpt")
fp_ckpt = hf_hub_download(repo_id="stabilityai/stable-diffusion-2-1", filename="v2-1_768-ema-pruned.ckpt")
bf = BlendingFrontend(StableDiffusionHolder(fp_ckpt))
# self = BlendingFrontend(None)
# Change Parameters below
parser = argparse.ArgumentParser(description="Latent Blending GUI")
parser.add_argument("--do_compile", type=bool, default=False)
parser.add_argument("--nmb_preview_images", type=int, default=4)
parser.add_argument("--server_name", type=str, default=None)
try:
args = parser.parse_args()
nmb_preview_images = args.nmb_preview_images
do_compile = args.do_compile
server_name = args.server_name
except SystemExit:
# If the script is run in an interactive environment (like Jupyter), parse_args might fail.
nmb_preview_images = 4
do_compile = False # compile SD pipes with sdfast
server_name = None
mur = MultiUserRouter(do_compile=do_compile)
with gr.Blocks() as demo: with gr.Blocks() as demo:
gr.HTML("""<h1>Latent Blending</h1> with gr.Accordion("Setup", open=True) as accordion_setup:
<p>Create butter-smooth transitions between prompts, powered by stable diffusion</p> # New user registration, model selection, ...
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
<br/>
<a href="https://huggingface.co/spaces/lunarring/latentblending?duplicate=true">
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
</p>""")
with gr.Row(): with gr.Row():
prompt1 = gr.Textbox(label="prompt 1") model = gr.Dropdown(mur.list_models, value=mur.list_models[0], label="model")
prompt2 = gr.Textbox(label="prompt 2") width = gr.Slider(256, 2048, 512, step=128, label='width', interactive=True)
height = gr.Slider(256, 2048, 512, step=128, label='height', interactive=True)
user_id = gr.Textbox(label="user id (filled automatically)", interactive=False)
b_start_session = gr.Button('start session', variant='primary')
with gr.Accordion("Latent Blending (expand with arrow on right side after you clicked 'start session')", open=False) as accordion_latentblending:
with gr.Row(): with gr.Row():
duration_compute = gr.Slider(10, 25, bf.t_compute_max_allowed, step=1, label='waiting time', interactive=True) prompt = gr.Textbox(label="prompt")
duration_video = gr.Slider(1, 100, bf.duration_video, step=0.1, label='video duration', interactive=True)
height = gr.Slider(256, 1024, bf.height, step=128, label='height', interactive=True)
width = gr.Slider(256, 1024, bf.width, step=128, label='width', interactive=True)
with gr.Accordion("Advanced Settings (click to expand)", open=False):
with gr.Accordion("Diffusion settings", open=True):
with gr.Row():
num_inference_steps = gr.Slider(5, 100, bf.num_inference_steps, step=1, label='num_inference_steps', interactive=True)
guidance_scale = gr.Slider(1, 25, bf.guidance_scale, step=0.1, label='guidance_scale', interactive=True)
negative_prompt = gr.Textbox(label="negative prompt") negative_prompt = gr.Textbox(label="negative prompt")
b_compute = gr.Button('generate preview images', variant='primary')
with gr.Accordion("Seed control: adjust seeds for first and last images", open=True): b_select = gr.Button('add selected image to video', variant='primary')
with gr.Row():
b_newseed1 = gr.Button("randomize seed 1", variant='secondary')
seed1 = gr.Number(bf.seed1, label="seed 1", interactive=True)
seed2 = gr.Number(bf.seed2, label="seed 2", interactive=True)
b_newseed2 = gr.Button("randomize seed 2", variant='secondary')
with gr.Accordion("Last image crossfeeding.", open=True):
with gr.Row():
branch1_crossfeed_power = gr.Slider(0.0, 1.0, bf.branch1_crossfeed_power, step=0.01, label='branch1 crossfeed power', interactive=True)
branch1_crossfeed_range = gr.Slider(0.0, 1.0, bf.branch1_crossfeed_range, step=0.01, label='branch1 crossfeed range', interactive=True)
branch1_crossfeed_decay = gr.Slider(0.0, 1.0, bf.branch1_crossfeed_decay, step=0.01, label='branch1 crossfeed decay', interactive=True)
with gr.Accordion("Transition settings", open=True):
with gr.Row():
parental_crossfeed_power = gr.Slider(0.0, 1.0, bf.parental_crossfeed_power, step=0.01, label='parental crossfeed power', interactive=True)
parental_crossfeed_range = gr.Slider(0.0, 1.0, bf.parental_crossfeed_range, step=0.01, label='parental crossfeed range', interactive=True)
parental_crossfeed_power_decay = gr.Slider(0.0, 1.0, bf.parental_crossfeed_power_decay, step=0.01, label='parental crossfeed decay', interactive=True)
with gr.Row():
depth_strength = gr.Slider(0.01, 0.99, bf.depth_strength, step=0.01, label='depth_strength', interactive=True)
guidance_scale_mid_damper = gr.Slider(0.01, 2.0, bf.guidance_scale_mid_damper, step=0.01, label='guidance_scale_mid_damper', interactive=True)
with gr.Row(): with gr.Row():
b_compute1 = gr.Button('step1: compute first image', variant='primary') gallery_preview = gr.Gallery(
b_compute2 = gr.Button('step2: compute last image', variant='primary') label="Generated images", show_label=False, elem_id="gallery"
b_compute_transition = gr.Button('step3: compute transition', variant='primary') , columns=[nmb_preview_images], rows=[1], object_fit="contain", height="auto", allow_preview=False, interactive=False)
with gr.Row(): with gr.Row():
img1 = gr.Image(label="1/5") gr.Markdown("Your movie contains the following images (see below)")
img2 = gr.Image(label="2/5", show_progress=False) with gr.Row():
img3 = gr.Image(label="3/5", show_progress=False) gallery_movie = gr.Gallery(
img4 = gr.Image(label="4/5", show_progress=False) label="Generated images", show_label=False, elem_id="gallery"
img5 = gr.Image(label="5/5") , columns=[20], rows=[1], object_fit="contain", height="auto", allow_preview=False, interactive=False)
with gr.Row(): with gr.Row():
vid_single = gr.Video(label="current single trans") b_delete = gr.Button('delete selected image')
vid_multi = gr.Video(label="concatented multi trans") b_move_earlier = gr.Button('move image to earlier time')
b_move_later = gr.Button('move image to later time')
with gr.Row(): with gr.Row():
b_stackforward = gr.Button('append last movie segment (left) to multi movie (right)', variant='primary') b_generate_movie = gr.Button('generate movie', variant='primary')
t_per_segment = gr.Slider(1, 30, 10, step=0.1, label='time per segment', interactive=True)
with gr.Row(): with gr.Row():
gr.Markdown( movie = gr.Video()
"""
# Parameters
## Main
- waiting time: set your waiting time for the transition. high values = better quality
- video duration: seconds per segment
- height/width: in pixels
## Diffusion settings # bindings
- num_inference_steps: number of diffusion steps b_start_session.click(mur.register_new_user, inputs=[model, width, height], outputs=user_id)
- guidance_scale: latent blending seems to prefer lower values here b_compute.click(mur.compute_imgs, inputs=[user_id, prompt, negative_prompt], outputs=gallery_preview)
- negative prompt: enter negative prompt here, applied for all images b_select.click(mur.add_image_to_video, user_id, gallery_movie)
gallery_preview.select(mur.preview_img_selected, user_id, None)
gallery_movie.select(mur.movie_img_selected, user_id, None)
b_delete.click(mur.img_movie_delete, user_id, gallery_movie)
b_move_earlier.click(mur.img_movie_earlier, user_id, gallery_movie)
b_move_later.click(mur.img_movie_later, user_id, gallery_movie)
b_generate_movie.click(mur.generate_movie, [user_id, t_per_segment], movie)
## Last image crossfeeding
- branch1_crossfeed_power: Controls the level of cross-feeding between the first and last image branch. For preserving structures.
- branch1_crossfeed_range: Sets the duration of active crossfeed during development. High values enforce strong structural similarity.
- branch1_crossfeed_decay: Sets decay for branch1_crossfeed_power. Lower values make the decay stronger across the range.
## Transition settings if server_name is None:
- parental_crossfeed_power: Similar to branch1_crossfeed_power, however applied for the images withinin the transition. demo.launch(share=False, inbrowser=True, inline=False)
- parental_crossfeed_range: Similar to branch1_crossfeed_range, however applied for the images withinin the transition. else:
- parental_crossfeed_power_decay: Similar to branch1_crossfeed_decay, however applied for the images withinin the transition. demo.launch(share=False, inbrowser=True, inline=False, server_name=server_name)
- depth_strength: Determines when the blending process will begin in terms of diffusion steps. Low values more inventive but can cause motion.
- guidance_scale_mid_damper: Decreases the guidance scale in the middle of a transition.
""")
with gr.Row():
user_id = gr.Textbox(label="user id", interactive=False)
# Collect all UI elemts in list to easily pass as inputs in gradio
dict_ui_elem = {}
dict_ui_elem["prompt1"] = prompt1
dict_ui_elem["negative_prompt"] = negative_prompt
dict_ui_elem["prompt2"] = prompt2
dict_ui_elem["duration_compute"] = duration_compute
dict_ui_elem["duration_video"] = duration_video
dict_ui_elem["height"] = height
dict_ui_elem["width"] = width
dict_ui_elem["depth_strength"] = depth_strength
dict_ui_elem["branch1_crossfeed_power"] = branch1_crossfeed_power
dict_ui_elem["branch1_crossfeed_range"] = branch1_crossfeed_range
dict_ui_elem["branch1_crossfeed_decay"] = branch1_crossfeed_decay
dict_ui_elem["num_inference_steps"] = num_inference_steps
dict_ui_elem["guidance_scale"] = guidance_scale
dict_ui_elem["guidance_scale_mid_damper"] = guidance_scale_mid_damper
dict_ui_elem["seed1"] = seed1
dict_ui_elem["seed2"] = seed2
dict_ui_elem["parental_crossfeed_range"] = parental_crossfeed_range
dict_ui_elem["parental_crossfeed_power"] = parental_crossfeed_power
dict_ui_elem["parental_crossfeed_power_decay"] = parental_crossfeed_power_decay
dict_ui_elem["user_id"] = user_id
# Convert to list, as gradio doesn't seem to accept dicts
list_ui_vals = []
list_ui_keys = []
for k in dict_ui_elem.keys():
list_ui_vals.append(dict_ui_elem[k])
list_ui_keys.append(k)
bf.list_ui_keys = list_ui_keys
b_newseed1.click(bf.randomize_seed1, outputs=seed1)
b_newseed2.click(bf.randomize_seed2, outputs=seed2)
b_compute1.click(bf.compute_img1, inputs=list_ui_vals, outputs=[img1, img2, img3, img4, img5, user_id])
b_compute2.click(bf.compute_img2, inputs=list_ui_vals, outputs=[img2, img3, img4, img5, user_id])
b_compute_transition.click(bf.compute_transition,
inputs=list_ui_vals,
outputs=[img2, img3, img4, vid_single])
b_stackforward.click(bf.stack_forward,
inputs=[prompt2, seed2],
outputs=[vid_multi, img1, img2, img3, img4, img5, prompt1, seed1, prompt2])
demo.launch(share=bf.share, inbrowser=True, inline=False)