Compare commits

..

35 Commits

Author SHA1 Message Date
DGX fd5916a598 new gradio interface 2024-03-29 14:44:23 +00:00
DGX 02d9405d54 tood list upgrade 2024-03-27 22:11:00 +00:00
DGX 1950844705 functional gallery for movie frames 2024-03-27 22:07:28 +00:00
DGX 2a2886157f more powerful UI 2024-03-27 21:23:21 +00:00
Johannes Stelzer ac56d0e2c0
Update README.md 2024-03-19 11:39:48 +00:00
Johannes Stelzer 42bc353cb1 moved examples 2024-03-19 11:28:19 +00:00
Johannes Stelzer 49c0a5585f
Merge pull request #16 from JimothyJohn/dev
Add Dockerfile
2024-03-19 11:24:57 +00:00
JimothyJohn c10f1dd334 Add Dockerfile 2024-03-16 15:39:13 -05:00
DGX 3de2021542 simple gradio interface for saving jsons 2024-02-21 15:22:34 +00:00
DGX 02ca854f43 import fix 2024-02-21 15:22:06 +00:00
DGX 8c89cd3a25 cleanup 2024-02-21 13:49:27 +00:00
DGX 2775f538c9 import fix 2024-02-21 12:48:00 +00:00
DGX d7d750f615 import fix 2024-02-21 12:46:29 +00:00
Johannes Stelzer 50a7084627
Merge pull request #14 from lunarring/lunar_tools
Lunar tools
2024-02-06 12:45:54 +00:00
DGX 37fc1cf05f removed ffmpeg 2024-02-06 12:45:07 +00:00
DGX 0d44404903 removed dependency 2024-02-06 12:36:41 +00:00
DGX 5ea7981a9c random seeds 2024-02-06 12:01:42 +00:00
Johannes Stelzer 179a42b9bf
Update README.md 2024-02-05 14:07:34 +00:00
DGX b9ed277055 pretrained path 2024-02-01 13:26:12 +00:00
DGX 896ba0c768 missing numpy import 2024-02-01 13:25:15 +00:00
Johannes Stelzer f72cc12fb3
Update README.md 2024-01-31 16:22:38 +00:00
Johannes Stelzer 01a960c48d diffusersholder automatically spawned in blendingengine 2024-01-31 11:12:47 +00:00
Johannes Stelzer 646a3c757e
Update README.md 2024-01-26 11:52:04 +00:00
Johannes Stelzer 47e72ed76f
Update README.md 2024-01-10 10:00:33 +01:00
DGX 4b235b874e compile flag with sfast 2024-01-10 08:58:30 +00:00
DGX 1775c9a90a Merge branch 'main' of github.com:lunarring/latentblending 2024-01-10 08:47:44 +00:00
DGX a0f35f2a41 moved examples 2024-01-10 08:47:35 +00:00
Johannes Stelzer f5965154ba trailing comma 2024-01-09 21:30:37 +01:00
Johannes Stelzer b83d3ee0a0 lpips darwin 2024-01-09 21:21:23 +01:00
Johannes Stelzer 4501d80044
Update README.md 2024-01-09 21:16:10 +01:00
Johannes Stelzer 6e138c54a2
Update README.md 2024-01-09 21:12:14 +01:00
Johannes Stelzer 1ba4b578a0
Update README.md 2024-01-09 21:11:40 +01:00
DGX 4042a098b0 accelerate 2024-01-09 20:10:12 +00:00
DGX 9d5b545c1a accelerate 2024-01-09 20:09:16 +00:00
Johannes Stelzer f1a1b47923
Merge pull request #13 from lunarring/package
Package
2024-01-09 21:08:37 +01:00
12 changed files with 530 additions and 854 deletions

1
.gitignore vendored
View File

@ -7,6 +7,7 @@ __pycache__/
*.so *.so
# Distribution / packaging # Distribution / packaging
*.json
.Python .Python
build/ build/
develop-eggs/ develop-eggs/

51
Dockerfile Normal file
View File

@ -0,0 +1,51 @@
FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
# Configure environment
ENV DEBIAN_FRONTEND=noninteractive \
PIP_PREFER_BINARY=1 \
CUDA_HOME=/usr/local/cuda-12.1 \
TORCH_CUDA_ARCH_LIST="8.6"
# Redirect shell
RUN rm /bin/sh && ln -s /bin/bash /bin/sh
# Install prereqs
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
git-lfs \
ffmpeg \
libgl1-mesa-dev \
libglib2.0-0 \
git \
python3-dev \
python3-pip \
# Lunar Tools prereqs
libasound2-dev \
libportaudio2 \
&& apt clean && rm -rf /var/lib/apt/lists/* \
&& ln -s /usr/bin/python3 /usr/bin/python
# Set symbolic links
RUN echo "export PATH=/usr/local/cuda/bin:$PATH" >> /etc/bash.bashrc \
&& echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> /etc/bash. bashrc \
&& echo "export CUDA_HOME=/usr/local/cuda-12.1" >> /etc/bash.bashrc
# Install Python packages: Basic, then CUDA-compatible, then custom
RUN pip3 install \
wheel \
ninja && \
pip3 install \
torch==2.1.0 \
torchvision==0.16.0 \
xformers>=0.0.22 \
triton>=2.1.0 \
--index-url https://download.pytorch.org/whl/cu121 && \
pip3 install git+https://github.com/lunarring/latentblending \
git+https://github.com/chengzeyi/stable-fast.git@main#egg=stable-fast
# Optionally store weights in image
# RUN mkdir -p /root/.cache/torch/hub/checkpoints/ && curl -o /root/.cache/torch/hub/checkpoints//alexnet-owt-7be5be79.pth https://download.pytorch.org/models/alexnet-owt-7be5be79.pth
# RUN git lfs install && git clone https://huggingface.co/stabilityai/sdxl-turbo /sdxl-turbo
# Clone base repo because why not
RUN git clone https://github.com/lunarring/latentblending.git

View File

@ -2,32 +2,53 @@
Latent blending enables video transitions with incredible smoothness between prompts, computed within seconds. Powered by [stable diffusion XL](https://stability.ai/stable-diffusion), this method involves specific mixing of intermediate latent representations to create a seamless transition with users having the option to fully customize the transition directly in high-resolution. The new version also supports SDXL Turbo, allowing to generate transitions faster than they are typically played back! Latent blending enables video transitions with incredible smoothness between prompts, computed within seconds. Powered by [stable diffusion XL](https://stability.ai/stable-diffusion), this method involves specific mixing of intermediate latent representations to create a seamless transition with users having the option to fully customize the transition directly in high-resolution. The new version also supports SDXL Turbo, allowing to generate transitions faster than they are typically played back!
```python ```python
import torch
from diffusers import AutoPipelineForText2Image
from latentblending.blending_engine import BlendingEngine
from latentblending.diffusers_holder import DiffusersHolder
pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16").to("cuda") pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16").to("cuda")
dh = DiffusersHolder(pipe) be = BlendingEngine(pipe)
lb = LatentBlending(dh) be.set_prompt1("photo of underwater landscape, fish, und the sea, incredible detail, high resolution")
lb.set_prompt1("photo of underwater landscape, fish, und the sea, incredible detail, high resolution") be.set_prompt2("rendering of an alien planet, strange plants, strange creatures, surreal")
lb.set_prompt2("rendering of an alien planet, strange plants, strange creatures, surreal") be.set_negative_prompt("blurry, ugly, pale")
lb.set_negative_prompt("blurry, ugly, pale")
# Run latent blending # Run latent blending
lb.run_transition() be.run_transition()
# Save movie # Save movie
lb.write_movie_transition('movie_example1.mp4', duration_transition=12) be.write_movie_transition('movie_example1.mp4', duration_transition=12)
``` ```
# Installation
```commandline
pip install git+https://github.com/lunarring/latentblending
```
# Extra speedup with stable_fast compile
Install https://github.com/chengzeyi/stable-fast
Then enable pipe compilation by setting *do_compile=True*
```python
be = BlendingEngine(pipe, do_compile=True)
```
## Gradio UI ## Gradio UI
Coming soon again :) We can launch the a user-interface version with:
```commandline
python latentblending/gradio_ui.py
```
With the UI, you can iteratively generate your desired keyframes, and then render the movie with latent blending it at the end.
## Example 1: Simple transition ## Example 1: Simple transition
![](example1.jpg) ![](example1.jpg)
To run a simple transition between two prompts, run `example1_standard.py` To run a simple transition between two prompts, see `examples/single_trans.py`, or [check this volcano eruption ](https://youtu.be/O_2fpWHdnm4).
## Example 2: Multi transition ## Example 2: Multi transition
To run multiple transition between K prompts, resulting in a stitched video, run `example2_multitrans.py`. To run multiple transition between K prompts, resulting in a stitched video, see `examples/multi_trans.py`.
[View a longer example video here.](https://vimeo.com/789052336/80dcb545b2) [View a longer example video here.](https://youtu.be/RLF-yW5dR_Q)
# Customization # Customization
@ -35,19 +56,19 @@ To run multiple transition between K prompts, resulting in a stitched video, run
### Change the height/width ### Change the height/width
```python ```python
size_output = (1024, 768) size_output = (1024, 768)
lb.set_dimensions(size_output) be.set_dimensions(size_output)
``` ```
### Change the number of diffusion steps (set_num_inference_steps) ### Change the number of diffusion steps (set_num_inference_steps)
```python ```python
lb.set_num_inference_steps(50) be.set_num_inference_steps(50)
``` ```
For SDXL this is set as default=30, for SDXL Turbo a value of 4 is taken. For SDXL this is set as default=30, for SDXL Turbo a value of 4 is taken.
### Change the guidance scale ### Change the guidance scale
```python ```python
lb.set_guidance_scale(3.0) be.set_guidance_scale(3.0)
``` ```
For SDXL this is set as default=4.0, for SDXL Turbo a value of 0 is taken. For SDXL this is set as default=4.0, for SDXL Turbo a value of 0 is taken.
@ -55,7 +76,7 @@ For SDXL this is set as default=4.0, for SDXL Turbo a value of 0 is taken.
```python ```python
depth_strength = 0.5 depth_strength = 0.5
nmb_max_branches = 15 nmb_max_branches = 15
lb.set_branching(depth_strength=depth_strength, t_compute_max_allowed=None, nmb_max_branches=None) be.set_branching(depth_strength=depth_strength, t_compute_max_allowed=None, nmb_max_branches=None)
``` ```
* depth_strength: The strength of the diffusion iterations determines when the blending process will begin. A value close to zero results in more creative and intricate outcomes, while a value closer to one indicates a simpler alpha blending. However, low values may also bring about the introduction of additional objects and motion. * depth_strength: The strength of the diffusion iterations determines when the blending process will begin. A value close to zero results in more creative and intricate outcomes, while a value closer to one indicates a simpler alpha blending. However, low values may also bring about the introduction of additional objects and motion.
* t_compute_max_allowed: maximum time allowed for computation. Higher values give better results but take longer. Either provide t_compute_max_allowed or nmb_max_branches. Does not work for SDXL Turbo. * t_compute_max_allowed: maximum time allowed for computation. Higher values give better results but take longer. Either provide t_compute_max_allowed or nmb_max_branches. Does not work for SDXL Turbo.
@ -66,7 +87,7 @@ You can find the [most relevant parameters here.](parameters.md)
### Change guidance scale ### Change guidance scale
```python ```python
lb.set_guidance_scale(5.0) be.set_guidance_scale(5.0)
``` ```
### Crossfeeding to the last image. ### Crossfeeding to the last image.
@ -76,7 +97,7 @@ Cross-feeding latents is a key feature of latent blending. Here, you can set how
crossfeed_power = 0.5 # 50% of the latents in the last branch are copied from branch1 crossfeed_power = 0.5 # 50% of the latents in the last branch are copied from branch1
crossfeed_range = 0.7 # The crossfeed is active until 70% of num_iteration, then switched off crossfeed_range = 0.7 # The crossfeed is active until 70% of num_iteration, then switched off
crossfeed_decay = 0.2 # The power of the crossfeed decreases over diffusion iterations, here it would be 0.5*0.2=0.1 in the end of the range. crossfeed_decay = 0.2 # The power of the crossfeed decreases over diffusion iterations, here it would be 0.5*0.2=0.1 in the end of the range.
lb.set_branch1_crossfeed(crossfeed_power, crossfeed_range, crossfeed_decay) be.set_branch1_crossfeed(crossfeed_power, crossfeed_range, crossfeed_decay)
``` ```
### Crossfeeding to all transition images ### Crossfeeding to all transition images
@ -86,16 +107,10 @@ Here, you can set how much the parent branches influence the mixed one. In the a
crossfeed_power = 0.5 # 50% of the latents in the last branch are copied from the parents crossfeed_power = 0.5 # 50% of the latents in the last branch are copied from the parents
crossfeed_range = 0.7 # The crossfeed is active until 70% of num_iteration, then switched off crossfeed_range = 0.7 # The crossfeed is active until 70% of num_iteration, then switched off
crossfeed_decay = 0.2 # The power of the crossfeed decreases over diffusion iterations, here it would be 0.5*0.2=0.1 in the end of the range. crossfeed_decay = 0.2 # The power of the crossfeed decreases over diffusion iterations, here it would be 0.5*0.2=0.1 in the end of the range.
lb.set_parental_crossfeed(crossfeed_power, crossfeed_range, crossfeed_decay) be.set_parental_crossfeed(crossfeed_power, crossfeed_range, crossfeed_decay)
``` ```
# Installation
#### Packages
```commandline
pip install -r requirements.txt
```
# How does latent blending work? # How does latent blending work?
## Method ## Method
![](animation.gif) ![](animation.gif)
@ -104,9 +119,9 @@ In the figure above, a diffusion tree is illustrated. The diffusion steps are re
The concrete parameters for the transition above would be: The concrete parameters for the transition above would be:
``` ```
lb.set_branch1_crossfeed(crossfeed_power=0.8, crossfeed_range=0.6, crossfeed_decay=0.4) be.set_branch1_crossfeed(crossfeed_power=0.8, crossfeed_range=0.6, crossfeed_decay=0.4)
lb.set_parental_crossfeed(crossfeed_power=0.8, crossfeed_range=0.8, crossfeed_decay=0.2) be.set_parental_crossfeed(crossfeed_power=0.8, crossfeed_range=0.8, crossfeed_decay=0.2)
imgs_transition = lb.run_transition(num_inference_steps=10, depth_strength=0.2, nmb_max_branches=7) imgs_transition = be.run_transition(num_inference_steps=10, depth_strength=0.2, nmb_max_branches=7)
``` ```
## Perceptual aspects ## Perceptual aspects
@ -124,7 +139,7 @@ With latent blending, we can create transitions that appear to defy the laws of
* Inpaint support dropped (as it only makes sense for a single transition) * Inpaint support dropped (as it only makes sense for a single transition)
# Coming soon... # Coming soon...
- [ ] Gradio interface - [ ] MacOS support
- [ ] Huggingface Space - [ ] Huggingface Space
- [ ] Controlnet - [ ] Controlnet
- [ ] IP-Adapter - [ ] IP-Adapter

View File

@ -1,33 +1,39 @@
import torch import torch
import warnings import warnings
from blending_engine import BlendingEngine
from diffusers_holder import DiffusersHolder
from diffusers import AutoPipelineForText2Image from diffusers import AutoPipelineForText2Image
from movie_util import concatenate_movies from lunar_tools import concatenate_movies
from latentblending.blending_engine import BlendingEngine
import numpy as np
torch.set_grad_enabled(False) torch.set_grad_enabled(False)
torch.backends.cudnn.benchmark = False torch.backends.cudnn.benchmark = False
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
# %% First let us spawn a stable diffusion holder. Uncomment your version of choice. # %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16") pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
pipe.to('cuda') # pretrained_model_name_or_path = "stabilityai/sdxl-turbo"
dh = DiffusersHolder(pipe)
pipe = AutoPipelineForText2Image.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16, variant="fp16")
pipe.to('cuda')
be = BlendingEngine(pipe, do_compile=True)
be.set_negative_prompt("blurry, pale, low-res, lofi")
# %% Let's setup the multi transition # %% Let's setup the multi transition
fps = 30 fps = 30
duration_single_trans = 10 duration_single_trans = 10
be.set_dimensions((1024, 1024))
nmb_prompts = 20
# Specify a list of prompts below # Specify a list of prompts below
#%%
list_prompts = [] list_prompts = []
list_prompts.append("Photo of a house, high detail") list_prompts.append("high resolution ultra 8K image with lake and forest")
list_prompts.append("Photo of an elephant in african savannah") list_prompts.append("strange and alien desolate lanscapes 8K")
list_prompts.append("photo of a house, high detail") list_prompts.append("ultra high res psychedelic skyscraper city landscape 8K unreal engine")
#%%
fp_movie = f'surreal_nmb{len(list_prompts)}.mp4'
# You can optionally specify the seeds # Specify the seeds
list_seeds = [95437579, 33259350, 956051013] list_seeds = np.random.randint(0, np.iinfo(np.int32).max, len(list_prompts))
fp_movie = 'movie_example2.mp4'
be = BlendingEngine(dh)
list_movie_parts = [] list_movie_parts = []
for i in range(len(list_prompts) - 1): for i in range(len(list_prompts) - 1):

View File

@ -0,0 +1,75 @@
import torch
import warnings
from diffusers import AutoPipelineForText2Image
from latentblending.blending_engine import BlendingEngine
from lunar_tools import concatenate_movies
import numpy as np
torch.set_grad_enabled(False)
torch.backends.cudnn.benchmark = False
warnings.filterwarnings('ignore')
import json
# %% First let us spawn a stable diffusion holder. Uncomment your version of choice.
# pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
pretrained_model_name_or_path = "stabilityai/sdxl-turbo"
pipe = AutoPipelineForText2Image.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16, variant="fp16")
pipe.to('cuda')
be = BlendingEngine(pipe, do_compile=False)
fp_movie = f'test.mp4'
fp_json = "movie_240221_1520.json"
duration_single_trans = 10
# Load the JSON data from the file
with open(fp_json, 'r') as file:
data = json.load(file)
# Set up width, height, num_inference steps
width = data[0]["width"]
height = data[0]["height"]
num_inference_steps = data[0]["num_inference_steps"]
be.set_dimensions((width, height))
be.set_num_inference_steps(num_inference_steps)
# Initialize lists for prompts, negative prompts, and seeds
list_prompts = []
list_negative_prompts = []
list_seeds = []
# Extract prompts, negative prompts, and seeds from the data
for item in data[1:]: # Skip the first item as it contains settings
list_prompts.append(item["prompt"])
list_negative_prompts.append(item["negative_prompt"])
list_seeds.append(item["seed"])
list_movie_parts = []
for i in range(len(list_prompts) - 1):
# For a multi transition we can save some computation time and recycle the latents
if i == 0:
be.set_prompt1(list_prompts[i])
be.set_negative_prompt(list_negative_prompts[i])
be.set_prompt2(list_prompts[i + 1])
recycle_img1 = False
else:
be.swap_forward()
be.set_negative_prompt(list_negative_prompts[i+1])
be.set_prompt2(list_prompts[i + 1])
recycle_img1 = True
fp_movie_part = f"tmp_part_{str(i).zfill(3)}.mp4"
fixed_seeds = list_seeds[i:i + 2]
# Run latent blending
be.run_transition(
recycle_img1=recycle_img1,
fixed_seeds=fixed_seeds)
# Save movie
be.write_movie_transition(fp_movie_part, duration_single_trans)
list_movie_parts.append(fp_movie_part)
# Finally, concatente the result
concatenate_movies(fp_movie, list_movie_parts)
print(f"DONE! MOVIE SAVED IN {fp_movie}")

View File

@ -1,8 +1,7 @@
import torch import torch
import warnings import warnings
from blending_engine import BlendingEngine
from diffusers_holder import DiffusersHolder
from diffusers import AutoPipelineForText2Image from diffusers import AutoPipelineForText2Image
from latentblending.blending_engine import BlendingEngine
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
torch.set_grad_enabled(False) torch.set_grad_enabled(False)
@ -12,9 +11,7 @@ torch.backends.cudnn.benchmark = False
pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16") pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16")
pipe.to("cuda") pipe.to("cuda")
dh = DiffusersHolder(pipe) be = BlendingEngine(pipe)
be = BlendingEngine(dh)
be.set_prompt1("photo of underwater landscape, fish, und the sea, incredible detail, high resolution") be.set_prompt1("photo of underwater landscape, fish, und the sea, incredible detail, high resolution")
be.set_prompt2("rendering of an alien planet, strange plants, strange creatures, surreal") be.set_prompt2("rendering of an alien planet, strange plants, strange creatures, surreal")
be.set_negative_prompt("blurry, ugly, pale") be.set_negative_prompt("blurry, ugly, pale")

View File

@ -1,4 +1,3 @@
from .blending_engine import BlendingEngine from .blending_engine import BlendingEngine
from .diffusers_holder import DiffusersHolder from .diffusers_holder import DiffusersHolder
from .movie_util import MovieSaver
from .utils import interpolate_spherical, add_frames_linear_interp, interpolate_linear, get_spacing, get_time, yml_load, yml_save from .utils import interpolate_spherical, add_frames_linear_interp, interpolate_linear, get_spacing, get_time, yml_load, yml_save

View File

@ -5,10 +5,12 @@ import warnings
import time import time
from tqdm.auto import tqdm from tqdm.auto import tqdm
from PIL import Image from PIL import Image
from latentblending.movie_util import MovieSaver
from typing import List, Optional from typing import List, Optional
import lpips import lpips
from latentblending.utils import interpolate_spherical, interpolate_linear, add_frames_linear_interp, yml_load, yml_save import platform
from latentblending.diffusers_holder import DiffusersHolder
from latentblending.utils import interpolate_spherical, interpolate_linear, add_frames_linear_interp
from lunar_tools import MovieSaver, fill_up_frames_linear_interpolation
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
torch.backends.cudnn.benchmark = False torch.backends.cudnn.benchmark = False
torch.set_grad_enabled(False) torch.set_grad_enabled(False)
@ -17,12 +19,15 @@ torch.set_grad_enabled(False)
class BlendingEngine(): class BlendingEngine():
def __init__( def __init__(
self, self,
dh: None, pipe: None,
do_compile: bool = False,
guidance_scale_mid_damper: float = 0.5, guidance_scale_mid_damper: float = 0.5,
mid_compression_scaler: float = 1.2): mid_compression_scaler: float = 1.2):
r""" r"""
Initializes the latent blending class. Initializes the latent blending class.
Args: Args:
pipe: diffusers pipeline (SDXL)
do_compile: compile pipeline for faster inference using stable fast
guidance_scale_mid_damper: float = 0.5 guidance_scale_mid_damper: float = 0.5
Reduces the guidance scale towards the middle of the transition. Reduces the guidance scale towards the middle of the transition.
A value of 0.5 would decrease the guidance_scale towards the middle linearly by 0.5. A value of 0.5 would decrease the guidance_scale towards the middle linearly by 0.5.
@ -35,7 +40,8 @@ class BlendingEngine():
and guidance_scale_mid_damper <= 1.0, \ and guidance_scale_mid_damper <= 1.0, \
f"guidance_scale_mid_damper neees to be in interval (0,1], you provided {guidance_scale_mid_damper}" f"guidance_scale_mid_damper neees to be in interval (0,1], you provided {guidance_scale_mid_damper}"
self.dh = dh
self.dh = DiffusersHolder(pipe)
self.device = self.dh.device self.device = self.dh.device
self.set_dimensions() self.set_dimensions()
@ -64,6 +70,9 @@ class BlendingEngine():
self.multi_transition_img_first = None self.multi_transition_img_first = None
self.multi_transition_img_last = None self.multi_transition_img_last = None
self.dt_unet_step = 0 self.dt_unet_step = 0
if platform.system() == "Darwin":
self.lpips = lpips.LPIPS(net='alex')
else:
self.lpips = lpips.LPIPS(net='alex').cuda(self.device) self.lpips = lpips.LPIPS(net='alex').cuda(self.device)
self.set_prompt1("") self.set_prompt1("")
@ -76,13 +85,23 @@ class BlendingEngine():
self.benchmark_speed() self.benchmark_speed()
self.set_branching() self.set_branching()
if do_compile:
print("starting compilation")
from sfast.compilers.diffusion_pipeline_compiler import (compile, CompilationConfig)
self.dh.pipe.enable_xformers_memory_efficient_attention()
config = CompilationConfig.Default()
config.enable_xformers = True
config.enable_triton = True
config.enable_cuda_graph = True
self.dh.pipe = compile(self.dh.pipe, config)
def benchmark_speed(self): def benchmark_speed(self):
""" """
Measures the time per diffusion step and for the vae decoding Measures the time per diffusion step and for the vae decoding
""" """
print("starting speed benchmark...")
text_embeddings = self.dh.get_text_embedding("test") text_embeddings = self.dh.get_text_embedding("test")
latents_start = self.dh.get_noise(np.random.randint(111111)) latents_start = self.dh.get_noise(np.random.randint(111111))
# warmup # warmup
@ -96,6 +115,7 @@ class BlendingEngine():
t0 = time.time() t0 = time.time()
img = self.dh.latent2image(list_latents[-1]) img = self.dh.latent2image(list_latents[-1])
self.dt_vae = time.time() - t0 self.dt_vae = time.time() - t0
print(f"time per unet iteration: {self.dt_unet_step} time for vae: {self.dt_vae}")
def set_dimensions(self, size_output=None): def set_dimensions(self, size_output=None):
r""" r"""
@ -660,7 +680,6 @@ class BlendingEngine():
img_leaf = Image.fromarray(img) img_leaf = Image.fromarray(img)
img_leaf.save(os.path.join(dp_img, f"lowres_img_{str(i).zfill(4)}.jpg")) img_leaf.save(os.path.join(dp_img, f"lowres_img_{str(i).zfill(4)}.jpg"))
fp_yml = os.path.join(dp_img, "lowres.yaml") fp_yml = os.path.join(dp_img, "lowres.yaml")
self.save_statedict(fp_yml)
def write_movie_transition(self, fp_movie, duration_transition, fps=30): def write_movie_transition(self, fp_movie, duration_transition, fps=30):
r""" r"""
@ -676,7 +695,7 @@ class BlendingEngine():
""" """
# Let's get more cheap frames via linear interpolation (duration_transition*fps frames) # Let's get more cheap frames via linear interpolation (duration_transition*fps frames)
imgs_transition_ext = add_frames_linear_interp(self.tree_final_imgs, duration_transition, fps) imgs_transition_ext = fill_up_frames_linear_interpolation(self.tree_final_imgs, duration_transition, fps)
# Save as MP4 # Save as MP4
if os.path.isfile(fp_movie): if os.path.isfile(fp_movie):
@ -686,12 +705,6 @@ class BlendingEngine():
ms.write_frame(img) ms.write_frame(img)
ms.finalize() ms.finalize()
def save_statedict(self, fp_yml):
# Dump everything relevant into yaml
imgs_transition = self.tree_final_imgs
state_dict = self.get_state_dict()
state_dict['nmb_images'] = len(imgs_transition)
yml_save(fp_yml, state_dict)
def get_state_dict(self): def get_state_dict(self):
state_dict = {} state_dict = {}
@ -714,35 +727,6 @@ class BlendingEngine():
pass pass
return state_dict return state_dict
def randomize_seed(self):
r"""
Set a random seed for a fresh start.
"""
seed = np.random.randint(999999999)
self.set_seed(seed)
def set_seed(self, seed: int):
r"""
Set a the seed for a fresh start.
"""
self.seed = seed
self.dh.seed = seed
def set_width(self, width):
r"""
Set the width of the resulting image.
"""
assert np.mod(width, 64) == 0, "set_width: value needs to be divisible by 64"
self.width = width
self.dh.width = width
def set_height(self, height):
r"""
Set the height of the resulting image.
"""
assert np.mod(height, 64) == 0, "set_height: value needs to be divisible by 64"
self.height = height
self.dh.height = height
def swap_forward(self): def swap_forward(self):
r""" r"""
@ -813,14 +797,18 @@ if __name__ == "__main__":
from diffusers import AutoencoderTiny from diffusers import AutoencoderTiny
# pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0" # pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
pretrained_model_name_or_path = "stabilityai/sdxl-turbo" pretrained_model_name_or_path = "stabilityai/sdxl-turbo"
pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path)
pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16, variant="fp16") # pipe.to("mps")
pipe.to("cuda") pipe.to("cuda")
pipe.vae = AutoencoderTiny.from_pretrained('madebyollin/taesdxl', torch_device='cuda', torch_dtype=torch.float16)
pipe.vae = pipe.vae.cuda() # pipe.vae = AutoencoderTiny.from_pretrained('madebyollin/taesdxl', torch_device='cuda', torch_dtype=torch.float16)
# pipe.vae = pipe.vae.cuda()
dh = DiffusersHolder(pipe) dh = DiffusersHolder(pipe)
xxx
# %% Next let's set up all parameters # %% Next let's set up all parameters
prompt1 = "photo of underwater landscape, fish, und the sea, incredible detail, high resolution" prompt1 = "photo of underwater landscape, fish, und the sea, incredible detail, high resolution"
prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal" prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal"
@ -829,19 +817,20 @@ if __name__ == "__main__":
duration_transition = 12 # In seconds duration_transition = 12 # In seconds
# Spawn latent blending # Spawn latent blending
lb = LatentBlending(dh) be = BlendingEngine(dh)
lb.set_prompt1(prompt1) be.set_prompt1(prompt1)
lb.set_prompt2(prompt2) be.set_prompt2(prompt2)
lb.set_negative_prompt(negative_prompt) be.set_negative_prompt(negative_prompt)
# Run latent blending # Run latent blending
t0 = time.time() t0 = time.time()
lb.run_transition(fixed_seeds=[420, 421]) be.run_transition(fixed_seeds=[420, 421])
dt = time.time() - t0 dt = time.time() - t0
print(f"dt = {dt}")
# Save movie # Save movie
fp_movie = f'test.mp4' fp_movie = f'test.mp4'
lb.write_movie_transition(fp_movie, duration_transition) be.write_movie_transition(fp_movie, duration_transition)

View File

@ -1,18 +1,3 @@
# Copyright 2022 Lunar Ring. All rights reserved.
# Written by Johannes Stelzer, email stelzer@lunar-ring.ai twitter @j_stelzer
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os import os
import torch import torch
torch.backends.cudnn.benchmark = False torch.backends.cudnn.benchmark = False
@ -20,481 +5,340 @@ torch.set_grad_enabled(False)
import numpy as np import numpy as np
import warnings import warnings
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
import warnings
from tqdm.auto import tqdm from tqdm.auto import tqdm
from PIL import Image from PIL import Image
from movie_util import MovieSaver, concatenate_movies
from latent_blending import LatentBlending
from stable_diffusion_holder import StableDiffusionHolder
import gradio as gr import gradio as gr
from dotenv import find_dotenv, load_dotenv
import shutil import shutil
import uuid import uuid
from utils import get_time, add_frames_linear_interp from diffusers import AutoPipelineForText2Image
from huggingface_hub import hf_hub_download from latentblending.blending_engine import BlendingEngine
import datetime
import tempfile
import json
from lunar_tools import concatenate_movies
import argparse
"""
TODO
- time per segment
- init phase (model, res, nmb iter)
- recycle existing movies
- hf spaces integration
"""
class BlendingFrontend(): class MultiUserRouter():
def __init__( def __init__(
self, self,
sdh, do_compile=False
share=False): ):
self.user_blendingvariableholder = {}
self.do_compile = do_compile
self.list_models = ["stabilityai/sdxl-turbo", "stabilityai/stable-diffusion-xl-base-1.0"]
self.init_models()
def init_models(self):
self.dict_blendingengines = {}
for m in self.list_models:
pipe = AutoPipelineForText2Image.from_pretrained(m, torch_dtype=torch.float16, variant="fp16")
pipe.to("cuda")
be = BlendingEngine(pipe, do_compile=self.do_compile)
self.dict_blendingengines[m] = be
def register_new_user(self, model, width, height):
user_id = str(uuid.uuid4().hex.upper()[0:8])
be = self.dict_blendingengines[model]
be.set_dimensions((width, height))
self.user_blendingvariableholder[user_id] = BlendingVariableHolder(be)
return user_id
def user_overflow_protection(self):
pass
def preview_img_selected(self, user_id, data: gr.SelectData, button):
return self.user_blendingvariableholder[user_id].preview_img_selected(data, button)
def movie_img_selected(self, user_id, data: gr.SelectData, button):
return self.user_blendingvariableholder[user_id].movie_img_selected(data, button)
def compute_imgs(self, user_id, prompt, negative_prompt):
return self.user_blendingvariableholder[user_id].compute_imgs(prompt, negative_prompt)
def get_list_images_movie(self, user_id):
return self.user_blendingvariableholder[user_id].get_list_images_movie()
def init_new_movie(self, user_id):
return self.user_blendingvariableholder[user_id].init_new_movie()
def write_json(self, user_id):
return self.user_blendingvariableholder[user_id].write_json()
def add_image_to_video(self, user_id):
return self.user_blendingvariableholder[user_id].add_image_to_video()
def img_movie_delete(self, user_id):
return self.user_blendingvariableholder[user_id].img_movie_delete()
def img_movie_later(self, user_id):
return self.user_blendingvariableholder[user_id].img_movie_later()
def img_movie_earlier(self, user_id):
return self.user_blendingvariableholder[user_id].img_movie_earlier()
def generate_movie(self, user_id, t_per_segment):
return self.user_blendingvariableholder[user_id].generate_movie(t_per_segment)
#%% BlendingVariableHolder Class
class BlendingVariableHolder():
def __init__(
self,
be):
r""" r"""
Gradio Helper Class to collect UI data and start latent blending. Gradio Helper Class to collect UI data and start latent blending.
Args: Args:
sdh: be:
StableDiffusionHolder Blendingengine
share: bool share: bool
Set true to get a shareable gradio link (e.g. for running a remote server) Set true to get a shareable gradio link (e.g. for running a remote server)
""" """
self.share = share self.be = be
# UI Defaults # UI Defaults
self.num_inference_steps = 30
self.depth_strength = 0.25
self.seed1 = 420 self.seed1 = 420
self.seed2 = 420 self.seed2 = 420
self.prompt1 = "" self.prompt1 = ""
self.prompt2 = "" self.prompt2 = ""
self.negative_prompt = "" self.negative_prompt = ""
self.fps = 30 self.nmb_preview_images = 4
self.duration_video = 8
self.t_compute_max_allowed = 10
self.lb = LatentBlending(sdh)
self.lb.sdh.num_inference_steps = self.num_inference_steps
self.init_parameters_from_lb()
self.init_save_dir()
# Vars # Vars
self.list_fp_imgs_current = [] self.prompt = None
self.recycle_img1 = False self.negative_prompt = None
self.recycle_img2 = False self.list_seeds = []
self.list_all_segments = [] self.idx_movie = 0
self.dp_session = "" self.list_seeds = []
self.user_id = None self.list_images_preview = []
self.data = []
self.idx_img_preview_selected = None
self.idx_img_movie_selected = None
self.jpg_quality = 80
self.fp_movie = ''
def init_parameters_from_lb(self): def preview_img_selected(self, data: gr.SelectData, button):
r""" self.idx_img_preview_selected = data.index
Automatically init parameters from latentblending instance print(f"preview image {self.idx_img_preview_selected} selected, seed {self.list_seeds[self.idx_img_preview_selected]}")
"""
self.height = self.lb.sdh.height
self.width = self.lb.sdh.width
self.guidance_scale = self.lb.guidance_scale
self.guidance_scale_mid_damper = self.lb.guidance_scale_mid_damper
self.mid_compression_scaler = self.lb.mid_compression_scaler
self.branch1_crossfeed_power = self.lb.branch1_crossfeed_power
self.branch1_crossfeed_range = self.lb.branch1_crossfeed_range
self.branch1_crossfeed_decay = self.lb.branch1_crossfeed_decay
self.parental_crossfeed_power = self.lb.parental_crossfeed_power
self.parental_crossfeed_range = self.lb.parental_crossfeed_range
self.parental_crossfeed_power_decay = self.lb.parental_crossfeed_power_decay
def init_save_dir(self): def movie_img_selected(self, data: gr.SelectData, button):
r""" self.idx_img_movie_selected = data.index
Initializes the directory where stuff is being saved. print(f"movie image {self.idx_img_movie_selected} selected")
You can specify this directory in a ".env" file in your latentblending root, setting
DIR_OUT='/path/to/saving'
"""
load_dotenv(find_dotenv(), verbose=False)
self.dp_out = os.getenv("DIR_OUT")
if self.dp_out is None:
self.dp_out = ""
self.dp_imgs = os.path.join(self.dp_out, "imgs")
os.makedirs(self.dp_imgs, exist_ok=True)
self.dp_movies = os.path.join(self.dp_out, "movies")
os.makedirs(self.dp_movies, exist_ok=True)
self.save_empty_image()
def save_empty_image(self): def compute_imgs(self, prompt, negative_prompt):
r""" self.prompt = prompt
Saves an empty/black dummy image. self.negative_prompt = negative_prompt
""" self.be.set_prompt1(prompt)
self.fp_img_empty = os.path.join(self.dp_imgs, 'empty.jpg') self.be.set_prompt2(prompt)
Image.fromarray(np.zeros((self.height, self.width, 3), dtype=np.uint8)).save(self.fp_img_empty, quality=5) self.be.set_negative_prompt(negative_prompt)
self.list_seeds = []
self.list_images_preview = []
self.idx_img_preview_selected = None
for i in range(self.nmb_preview_images):
seed = np.random.randint(0, np.iinfo(np.int32).max)
self.be.seed1 = seed
self.list_seeds.append(seed)
img = self.be.compute_latents1(return_image=True)
fn_img_tmp = f"image_{uuid.uuid4()}.jpg"
temp_img_path = os.path.join(tempfile.gettempdir(), fn_img_tmp)
img.save(temp_img_path)
img.save(temp_img_path, quality=self.jpg_quality, optimize=True)
self.list_images_preview.append(temp_img_path)
return self.list_images_preview
def randomize_seed1(self):
r"""
Randomizes the first seed
"""
seed = np.random.randint(0, 10000000)
self.seed1 = int(seed)
print(f"randomize_seed1: new seed = {self.seed1}")
return seed
def randomize_seed2(self): def get_list_images_movie(self):
r""" return [entry["preview_image"] for entry in self.data]
Randomizes the second seed
"""
seed = np.random.randint(0, 10000000)
self.seed2 = int(seed)
print(f"randomize_seed2: new seed = {self.seed2}")
return seed
def setup_lb(self, list_ui_vals):
r"""
Sets all parameters from the UI. Since gradio does not support to pass dictionaries,
we have to instead pass keys (list_ui_keys, global) and values (list_ui_vals)
"""
# Collect latent blending variables
self.lb.set_width(list_ui_vals[list_ui_keys.index('width')])
self.lb.set_height(list_ui_vals[list_ui_keys.index('height')])
self.lb.set_prompt1(list_ui_vals[list_ui_keys.index('prompt1')])
self.lb.set_prompt2(list_ui_vals[list_ui_keys.index('prompt2')])
self.lb.set_negative_prompt(list_ui_vals[list_ui_keys.index('negative_prompt')])
self.lb.guidance_scale = list_ui_vals[list_ui_keys.index('guidance_scale')]
self.lb.guidance_scale_mid_damper = list_ui_vals[list_ui_keys.index('guidance_scale_mid_damper')]
self.t_compute_max_allowed = list_ui_vals[list_ui_keys.index('duration_compute')]
self.lb.num_inference_steps = list_ui_vals[list_ui_keys.index('num_inference_steps')]
self.lb.sdh.num_inference_steps = list_ui_vals[list_ui_keys.index('num_inference_steps')]
self.duration_video = list_ui_vals[list_ui_keys.index('duration_video')]
self.lb.seed1 = list_ui_vals[list_ui_keys.index('seed1')]
self.lb.seed2 = list_ui_vals[list_ui_keys.index('seed2')]
self.lb.branch1_crossfeed_power = list_ui_vals[list_ui_keys.index('branch1_crossfeed_power')]
self.lb.branch1_crossfeed_range = list_ui_vals[list_ui_keys.index('branch1_crossfeed_range')]
self.lb.branch1_crossfeed_decay = list_ui_vals[list_ui_keys.index('branch1_crossfeed_decay')]
self.lb.parental_crossfeed_power = list_ui_vals[list_ui_keys.index('parental_crossfeed_power')]
self.lb.parental_crossfeed_range = list_ui_vals[list_ui_keys.index('parental_crossfeed_range')]
self.lb.parental_crossfeed_power_decay = list_ui_vals[list_ui_keys.index('parental_crossfeed_power_decay')]
self.num_inference_steps = list_ui_vals[list_ui_keys.index('num_inference_steps')]
self.depth_strength = list_ui_vals[list_ui_keys.index('depth_strength')]
if len(list_ui_vals[list_ui_keys.index('user_id')]) > 1: def init_new_movie(self):
self.user_id = list_ui_vals[list_ui_keys.index('user_id')] current_time = datetime.datetime.now()
self.fp_movie = "movie_" + current_time.strftime("%y%m%d_%H%M") + ".mp4"
self.fp_json = "movie_" + current_time.strftime("%y%m%d_%H%M") + ".json"
def write_json(self):
# Write the data list to a JSON file
data_copy = self.data.copy()
data_copy.insert(0, {"settings": "sdxl", "width": self.be.dh.width_img, "height": self.be.dh.height_img, "num_inference_steps": self.be.dh.num_inference_steps})
with open(self.fp_json, 'w') as f:
json.dump(data_copy, f, indent=4)
def add_image_to_video(self):
if self.prompt is None:
print("Cannot take because no prompt was set!")
return self.get_list_images_movie()
if self.idx_movie == 0:
self.init_new_movie()
self.data.append({"iteration": self.idx_movie,
"seed": self.list_seeds[self.idx_img_preview_selected],
"prompt": self.prompt,
"negative_prompt": self.negative_prompt,
"preview_image": self.list_images_preview[self.idx_img_preview_selected]
})
self.write_json()
self.idx_movie += 1
return self.get_list_images_movie()
def img_movie_delete(self):
if self.idx_img_movie_selected is not None and 0 <= self.idx_img_movie_selected < len(self.data)+1:
del self.data[self.idx_img_movie_selected]
self.idx_img_movie_selected = None
else: else:
# generate new user id print(f"Invalid movie image index for deletion: {self.idx_img_movie_selected}")
self.user_id = uuid.uuid4().hex return self.get_list_images_movie()
print(f"made new user_id: {self.user_id} at {get_time('second')}")
def save_latents(self, fp_latents, list_latents): def img_movie_later(self):
r""" if self.idx_img_movie_selected is not None and self.idx_img_movie_selected < len(self.data):
Saves a latent trajectory on disk, in npy format. # Swap the selected image with the next one
""" self.data[self.idx_img_movie_selected], self.data[self.idx_img_movie_selected + 1] = \
list_latents_cpu = [l.cpu().numpy() for l in list_latents] self.data[self.idx_img_movie_selected+1], self.data[self.idx_img_movie_selected]
np.save(fp_latents, list_latents_cpu) self.idx_img_movie_selected = None
else:
print("Cannot move the image later in the sequence.")
return self.get_list_images_movie()
def load_latents(self, fp_latents): def img_movie_earlier(self):
r""" if self.idx_img_movie_selected is not None and self.idx_img_movie_selected > 0:
Loads a latent trajectory from disk, converts to torch tensor. # Swap the selected image with the previous one
""" self.data[self.idx_img_movie_selected-1], self.data[self.idx_img_movie_selected] = \
list_latents_cpu = np.load(fp_latents) self.data[self.idx_img_movie_selected], self.data[self.idx_img_movie_selected-1]
list_latents = [torch.from_numpy(l).to(self.lb.device) for l in list_latents_cpu] self.idx_img_movie_selected = None
return list_latents else:
print("Cannot move the image earlier in the sequence.")
return self.get_list_images_movie()
def compute_img1(self, *args):
r"""
Computes the first transition image and returns it for display.
Sets all other transition images and last image to empty (as they are obsolete with this operation)
"""
list_ui_vals = args
self.setup_lb(list_ui_vals)
fp_img1 = os.path.join(self.dp_imgs, f"img1_{self.user_id}")
img1 = Image.fromarray(self.lb.compute_latents1(return_image=True))
img1.save(fp_img1 + ".jpg")
self.save_latents(fp_img1 + ".npy", self.lb.tree_latents[0])
self.recycle_img1 = True
self.recycle_img2 = False
return [fp_img1 + ".jpg", self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.user_id]
def compute_img2(self, *args): def generate_movie(self, t_per_segment=10):
r""" print("starting movie gen")
Computes the last transition image and returns it for display. list_prompts = []
Sets all other transition images to empty (as they are obsolete with this operation) list_negative_prompts = []
""" list_seeds = []
if not os.path.isfile(os.path.join(self.dp_imgs, f"img1_{self.user_id}.jpg")): # don't do anything
return [self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, self.user_id]
list_ui_vals = args
self.setup_lb(list_ui_vals)
self.lb.tree_latents[0] = self.load_latents(os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy")) # Extract prompts, negative prompts, and seeds from the data
fp_img2 = os.path.join(self.dp_imgs, f"img2_{self.user_id}") for item in self.data:
img2 = Image.fromarray(self.lb.compute_latents2(return_image=True)) list_prompts.append(item["prompt"])
img2.save(fp_img2 + '.jpg') list_negative_prompts.append(item["negative_prompt"])
self.save_latents(fp_img2 + ".npy", self.lb.tree_latents[-1]) list_seeds.append(item["seed"])
self.recycle_img2 = True
# fixme save seeds. change filenames?
return [self.fp_img_empty, self.fp_img_empty, self.fp_img_empty, fp_img2 + ".jpg", self.user_id]
def compute_transition(self, *args): list_movie_parts = []
r""" for i in range(len(list_prompts) - 1):
Computes transition images and movie. # For a multi transition we can save some computation time and recycle the latents
""" if i == 0:
list_ui_vals = args self.be.set_prompt1(list_prompts[i])
self.setup_lb(list_ui_vals) self.be.set_negative_prompt(list_negative_prompts[i])
print("STARTING TRANSITION...") self.be.set_prompt2(list_prompts[i + 1])
fixed_seeds = [self.seed1, self.seed2] recycle_img1 = False
# Inject loaded latents (other user interference) else:
self.lb.tree_latents[0] = self.load_latents(os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy")) self.be.swap_forward()
self.lb.tree_latents[-1] = self.load_latents(os.path.join(self.dp_imgs, f"img2_{self.user_id}.npy")) self.be.set_negative_prompt(list_negative_prompts[i+1])
imgs_transition = self.lb.run_transition( self.be.set_prompt2(list_prompts[i + 1])
recycle_img1=self.recycle_img1, recycle_img1 = True
recycle_img2=self.recycle_img2,
num_inference_steps=self.num_inference_steps, fp_movie_part = f"tmp_part_{str(i).zfill(3)}.mp4"
depth_strength=self.depth_strength, fixed_seeds = list_seeds[i:i + 2]
t_compute_max_allowed=self.t_compute_max_allowed, # Run latent blending
self.be.run_transition(
recycle_img1=recycle_img1,
fixed_seeds=fixed_seeds) fixed_seeds=fixed_seeds)
print(f"Latent Blending pass finished ({get_time('second')}). Resulted in {len(imgs_transition)} images")
# Subselect three preview images # Save movie
idx_img_prev = np.round(np.linspace(0, len(imgs_transition) - 1, 5)[1:-1]).astype(np.int32) self.be.write_movie_transition(fp_movie_part, t_per_segment)
list_movie_parts.append(fp_movie_part)
list_imgs_preview = [] # Finally, concatenate the result
for j in idx_img_prev: concatenate_movies(self.fp_movie, list_movie_parts)
list_imgs_preview.append(Image.fromarray(imgs_transition[j])) print(f"DONE! MOVIE SAVED IN {self.fp_movie}")
return self.fp_movie
# Save the preview imgs as jpgs on disk so we are not sending umcompressed data around
current_timestamp = get_time('second')
self.list_fp_imgs_current = []
for i in range(len(list_imgs_preview)):
fp_img = os.path.join(self.dp_imgs, f"img_preview_{i}_{current_timestamp}.jpg")
list_imgs_preview[i].save(fp_img)
self.list_fp_imgs_current.append(fp_img)
# Insert cheap frames for the movie
imgs_transition_ext = add_frames_linear_interp(imgs_transition, self.duration_video, self.fps)
# Save as movie
self.fp_movie = self.get_fp_video_last()
if os.path.isfile(self.fp_movie):
os.remove(self.fp_movie)
ms = MovieSaver(self.fp_movie, fps=self.fps)
for img in tqdm(imgs_transition_ext):
ms.write_frame(img)
ms.finalize()
print("DONE SAVING MOVIE! SENDING BACK...")
# Assemble Output, updating the preview images and le movie
list_return = self.list_fp_imgs_current + [self.fp_movie]
return list_return
def stack_forward(self, prompt2, seed2):
r"""
Allows to generate multi-segment movies. Sets last image -> first image with all
relevant parameters.
"""
# Save preview images, prompts and seeds into dictionary for stacking
if len(self.list_all_segments) == 0:
timestamp_session = get_time('second')
self.dp_session = os.path.join(self.dp_out, f"session_{timestamp_session}")
os.makedirs(self.dp_session)
idx_segment = len(self.list_all_segments)
dp_segment = os.path.join(self.dp_session, f"segment_{str(idx_segment).zfill(3)}")
self.list_all_segments.append(dp_segment)
self.lb.write_imgs_transition(dp_segment)
fp_movie_last = self.get_fp_video_last()
fp_movie_next = self.get_fp_video_next()
shutil.copyfile(fp_movie_last, fp_movie_next)
self.lb.tree_latents[0] = self.load_latents(os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy"))
self.lb.tree_latents[-1] = self.load_latents(os.path.join(self.dp_imgs, f"img2_{self.user_id}.npy"))
self.lb.swap_forward()
shutil.copyfile(os.path.join(self.dp_imgs, f"img2_{self.user_id}.npy"), os.path.join(self.dp_imgs, f"img1_{self.user_id}.npy"))
fp_multi = self.multi_concat()
list_out = [fp_multi]
list_out.extend([os.path.join(self.dp_imgs, f"img2_{self.user_id}.jpg")])
list_out.extend([self.fp_img_empty] * 4)
list_out.append(gr.update(interactive=False, value=prompt2))
list_out.append(gr.update(interactive=False, value=seed2))
list_out.append("")
list_out.append(np.random.randint(0, 10000000))
print(f"stack_forward: fp_multi {fp_multi}")
return list_out
def multi_concat(self):
r"""
Concatentates all stacked segments into one long movie.
"""
list_fp_movies = self.get_fp_video_all()
# Concatenate movies and save
fp_final = os.path.join(self.dp_session, f"concat_{self.user_id}.mp4")
concatenate_movies(fp_final, list_fp_movies)
return fp_final
def get_fp_video_all(self):
r"""
Collects all stacked movie segments.
"""
list_all = os.listdir(self.dp_movies)
str_beg = f"movie_{self.user_id}_"
list_user = [l for l in list_all if str_beg in l]
list_user.sort()
list_user = [os.path.join(self.dp_movies, l) for l in list_user]
return list_user
def get_fp_video_next(self):
r"""
Gets the filepath of the next movie segment.
"""
list_videos = self.get_fp_video_all()
if len(list_videos) == 0:
idx_next = 0
else:
idx_next = len(list_videos)
fp_video_next = os.path.join(self.dp_movies, f"movie_{self.user_id}_{str(idx_next).zfill(3)}.mp4")
return fp_video_next
def get_fp_video_last(self):
r"""
Gets the current video that was saved.
"""
fp_video_last = os.path.join(self.dp_movies, f"last_{self.user_id}.mp4")
return fp_video_last
#%% Runtime engine
if __name__ == "__main__": if __name__ == "__main__":
# fp_ckpt = hf_hub_download(repo_id="stabilityai/stable-diffusion-2-1-base", filename="v2-1_512-ema-pruned.ckpt")
fp_ckpt = hf_hub_download(repo_id="stabilityai/stable-diffusion-2-1", filename="v2-1_768-ema-pruned.ckpt")
bf = BlendingFrontend(StableDiffusionHolder(fp_ckpt))
# self = BlendingFrontend(None)
# Change Parameters below
parser = argparse.ArgumentParser(description="Latent Blending GUI")
parser.add_argument("--do_compile", type=bool, default=False)
parser.add_argument("--nmb_preview_images", type=int, default=4)
parser.add_argument("--server_name", type=str, default=None)
try:
args = parser.parse_args()
nmb_preview_images = args.nmb_preview_images
do_compile = args.do_compile
server_name = args.server_name
except SystemExit:
# If the script is run in an interactive environment (like Jupyter), parse_args might fail.
nmb_preview_images = 4
do_compile = False # compile SD pipes with sdfast
server_name = None
mur = MultiUserRouter(do_compile=do_compile)
with gr.Blocks() as demo: with gr.Blocks() as demo:
gr.HTML("""<h1>Latent Blending</h1> with gr.Accordion("Setup", open=True) as accordion_setup:
<p>Create butter-smooth transitions between prompts, powered by stable diffusion</p> # New user registration, model selection, ...
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
<br/>
<a href="https://huggingface.co/spaces/lunarring/latentblending?duplicate=true">
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
</p>""")
with gr.Row(): with gr.Row():
prompt1 = gr.Textbox(label="prompt 1") model = gr.Dropdown(mur.list_models, value=mur.list_models[0], label="model")
prompt2 = gr.Textbox(label="prompt 2") width = gr.Slider(256, 2048, 512, step=128, label='width', interactive=True)
height = gr.Slider(256, 2048, 512, step=128, label='height', interactive=True)
user_id = gr.Textbox(label="user id (filled automatically)", interactive=False)
b_start_session = gr.Button('start session', variant='primary')
with gr.Accordion("Latent Blending (expand with arrow on right side after you clicked 'start session')", open=False) as accordion_latentblending:
with gr.Row(): with gr.Row():
duration_compute = gr.Slider(10, 25, bf.t_compute_max_allowed, step=1, label='waiting time', interactive=True) prompt = gr.Textbox(label="prompt")
duration_video = gr.Slider(1, 100, bf.duration_video, step=0.1, label='video duration', interactive=True)
height = gr.Slider(256, 1024, bf.height, step=128, label='height', interactive=True)
width = gr.Slider(256, 1024, bf.width, step=128, label='width', interactive=True)
with gr.Accordion("Advanced Settings (click to expand)", open=False):
with gr.Accordion("Diffusion settings", open=True):
with gr.Row():
num_inference_steps = gr.Slider(5, 100, bf.num_inference_steps, step=1, label='num_inference_steps', interactive=True)
guidance_scale = gr.Slider(1, 25, bf.guidance_scale, step=0.1, label='guidance_scale', interactive=True)
negative_prompt = gr.Textbox(label="negative prompt") negative_prompt = gr.Textbox(label="negative prompt")
b_compute = gr.Button('generate preview images', variant='primary')
with gr.Accordion("Seed control: adjust seeds for first and last images", open=True): b_select = gr.Button('add selected image to video', variant='primary')
with gr.Row():
b_newseed1 = gr.Button("randomize seed 1", variant='secondary')
seed1 = gr.Number(bf.seed1, label="seed 1", interactive=True)
seed2 = gr.Number(bf.seed2, label="seed 2", interactive=True)
b_newseed2 = gr.Button("randomize seed 2", variant='secondary')
with gr.Accordion("Last image crossfeeding.", open=True):
with gr.Row():
branch1_crossfeed_power = gr.Slider(0.0, 1.0, bf.branch1_crossfeed_power, step=0.01, label='branch1 crossfeed power', interactive=True)
branch1_crossfeed_range = gr.Slider(0.0, 1.0, bf.branch1_crossfeed_range, step=0.01, label='branch1 crossfeed range', interactive=True)
branch1_crossfeed_decay = gr.Slider(0.0, 1.0, bf.branch1_crossfeed_decay, step=0.01, label='branch1 crossfeed decay', interactive=True)
with gr.Accordion("Transition settings", open=True):
with gr.Row():
parental_crossfeed_power = gr.Slider(0.0, 1.0, bf.parental_crossfeed_power, step=0.01, label='parental crossfeed power', interactive=True)
parental_crossfeed_range = gr.Slider(0.0, 1.0, bf.parental_crossfeed_range, step=0.01, label='parental crossfeed range', interactive=True)
parental_crossfeed_power_decay = gr.Slider(0.0, 1.0, bf.parental_crossfeed_power_decay, step=0.01, label='parental crossfeed decay', interactive=True)
with gr.Row():
depth_strength = gr.Slider(0.01, 0.99, bf.depth_strength, step=0.01, label='depth_strength', interactive=True)
guidance_scale_mid_damper = gr.Slider(0.01, 2.0, bf.guidance_scale_mid_damper, step=0.01, label='guidance_scale_mid_damper', interactive=True)
with gr.Row(): with gr.Row():
b_compute1 = gr.Button('step1: compute first image', variant='primary') gallery_preview = gr.Gallery(
b_compute2 = gr.Button('step2: compute last image', variant='primary') label="Generated images", show_label=False, elem_id="gallery"
b_compute_transition = gr.Button('step3: compute transition', variant='primary') , columns=[nmb_preview_images], rows=[1], object_fit="contain", height="auto", allow_preview=False, interactive=False)
with gr.Row(): with gr.Row():
img1 = gr.Image(label="1/5") gr.Markdown("Your movie contains the following images (see below)")
img2 = gr.Image(label="2/5", show_progress=False) with gr.Row():
img3 = gr.Image(label="3/5", show_progress=False) gallery_movie = gr.Gallery(
img4 = gr.Image(label="4/5", show_progress=False) label="Generated images", show_label=False, elem_id="gallery"
img5 = gr.Image(label="5/5") , columns=[20], rows=[1], object_fit="contain", height="auto", allow_preview=False, interactive=False)
with gr.Row(): with gr.Row():
vid_single = gr.Video(label="current single trans") b_delete = gr.Button('delete selected image')
vid_multi = gr.Video(label="concatented multi trans") b_move_earlier = gr.Button('move image to earlier time')
b_move_later = gr.Button('move image to later time')
with gr.Row(): with gr.Row():
b_stackforward = gr.Button('append last movie segment (left) to multi movie (right)', variant='primary') b_generate_movie = gr.Button('generate movie', variant='primary')
t_per_segment = gr.Slider(1, 30, 10, step=0.1, label='time per segment', interactive=True)
with gr.Row(): with gr.Row():
gr.Markdown( movie = gr.Video()
"""
# Parameters
## Main
- waiting time: set your waiting time for the transition. high values = better quality
- video duration: seconds per segment
- height/width: in pixels
## Diffusion settings # bindings
- num_inference_steps: number of diffusion steps b_start_session.click(mur.register_new_user, inputs=[model, width, height], outputs=user_id)
- guidance_scale: latent blending seems to prefer lower values here b_compute.click(mur.compute_imgs, inputs=[user_id, prompt, negative_prompt], outputs=gallery_preview)
- negative prompt: enter negative prompt here, applied for all images b_select.click(mur.add_image_to_video, user_id, gallery_movie)
gallery_preview.select(mur.preview_img_selected, user_id, None)
gallery_movie.select(mur.movie_img_selected, user_id, None)
b_delete.click(mur.img_movie_delete, user_id, gallery_movie)
b_move_earlier.click(mur.img_movie_earlier, user_id, gallery_movie)
b_move_later.click(mur.img_movie_later, user_id, gallery_movie)
b_generate_movie.click(mur.generate_movie, [user_id, t_per_segment], movie)
## Last image crossfeeding
- branch1_crossfeed_power: Controls the level of cross-feeding between the first and last image branch. For preserving structures.
- branch1_crossfeed_range: Sets the duration of active crossfeed during development. High values enforce strong structural similarity.
- branch1_crossfeed_decay: Sets decay for branch1_crossfeed_power. Lower values make the decay stronger across the range.
## Transition settings if server_name is None:
- parental_crossfeed_power: Similar to branch1_crossfeed_power, however applied for the images withinin the transition. demo.launch(share=False, inbrowser=True, inline=False)
- parental_crossfeed_range: Similar to branch1_crossfeed_range, however applied for the images withinin the transition. else:
- parental_crossfeed_power_decay: Similar to branch1_crossfeed_decay, however applied for the images withinin the transition. demo.launch(share=False, inbrowser=True, inline=False, server_name=server_name)
- depth_strength: Determines when the blending process will begin in terms of diffusion steps. Low values more inventive but can cause motion.
- guidance_scale_mid_damper: Decreases the guidance scale in the middle of a transition.
""")
with gr.Row():
user_id = gr.Textbox(label="user id", interactive=False)
# Collect all UI elemts in list to easily pass as inputs in gradio
dict_ui_elem = {}
dict_ui_elem["prompt1"] = prompt1
dict_ui_elem["negative_prompt"] = negative_prompt
dict_ui_elem["prompt2"] = prompt2
dict_ui_elem["duration_compute"] = duration_compute
dict_ui_elem["duration_video"] = duration_video
dict_ui_elem["height"] = height
dict_ui_elem["width"] = width
dict_ui_elem["depth_strength"] = depth_strength
dict_ui_elem["branch1_crossfeed_power"] = branch1_crossfeed_power
dict_ui_elem["branch1_crossfeed_range"] = branch1_crossfeed_range
dict_ui_elem["branch1_crossfeed_decay"] = branch1_crossfeed_decay
dict_ui_elem["num_inference_steps"] = num_inference_steps
dict_ui_elem["guidance_scale"] = guidance_scale
dict_ui_elem["guidance_scale_mid_damper"] = guidance_scale_mid_damper
dict_ui_elem["seed1"] = seed1
dict_ui_elem["seed2"] = seed2
dict_ui_elem["parental_crossfeed_range"] = parental_crossfeed_range
dict_ui_elem["parental_crossfeed_power"] = parental_crossfeed_power
dict_ui_elem["parental_crossfeed_power_decay"] = parental_crossfeed_power_decay
dict_ui_elem["user_id"] = user_id
# Convert to list, as gradio doesn't seem to accept dicts
list_ui_vals = []
list_ui_keys = []
for k in dict_ui_elem.keys():
list_ui_vals.append(dict_ui_elem[k])
list_ui_keys.append(k)
bf.list_ui_keys = list_ui_keys
b_newseed1.click(bf.randomize_seed1, outputs=seed1)
b_newseed2.click(bf.randomize_seed2, outputs=seed2)
b_compute1.click(bf.compute_img1, inputs=list_ui_vals, outputs=[img1, img2, img3, img4, img5, user_id])
b_compute2.click(bf.compute_img2, inputs=list_ui_vals, outputs=[img2, img3, img4, img5, user_id])
b_compute_transition.click(bf.compute_transition,
inputs=list_ui_vals,
outputs=[img2, img3, img4, vid_single])
b_stackforward.click(bf.stack_forward,
inputs=[prompt2, seed2],
outputs=[vid_multi, img1, img2, img3, img4, img5, prompt1, seed1, prompt2])
demo.launch(share=bf.share, inbrowser=True, inline=False)

View File

@ -1,301 +0,0 @@
# Copyright 2022 Lunar Ring. All rights reserved.
# Written by Johannes Stelzer, email stelzer@lunar-ring.ai twitter @j_stelzer
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import subprocess
import os
import numpy as np
from tqdm import tqdm
import cv2
from typing import List
import ffmpeg # pip install ffmpeg-python. if error with broken pipe: conda update ffmpeg
class MovieSaver():
def __init__(
self,
fp_out: str,
fps: int = 24,
shape_hw: List[int] = None,
crf: int = 21,
codec: str = 'libx264',
preset: str = 'fast',
pix_fmt: str = 'yuv420p',
silent_ffmpeg: bool = True):
r"""
Initializes movie saver class - a human friendly ffmpeg wrapper.
After you init the class, you can dump numpy arrays x into moviesaver.write_frame(x).
Don't forget toi finalize movie file with moviesaver.finalize().
Args:
fp_out: str
Output file name. If it already exists, it will be deleted.
fps: int
Frames per second.
shape_hw: List[int, int]
Output shape, optional argument. Can be initialized automatically when first frame is written.
crf: int
ffmpeg doc: the range of the CRF scale is 051, where 0 is lossless
(for 8 bit only, for 10 bit use -qp 0), 23 is the default, and 51 is worst quality possible.
A lower value generally leads to higher quality, and a subjectively sane range is 1728.
Consider 17 or 18 to be visually lossless or nearly so;
it should look the same or nearly the same as the input but it isn't technically lossless.
The range is exponential, so increasing the CRF value +6 results in
roughly half the bitrate / file size, while -6 leads to roughly twice the bitrate.
codec: int
Number of diffusion steps. Larger values will take more compute time.
preset: str
Choose between ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow.
ffmpeg doc: A preset is a collection of options that will provide a certain encoding speed
to compression ratio. A slower preset will provide better compression
(compression is quality per filesize).
This means that, for example, if you target a certain file size or constant bit rate,
you will achieve better quality with a slower preset. Similarly, for constant quality encoding,
you will simply save bitrate by choosing a slower preset.
pix_fmt: str
Pixel format. Run 'ffmpeg -pix_fmts' in your shell to see all options.
silent_ffmpeg: bool
Surpress the output from ffmpeg.
"""
if len(os.path.split(fp_out)[0]) > 0:
assert os.path.isdir(os.path.split(fp_out)[0]), "Directory does not exist!"
self.fp_out = fp_out
self.fps = fps
self.crf = crf
self.pix_fmt = pix_fmt
self.codec = codec
self.preset = preset
self.silent_ffmpeg = silent_ffmpeg
if os.path.isfile(fp_out):
os.remove(fp_out)
self.init_done = False
self.nmb_frames = 0
if shape_hw is None:
self.shape_hw = [-1, 1]
else:
if len(shape_hw) == 2:
shape_hw.append(3)
self.shape_hw = shape_hw
self.initialize()
print(f"MovieSaver initialized. fps={fps} crf={crf} pix_fmt={pix_fmt} codec={codec} preset={preset}")
def initialize(self):
args = (
ffmpeg
.input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(self.shape_hw[1], self.shape_hw[0]), framerate=self.fps)
.output(self.fp_out, crf=self.crf, pix_fmt=self.pix_fmt, c=self.codec, preset=self.preset)
.overwrite_output()
.compile()
)
if self.silent_ffmpeg:
self.ffmpg_process = subprocess.Popen(args, stdin=subprocess.PIPE, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
else:
self.ffmpg_process = subprocess.Popen(args, stdin=subprocess.PIPE)
self.init_done = True
self.shape_hw = tuple(self.shape_hw)
print(f"Initialization done. Movie shape: {self.shape_hw}")
def write_frame(self, out_frame: np.ndarray):
r"""
Function to dump a numpy array as frame of a movie.
Args:
out_frame: np.ndarray
Numpy array, in np.uint8 format. Convert with np.astype(x, np.uint8).
Dim 0: y
Dim 1: x
Dim 2: RGB
"""
assert out_frame.dtype == np.uint8, "Convert to np.uint8 before"
assert len(out_frame.shape) == 3, "out_frame needs to be three dimensional, Y X C"
assert out_frame.shape[2] == 3, f"need three color channels, but you provided {out_frame.shape[2]}."
if not self.init_done:
self.shape_hw = out_frame.shape
self.initialize()
assert self.shape_hw == out_frame.shape, f"You cannot change the image size after init. Initialized with {self.shape_hw}, out_frame {out_frame.shape}"
# write frame
self.ffmpg_process.stdin.write(
out_frame
.astype(np.uint8)
.tobytes()
)
self.nmb_frames += 1
def finalize(self):
r"""
Call this function to finalize the movie. If you forget to call it your movie will be garbage.
"""
if self.nmb_frames == 0:
print("You did not write any frames yet! nmb_frames = 0. Cannot save.")
return
self.ffmpg_process.stdin.close()
self.ffmpg_process.wait()
duration = int(self.nmb_frames / self.fps)
print(f"Movie saved, {duration}s playtime, watch here: \n{self.fp_out}")
def concatenate_movies(fp_final: str, list_fp_movies: List[str]):
r"""
Concatenate multiple movie segments into one long movie, using ffmpeg.
Parameters
----------
fp_final : str
Full path of the final movie file. Should end with .mp4
list_fp_movies : list[str]
List of full paths of movie segments.
"""
assert fp_final[-4] == ".", "fp_final seems to miss file extension: {fp_final}"
for fp in list_fp_movies:
assert os.path.isfile(fp), f"Input movie does not exist: {fp}"
assert os.path.getsize(fp) > 100, f"Input movie seems empty: {fp}"
if os.path.isfile(fp_final):
os.remove(fp_final)
# make a list for ffmpeg
list_concat = []
for fp_part in list_fp_movies:
list_concat.append(f"""file '{fp_part}'""")
# save this list
fp_list = "tmp_move.txt"
with open(fp_list, "w") as fa:
for item in list_concat:
fa.write("%s\n" % item)
cmd = f'ffmpeg -f concat -safe 0 -i {fp_list} -c copy {fp_final}'
subprocess.call(cmd, shell=True)
os.remove(fp_list)
if os.path.isfile(fp_final):
print(f"concatenate_movies: success! Watch here: {fp_final}")
def add_sound(fp_final, fp_silentmovie, fp_sound):
cmd = f'ffmpeg -i {fp_silentmovie} -i {fp_sound} -c copy -map 0:v:0 -map 1:a:0 {fp_final}'
subprocess.call(cmd, shell=True)
if os.path.isfile(fp_final):
print(f"add_sound: success! Watch here: {fp_final}")
def add_subtitles_to_video(
fp_input: str,
fp_output: str,
subtitles: list,
fontsize: int = 50,
font_name: str = "Arial",
color: str = 'yellow'
):
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
r"""
Function to add subtitles to a video.
Args:
fp_input (str): File path of the input video.
fp_output (str): File path of the output video with subtitles.
subtitles (list): List of dictionaries containing subtitle information
(start, duration, text). Example:
subtitles = [
{"start": 1, "duration": 3, "text": "hello test"},
{"start": 4, "duration": 2, "text": "this works"},
]
fontsize (int): Font size of the subtitles.
font_name (str): Font name of the subtitles.
color (str): Color of the subtitles.
"""
# Check if the input file exists
if not os.path.isfile(fp_input):
raise FileNotFoundError(f"Input file not found: {fp_input}")
# Check the subtitles format and sort them by the start time
time_points = []
for subtitle in subtitles:
if not isinstance(subtitle, dict):
raise ValueError("Each subtitle must be a dictionary containing 'start', 'duration' and 'text'.")
if not all(key in subtitle for key in ["start", "duration", "text"]):
raise ValueError("Each subtitle dictionary must contain 'start', 'duration' and 'text'.")
if subtitle['start'] < 0 or subtitle['duration'] <= 0:
raise ValueError("'start' should be non-negative and 'duration' should be positive.")
time_points.append((subtitle['start'], subtitle['start'] + subtitle['duration']))
# Check for overlaps
time_points.sort()
for i in range(1, len(time_points)):
if time_points[i][0] < time_points[i - 1][1]:
raise ValueError("Subtitle time intervals should not overlap.")
# Load the video clip
video = VideoFileClip(fp_input)
# Create a list to store subtitle clips
subtitle_clips = []
# Loop through the subtitle information and create TextClip for each
for subtitle in subtitles:
text_clip = TextClip(subtitle["text"], fontsize=fontsize, color=color, font=font_name)
text_clip = text_clip.set_position(('center', 'bottom')).set_start(subtitle["start"]).set_duration(subtitle["duration"])
subtitle_clips.append(text_clip)
# Overlay the subtitles on the video
video = CompositeVideoClip([video] + subtitle_clips)
# Write the final clip to a new file
video.write_videofile(fp_output)
class MovieReader():
r"""
Class to read in a movie.
"""
def __init__(self, fp_movie):
self.video_player_object = cv2.VideoCapture(fp_movie)
self.nmb_frames = int(self.video_player_object.get(cv2.CAP_PROP_FRAME_COUNT))
self.fps_movie = int(self.video_player_object.get(cv2.CAP_PROP_FPS))
self.shape = [100, 100, 3]
self.shape_is_set = False
def get_next_frame(self):
success, image = self.video_player_object.read()
if success:
if not self.shape_is_set:
self.shape_is_set = True
self.shape = image.shape
return image
else:
return np.zeros(self.shape)
if __name__ == "__main__":
fps = 2
list_fp_movies = []
for k in range(4):
fp_movie = f"/tmp/my_random_movie_{k}.mp4"
list_fp_movies.append(fp_movie)
ms = MovieSaver(fp_movie, fps=fps)
for fn in tqdm(range(30)):
img = (np.random.rand(512, 1024, 3) * 255).astype(np.uint8)
ms.write_frame(img)
ms.finalize()
fp_final = "/tmp/my_concatenated_movie.mp4"
concatenate_movies(fp_final, list_fp_movies)

View File

@ -1,6 +1,6 @@
lpips==0.1.4 lpips==0.1.4
opencv-python opencv-python
ffmpeg-python
diffusers==0.25.0 diffusers==0.25.0
transformers transformers
pytest pytest
accelerate

View File

@ -6,14 +6,14 @@ with open('requirements.txt') as f:
setup( setup(
name='latentblending', name='latentblending',
version='0.2', version='0.3',
url='https://github.com/lunarring/latentblending', url='https://github.com/lunarring/latentblending',
description='Butter-smooth video transitions', description='Butter-smooth video transitions',
long_description=open('README.md').read(), long_description=open('README.md').read(),
install_requires=required, install_requires=[
dependency_links=[ 'lunar_tools @ git+https://github.com/lunarring/lunar_tools.git#egg=lunar_tools'
'git+https://github.com/lunarring/lunar_tools#egg=lunar_tools' ] + required,
],
include_package_data=False, include_package_data=False,
) )