diff --git a/example1_standard.py b/example1_standard.py new file mode 100644 index 0000000..cabd035 --- /dev/null +++ b/example1_standard.py @@ -0,0 +1,102 @@ +# Copyright 2022 Lunar Ring. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, sys +dp_git = "/home/lugo/git/" +sys.path.append(os.path.join(dp_git,'garden4')) +sys.path.append('util') +import torch +torch.backends.cudnn.benchmark = False +import numpy as np +import warnings +warnings.filterwarnings('ignore') +import time +import subprocess +import warnings +import torch +from tqdm.auto import tqdm +from diffusers import StableDiffusionInpaintPipeline +from diffusers import StableDiffusionPipeline +from diffusers.schedulers import DDIMScheduler +from PIL import Image +import matplotlib.pyplot as plt +import torch +from movie_man import MovieSaver +import datetime +from typing import Callable, List, Optional, Union +import inspect +from latent_blending import LatentBlending, add_frames_linear_interp +torch.set_grad_enabled(False) + +#%% First let us spawn a diffusers pipe using DDIMScheduler +device = "cuda:0" +model_path = "../stable_diffusion_models/stable-diffusion-v1-5" + +scheduler = DDIMScheduler(beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False) + +pipe = StableDiffusionPipeline.from_pretrained( + model_path, + revision="fp16", + torch_dtype=torch.float16, + scheduler=scheduler, + use_auth_token=True +) +pipe = pipe.to(device) + +#%% Next let's set up all parameters + +# We want 20 diffusion steps, begin with 2 branches, have 3 branches at step 12 (=0.6*20) +# 10 branches at step 16 (=0.8*20) and 24 branches at step 18 (=0.9*20) +# Furthermore we want seed 993621550 for keyframeA and seed 54878562 for keyframeB () + +num_inference_steps = 30 # Number of diffusion interations +list_nmb_branches = [2, 6, 30, 100] # Specify the branching structure +list_injection_strength = [0.0, 0.3, 0.73, 0.93] # Specify the branching structure +width = 512 +height = 512 +guidance_scale = 5 +#fixed_seeds = [993621550, 326814432] +#fixed_seeds = [993621550, 888839807] +fixed_seeds = [993621550, 753528763] + +lb = LatentBlending(pipe, device, height, width, num_inference_steps, guidance_scale) +prompt1 = "photo of a beautiful forest covered in white flowers, ambient light, very detailed, magic" +prompt2 = "photo of a mystical sculpture in the middle of the desert, warm sunlight, sand, eery feeling" +lb.set_prompt1(prompt1) +lb.set_prompt2(prompt2) + +imgs_transition = lb.run_transition(list_nmb_branches, list_injection_strength, fixed_seeds=fixed_seeds) + +#% + +# let's get more frames +duration_transition = 12 +fps = 60 +imgs_transition_ext = add_frames_linear_interp(imgs_transition, duration_transition, fps) + +# movie saving +fp_movie = f"/home/lugo/tmp/latentblending/bobo_incoming.mp4" +if os.path.isfile(fp_movie): + os.remove(fp_movie) +ms = MovieSaver(fp_movie, fps=fps, profile='save') +for img in tqdm(imgs_transition_ext): + ms.write_frame(img) +ms.finalize() + + +# MOVIE TODO: ueberschreiben! bad prints. \ No newline at end of file diff --git a/latent_blending.py b/latent_blending.py index e4e456a..ba506a9 100644 --- a/latent_blending.py +++ b/latent_blending.py @@ -115,7 +115,7 @@ class LatentBlending(): self.mask_image = None self.mode = 'inpaint' else: - self.mode = 'default' + self.mode = 'standard' def init_inpainting( @@ -214,14 +214,16 @@ class LatentBlending(): if list_injection_idx is None: assert list_injection_strength is not None, "Supply either list_injection_idx or list_injection_strength" + # Create the injection indexes list_injection_idx = [int(round(x*self.num_inference_steps)) for x in list_injection_strength] assert min(np.diff(list_injection_idx)) > 0, 'Injection idx needs to be increasing' if min(np.diff(list_injection_idx)) < 2: print("Warning: your injection spacing is very tight. consider increasing the distances") - assert type(list_injection_strength[0]) is float, "Need to supply floats for list_injection_strength" + assert type(list_injection_strength[1]) is float, "Need to supply floats for list_injection_strength" + # we are checking element 1 in list_injection_strength because "0" is an int... [0, 0.5] + assert max(list_injection_idx) < self.num_inference_steps, "Decrease the injection index or strength" assert len(list_injection_idx) == len(list_nmb_branches), "Need to have same length" - assert max(list_injection_idx) < self.num_inference_steps,"Injection index cannot happen after last diffusion step! Decrease list_injection_idx or list_injection_strength[-1]" if fixed_seeds is not None: @@ -364,7 +366,7 @@ class LatentBlending(): return_image: Optional[bool] = False ): r""" - Wrapper function for run_diffusion_default and run_diffusion_inpaint. + Wrapper function for run_diffusion_standard and run_diffusion_inpaint. Depending on the mode, the correct one will be executed. Args: @@ -381,8 +383,8 @@ class LatentBlending(): """ - if self.mode == 'default': - return self.run_diffusion_default(text_embeddings, latents_for_injection=latents_for_injection, idx_start=idx_start, idx_stop=idx_stop, return_image=return_image) + if self.mode == 'standard': + return self.run_diffusion_standard(text_embeddings, latents_for_injection=latents_for_injection, idx_start=idx_start, idx_stop=idx_stop, return_image=return_image) elif self.mode == 'inpaint': assert self.image_source is not None, "image_source is None. Please run init_inpainting first." @@ -391,7 +393,7 @@ class LatentBlending(): @torch.no_grad() - def run_diffusion_default( + def run_diffusion_standard( self, text_embeddings: torch.FloatTensor, latents_for_injection: torch.FloatTensor = None, @@ -936,7 +938,7 @@ if __name__ == "__main__": height = 512 guidance_scale = 5 seed = 421 - mode = 'default' + mode = 'standard' fps_target = 24 duration_target = 10 gpu_id = 0 @@ -962,7 +964,7 @@ if __name__ == "__main__": pipe = pipe.to(device) - #%% DEFAULT TRANS RE SANITY + #%% standard TRANS RE SANITY lb = LatentBlending(pipe, device, height, width, num_inference_steps, guidance_scale, seed) self = lb @@ -1472,7 +1474,7 @@ if __name__ == "__main__": height = 512 guidance_scale = 5 seed = 421 - mode = 'default' + mode = 'standard' fps_target = 30 duration_target = 15 gpu_id = 0 @@ -1490,17 +1492,56 @@ if __name__ == "__main__": ) pipe = pipe.to(device) + #%% seed cherrypicking + + prompt1 = "photo of a surreal brutalistic vault that is glowing in the night, futuristic, greek ornaments, spider webs" + lb.set_prompt1(prompt1) + + for i in range(1): + seed = 753528763 #np.random.randint(753528763) + lb.set_seed(seed) + txt = f"{i} {seed}" + img = lb.run_diffusion(lb.text_embedding1, return_image=True) + plt.imshow(img) + plt.title(txt) + plt.show() + print(txt) + + #%% make nice images of latents + num_inference_steps = 10 # Number of diffusion interations + list_nmb_branches = [2, 3, 7, 12] # Specify the branching structure + list_injection_idx = [0, 2, 5, 8] # Specify the branching structure + width = 512 + height = 512 + guidance_scale = 5 + fixed_seeds = [993621550, 326814432] + + lb = LatentBlending(pipe, device, height, width, num_inference_steps, guidance_scale) + prompt1 = "photo of a beautiful forest covered in white flowers, ambient light, very detailed, magic" + prompt2 = "photo of a mystical sculpture in the middle of the desert, warm sunlight, sand, eery feeling" + lb.set_prompt1(prompt1) + lb.set_prompt2(prompt2) - - - + imgs_transition = lb.run_transition(list_nmb_branches, list_injection_idx=list_injection_idx, fixed_seeds=fixed_seeds) +#%% + dp_tmp= "/home/lugo/tmp/latentblending" + for d in range(len(lb.tree_latents)): + for b in range(list_nmb_branches[d]): + for x in range(len(lb.tree_latents[d][b])): + lati = lb.tree_latents[d][b][x] + img = lb.latent2image(lati) + fn = f"d{d}_b{b}_x{x}.jpg" + ip.save(os.path.join(dp_tmp, fn), img) + + + + #%% """ TODO Coding: - list_nmb_branches > num inference auto mode (quality settings) refactor movie man make movie combiner in movie man