From 8cdfa020834f75850d2e5a7a5b4870cd3cb8bea7 Mon Sep 17 00:00:00 2001 From: Johannes Stelzer Date: Tue, 22 Nov 2022 00:07:55 +0100 Subject: [PATCH] intermediate progress --- cherry_picknick.py | 103 ++++++++++++++++++++++++++++++++++ example1_standard.py | 4 +- example3_multitrans.py | 122 +++++++++++++++++++++++++++++++++++++++++ latent_blending.py | 103 +--------------------------------- movie_util.py | 5 ++ 5 files changed, 235 insertions(+), 102 deletions(-) create mode 100644 cherry_picknick.py create mode 100644 example3_multitrans.py diff --git a/cherry_picknick.py b/cherry_picknick.py new file mode 100644 index 0000000..f59b049 --- /dev/null +++ b/cherry_picknick.py @@ -0,0 +1,103 @@ +# Copyright 2022 Lunar Ring. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, sys +import torch +torch.backends.cudnn.benchmark = False +import numpy as np +import warnings +warnings.filterwarnings('ignore') +import warnings +import torch +from tqdm.auto import tqdm +from diffusers import StableDiffusionPipeline +from diffusers.schedulers import DDIMScheduler +from PIL import Image +import matplotlib.pyplot as plt +import torch +from movie_util import MovieSaver +from typing import Callable, List, Optional, Union +from latent_blending import LatentBlending, add_frames_linear_interp +torch.set_grad_enabled(False) + +#%% First let us spawn a diffusers pipe using DDIMScheduler +device = "cuda:0" +model_path = "../stable_diffusion_models/stable-diffusion-v1-5" + +scheduler = DDIMScheduler(beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False) + +pipe = StableDiffusionPipeline.from_pretrained( + model_path, + revision="fp16", + torch_dtype=torch.float16, + scheduler=scheduler, + use_auth_token=True +) +pipe = pipe.to(device) + +#%% Next let's set up all parameters +num_inference_steps = 30 # Number of diffusion interations +list_nmb_branches = [2, 3, 10, 24]#, 50] # Branching structure: how many branches +list_injection_strength = [0.0, 0.6, 0.8, 0.9]#, 0.95] # Branching structure: how deep is the blending + +width = 512 +height = 512 +guidance_scale = 5 +fps = 30 +duration_target = 10 +width = 512 +height = 512 + +lb = LatentBlending(pipe, device, height, width, num_inference_steps, guidance_scale) + + +list_prompts = [] +list_prompts.append("surrealistic statue made of glitter and dirt, standing in a lake, atmospheric light, strange glow") +list_prompts.append("weird statue of a frog monkey, many colors, standing next to the ruins of an ancient city") +list_prompts.append("statue of a mix between a tree and human, made of marble, incredibly detailed") +list_prompts.append("statue made of hot metal, bizzarre, dark clouds in the sky") +list_prompts.append("statue of a spider that looked like a human") +list_prompts.append("statue of a bird that looked like a scorpion") +list_prompts.append("statue of an ancient cybernetic messenger annoucing good news, golden, futuristic") + +k = 6 + +prompt = list_prompts[k] +for i in range(4): + lb.set_prompt1(prompt) + + seed = np.random.randint(999999999) + lb.set_seed(seed) + plt.imshow(lb.run_diffusion(lb.text_embedding1, return_image=True)) + plt.title(f"{i} seed {seed}") + plt.show() + print(f"prompt {k} seed {seed} trial {i}") + +#%% + +""" + +prompt 3 seed 28652396 trial 2 +prompt 4 seed 783279867 trial 3 +prompt 5 seed 831049796 trial 3 + +prompt 6 seed 798876383 trial 2 +prompt 6 seed 750494819 trial 2 +prompt 6 seed 416472011 trial 1 + +""" \ No newline at end of file diff --git a/example1_standard.py b/example1_standard.py index 6856787..ae9c138 100644 --- a/example1_standard.py +++ b/example1_standard.py @@ -26,7 +26,7 @@ from diffusers.schedulers import DDIMScheduler from PIL import Image import matplotlib.pyplot as plt import torch -from movie_man import MovieSaver +from movie_util import MovieSaver from typing import Callable, List, Optional, Union from latent_blending import LatentBlending, add_frames_linear_interp torch.set_grad_enabled(False) @@ -81,7 +81,7 @@ imgs_transition_ext = add_frames_linear_interp(imgs_transition, duration_transit fp_movie = f"/home/lugo/tmp/latentblending/bobo_incoming.mp4" if os.path.isfile(fp_movie): os.remove(fp_movie) -ms = MovieSaver(fp_movie, fps=fps, profile='save') +ms = MovieSaver(fp_movie, fps=fps) for img in tqdm(imgs_transition_ext): ms.write_frame(img) ms.finalize() diff --git a/example3_multitrans.py b/example3_multitrans.py new file mode 100644 index 0000000..2ff12b7 --- /dev/null +++ b/example3_multitrans.py @@ -0,0 +1,122 @@ +# Copyright 2022 Lunar Ring. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, sys +import torch +torch.backends.cudnn.benchmark = False +import numpy as np +import warnings +warnings.filterwarnings('ignore') +import warnings +import torch +from tqdm.auto import tqdm +from diffusers import StableDiffusionPipeline +from diffusers.schedulers import DDIMScheduler +from PIL import Image +import matplotlib.pyplot as plt +import torch +from movie_util import MovieSaver +from typing import Callable, List, Optional, Union +from latent_blending import LatentBlending, add_frames_linear_interp +torch.set_grad_enabled(False) + +#%% First let us spawn a diffusers pipe using DDIMScheduler +device = "cuda:0" +model_path = "../stable_diffusion_models/stable-diffusion-v1-5" + +scheduler = DDIMScheduler(beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False) + +pipe = StableDiffusionPipeline.from_pretrained( + model_path, + revision="fp16", + torch_dtype=torch.float16, + scheduler=scheduler, + use_auth_token=True +) +pipe = pipe.to(device) + +#%% MULTITRANS +# XXX FIXME AssertionError: Need to supply floats for list_injection_strength +# GO AS DEEP AS POSSIBLE WITHOUT CAUSING MOTION + +num_inference_steps = 100 # Number of diffusion interations +#list_nmb_branches = [2, 12, 24, 55, 77] # Branching structure: how many branches +#list_injection_strength = [0.0, 0.35, 0.5, 0.65, 0.95] # Branching structure: how deep is the blending +list_nmb_branches = list(np.linspace(2, 600, 15).astype(int)) # +list_injection_strength = list(np.linspace(0.45, 0.97, 14).astype(np.float32)) # Branching structure: how deep is the blending +list_injection_strength = [float(x) for x in list_injection_strength] +list_injection_strength.insert(0,0.0) + +width = 512 +height = 512 +guidance_scale = 5 +fps = 30 +duration_target = 20 +width = 512 +height = 512 + +lb = LatentBlending(pipe, device, height, width, num_inference_steps, guidance_scale) + +#list_nmb_branches = [2, 3, 10, 24] # Branching structure: how many branches +#list_injection_strength = [0.0, 0.6, 0.8, 0.9] # + +list_prompts = [] +list_prompts.append("surrealistic statue made of glitter and dirt, standing in a lake, atmospheric light, strange glow") +list_prompts.append("statue of a mix between a tree and human, made of marble, incredibly detailed") +list_prompts.append("weird statue of a frog monkey, many colors, standing next to the ruins of an ancient city") +list_prompts.append("statue made of hot metal, bizzarre, dark clouds in the sky") +list_prompts.append("statue of a spider that looked like a human") +list_prompts.append("statue of a bird that looked like a scorpion") +list_prompts.append("statue of an ancient cybernetic messenger annoucing good news, golden, futuristic") + + +list_seeds = [234187386, 422209351, 241845736, 28652396, 783279867, 831049796, 234903931] + + +fp_movie = "/home/lugo/tmp/latentblending/bubu.mp4" +ms = MovieSaver(fp_movie, fps=fps) + +for i in range(len(list_prompts)-1): + print(f"Starting movie segment {i+1}/{len(list_prompts)-1}") + + if i==0: + lb.set_prompt1(list_prompts[i]) + lb.set_prompt2(list_prompts[i+1]) + recycle_img1 = False + else: + lb.swap_forward() + lb.set_prompt2(list_prompts[i+1]) + recycle_img1 = True + + local_seeds = [list_seeds[i], list_seeds[i+1]] + list_imgs = lb.run_transition(list_nmb_branches, list_injection_strength, recycle_img1=recycle_img1, fixed_seeds=local_seeds) + list_imgs_interp = add_frames_linear_interp(list_imgs, fps, duration_target) + + # Save movie frame + for img in list_imgs_interp: + ms.write_frame(img) + +ms.finalize() + +#%% +#for img in lb.tree_final_imgs: +# if img is not None: +# ms.write_frame(img) +# +#ms.finalize() + diff --git a/latent_blending.py b/latent_blending.py index c34b599..cd23b88 100644 --- a/latent_blending.py +++ b/latent_blending.py @@ -314,7 +314,7 @@ class LatentBlending(): fract_mixing = self.tree_fracts[t_block][idx_leaf_deep] list_fract_mixing_prev = self.tree_fracts[t_block_prev] b_parent1, b_parent2 = get_closest_idx(fract_mixing, list_fract_mixing_prev) - assert self.tree_status[t_block_prev][b_parent1] != 'untouched', 'This should never happen!' + assert self.tree_status[t_block_prev][b_parent1] != 'untouched', 'Branch destruction??? This should never happen!' if self.tree_status[t_block_prev][b_parent2] == 'untouched': self.tree_status[t_block_prev][b_parent2] = 'prefetched' list_local_stem.append([t_block_prev, b_parent2]) @@ -933,115 +933,18 @@ def get_time(resolution=None): #%% le main if __name__ == "__main__": - - #%% TMP SURGERY - num_inference_steps = 20 # Number of diffusion interations - list_nmb_branches = [2, 3, 10, 24] # Branching structure: how many branches - list_injection_strength = [0.0, 0.6, 0.8, 0.9] # Branching structure: how deep is the blending - width = 512 - height = 512 - guidance_scale = 5 - fixed_seeds = [993621550, 280335986] - - lb = LatentBlending(pipe, device, height, width, num_inference_steps, guidance_scale) - prompt1 = "photo of a beautiful forest covered in white flowers, ambient light, very detailed, magic" - prompt2 = "photo of an eerie statue surrounded by ferns and vines, analog photograph kodak portra, mystical ambience, incredible detail" - lb.set_prompt1(prompt1) - lb.set_prompt2(prompt2) - imgs_transition = lb.run_transition(list_nmb_branches, list_injection_strength, fixed_seeds=fixed_seeds) - - - #%% LOOP - list_prompts = [] - list_prompts.append("paiting of a medieval city") - list_prompts.append("paiting of a forest") - list_prompts.append("photo of a desert landscape") - list_prompts.append("photo of a jungle") - # we provide a mask for that image1 - mask_image = 255*np.ones([512,512], dtype=np.uint8) - mask_image[200:300, 200:300] = 0 - - list_nmb_branches = [2, 4, 12] - list_injection_idx = [0, 4, 12] - - # we provide a new prompt for image2 - prompt2 = list_prompts[1]# "beautiful painting ocean sunset colorful" - # self.swap_forward() - self.randomize_seed() - self.set_prompt2(prompt2) - self.init_inpainting(image_source=img1, mask_image=mask_image) - list_imgs = self.run_transition(list_nmb_branches, list_injection_idx=list_injection_idx, recycle_img1=True, fixed_seeds='randomize') - - # now we switch them around so image2 becomes image1 - img1 = list_imgs[-1] - - - - - #%% GOOD MOVIE ENGINE - num_inference_steps = 30 - width = 512 - height = 512 - guidance_scale = 5 - list_nmb_branches = [2, 4, 10, 50] - list_injection_idx = [0, 17, 24, 27] - fps_target = 30 - duration_target = 10 - width = 512 - height = 512 - - - list_prompts = [] - list_prompts.append('painting of the first beer that was drunk in mesopotamia') - list_prompts.append('painting of a greek wine symposium') - - lb = LatentBlending(pipe, device, height, width, num_inference_steps, guidance_scale, seed) - dp_movie = "/home/lugo/tmp/movie" - - - - #%% EXAMPLE3 MOVIE ENGINE - list_injection_steps = [2, 3, 4, 5] - list_injection_strength = [0.55, 0.69, 0.8, 0.92] - num_inference_steps = 30 - width = 768 - height = 512 - guidance_scale = 5 - seed = 421 - mode = 'standard' - fps_target = 30 - duration_target = 15 - gpu_id = 0 - - device = "cuda:"+str(gpu_id) - model_path = "../stable_diffusion_models/stable-diffusion-v1-5" - pipe = StableDiffusionPipeline.from_pretrained( - model_path, - revision="fp16", - torch_dtype=torch.float16, - scheduler=DDIMScheduler(), - use_auth_token=True - ) - pipe = pipe.to(device) - - #%% """ TODO Coding: + RUNNING WITHOUT PROMPT! + auto mode (quality settings) - refactor movie man - make movie combiner in movie man - check how default args handled in proper python code... save value ranges, can it be trashed? - documentation in code - example1: single transition - example2: single transition inpaint - example3: make movie set all variables in init! self.img2... TODO Other: diff --git a/movie_util.py b/movie_util.py index 08ccf32..88f10f4 100644 --- a/movie_util.py +++ b/movie_util.py @@ -202,6 +202,11 @@ class MovieReader(): #%% if __name__ == "__main__": + ms = MovieSaver("/tmp/bubu.mp4", fps=fps) + for img in list_imgs_interp: + ms.write_frame(img) + ms.finalize() +if False: fps=2 list_fp_movies = [] for k in range(4):