From 6189391fbc19aa4caeb598f5f07d7085cb5977ac Mon Sep 17 00:00:00 2001 From: Johannes Stelzer Date: Sat, 6 Jan 2024 18:16:36 +0100 Subject: [PATCH] ok version in latent_blending.py --- diffusers_holder.py | 4 +++ example1_standard.py | 14 ++++++---- latent_blending.py | 66 ++++++++++++++++++++++++++++++++++++++++++-- movie_util.py | 1 - 4 files changed, 77 insertions(+), 8 deletions(-) diff --git a/diffusers_holder.py b/diffusers_holder.py index b848d4c..e0d81de 100644 --- a/diffusers_holder.py +++ b/diffusers_holder.py @@ -773,6 +773,10 @@ if __name__ == "__main__": self.run_diffusion_sd_xl(text_embeddings_mix, latents_start_mixed, idx_start=idx_start, return_image=True) + #%% + fract=0.8 + latentsmix = interpolate_spherical(latents1[-1], latents2[-1], fract) + self.latent2image(latentsmix) diff --git a/example1_standard.py b/example1_standard.py index 125ce61..eac5bfb 100644 --- a/example1_standard.py +++ b/example1_standard.py @@ -18,21 +18,24 @@ import warnings from latent_blending import LatentBlending from diffusers_holder import DiffusersHolder from diffusers import DiffusionPipeline + +from diffusers import AutoPipelineForText2Image warnings.filterwarnings('ignore') torch.set_grad_enabled(False) torch.backends.cudnn.benchmark = False # %% First let us spawn a stable diffusion holder. Uncomment your version of choice. -pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0" -pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16) -pipe.to('cuda') +pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16") +pipe.to("cuda") + dh = DiffusersHolder(pipe) # %% Next let's set up all parameters depth_strength = 0.55 # Specifies how deep (in terms of diffusion iterations the first branching happens) -t_compute_max_allowed = 60 # Determines the quality of the transition in terms of compute time you grant it -num_inference_steps = 30 +t_compute_max_allowed = 10 # Determines the quality of the transition in terms of compute time you grant it +num_inference_steps = 4 size_output = (1024, 1024) + prompt1 = "underwater landscape, fish, und the sea, incredible detail, high resolution" prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal" negative_prompt = "blurry, ugly, pale" # Optional @@ -46,6 +49,7 @@ lb.set_prompt1(prompt1) lb.set_prompt2(prompt2) lb.set_dimensions(size_output) lb.set_negative_prompt(negative_prompt) +lb.set_guidance_scale(0) # Run latent blending lb.run_transition( diff --git a/latent_blending.py b/latent_blending.py index 21846e1..63a7746 100644 --- a/latent_blending.py +++ b/latent_blending.py @@ -797,10 +797,10 @@ class LatentBlending(): Used to determine the optimal point of insertion to create smooth transitions. High values indicate low similarity. """ - tensorA = torch.from_numpy(imgA).float().cuda(self.device) + tensorA = torch.from_numpy(np.asarray(imgA)).float().cuda(self.device) tensorA = 2 * tensorA / 255.0 - 1 tensorA = tensorA.permute([2, 0, 1]).unsqueeze(0) - tensorB = torch.from_numpy(imgB).float().cuda(self.device) + tensorB = torch.from_numpy(np.asarray(imgB)).float().cuda(self.device) tensorB = 2 * tensorB / 255.0 - 1 tensorB = tensorB.permute([2, 0, 1]).unsqueeze(0) lploss = self.lpips(tensorA, tensorB) @@ -831,3 +831,65 @@ class LatentBlending(): b_parent1 = tmp return b_parent1, b_parent2 + + +if __name__ == "__main__": + + # %% First let us spawn a stable diffusion holder. Uncomment your version of choice. + from diffusers_holder import DiffusersHolder + from diffusers import DiffusionPipeline + from diffusers import AutoencoderTiny + pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16") + pipe.to("cuda") + # pipe.vae = AutoencoderTiny.from_pretrained('madebyollin/taesdxl', torch_device='cuda', torch_dtype=torch.float16) + # pipe.vae = pipe.vae.cuda() + + dh = DiffusersHolder(pipe) + # %% Next let's set up all parameters + depth_strength = 0.5 # Specifies how deep (in terms of diffusion iterations the first branching happens) + t_compute_max_allowed = 3 # Determines the quality of the transition in terms of compute time you grant it + num_inference_steps = 4 + size_output = (512, 512) + + + prompt1 = "underwater landscape, fish, und the sea, incredible detail, high resolution" + prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal" + negative_prompt = "blurry, ugly, pale" # Optional + + fp_movie = 'movie_example1.mp4' + duration_transition = 12 # In seconds + + # Spawn latent blending + lb = LatentBlending(dh) + lb.set_prompt1(prompt1) + lb.set_prompt2(prompt2) + lb.set_dimensions(size_output) + lb.set_negative_prompt(negative_prompt) + lb.set_guidance_scale(0) + + lb.branch1_crossfeed_power = 0.3 + lb.branch1_crossfeed_range = 0.6 + lb.branch1_crossfeed_decay = 0.99 + + lb.parental_crossfeed_power = 0.8 + lb.parental_crossfeed_power_decay = 1.0 + lb.parental_crossfeed_range = 1.0 + + # Run latent blending + lb.run_transition( + depth_strength=depth_strength, + num_inference_steps=num_inference_steps, + t_compute_max_allowed=t_compute_max_allowed) + + + # Save movie + lb.write_movie_transition(fp_movie, duration_transition) + + #%% + + """ + checkout sizes + checkout good tree for num inference steps + checkout that good nmb inference step given + + """ \ No newline at end of file diff --git a/movie_util.py b/movie_util.py index e6e0c6a..eb7e157 100644 --- a/movie_util.py +++ b/movie_util.py @@ -262,7 +262,6 @@ def add_subtitles_to_video( - class MovieReader(): r""" Class to read in a movie.