diff --git a/README.md b/README.md index a6559c7..e1c5558 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,7 @@ from latentblending.blending_engine import BlendingEngine from latentblending.diffusers_holder import DiffusersHolder pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16").to("cuda") -dh = DiffusersHolder(pipe) -be = BlendingEngine(dh) +be = BlendingEngine(pipe) be.set_prompt1("photo of underwater landscape, fish, und the sea, incredible detail, high resolution") be.set_prompt2("rendering of an alien planet, strange plants, strange creatures, surreal") be.set_negative_prompt("blurry, ugly, pale") @@ -31,7 +30,7 @@ Install https://github.com/chengzeyi/stable-fast Then enable pipe compilation by setting *do_compile=True* ```python -be = BlendingEngine(dh, do_compile=True) +be = BlendingEngine(pipe, do_compile=True) ``` ## Gradio UI diff --git a/examples/multi_trans.py b/examples/multi_trans.py index e83cd0f..7a7efc7 100644 --- a/examples/multi_trans.py +++ b/examples/multi_trans.py @@ -3,7 +3,6 @@ import warnings from diffusers import AutoPipelineForText2Image from latentblending.movie_util import concatenate_movies from latentblending.blending_engine import BlendingEngine -from latentblending.diffusers_holder import DiffusersHolder torch.set_grad_enabled(False) torch.backends.cudnn.benchmark = False warnings.filterwarnings('ignore') @@ -11,7 +10,7 @@ warnings.filterwarnings('ignore') # %% First let us spawn a stable diffusion holder. Uncomment your version of choice. pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16") pipe.to('cuda') -dh = DiffusersHolder(pipe) +be = BlendingEngine(pipe) # %% Let's setup the multi transition fps = 30 @@ -24,10 +23,10 @@ list_prompts.append("Photo of an elephant in african savannah") list_prompts.append("photo of a house, high detail") -# You can optionally specify the seeds -list_seeds = [95437579, 33259350, 956051013] +# Specify the seeds +list_seeds = np.random.randint(0, 10^9, len(list_prompts)) fp_movie = 'movie_example2.mp4' -be = BlendingEngine(dh) + list_movie_parts = [] for i in range(len(list_prompts) - 1): diff --git a/examples/single_trans.py b/examples/single_trans.py index 1467398..3f99f12 100644 --- a/examples/single_trans.py +++ b/examples/single_trans.py @@ -2,7 +2,6 @@ import torch import warnings from diffusers import AutoPipelineForText2Image from latentblending.blending_engine import BlendingEngine -from latentblending.diffusers_holder import DiffusersHolder warnings.filterwarnings('ignore') torch.set_grad_enabled(False) @@ -12,9 +11,7 @@ torch.backends.cudnn.benchmark = False pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16") pipe.to("cuda") -dh = DiffusersHolder(pipe) - -be = BlendingEngine(dh) +be = BlendingEngine(pipe) be.set_prompt1("photo of underwater landscape, fish, und the sea, incredible detail, high resolution") be.set_prompt2("rendering of an alien planet, strange plants, strange creatures, surreal") be.set_negative_prompt("blurry, ugly, pale") diff --git a/latentblending/blending_engine.py b/latentblending/blending_engine.py index e3373a5..6cd004f 100644 --- a/latentblending/blending_engine.py +++ b/latentblending/blending_engine.py @@ -9,7 +9,8 @@ from latentblending.movie_util import MovieSaver from typing import List, Optional import lpips import platform -from latentblending.utils import interpolate_spherical, interpolate_linear, add_frames_linear_interp, yml_load, yml_save +from latentblending.diffusers_holder import DiffusersHolder +from latentblending.utils import interpolate_spherical, interpolate_linear, add_frames_linear_interp warnings.filterwarnings('ignore') torch.backends.cudnn.benchmark = False torch.set_grad_enabled(False) @@ -18,13 +19,15 @@ torch.set_grad_enabled(False) class BlendingEngine(): def __init__( self, - dh: None, + pipe: None, do_compile: bool = False, guidance_scale_mid_damper: float = 0.5, mid_compression_scaler: float = 1.2): r""" Initializes the latent blending class. Args: + pipe: diffusers pipeline (SDXL) + do_compile: compile pipeline for faster inference using stable fast guidance_scale_mid_damper: float = 0.5 Reduces the guidance scale towards the middle of the transition. A value of 0.5 would decrease the guidance_scale towards the middle linearly by 0.5. @@ -37,7 +40,8 @@ class BlendingEngine(): and guidance_scale_mid_damper <= 1.0, \ f"guidance_scale_mid_damper neees to be in interval (0,1], you provided {guidance_scale_mid_damper}" - self.dh = dh + + self.dh = DiffusersHolder(pipe) self.device = self.dh.device self.set_dimensions() @@ -97,7 +101,7 @@ class BlendingEngine(): """ Measures the time per diffusion step and for the vae decoding """ - + print("starting speed benchmark...") text_embeddings = self.dh.get_text_embedding("test") latents_start = self.dh.get_noise(np.random.randint(111111)) # warmup @@ -111,6 +115,7 @@ class BlendingEngine(): t0 = time.time() img = self.dh.latent2image(list_latents[-1]) self.dt_vae = time.time() - t0 + print(f"time per unet iteration: {self.dt_unet_step} time for vae: {self.dt_vae}") def set_dimensions(self, size_output=None): r""" @@ -701,12 +706,6 @@ class BlendingEngine(): ms.write_frame(img) ms.finalize() - def save_statedict(self, fp_yml): - # Dump everything relevant into yaml - imgs_transition = self.tree_final_imgs - state_dict = self.get_state_dict() - state_dict['nmb_images'] = len(imgs_transition) - yml_save(fp_yml, state_dict) def get_state_dict(self): state_dict = {} @@ -828,14 +827,18 @@ if __name__ == "__main__": from diffusers import AutoencoderTiny # pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0" pretrained_model_name_or_path = "stabilityai/sdxl-turbo" + pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path) - pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16, variant="fp16") + # pipe.to("mps") pipe.to("cuda") - pipe.vae = AutoencoderTiny.from_pretrained('madebyollin/taesdxl', torch_device='cuda', torch_dtype=torch.float16) - pipe.vae = pipe.vae.cuda() + + # pipe.vae = AutoencoderTiny.from_pretrained('madebyollin/taesdxl', torch_device='cuda', torch_dtype=torch.float16) + # pipe.vae = pipe.vae.cuda() dh = DiffusersHolder(pipe) + + xxx # %% Next let's set up all parameters prompt1 = "photo of underwater landscape, fish, und the sea, incredible detail, high resolution" prompt2 = "rendering of an alien planet, strange plants, strange creatures, surreal"