From fd5916a5985041557c9e55ac7e5251e2782133b4 Mon Sep 17 00:00:00 2001 From: DGX Date: Fri, 29 Mar 2024 14:44:23 +0000 Subject: [PATCH] new gradio interface --- README.md | 9 +- latentblending/gradio_ui.py | 196 +++++++++++++++++++++++++----------- 2 files changed, 143 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 4b52899..761119e 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,12 @@ be = BlendingEngine(pipe, do_compile=True) ``` ## Gradio UI -We made a UI, in latentblending/gradio_ui.py -The idea is to generate the keyframes iteratively, selecting the best prompt and seed, and saving the result as .json. Next the video production can be run as a second step using example_multi_trans_json.py +We can launch the a user-interface version with: +```commandline +python latentblending/gradio_ui.py +``` + +With the UI, you can iteratively generate your desired keyframes, and then render the movie with latent blending it at the end. ## Example 1: Simple transition ![](example1.jpg) @@ -136,7 +140,6 @@ With latent blending, we can create transitions that appear to defy the laws of # Coming soon... - [ ] MacOS support -- [ ] Gradio interface - [ ] Huggingface Space - [ ] Controlnet - [ ] IP-Adapter diff --git a/latentblending/gradio_ui.py b/latentblending/gradio_ui.py index e6cb1a0..a4a4a63 100644 --- a/latentblending/gradio_ui.py +++ b/latentblending/gradio_ui.py @@ -16,6 +16,7 @@ import datetime import tempfile import json from lunar_tools import concatenate_movies +import argparse """ TODO @@ -25,12 +26,74 @@ TODO - hf spaces integration """ - -class BlendingFrontend(): +class MultiUserRouter(): def __init__( self, - be, - share=False): + do_compile=False + ): + self.user_blendingvariableholder = {} + self.do_compile = do_compile + self.list_models = ["stabilityai/sdxl-turbo", "stabilityai/stable-diffusion-xl-base-1.0"] + + self.init_models() + + def init_models(self): + self.dict_blendingengines = {} + for m in self.list_models: + pipe = AutoPipelineForText2Image.from_pretrained(m, torch_dtype=torch.float16, variant="fp16") + pipe.to("cuda") + be = BlendingEngine(pipe, do_compile=self.do_compile) + + self.dict_blendingengines[m] = be + + def register_new_user(self, model, width, height): + user_id = str(uuid.uuid4().hex.upper()[0:8]) + be = self.dict_blendingengines[model] + be.set_dimensions((width, height)) + self.user_blendingvariableholder[user_id] = BlendingVariableHolder(be) + return user_id + + def user_overflow_protection(self): + pass + + def preview_img_selected(self, user_id, data: gr.SelectData, button): + return self.user_blendingvariableholder[user_id].preview_img_selected(data, button) + + def movie_img_selected(self, user_id, data: gr.SelectData, button): + return self.user_blendingvariableholder[user_id].movie_img_selected(data, button) + + def compute_imgs(self, user_id, prompt, negative_prompt): + return self.user_blendingvariableholder[user_id].compute_imgs(prompt, negative_prompt) + + def get_list_images_movie(self, user_id): + return self.user_blendingvariableholder[user_id].get_list_images_movie() + + def init_new_movie(self, user_id): + return self.user_blendingvariableholder[user_id].init_new_movie() + + def write_json(self, user_id): + return self.user_blendingvariableholder[user_id].write_json() + + def add_image_to_video(self, user_id): + return self.user_blendingvariableholder[user_id].add_image_to_video() + + def img_movie_delete(self, user_id): + return self.user_blendingvariableholder[user_id].img_movie_delete() + + def img_movie_later(self, user_id): + return self.user_blendingvariableholder[user_id].img_movie_later() + + def img_movie_earlier(self, user_id): + return self.user_blendingvariableholder[user_id].img_movie_earlier() + + def generate_movie(self, user_id, t_per_segment): + return self.user_blendingvariableholder[user_id].generate_movie(t_per_segment) + +#%% BlendingVariableHolder Class +class BlendingVariableHolder(): + def __init__( + self, + be): r""" Gradio Helper Class to collect UI data and start latent blending. Args: @@ -40,7 +103,6 @@ class BlendingFrontend(): Set true to get a shareable gradio link (e.g. for running a remote server) """ self.be = be - self.share = share # UI Defaults self.seed1 = 420 @@ -62,7 +124,6 @@ class BlendingFrontend(): self.idx_img_movie_selected = None self.jpg_quality = 80 self.fp_movie = '' - self.duration_single_trans = 10 def preview_img_selected(self, data: gr.SelectData, button): self.idx_img_preview_selected = data.index @@ -91,7 +152,7 @@ class BlendingFrontend(): img.save(temp_img_path) img.save(temp_img_path, quality=self.jpg_quality, optimize=True) self.list_images_preview.append(temp_img_path) - return self.list_images_preview + return self.list_images_preview def get_list_images_movie(self): @@ -134,7 +195,7 @@ class BlendingFrontend(): del self.data[self.idx_img_movie_selected] self.idx_img_movie_selected = None else: - print("Invalid movie image index for deletion.") + print(f"Invalid movie image index for deletion: {self.idx_img_movie_selected}") return self.get_list_images_movie() def img_movie_later(self): @@ -158,7 +219,7 @@ class BlendingFrontend(): return self.get_list_images_movie() - def generate_movie(self): + def generate_movie(self, t_per_segment=10): print("starting movie gen") list_prompts = [] list_negative_prompts = [] @@ -192,7 +253,7 @@ class BlendingFrontend(): fixed_seeds=fixed_seeds) # Save movie - self.be.write_movie_transition(fp_movie_part, self.duration_single_trans) + self.be.write_movie_transition(fp_movie_part, t_per_segment) list_movie_parts.append(fp_movie_part) # Finally, concatenate the result @@ -200,67 +261,84 @@ class BlendingFrontend(): print(f"DONE! MOVIE SAVED IN {self.fp_movie}") return self.fp_movie - +#%% Runtime engine if __name__ == "__main__": - width = 512 - height = 512 - num_inference_steps = 4 - - pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16") - # pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16") - pipe.to("cuda") - be = BlendingEngine(pipe) - be.set_dimensions((width, height)) - be.set_num_inference_steps(num_inference_steps) + # Change Parameters below + parser = argparse.ArgumentParser(description="Latent Blending GUI") + parser.add_argument("--do_compile", type=bool, default=False) + parser.add_argument("--nmb_preview_images", type=int, default=4) + parser.add_argument("--server_name", type=str, default=None) + try: + args = parser.parse_args() + nmb_preview_images = args.nmb_preview_images + do_compile = args.do_compile + server_name = args.server_name - bf = BlendingFrontend(be) + except SystemExit: + # If the script is run in an interactive environment (like Jupyter), parse_args might fail. + nmb_preview_images = 4 + do_compile = False # compile SD pipes with sdfast + server_name = None + mur = MultiUserRouter(do_compile=do_compile) with gr.Blocks() as demo: + with gr.Accordion("Setup", open=True) as accordion_setup: + # New user registration, model selection, ... + with gr.Row(): + model = gr.Dropdown(mur.list_models, value=mur.list_models[0], label="model") + width = gr.Slider(256, 2048, 512, step=128, label='width', interactive=True) + height = gr.Slider(256, 2048, 512, step=128, label='height', interactive=True) + user_id = gr.Textbox(label="user id (filled automatically)", interactive=False) + b_start_session = gr.Button('start session', variant='primary') - with gr.Row(): - prompt = gr.Textbox(label="prompt") - negative_prompt = gr.Textbox(label="negative prompt") - b_compute = gr.Button('generate preview images', variant='primary') - b_select = gr.Button('add selected image to video', variant='primary') + with gr.Accordion("Latent Blending (expand with arrow on right side after you clicked 'start session')", open=False) as accordion_latentblending: + with gr.Row(): + prompt = gr.Textbox(label="prompt") + negative_prompt = gr.Textbox(label="negative prompt") + b_compute = gr.Button('generate preview images', variant='primary') + b_select = gr.Button('add selected image to video', variant='primary') + + with gr.Row(): + gallery_preview = gr.Gallery( + label="Generated images", show_label=False, elem_id="gallery" + , columns=[nmb_preview_images], rows=[1], object_fit="contain", height="auto", allow_preview=False, interactive=False) - with gr.Row(): - gallery_preview = gr.Gallery( - label="Generated images", show_label=False, elem_id="gallery" - , columns=[bf.nmb_preview_images], rows=[1], object_fit="contain", height="auto", allow_preview=False, interactive=False) - - - with gr.Row(): - gr.Markdown("Your movie contains so far the below frames") - with gr.Row(): - gallery_movie = gr.Gallery( - label="Generated images", show_label=False, elem_id="gallery" - , columns=[20], rows=[1], object_fit="contain", height="auto", allow_preview=False, interactive=False) + with gr.Row(): + gr.Markdown("Your movie contains the following images (see below)") + with gr.Row(): + gallery_movie = gr.Gallery( + label="Generated images", show_label=False, elem_id="gallery" + , columns=[20], rows=[1], object_fit="contain", height="auto", allow_preview=False, interactive=False) + - - with gr.Row(): - b_delete = gr.Button('delete selected image') - b_move_earlier = gr.Button('move to earlier time') - b_move_later = gr.Button('move to later time') + with gr.Row(): + b_delete = gr.Button('delete selected image') + b_move_earlier = gr.Button('move image to earlier time') + b_move_later = gr.Button('move image to later time') - with gr.Row(): - b_generate_movie = gr.Button('generate movie', variant='primary') + with gr.Row(): + b_generate_movie = gr.Button('generate movie', variant='primary') + t_per_segment = gr.Slider(1, 30, 10, step=0.1, label='time per segment', interactive=True) - with gr.Row(): - movie = gr.Video() + with gr.Row(): + movie = gr.Video() - # bindings - b_compute.click(bf.compute_imgs, inputs=[prompt, negative_prompt], outputs=gallery_preview) - b_select.click(bf.add_image_to_video, None, gallery_movie) - b_generate_movie.click(bf.generate_movie, None, movie) - gallery_preview.select(bf.preview_img_selected, None, None) - gallery_movie.select(bf.movie_img_selected, None, None) - b_delete.click(bf.img_movie_delete, None, gallery_movie) - b_move_earlier.click(bf.img_movie_earlier, None, gallery_movie) - b_move_later.click(bf.img_movie_later, None, gallery_movie) + # bindings + b_start_session.click(mur.register_new_user, inputs=[model, width, height], outputs=user_id) + b_compute.click(mur.compute_imgs, inputs=[user_id, prompt, negative_prompt], outputs=gallery_preview) + b_select.click(mur.add_image_to_video, user_id, gallery_movie) + gallery_preview.select(mur.preview_img_selected, user_id, None) + gallery_movie.select(mur.movie_img_selected, user_id, None) + b_delete.click(mur.img_movie_delete, user_id, gallery_movie) + b_move_earlier.click(mur.img_movie_earlier, user_id, gallery_movie) + b_move_later.click(mur.img_movie_later, user_id, gallery_movie) + b_generate_movie.click(mur.generate_movie, [user_id, t_per_segment], movie) - - demo.launch(share=bf.share, inbrowser=True, inline=False) + if server_name is None: + demo.launch(share=False, inbrowser=True, inline=False) + else: + demo.launch(share=False, inbrowser=True, inline=False, server_name=server_name)