From 16da42c10e934c1e237285ce1ebe4df81cbeb8cf Mon Sep 17 00:00:00 2001 From: Johannes Stelzer Date: Sun, 8 Jan 2023 11:48:44 +0100 Subject: [PATCH] negative prompts --- gradio_ui.py | 35 +++---- latent_blending.py | 20 ++++ stable_diffusion_holder.py | 193 ++++++------------------------------- 3 files changed, 63 insertions(+), 185 deletions(-) diff --git a/gradio_ui.py b/gradio_ui.py index 0382d15..4230b41 100644 --- a/gradio_ui.py +++ b/gradio_ui.py @@ -35,9 +35,6 @@ import copy """ experiment with slider as output -> does it change in the browser? -guidance scale has no effect -get a movie as result -seed bug: also shows changes from before mid compression scaler can destroy tree """ @@ -77,9 +74,10 @@ class BlendingFrontend(): self.guidance_scale = 4.0 self.guidance_scale_mid_damper = 0.5 self.mid_compression_scaler = 1.2 - self.prompt1 = '' - self.prompt2 = '' - self.dp_base = '/home/lugo/latentblending' + self.prompt1 = "" + self.prompt2 = "" + self.negative_prompt = "" + self.dp_base = "/home/lugo/latentblending" self.list_settings = [] self.state_prev = {} self.state_current = {} @@ -137,6 +135,9 @@ class BlendingFrontend(): self.prompt2 = value # print(f"changed prompt2 to {value}") + def change_negative_prompt(self, value): + self.negative_prompt = value + def change_seed1(self, value): self.seed1 = int(value) @@ -170,15 +171,6 @@ class BlendingFrontend(): self.lb.sdh.height = self.height self.lb.sdh.width = self.width - # list_nmb_branches = [2, 6, 15] - # list_injection_strength = [0.0, self.depth_strength, 0.9] - - # self.lb.setup_branching( - # num_inference_steps = self.num_inference_steps, - # list_nmb_branches = list_nmb_branches, - # list_injection_strength = list_injection_strength - # ) - self.lb.autosetup_branching( depth_strength = self.depth_strength, num_inference_steps = self.num_inference_steps, @@ -187,6 +179,7 @@ class BlendingFrontend(): self.lb.set_prompt1(self.prompt1) self.lb.set_prompt2(self.prompt2) + self.lb.set_negative_prompt(self.negative_prompt) self.lb.guidance_scale = self.guidance_scale self.lb.guidance_scale_mid_damper = self.guidance_scale_mid_damper @@ -276,8 +269,8 @@ self = BlendingFrontend() with gr.Blocks() as demo: with gr.Row(): - text1 = gr.Textbox(label="prompt 1") - text2 = gr.Textbox(label="prompt 2") + prompt1 = gr.Textbox(label="prompt 1") + prompt2 = gr.Textbox(label="prompt 2") with gr.Row(): depth_strength = gr.Slider(0.01, 0.99, self.depth_strength, step=0.01, label='depth_strength', interactive=True) @@ -288,7 +281,8 @@ with gr.Blocks() as demo: with gr.Row(): num_inference_steps = gr.Slider(5, 100, self.num_inference_steps, step=1, label='num_inference_steps', interactive=True) height = gr.Slider(256, 2048, self.height, step=128, label='height', interactive=True) - width = gr.Slider(256, 2048, self.width, step=128, label='width', interactive=True) + width = gr.Slider(256, 2048, self.width, step=128, label='width', interactive=True) + negative_prompt = gr.Textbox(label="negative prompt") with gr.Row(): b_newseed1 = gr.Button("rand seed 1") @@ -324,10 +318,11 @@ with gr.Blocks() as demo: height.change(fn=self.change_height, inputs=height) width.change(fn=self.change_width, inputs=width) - text1.change(fn=self.change_prompt1, inputs=text1) - text2.change(fn=self.change_prompt2, inputs=text2) + prompt1.change(fn=self.change_prompt1, inputs=prompt1) + prompt2.change(fn=self.change_prompt2, inputs=prompt2) seed1.change(fn=self.change_seed1, inputs=seed1) seed2.change(fn=self.change_seed2, inputs=seed2) + negative_prompt.change(fn=self.change_negative_prompt, inputs=negative_prompt) b_newseed1.click(self.randomize_seed1, outputs=seed1) b_newseed2.click(self.randomize_seed2, outputs=seed2) diff --git a/latent_blending.py b/latent_blending.py index 6ab572b..5866ce5 100644 --- a/latent_blending.py +++ b/latent_blending.py @@ -86,6 +86,7 @@ class LatentBlending(): # Initialize vars self.prompt1 = "" self.prompt2 = "" + self.negative_prompt = "" self.tree_latents = None self.tree_fracts = None self.tree_status = None @@ -127,6 +128,12 @@ class LatentBlending(): self.guidance_scale = guidance_scale self.sdh.guidance_scale = guidance_scale + def set_negative_prompt(self, negative_prompt): + r"""Set the negative prompt. Currenty only one negative prompt is supported + """ + self.negative_prompt = negative_prompt + self.sdh.set_negative_prompt(negative_prompt) + def set_guidance_mid_dampening(self, fract_mixing): r""" Tunes the guidance scale down as a linear function of fract_mixing, @@ -1096,10 +1103,23 @@ if __name__ == "__main__": dp_img = "/home/lugo/latentblending/230107_144533" self.run_upscaling_step2(dp_img) + #%% """ +mr stealy + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + + + +""" + + +#%% +""" + TODO Coding: CHECK IF ALL STUFF WORKS STILL: STANDARD MODEL, INPAINTING RUNNING WITHOUT PROMPT! diff --git a/stable_diffusion_holder.py b/stable_diffusion_holder.py index 17b31ae..d76bc1e 100644 --- a/stable_diffusion_holder.py +++ b/stable_diffusion_holder.py @@ -144,6 +144,8 @@ class StableDiffusionHolder: self.mask_empty = Image.fromarray(255*np.ones([self.width, self.height], dtype=np.uint8)) self.image_empty = Image.fromarray(np.zeros([self.width, self.height, 3], dtype=np.uint8)) + self.negative_prompt = [""] + def init_model(self, fp_ckpt, fp_config): assert os.path.isfile(fp_ckpt), f"Your model checkpoint file does not exist: {fp_ckpt}" @@ -158,7 +160,17 @@ class StableDiffusionHolder: self.model = self.model.to(self.device) self.sampler = DDIMSampler(self.model) + def set_negative_prompt(self, negative_prompt): + r"""Set the negative prompt. Currenty only one negative prompt is supported + """ + if isinstance(negative_prompt, str): + self.negative_prompt = [negative_prompt] + else: + self.negative_prompt = negative_prompt + + if len(self.negative_prompt) > 1: + self.negative_prompt = [self.negative_prompt[0]] def init_auto_res(self): r"""Automatically set the resolution to the one used in training. @@ -266,7 +278,7 @@ class StableDiffusionHolder: with precision_scope("cuda"): with self.model.ema_scope(): if self.guidance_scale != 1.0: - uc = self.model.get_learned_conditioning([""]) + uc = self.model.get_learned_conditioning(self.negative_prompt) else: uc = None shape_latents = [self.C, self.height // self.f, self.width // self.f] @@ -532,177 +544,28 @@ class StableDiffusionHolder: if __name__ == "__main__": - fp_ckpt= "../stable_diffusion_models/ckpt/x4-upscaler-ema.ckpt" - fp_config = 'configs/x4-upscaling.yaml' - num_inference_steps = 100 - self = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps=num_inference_steps) - xxx - #%% image A - image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg') - image = image.resize((32*20, 32*12)) - promptA = "photo of a an ancient castle surrounded by a forest" - noise_level = 20 #gradio min=0, max=350, value=20 - text_embeddingA = self.get_text_embedding(promptA) - cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level) - - list_samplesA = self.run_diffusion_upscaling(cond, uc_full) - image_result = Image.fromarray(self.latent2image(list_samplesA[-1])) - image_result.save('/home/lugo/latentblending/test1/high/imgA.jpg') - - - #%% image B - from latent_blending import interpolate_linear, interpolate_spherical - image = Image.open('/home/lugo/latentblending/test1/img_0006.jpg') - image = image.resize((32*20, 32*12)) - promptA = "photo of a an ancient castle surrounded by a forest" - promptB = "photo of a beautiful island on the horizon, blue sea with waves" - noise_level = 20 #gradio min=0, max=350, value=20 - text_embeddingA = self.get_text_embedding(promptA) - text_embeddingB = self.get_text_embedding(promptB) - text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8) - - cond, uc_full = self.get_cond_upscaling(image, text_embedding, noise_level) - - list_samplesB = self.run_diffusion_upscaling(cond, uc_full) - image_result = Image.fromarray(self.latent2image(list_samplesB[-1])) - image_result.save('/home/lugo/latentblending/test1/high/imgB.jpg') - - - #%% reality check: run only for 50 iter. - image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg') - image = image.resize((32*20, 32*12)) - promptA = "photo of a an ancient castle surrounded by a forest" - noise_level = 20 #gradio min=0, max=350, value=20 - text_embeddingA = self.get_text_embedding(promptA) - cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level) - - latents_inject = list_samplesA[50] - list_samplesAx = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=50) - image_result = Image.fromarray(self.latent2image(list_samplesAx[-1])) - image_result.save('/home/lugo/latentblending/test1/high/imgA_restart.jpg') - - # RESULTS ARE NOT EXACTLY IDENTICAL! INVESTIGATE WHY - - #%% mix in the middle! which uc_full should be taken? - # expA: take the one from A - idx_start = 90 - latentsA = list_samplesA[idx_start] - latentsB = list_samplesB[idx_start] - latents_inject = interpolate_spherical(latentsA, latentsB, 0.5) - - image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg') - image = image.resize((32*20, 32*12)) - promptA = "photo of a an ancient castle surrounded by a forest" - noise_level = 20 #gradio min=0, max=350, value=20 - text_embeddingA = self.get_text_embedding(promptA) - cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level) - - list_samples = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=idx_start) - image_result = Image.fromarray(self.latent2image(list_samples[-1])) - image_result.save('/home/lugo/latentblending/test1/high/img_mix_expA_late.jpg') - - - #%% mix in the middle! which uc_full should be taken? - # expA: take the one from B - idx_start = 90 - latentsA = list_samplesA[idx_start] - latentsB = list_samplesB[idx_start] - latents_inject = interpolate_spherical(latentsA, latentsB, 0.5) - - image = Image.open('/home/lugo/latentblending/test1/img_0006.jpg').resize((32*20, 32*12)) - promptA = "photo of a an ancient castle surrounded by a forest" - promptB = "photo of a beautiful island on the horizon, blue sea with waves" - noise_level = 20 #gradio min=0, max=350, value=20 - text_embeddingA = self.get_text_embedding(promptA) - text_embeddingB = self.get_text_embedding(promptB) - text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8) - cond, uc_full = self.get_cond_upscaling(image, text_embedding, noise_level) - - list_samples = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=idx_start) - image_result = Image.fromarray(self.latent2image(list_samples[-1])) - image_result.save('/home/lugo/latentblending/test1/high/img_mix_expB_late.jpg') - #%% lets blend the uc_full too! - # expC + num_inference_steps = 20 # Number of diffusion interations - idx_start = 50 - list_mix = np.linspace(0, 1, 20) - for fract_mix in list_mix: - # fract_mix = 0.75 - latentsA = list_samplesA[idx_start] - latentsB = list_samplesB[idx_start] - latents_inject = interpolate_spherical(latentsA, latentsB, fract_mix) - - text_embeddingA = self.get_text_embedding(promptA) - text_embeddingB = self.get_text_embedding(promptB) - text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8) - - imageA = Image.open('/home/lugo/latentblending/test1/img_0007.jpg').resize((32*20, 32*12)) - condA, uc_fullA = self.get_cond_upscaling(imageA, text_embedding, noise_level) - - imageB = Image.open('/home/lugo/latentblending/test1/img_0006.jpg').resize((32*20, 32*12)) - condB, uc_fullB = self.get_cond_upscaling(imageB, text_embedding, noise_level) - - condA['c_concat'][0] = interpolate_spherical(condA['c_concat'][0], condB['c_concat'][0], fract_mix) - uc_fullA['c_concat'][0] = interpolate_spherical(uc_fullA['c_concat'][0], uc_fullB['c_concat'][0], fract_mix) + # fp_ckpt = "../stable_diffusion_models/ckpt/768-v-ema.ckpt" + # fp_config = '../stablediffusion/configs/stable-diffusion/v2-inference-v.yaml' - list_samples = self.run_diffusion_upscaling(condA, uc_fullA, latents_inject, idx_start=idx_start) - image_result = Image.fromarray(self.latent2image(list_samples[-1])) - image_result.save(f'/home/lugo/latentblending/test1/high/img_mix_expC_{fract_mix}_start{idx_start}.jpg') + # fp_ckpt= "../stable_diffusion_models/ckpt/512-inpainting-ema.ckpt" + # fp_config = '../stablediffusion/configs//stable-diffusion/v2-inpainting-inference.yaml' - - -#%% + fp_ckpt = "../stable_diffusion_models/ckpt/v2-1_768-ema-pruned.ckpt" + fp_config = 'configs/v2-inference-v.yaml' -list_imgs = os.listdir('/home/lugo/latentblending/test1/high/') -list_imgs = [l for l in list_imgs if "expC" in l] -list_imgs.pop(0) - -lx = [] -for fn in list_imgs: - Image.open - - -#%% + + self = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps) + + #%% + prompt = "painting of a house" + te = self.get_text_embedding(prompt) + + img = self.run_diffusion_standard(te, return_image=True) - - if False: - - num_inference_steps = 20 # Number of diffusion interations - - # fp_ckpt = "../stable_diffusion_models/ckpt/768-v-ema.ckpt" - # fp_config = '../stablediffusion/configs/stable-diffusion/v2-inference-v.yaml' - - fp_ckpt= "../stable_diffusion_models/ckpt/512-inpainting-ema.ckpt" - fp_config = '../stablediffusion/configs//stable-diffusion/v2-inpainting-inference.yaml' - - sdh = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps) - # fp_ckpt= "../stable_diffusion_models/ckpt/512-base-ema.ckpt" - # fp_config = '../stablediffusion/configs//stable-diffusion/v2-inference.yaml' - - - - image_source = Image.fromarray((255*np.random.rand(512,512,3)).astype(np.uint8)) - mask = 255*np.ones([512,512], dtype=np.uint8) - mask[0:50, 0:50] = 0 - mask = Image.fromarray(mask) - - sdh.init_inpainting(image_source, mask) - text_embedding = sdh.get_text_embedding("photo of a strange house, surreal painting") - list_latents = sdh.run_diffusion_inpaint(text_embedding) - - idx_inject = 3 - img_orig = sdh.latent2image(list_latents[-1]) - list_inject = sdh.run_diffusion_inpaint(text_embedding, list_latents[idx_inject], idx_start=idx_inject+1) - img_inject = sdh.latent2image(list_inject[-1]) - - img_diff = img_orig - img_inject - import matplotlib.pyplot as plt - plt.imshow(np.concatenate((img_orig, img_inject, img_diff), axis=1)) - - -