negative prompts

2023-01-08 11:48:44 +01:00 · 2023-01-08 11:48:44 +01:00 · 16da42c10e
parent 34cc1edd57
commit 16da42c10e
3 changed files with 63 additions and 185 deletions
--- a/gradio_ui.py
+++ b/gradio_ui.py
@ -35,9 +35,6 @@ import copy
 """
 experiment with slider as output -> does it change in the browser?
 guidance scale has no effect
 get a movie as result
 seed bug: also shows changes from before
 mid compression scaler can destroy tree
 """
@ -77,9 +74,10 @@ class BlendingFrontend():
        self.guidance_scale = 4.0
        self.guidance_scale_mid_damper = 0.5
        self.mid_compression_scaler = 1.2
-        self.prompt1 = ''
+        self.prompt1 = ""
-        self.prompt2 = ''
+        self.prompt2 = ""
-        self.dp_base = '/home/lugo/latentblending'
+        self.negative_prompt = ""
        self.dp_base = "/home/lugo/latentblending"
        self.list_settings = []
        self.state_prev = {}
        self.state_current = {}
@ -137,6 +135,9 @@ class BlendingFrontend():
        self.prompt2 = value
        # print(f"changed prompt2 to {value}")
    def change_negative_prompt(self, value):
        self.negative_prompt = value
    def change_seed1(self, value):
        self.seed1 = int(value)
@ -170,15 +171,6 @@ class BlendingFrontend():
        self.lb.sdh.height = self.height
        self.lb.sdh.width = self.width
        # list_nmb_branches = [2, 6, 15]
        # list_injection_strength = [0.0, self.depth_strength, 0.9]
        # self.lb.setup_branching(
        #                     num_inference_steps = self.num_inference_steps,
        #                     list_nmb_branches = list_nmb_branches, 
        #                     list_injection_strength = list_injection_strength
        #                   )
        self.lb.autosetup_branching(
                depth_strength = self.depth_strength,
                num_inference_steps = self.num_inference_steps,
@ -187,6 +179,7 @@ class BlendingFrontend():
        self.lb.set_prompt1(self.prompt1)
        self.lb.set_prompt2(self.prompt2)
        self.lb.set_negative_prompt(self.negative_prompt)
        self.lb.guidance_scale = self.guidance_scale
        self.lb.guidance_scale_mid_damper = self.guidance_scale_mid_damper
@ -276,8 +269,8 @@ self = BlendingFrontend()
 with gr.Blocks() as demo:
    with gr.Row():
-        text1 = gr.Textbox(label="prompt 1")
+        prompt1 = gr.Textbox(label="prompt 1")
-        text2 = gr.Textbox(label="prompt 2")
+        prompt2 = gr.Textbox(label="prompt 2")
    with gr.Row():
        depth_strength = gr.Slider(0.01, 0.99, self.depth_strength, step=0.01, label='depth_strength', interactive=True) 
@ -289,6 +282,7 @@ with gr.Blocks() as demo:
        num_inference_steps = gr.Slider(5, 100, self.num_inference_steps, step=1, label='num_inference_steps', interactive=True)
        height = gr.Slider(256, 2048, self.height, step=128, label='height', interactive=True)
        width = gr.Slider(256, 2048, self.width, step=128, label='width', interactive=True) 
        negative_prompt = gr.Textbox(label="negative prompt")          
    with gr.Row():
        b_newseed1 = gr.Button("rand seed 1")
@ -324,10 +318,11 @@ with gr.Blocks() as demo:
    height.change(fn=self.change_height, inputs=height)
    width.change(fn=self.change_width, inputs=width)
-    text1.change(fn=self.change_prompt1, inputs=text1)
+    prompt1.change(fn=self.change_prompt1, inputs=prompt1)
-    text2.change(fn=self.change_prompt2, inputs=text2)
+    prompt2.change(fn=self.change_prompt2, inputs=prompt2)
    seed1.change(fn=self.change_seed1, inputs=seed1)
    seed2.change(fn=self.change_seed2, inputs=seed2)
    negative_prompt.change(fn=self.change_negative_prompt, inputs=negative_prompt)
    b_newseed1.click(self.randomize_seed1, outputs=seed1)
    b_newseed2.click(self.randomize_seed2, outputs=seed2)
--- a/latent_blending.py
+++ b/latent_blending.py
@ -86,6 +86,7 @@ class LatentBlending():
        # Initialize vars
        self.prompt1 = ""
        self.prompt2 = ""
        self.negative_prompt = ""
        self.tree_latents = None
        self.tree_fracts = None
        self.tree_status = None
@ -127,6 +128,12 @@ class LatentBlending():
        self.guidance_scale = guidance_scale
        self.sdh.guidance_scale = guidance_scale
    def set_negative_prompt(self, negative_prompt):
        r"""Set the negative prompt. Currenty only one negative prompt is supported
        """
        self.negative_prompt = negative_prompt
        self.sdh.set_negative_prompt(negative_prompt)
    def set_guidance_mid_dampening(self, fract_mixing):
        r"""
        Tunes the guidance scale down as a linear function of fract_mixing, 
@ -1096,10 +1103,23 @@ if __name__ == "__main__":
    dp_img = "/home/lugo/latentblending/230107_144533" 
    self.run_upscaling_step2(dp_img)
 #%%
 """
 mr stealy
            elif isinstance(negative_prompt, str):
                uncond_tokens = [negative_prompt]
 """
 #%%
 """
 TODO Coding:
    CHECK IF ALL STUFF WORKS STILL: STANDARD MODEL, INPAINTING
    RUNNING WITHOUT PROMPT!
--- a/stable_diffusion_holder.py
+++ b/stable_diffusion_holder.py
@ -144,6 +144,8 @@ class StableDiffusionHolder:
        self.mask_empty = Image.fromarray(255*np.ones([self.width, self.height], dtype=np.uint8))
        self.image_empty = Image.fromarray(np.zeros([self.width, self.height, 3], dtype=np.uint8))
        self.negative_prompt = [""]
    def init_model(self, fp_ckpt, fp_config):
        assert os.path.isfile(fp_ckpt), f"Your model checkpoint file does not exist: {fp_ckpt}"
@ -158,7 +160,17 @@ class StableDiffusionHolder:
        self.model = self.model.to(self.device)
        self.sampler = DDIMSampler(self.model)
    def set_negative_prompt(self, negative_prompt):
        r"""Set the negative prompt. Currenty only one negative prompt is supported
        """
        if isinstance(negative_prompt, str):
            self.negative_prompt = [negative_prompt]
        else:
            self.negative_prompt = negative_prompt
        if len(self.negative_prompt) > 1:
            self.negative_prompt = [self.negative_prompt[0]]
    def init_auto_res(self):
        r"""Automatically set the resolution to the one used in training.
@ -266,7 +278,7 @@ class StableDiffusionHolder:
        with precision_scope("cuda"):
            with self.model.ema_scope():
                if self.guidance_scale != 1.0:
-                    uc = self.model.get_learned_conditioning([""])
+                    uc = self.model.get_learned_conditioning(self.negative_prompt)
                else:
                    uc = None
                shape_latents = [self.C, self.height // self.f, self.width // self.f]
@ -532,177 +544,28 @@ class StableDiffusionHolder:
 if __name__ == "__main__":
    fp_ckpt= "../stable_diffusion_models/ckpt/x4-upscaler-ema.ckpt"
    fp_config = 'configs/x4-upscaling.yaml'
    num_inference_steps = 100
    self = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps=num_inference_steps)
    xxx
    #%% image A
    image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg') 
    image = image.resize((32*20, 32*12))
    promptA = "photo of a an ancient castle surrounded by a forest"
    noise_level = 20 #gradio min=0, max=350, value=20
    text_embeddingA = self.get_text_embedding(promptA)
    cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level)
    list_samplesA = self.run_diffusion_upscaling(cond, uc_full)
    image_result = Image.fromarray(self.latent2image(list_samplesA[-1]))
    image_result.save('/home/lugo/latentblending/test1/high/imgA.jpg')
    #%% image B
    from latent_blending import interpolate_linear, interpolate_spherical
    image = Image.open('/home/lugo/latentblending/test1/img_0006.jpg') 
    image = image.resize((32*20, 32*12))
    promptA = "photo of a an ancient castle surrounded by a forest"
    promptB = "photo of a beautiful island on the horizon, blue sea with waves"
    noise_level = 20 #gradio min=0, max=350, value=20
    text_embeddingA = self.get_text_embedding(promptA)
    text_embeddingB = self.get_text_embedding(promptB)
    text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8)
    cond, uc_full = self.get_cond_upscaling(image, text_embedding, noise_level)
    list_samplesB = self.run_diffusion_upscaling(cond, uc_full)
    image_result = Image.fromarray(self.latent2image(list_samplesB[-1]))
    image_result.save('/home/lugo/latentblending/test1/high/imgB.jpg')
    #%% reality check: run only for 50 iter.
    image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg') 
    image = image.resize((32*20, 32*12))
    promptA = "photo of a an ancient castle surrounded by a forest"
    noise_level = 20 #gradio min=0, max=350, value=20
    text_embeddingA = self.get_text_embedding(promptA)
    cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level)
    latents_inject = list_samplesA[50]
    list_samplesAx = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=50)
    image_result = Image.fromarray(self.latent2image(list_samplesAx[-1]))
    image_result.save('/home/lugo/latentblending/test1/high/imgA_restart.jpg')
    # RESULTS ARE NOT EXACTLY IDENTICAL! INVESTIGATE WHY
    #%% mix in the middle! which uc_full should be taken? 
    # expA: take the one from A
    idx_start = 90
    latentsA = list_samplesA[idx_start]
    latentsB = list_samplesB[idx_start]
    latents_inject = interpolate_spherical(latentsA, latentsB, 0.5)
    image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg') 
    image = image.resize((32*20, 32*12))
    promptA = "photo of a an ancient castle surrounded by a forest"
    noise_level = 20 #gradio min=0, max=350, value=20
    text_embeddingA = self.get_text_embedding(promptA)
    cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level)
    list_samples = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=idx_start)
    image_result = Image.fromarray(self.latent2image(list_samples[-1]))
    image_result.save('/home/lugo/latentblending/test1/high/img_mix_expA_late.jpg')
    #%% mix in the middle! which uc_full should be taken? 
    # expA: take the one from B
    idx_start = 90
    latentsA = list_samplesA[idx_start]
    latentsB = list_samplesB[idx_start]
    latents_inject = interpolate_spherical(latentsA, latentsB, 0.5)
    image = Image.open('/home/lugo/latentblending/test1/img_0006.jpg').resize((32*20, 32*12))
    promptA = "photo of a an ancient castle surrounded by a forest"
    promptB = "photo of a beautiful island on the horizon, blue sea with waves"
    noise_level = 20 #gradio min=0, max=350, value=20
    text_embeddingA = self.get_text_embedding(promptA)
    text_embeddingB = self.get_text_embedding(promptB)
    text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8)
    cond, uc_full = self.get_cond_upscaling(image, text_embedding, noise_level)
    list_samples = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=idx_start)
    image_result = Image.fromarray(self.latent2image(list_samples[-1]))
    image_result.save('/home/lugo/latentblending/test1/high/img_mix_expB_late.jpg')
-    #%% lets blend the uc_full too! 
+    num_inference_steps = 20 # Number of diffusion interations
    # expC
-    idx_start = 50
+    # fp_ckpt = "../stable_diffusion_models/ckpt/768-v-ema.ckpt"
-    list_mix = np.linspace(0, 1, 20)
+    # fp_config = '../stablediffusion/configs/stable-diffusion/v2-inference-v.yaml'
    for fract_mix in list_mix:
        # fract_mix = 0.75
        latentsA = list_samplesA[idx_start]
        latentsB = list_samplesB[idx_start]
        latents_inject = interpolate_spherical(latentsA, latentsB, fract_mix)
-        text_embeddingA = self.get_text_embedding(promptA)
+    # fp_ckpt= "../stable_diffusion_models/ckpt/512-inpainting-ema.ckpt"
-        text_embeddingB = self.get_text_embedding(promptB)
+    # fp_config = '../stablediffusion/configs//stable-diffusion/v2-inpainting-inference.yaml'
        text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8)
-        imageA = Image.open('/home/lugo/latentblending/test1/img_0007.jpg').resize((32*20, 32*12))
+    fp_ckpt = "../stable_diffusion_models/ckpt/v2-1_768-ema-pruned.ckpt"
-        condA, uc_fullA = self.get_cond_upscaling(imageA, text_embedding, noise_level)
+    fp_config = 'configs/v2-inference-v.yaml'
        imageB = Image.open('/home/lugo/latentblending/test1/img_0006.jpg').resize((32*20, 32*12))
        condB, uc_fullB = self.get_cond_upscaling(imageB, text_embedding, noise_level)
        condA['c_concat'][0] = interpolate_spherical(condA['c_concat'][0], condB['c_concat'][0], fract_mix)
        uc_fullA['c_concat'][0] = interpolate_spherical(uc_fullA['c_concat'][0], uc_fullB['c_concat'][0], fract_mix)
        list_samples = self.run_diffusion_upscaling(condA, uc_fullA, latents_inject, idx_start=idx_start)
        image_result = Image.fromarray(self.latent2image(list_samples[-1]))
        image_result.save(f'/home/lugo/latentblending/test1/high/img_mix_expC_{fract_mix}_start{idx_start}.jpg')
    self = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps)
-#%%
+    #%%
-
+    prompt = "painting of a house"
-list_imgs = os.listdir('/home/lugo/latentblending/test1/high/')
+    te = self.get_text_embedding(prompt)
 list_imgs = [l for l in list_imgs if "expC" in l]
 list_imgs.pop(0)
 lx = []
 for fn in list_imgs:
    Image.open
 #%%
    if False:
        num_inference_steps = 20 # Number of diffusion interations
        # fp_ckpt = "../stable_diffusion_models/ckpt/768-v-ema.ckpt"
        # fp_config = '../stablediffusion/configs/stable-diffusion/v2-inference-v.yaml'
        fp_ckpt= "../stable_diffusion_models/ckpt/512-inpainting-ema.ckpt"
        fp_config = '../stablediffusion/configs//stable-diffusion/v2-inpainting-inference.yaml'
        sdh = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps)
        # fp_ckpt= "../stable_diffusion_models/ckpt/512-base-ema.ckpt"
        # fp_config = '../stablediffusion/configs//stable-diffusion/v2-inference.yaml'
        image_source = Image.fromarray((255*np.random.rand(512,512,3)).astype(np.uint8))
        mask = 255*np.ones([512,512], dtype=np.uint8)
        mask[0:50, 0:50] = 0
        mask = Image.fromarray(mask)
        sdh.init_inpainting(image_source, mask)
        text_embedding = sdh.get_text_embedding("photo of a strange house, surreal painting")
        list_latents = sdh.run_diffusion_inpaint(text_embedding)
        idx_inject = 3
        img_orig = sdh.latent2image(list_latents[-1])
        list_inject = sdh.run_diffusion_inpaint(text_embedding, list_latents[idx_inject], idx_start=idx_inject+1)
        img_inject = sdh.latent2image(list_inject[-1])
        img_diff = img_orig - img_inject
        import matplotlib.pyplot as plt
        plt.imshow(np.concatenate((img_orig, img_inject, img_diff), axis=1))
    img = self.run_diffusion_standard(te, return_image=True)