negative prompts

This commit is contained in:
Johannes Stelzer 2023-01-08 11:48:44 +01:00
parent 34cc1edd57
commit 16da42c10e
3 changed files with 63 additions and 185 deletions

View File

@ -35,9 +35,6 @@ import copy
""" """
experiment with slider as output -> does it change in the browser? experiment with slider as output -> does it change in the browser?
guidance scale has no effect
get a movie as result
seed bug: also shows changes from before
mid compression scaler can destroy tree mid compression scaler can destroy tree
""" """
@ -77,9 +74,10 @@ class BlendingFrontend():
self.guidance_scale = 4.0 self.guidance_scale = 4.0
self.guidance_scale_mid_damper = 0.5 self.guidance_scale_mid_damper = 0.5
self.mid_compression_scaler = 1.2 self.mid_compression_scaler = 1.2
self.prompt1 = '' self.prompt1 = ""
self.prompt2 = '' self.prompt2 = ""
self.dp_base = '/home/lugo/latentblending' self.negative_prompt = ""
self.dp_base = "/home/lugo/latentblending"
self.list_settings = [] self.list_settings = []
self.state_prev = {} self.state_prev = {}
self.state_current = {} self.state_current = {}
@ -137,6 +135,9 @@ class BlendingFrontend():
self.prompt2 = value self.prompt2 = value
# print(f"changed prompt2 to {value}") # print(f"changed prompt2 to {value}")
def change_negative_prompt(self, value):
self.negative_prompt = value
def change_seed1(self, value): def change_seed1(self, value):
self.seed1 = int(value) self.seed1 = int(value)
@ -170,15 +171,6 @@ class BlendingFrontend():
self.lb.sdh.height = self.height self.lb.sdh.height = self.height
self.lb.sdh.width = self.width self.lb.sdh.width = self.width
# list_nmb_branches = [2, 6, 15]
# list_injection_strength = [0.0, self.depth_strength, 0.9]
# self.lb.setup_branching(
# num_inference_steps = self.num_inference_steps,
# list_nmb_branches = list_nmb_branches,
# list_injection_strength = list_injection_strength
# )
self.lb.autosetup_branching( self.lb.autosetup_branching(
depth_strength = self.depth_strength, depth_strength = self.depth_strength,
num_inference_steps = self.num_inference_steps, num_inference_steps = self.num_inference_steps,
@ -187,6 +179,7 @@ class BlendingFrontend():
self.lb.set_prompt1(self.prompt1) self.lb.set_prompt1(self.prompt1)
self.lb.set_prompt2(self.prompt2) self.lb.set_prompt2(self.prompt2)
self.lb.set_negative_prompt(self.negative_prompt)
self.lb.guidance_scale = self.guidance_scale self.lb.guidance_scale = self.guidance_scale
self.lb.guidance_scale_mid_damper = self.guidance_scale_mid_damper self.lb.guidance_scale_mid_damper = self.guidance_scale_mid_damper
@ -276,8 +269,8 @@ self = BlendingFrontend()
with gr.Blocks() as demo: with gr.Blocks() as demo:
with gr.Row(): with gr.Row():
text1 = gr.Textbox(label="prompt 1") prompt1 = gr.Textbox(label="prompt 1")
text2 = gr.Textbox(label="prompt 2") prompt2 = gr.Textbox(label="prompt 2")
with gr.Row(): with gr.Row():
depth_strength = gr.Slider(0.01, 0.99, self.depth_strength, step=0.01, label='depth_strength', interactive=True) depth_strength = gr.Slider(0.01, 0.99, self.depth_strength, step=0.01, label='depth_strength', interactive=True)
@ -289,6 +282,7 @@ with gr.Blocks() as demo:
num_inference_steps = gr.Slider(5, 100, self.num_inference_steps, step=1, label='num_inference_steps', interactive=True) num_inference_steps = gr.Slider(5, 100, self.num_inference_steps, step=1, label='num_inference_steps', interactive=True)
height = gr.Slider(256, 2048, self.height, step=128, label='height', interactive=True) height = gr.Slider(256, 2048, self.height, step=128, label='height', interactive=True)
width = gr.Slider(256, 2048, self.width, step=128, label='width', interactive=True) width = gr.Slider(256, 2048, self.width, step=128, label='width', interactive=True)
negative_prompt = gr.Textbox(label="negative prompt")
with gr.Row(): with gr.Row():
b_newseed1 = gr.Button("rand seed 1") b_newseed1 = gr.Button("rand seed 1")
@ -324,10 +318,11 @@ with gr.Blocks() as demo:
height.change(fn=self.change_height, inputs=height) height.change(fn=self.change_height, inputs=height)
width.change(fn=self.change_width, inputs=width) width.change(fn=self.change_width, inputs=width)
text1.change(fn=self.change_prompt1, inputs=text1) prompt1.change(fn=self.change_prompt1, inputs=prompt1)
text2.change(fn=self.change_prompt2, inputs=text2) prompt2.change(fn=self.change_prompt2, inputs=prompt2)
seed1.change(fn=self.change_seed1, inputs=seed1) seed1.change(fn=self.change_seed1, inputs=seed1)
seed2.change(fn=self.change_seed2, inputs=seed2) seed2.change(fn=self.change_seed2, inputs=seed2)
negative_prompt.change(fn=self.change_negative_prompt, inputs=negative_prompt)
b_newseed1.click(self.randomize_seed1, outputs=seed1) b_newseed1.click(self.randomize_seed1, outputs=seed1)
b_newseed2.click(self.randomize_seed2, outputs=seed2) b_newseed2.click(self.randomize_seed2, outputs=seed2)

View File

@ -86,6 +86,7 @@ class LatentBlending():
# Initialize vars # Initialize vars
self.prompt1 = "" self.prompt1 = ""
self.prompt2 = "" self.prompt2 = ""
self.negative_prompt = ""
self.tree_latents = None self.tree_latents = None
self.tree_fracts = None self.tree_fracts = None
self.tree_status = None self.tree_status = None
@ -127,6 +128,12 @@ class LatentBlending():
self.guidance_scale = guidance_scale self.guidance_scale = guidance_scale
self.sdh.guidance_scale = guidance_scale self.sdh.guidance_scale = guidance_scale
def set_negative_prompt(self, negative_prompt):
r"""Set the negative prompt. Currenty only one negative prompt is supported
"""
self.negative_prompt = negative_prompt
self.sdh.set_negative_prompt(negative_prompt)
def set_guidance_mid_dampening(self, fract_mixing): def set_guidance_mid_dampening(self, fract_mixing):
r""" r"""
Tunes the guidance scale down as a linear function of fract_mixing, Tunes the guidance scale down as a linear function of fract_mixing,
@ -1096,10 +1103,23 @@ if __name__ == "__main__":
dp_img = "/home/lugo/latentblending/230107_144533" dp_img = "/home/lugo/latentblending/230107_144533"
self.run_upscaling_step2(dp_img) self.run_upscaling_step2(dp_img)
#%% #%%
""" """
mr stealy
elif isinstance(negative_prompt, str):
uncond_tokens = [negative_prompt]
"""
#%%
"""
TODO Coding: TODO Coding:
CHECK IF ALL STUFF WORKS STILL: STANDARD MODEL, INPAINTING CHECK IF ALL STUFF WORKS STILL: STANDARD MODEL, INPAINTING
RUNNING WITHOUT PROMPT! RUNNING WITHOUT PROMPT!

View File

@ -144,6 +144,8 @@ class StableDiffusionHolder:
self.mask_empty = Image.fromarray(255*np.ones([self.width, self.height], dtype=np.uint8)) self.mask_empty = Image.fromarray(255*np.ones([self.width, self.height], dtype=np.uint8))
self.image_empty = Image.fromarray(np.zeros([self.width, self.height, 3], dtype=np.uint8)) self.image_empty = Image.fromarray(np.zeros([self.width, self.height, 3], dtype=np.uint8))
self.negative_prompt = [""]
def init_model(self, fp_ckpt, fp_config): def init_model(self, fp_ckpt, fp_config):
assert os.path.isfile(fp_ckpt), f"Your model checkpoint file does not exist: {fp_ckpt}" assert os.path.isfile(fp_ckpt), f"Your model checkpoint file does not exist: {fp_ckpt}"
@ -158,7 +160,17 @@ class StableDiffusionHolder:
self.model = self.model.to(self.device) self.model = self.model.to(self.device)
self.sampler = DDIMSampler(self.model) self.sampler = DDIMSampler(self.model)
def set_negative_prompt(self, negative_prompt):
r"""Set the negative prompt. Currenty only one negative prompt is supported
"""
if isinstance(negative_prompt, str):
self.negative_prompt = [negative_prompt]
else:
self.negative_prompt = negative_prompt
if len(self.negative_prompt) > 1:
self.negative_prompt = [self.negative_prompt[0]]
def init_auto_res(self): def init_auto_res(self):
r"""Automatically set the resolution to the one used in training. r"""Automatically set the resolution to the one used in training.
@ -266,7 +278,7 @@ class StableDiffusionHolder:
with precision_scope("cuda"): with precision_scope("cuda"):
with self.model.ema_scope(): with self.model.ema_scope():
if self.guidance_scale != 1.0: if self.guidance_scale != 1.0:
uc = self.model.get_learned_conditioning([""]) uc = self.model.get_learned_conditioning(self.negative_prompt)
else: else:
uc = None uc = None
shape_latents = [self.C, self.height // self.f, self.width // self.f] shape_latents = [self.C, self.height // self.f, self.width // self.f]
@ -532,177 +544,28 @@ class StableDiffusionHolder:
if __name__ == "__main__": if __name__ == "__main__":
fp_ckpt= "../stable_diffusion_models/ckpt/x4-upscaler-ema.ckpt"
fp_config = 'configs/x4-upscaling.yaml'
num_inference_steps = 100
self = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps=num_inference_steps)
xxx
#%% image A
image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg')
image = image.resize((32*20, 32*12))
promptA = "photo of a an ancient castle surrounded by a forest"
noise_level = 20 #gradio min=0, max=350, value=20
text_embeddingA = self.get_text_embedding(promptA)
cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level)
list_samplesA = self.run_diffusion_upscaling(cond, uc_full)
image_result = Image.fromarray(self.latent2image(list_samplesA[-1]))
image_result.save('/home/lugo/latentblending/test1/high/imgA.jpg')
#%% image B
from latent_blending import interpolate_linear, interpolate_spherical
image = Image.open('/home/lugo/latentblending/test1/img_0006.jpg')
image = image.resize((32*20, 32*12))
promptA = "photo of a an ancient castle surrounded by a forest"
promptB = "photo of a beautiful island on the horizon, blue sea with waves"
noise_level = 20 #gradio min=0, max=350, value=20
text_embeddingA = self.get_text_embedding(promptA)
text_embeddingB = self.get_text_embedding(promptB)
text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8)
cond, uc_full = self.get_cond_upscaling(image, text_embedding, noise_level)
list_samplesB = self.run_diffusion_upscaling(cond, uc_full)
image_result = Image.fromarray(self.latent2image(list_samplesB[-1]))
image_result.save('/home/lugo/latentblending/test1/high/imgB.jpg')
#%% reality check: run only for 50 iter.
image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg')
image = image.resize((32*20, 32*12))
promptA = "photo of a an ancient castle surrounded by a forest"
noise_level = 20 #gradio min=0, max=350, value=20
text_embeddingA = self.get_text_embedding(promptA)
cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level)
latents_inject = list_samplesA[50]
list_samplesAx = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=50)
image_result = Image.fromarray(self.latent2image(list_samplesAx[-1]))
image_result.save('/home/lugo/latentblending/test1/high/imgA_restart.jpg')
# RESULTS ARE NOT EXACTLY IDENTICAL! INVESTIGATE WHY
#%% mix in the middle! which uc_full should be taken?
# expA: take the one from A
idx_start = 90
latentsA = list_samplesA[idx_start]
latentsB = list_samplesB[idx_start]
latents_inject = interpolate_spherical(latentsA, latentsB, 0.5)
image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg')
image = image.resize((32*20, 32*12))
promptA = "photo of a an ancient castle surrounded by a forest"
noise_level = 20 #gradio min=0, max=350, value=20
text_embeddingA = self.get_text_embedding(promptA)
cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level)
list_samples = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=idx_start)
image_result = Image.fromarray(self.latent2image(list_samples[-1]))
image_result.save('/home/lugo/latentblending/test1/high/img_mix_expA_late.jpg')
#%% mix in the middle! which uc_full should be taken?
# expA: take the one from B
idx_start = 90
latentsA = list_samplesA[idx_start]
latentsB = list_samplesB[idx_start]
latents_inject = interpolate_spherical(latentsA, latentsB, 0.5)
image = Image.open('/home/lugo/latentblending/test1/img_0006.jpg').resize((32*20, 32*12))
promptA = "photo of a an ancient castle surrounded by a forest"
promptB = "photo of a beautiful island on the horizon, blue sea with waves"
noise_level = 20 #gradio min=0, max=350, value=20
text_embeddingA = self.get_text_embedding(promptA)
text_embeddingB = self.get_text_embedding(promptB)
text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8)
cond, uc_full = self.get_cond_upscaling(image, text_embedding, noise_level)
list_samples = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=idx_start)
image_result = Image.fromarray(self.latent2image(list_samples[-1]))
image_result.save('/home/lugo/latentblending/test1/high/img_mix_expB_late.jpg')
#%% lets blend the uc_full too! num_inference_steps = 20 # Number of diffusion interations
# expC
idx_start = 50 # fp_ckpt = "../stable_diffusion_models/ckpt/768-v-ema.ckpt"
list_mix = np.linspace(0, 1, 20) # fp_config = '../stablediffusion/configs/stable-diffusion/v2-inference-v.yaml'
for fract_mix in list_mix:
# fract_mix = 0.75
latentsA = list_samplesA[idx_start]
latentsB = list_samplesB[idx_start]
latents_inject = interpolate_spherical(latentsA, latentsB, fract_mix)
text_embeddingA = self.get_text_embedding(promptA) # fp_ckpt= "../stable_diffusion_models/ckpt/512-inpainting-ema.ckpt"
text_embeddingB = self.get_text_embedding(promptB) # fp_config = '../stablediffusion/configs//stable-diffusion/v2-inpainting-inference.yaml'
text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8)
imageA = Image.open('/home/lugo/latentblending/test1/img_0007.jpg').resize((32*20, 32*12)) fp_ckpt = "../stable_diffusion_models/ckpt/v2-1_768-ema-pruned.ckpt"
condA, uc_fullA = self.get_cond_upscaling(imageA, text_embedding, noise_level) fp_config = 'configs/v2-inference-v.yaml'
imageB = Image.open('/home/lugo/latentblending/test1/img_0006.jpg').resize((32*20, 32*12))
condB, uc_fullB = self.get_cond_upscaling(imageB, text_embedding, noise_level)
condA['c_concat'][0] = interpolate_spherical(condA['c_concat'][0], condB['c_concat'][0], fract_mix)
uc_fullA['c_concat'][0] = interpolate_spherical(uc_fullA['c_concat'][0], uc_fullB['c_concat'][0], fract_mix)
list_samples = self.run_diffusion_upscaling(condA, uc_fullA, latents_inject, idx_start=idx_start)
image_result = Image.fromarray(self.latent2image(list_samples[-1]))
image_result.save(f'/home/lugo/latentblending/test1/high/img_mix_expC_{fract_mix}_start{idx_start}.jpg')
self = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps)
#%% #%%
prompt = "painting of a house"
list_imgs = os.listdir('/home/lugo/latentblending/test1/high/') te = self.get_text_embedding(prompt)
list_imgs = [l for l in list_imgs if "expC" in l]
list_imgs.pop(0)
lx = []
for fn in list_imgs:
Image.open
#%%
if False:
num_inference_steps = 20 # Number of diffusion interations
# fp_ckpt = "../stable_diffusion_models/ckpt/768-v-ema.ckpt"
# fp_config = '../stablediffusion/configs/stable-diffusion/v2-inference-v.yaml'
fp_ckpt= "../stable_diffusion_models/ckpt/512-inpainting-ema.ckpt"
fp_config = '../stablediffusion/configs//stable-diffusion/v2-inpainting-inference.yaml'
sdh = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps)
# fp_ckpt= "../stable_diffusion_models/ckpt/512-base-ema.ckpt"
# fp_config = '../stablediffusion/configs//stable-diffusion/v2-inference.yaml'
image_source = Image.fromarray((255*np.random.rand(512,512,3)).astype(np.uint8))
mask = 255*np.ones([512,512], dtype=np.uint8)
mask[0:50, 0:50] = 0
mask = Image.fromarray(mask)
sdh.init_inpainting(image_source, mask)
text_embedding = sdh.get_text_embedding("photo of a strange house, surreal painting")
list_latents = sdh.run_diffusion_inpaint(text_embedding)
idx_inject = 3
img_orig = sdh.latent2image(list_latents[-1])
list_inject = sdh.run_diffusion_inpaint(text_embedding, list_latents[idx_inject], idx_start=idx_inject+1)
img_inject = sdh.latent2image(list_inject[-1])
img_diff = img_orig - img_inject
import matplotlib.pyplot as plt
plt.imshow(np.concatenate((img_orig, img_inject, img_diff), axis=1))
img = self.run_diffusion_standard(te, return_image=True)