upscaling x4 model support
This commit is contained in:
parent
ca0f818317
commit
cd45b2e585
|
@ -1,105 +0,0 @@
|
||||||
# Copyright 2022 Lunar Ring. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import os, sys
|
|
||||||
import torch
|
|
||||||
torch.backends.cudnn.benchmark = False
|
|
||||||
import numpy as np
|
|
||||||
import warnings
|
|
||||||
warnings.filterwarnings('ignore')
|
|
||||||
import warnings
|
|
||||||
import torch
|
|
||||||
from tqdm.auto import tqdm
|
|
||||||
from PIL import Image
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import torch
|
|
||||||
from movie_util import MovieSaver
|
|
||||||
from typing import Callable, List, Optional, Union
|
|
||||||
from latent_blending import LatentBlending, add_frames_linear_interp
|
|
||||||
from stable_diffusion_holder import StableDiffusionHolder
|
|
||||||
torch.set_grad_enabled(False)
|
|
||||||
|
|
||||||
|
|
||||||
#%% First let us spawn a stable diffusion holder
|
|
||||||
use_inpaint = True
|
|
||||||
|
|
||||||
device = "cuda"
|
|
||||||
fp_ckpt= "../stable_diffusion_models/ckpt/512-inpainting-ema.ckpt"
|
|
||||||
fp_config = '../stablediffusion/configs//stable-diffusion/v2-inpainting-inference.yaml'
|
|
||||||
|
|
||||||
|
|
||||||
# fp_ckpt = "../stable_diffusion_models/ckpt/768-v-ema.ckpt"
|
|
||||||
# fp_config = '../stablediffusion/configs/stable-diffusion/v2-inference-v.yaml'
|
|
||||||
|
|
||||||
sdh = StableDiffusionHolder(fp_ckpt, fp_config, device)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#%% Next let's set up all parameters
|
|
||||||
num_inference_steps = 30 # Number of diffusion interations
|
|
||||||
|
|
||||||
guidance_scale = 5
|
|
||||||
|
|
||||||
lb = LatentBlending(sdh, num_inference_steps, guidance_scale)
|
|
||||||
|
|
||||||
list_prompts = []
|
|
||||||
list_prompts.append("photo of a beautiful forest covered in white flowers, ambient light, very detailed, magic")
|
|
||||||
list_prompts.append("photo of an golden statue with a funny hat, surrounded by ferns and vines, grainy analog photograph, mystical ambience, incredible detail")
|
|
||||||
|
|
||||||
|
|
||||||
for k, prompt in enumerate(list_prompts):
|
|
||||||
# k = 6
|
|
||||||
|
|
||||||
# prompt = list_prompts[k]
|
|
||||||
for i in range(10):
|
|
||||||
lb.set_prompt1(prompt)
|
|
||||||
|
|
||||||
seed = np.random.randint(999999999)
|
|
||||||
lb.set_seed(seed)
|
|
||||||
plt.imshow(lb.run_diffusion(lb.text_embedding1, return_image=True))
|
|
||||||
plt.title(f"prompt {k}, seed {i} {seed}")
|
|
||||||
plt.show()
|
|
||||||
print(f"prompt {k} seed {seed} trial {i}")
|
|
||||||
|
|
||||||
#%%
|
|
||||||
#%% Let's make a source image and mask.
|
|
||||||
k=0
|
|
||||||
for i in range(10):
|
|
||||||
seed = 190791709# np.random.randint(999999999)
|
|
||||||
# seed0 = 629575320
|
|
||||||
|
|
||||||
lb = LatentBlending(sdh)
|
|
||||||
lb.autosetup_branching(quality='medium', depth_strength=0.65)
|
|
||||||
|
|
||||||
prompt1 = "photo of a futuristic alien temple in a desert, mystic, glowing, organic, intricate, sci-fi movie, mesmerizing, scary"
|
|
||||||
lb.set_prompt1(prompt1)
|
|
||||||
lb.init_inpainting(init_empty=True)
|
|
||||||
lb.set_seed(seed)
|
|
||||||
plt.imshow(lb.run_diffusion(lb.text_embedding1, return_image=True))
|
|
||||||
plt.title(f"prompt1 {k}, seed {i} {seed}")
|
|
||||||
plt.show()
|
|
||||||
print(f"prompt1 {k} seed {seed} trial {i}")
|
|
||||||
|
|
||||||
xx
|
|
||||||
|
|
||||||
#%%
|
|
||||||
mask_image = 255*np.ones([512,512], dtype=np.uint8)
|
|
||||||
mask_image[340:420, 170:280, ] = 0
|
|
||||||
mask_image = Image.fromarray(mask_image)
|
|
||||||
|
|
||||||
#%%
|
|
||||||
|
|
||||||
"""
|
|
||||||
69731932, 504430820
|
|
||||||
"""
|
|
|
@ -27,7 +27,7 @@ import warnings
|
||||||
import torch
|
import torch
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import matplotlib.pyplot as plt
|
# import matplotlib.pyplot as plt
|
||||||
import torch
|
import torch
|
||||||
from movie_util import MovieSaver
|
from movie_util import MovieSaver
|
||||||
import datetime
|
import datetime
|
||||||
|
@ -41,7 +41,10 @@ from contextlib import nullcontext
|
||||||
|
|
||||||
from ldm.models.diffusion.ddim import DDIMSampler
|
from ldm.models.diffusion.ddim import DDIMSampler
|
||||||
from ldm.util import instantiate_from_config
|
from ldm.util import instantiate_from_config
|
||||||
|
from ldm.models.diffusion.ddpm import LatentUpscaleDiffusion, LatentInpaintDiffusion
|
||||||
from stable_diffusion_holder import StableDiffusionHolder
|
from stable_diffusion_holder import StableDiffusionHolder
|
||||||
|
import yaml
|
||||||
|
|
||||||
#%%
|
#%%
|
||||||
class LatentBlending():
|
class LatentBlending():
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -49,7 +52,7 @@ class LatentBlending():
|
||||||
sdh: None,
|
sdh: None,
|
||||||
guidance_scale: float = 4,
|
guidance_scale: float = 4,
|
||||||
guidance_scale_mid_damper: float = 0.5,
|
guidance_scale_mid_damper: float = 0.5,
|
||||||
mid_compression_scaler: float = 2.0,
|
mid_compression_scaler: float = 1.2,
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
Initializes the latent blending class.
|
Initializes the latent blending class.
|
||||||
|
@ -77,7 +80,8 @@ class LatentBlending():
|
||||||
self.height = self.sdh.height
|
self.height = self.sdh.height
|
||||||
self.guidance_scale_mid_damper = guidance_scale_mid_damper
|
self.guidance_scale_mid_damper = guidance_scale_mid_damper
|
||||||
self.mid_compression_scaler = mid_compression_scaler
|
self.mid_compression_scaler = mid_compression_scaler
|
||||||
self.seed = 420 # Run self.set_seed or fixed_seeds argument in run_transition
|
self.seed1 = 0
|
||||||
|
self.seed2 = 0
|
||||||
|
|
||||||
# Initialize vars
|
# Initialize vars
|
||||||
self.prompt1 = ""
|
self.prompt1 = ""
|
||||||
|
@ -90,20 +94,25 @@ class LatentBlending():
|
||||||
self.list_injection_idx_prev = []
|
self.list_injection_idx_prev = []
|
||||||
self.text_embedding1 = None
|
self.text_embedding1 = None
|
||||||
self.text_embedding2 = None
|
self.text_embedding2 = None
|
||||||
|
self.image1_lowres = None
|
||||||
|
self.image2_lowres = None
|
||||||
self.stop_diffusion = False
|
self.stop_diffusion = False
|
||||||
self.negative_prompt = None
|
self.negative_prompt = None
|
||||||
self.num_inference_steps = -1
|
self.num_inference_steps = self.sdh.num_inference_steps
|
||||||
|
self.noise_level_upscaling = 20
|
||||||
self.list_injection_idx = None
|
self.list_injection_idx = None
|
||||||
self.list_nmb_branches = None
|
self.list_nmb_branches = None
|
||||||
self.set_guidance_scale(guidance_scale)
|
self.set_guidance_scale(guidance_scale)
|
||||||
self.init_mode()
|
self.init_mode()
|
||||||
|
|
||||||
|
|
||||||
def init_mode(self, mode='standard'):
|
def init_mode(self):
|
||||||
r"""
|
r"""
|
||||||
Sets the mode of this class, either inpaint of standard.
|
Sets the operational mode. Currently supported are standard, inpainting and x4 upscaling.
|
||||||
"""
|
"""
|
||||||
if mode == 'inpaint':
|
if isinstance(self.sdh.model, LatentUpscaleDiffusion):
|
||||||
|
self.mode = 'upscale'
|
||||||
|
elif isinstance(self.sdh.model, LatentInpaintDiffusion):
|
||||||
self.sdh.image_source = None
|
self.sdh.image_source = None
|
||||||
self.sdh.mask_image = None
|
self.sdh.mask_image = None
|
||||||
self.mode = 'inpaint'
|
self.mode = 'inpaint'
|
||||||
|
@ -152,10 +161,26 @@ class LatentBlending():
|
||||||
self.prompt2 = prompt
|
self.prompt2 = prompt
|
||||||
self.text_embedding2 = self.get_text_embeddings(self.prompt2)
|
self.text_embedding2 = self.get_text_embeddings(self.prompt2)
|
||||||
|
|
||||||
def autosetup_branching(
|
def set_image1(self, image: Image):
|
||||||
|
r"""
|
||||||
|
Sets the first image (keyframe), relevant for the upscaling model transitions.
|
||||||
|
Args:
|
||||||
|
image: Image
|
||||||
|
"""
|
||||||
|
self.image1_lowres = image
|
||||||
|
|
||||||
|
def set_image2(self, image: Image):
|
||||||
|
r"""
|
||||||
|
Sets the second image (keyframe), relevant for the upscaling model transitions.
|
||||||
|
Args:
|
||||||
|
image: Image
|
||||||
|
"""
|
||||||
|
self.image2_lowres = image
|
||||||
|
|
||||||
|
def load_branching_profile(
|
||||||
self,
|
self,
|
||||||
quality: str = 'medium',
|
quality: str = 'medium',
|
||||||
deepth_strength: float = 0.65,
|
depth_strength: float = 0.65,
|
||||||
nmb_frames: int = 360,
|
nmb_frames: int = 360,
|
||||||
nmb_mindist: int = 3,
|
nmb_mindist: int = 3,
|
||||||
):
|
):
|
||||||
|
@ -167,7 +192,7 @@ class LatentBlending():
|
||||||
Determines how many diffusion steps are being made + how many branches in total.
|
Determines how many diffusion steps are being made + how many branches in total.
|
||||||
Tradeoff between quality and speed of computation.
|
Tradeoff between quality and speed of computation.
|
||||||
Choose: lowest, low, medium, high, ultra
|
Choose: lowest, low, medium, high, ultra
|
||||||
deepth_strength: float = 0.65,
|
depth_strength: float = 0.65,
|
||||||
Determines how deep the first injection will happen.
|
Determines how deep the first injection will happen.
|
||||||
Deeper injections will cause (unwanted) formation of new structures,
|
Deeper injections will cause (unwanted) formation of new structures,
|
||||||
more shallow values will go into alpha-blendy land.
|
more shallow values will go into alpha-blendy land.
|
||||||
|
@ -175,7 +200,6 @@ class LatentBlending():
|
||||||
total number of frames
|
total number of frames
|
||||||
nmb_mindist: int = 3
|
nmb_mindist: int = 3
|
||||||
minimum distance in terms of diffusion iteratinos between subsequent injections
|
minimum distance in terms of diffusion iteratinos between subsequent injections
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if quality == 'lowest':
|
if quality == 'lowest':
|
||||||
|
@ -193,10 +217,42 @@ class LatentBlending():
|
||||||
elif quality == 'ultra':
|
elif quality == 'ultra':
|
||||||
num_inference_steps = 100
|
num_inference_steps = 100
|
||||||
nmb_branches_final = nmb_frames//2
|
nmb_branches_final = nmb_frames//2
|
||||||
|
elif quality == 'upscaling_step1':
|
||||||
|
num_inference_steps = 40
|
||||||
|
nmb_branches_final = 12
|
||||||
|
elif quality == 'upscaling_step2':
|
||||||
|
num_inference_steps = 100
|
||||||
|
nmb_branches_final = 4
|
||||||
else:
|
else:
|
||||||
raise ValueError("quality = '{quality}' not supported")
|
raise ValueError(f"quality = '{quality}' not supported")
|
||||||
|
|
||||||
|
self.autosetup_branching(depth_strength, num_inference_steps, nmb_branches_final)
|
||||||
|
|
||||||
idx_injection_first = int(np.round(num_inference_steps*deepth_strength))
|
|
||||||
|
def autosetup_branching(
|
||||||
|
self,
|
||||||
|
depth_strength: float = 0.65,
|
||||||
|
num_inference_steps: int = 30,
|
||||||
|
nmb_branches_final: int = 20,
|
||||||
|
nmb_mindist: int = 3,
|
||||||
|
):
|
||||||
|
r"""
|
||||||
|
Automatically sets up the branching schedule.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
depth_strength: float = 0.65,
|
||||||
|
Determines how deep the first injection will happen.
|
||||||
|
Deeper injections will cause (unwanted) formation of new structures,
|
||||||
|
more shallow values will go into alpha-blendy land.
|
||||||
|
num_inference_steps: int
|
||||||
|
Number of diffusion steps. Larger values will take more compute time.
|
||||||
|
nmb_branches_final (int): The number of diffusion-generated images
|
||||||
|
at the end of the inference.
|
||||||
|
nmb_mindist (int): The minimum number of diffusion steps
|
||||||
|
between two injections.
|
||||||
|
"""
|
||||||
|
|
||||||
|
idx_injection_first = int(np.round(num_inference_steps*depth_strength))
|
||||||
idx_injection_last = num_inference_steps - 3
|
idx_injection_last = num_inference_steps - 3
|
||||||
nmb_injections = int(np.floor(num_inference_steps/5)) - 1
|
nmb_injections = int(np.floor(num_inference_steps/5)) - 1
|
||||||
|
|
||||||
|
@ -219,10 +275,6 @@ class LatentBlending():
|
||||||
list_injection_idx = list_injection_idx_clean
|
list_injection_idx = list_injection_idx_clean
|
||||||
list_nmb_branches = list_nmb_branches_clean
|
list_nmb_branches = list_nmb_branches_clean
|
||||||
|
|
||||||
# print(f"num_inference_steps: {num_inference_steps}")
|
|
||||||
# print(f"list_injection_idx: {list_injection_idx}")
|
|
||||||
# print(f"list_nmb_branches: {list_nmb_branches}")
|
|
||||||
|
|
||||||
list_nmb_branches = list_nmb_branches
|
list_nmb_branches = list_nmb_branches
|
||||||
list_injection_idx = list_injection_idx
|
list_injection_idx = list_injection_idx
|
||||||
self.setup_branching(num_inference_steps, list_nmb_branches=list_nmb_branches, list_injection_idx=list_injection_idx)
|
self.setup_branching(num_inference_steps, list_nmb_branches=list_nmb_branches, list_injection_idx=list_injection_idx)
|
||||||
|
@ -313,6 +365,7 @@ class LatentBlending():
|
||||||
recycle_img1: Optional[bool] = False,
|
recycle_img1: Optional[bool] = False,
|
||||||
recycle_img2: Optional[bool] = False,
|
recycle_img2: Optional[bool] = False,
|
||||||
fixed_seeds: Optional[List[int]] = None,
|
fixed_seeds: Optional[List[int]] = None,
|
||||||
|
premature_stop: Optional[int] = np.inf,
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
Returns a list of transition images using spherical latent blending.
|
Returns a list of transition images using spherical latent blending.
|
||||||
|
@ -324,6 +377,8 @@ class LatentBlending():
|
||||||
fixed_seeds: Optional[List[int)]:
|
fixed_seeds: Optional[List[int)]:
|
||||||
You can supply two seeds that are used for the first and second keyframe (prompt1 and prompt2).
|
You can supply two seeds that are used for the first and second keyframe (prompt1 and prompt2).
|
||||||
Otherwise random seeds will be taken.
|
Otherwise random seeds will be taken.
|
||||||
|
premature_stop: Optional[int]:
|
||||||
|
Stop the computation after premature_stop frames have been computed in the transition
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# Sanity checks first
|
# Sanity checks first
|
||||||
|
@ -336,28 +391,16 @@ class LatentBlending():
|
||||||
fixed_seeds = list(np.random.randint(0, 1000000, 2).astype(np.int32))
|
fixed_seeds = list(np.random.randint(0, 1000000, 2).astype(np.int32))
|
||||||
else:
|
else:
|
||||||
assert len(fixed_seeds)==2, "Supply a list with len = 2"
|
assert len(fixed_seeds)==2, "Supply a list with len = 2"
|
||||||
|
|
||||||
|
self.seed1 = fixed_seeds[0]
|
||||||
|
self.seed2 = fixed_seeds[1]
|
||||||
|
|
||||||
# Process interruption variable
|
# Process interruption variable
|
||||||
self.stop_diffusion = False
|
self.stop_diffusion = False
|
||||||
|
|
||||||
# Ensure correct num_inference_steps in holder
|
# Ensure correct num_inference_steps in holder
|
||||||
self.sdh.num_inference_steps = self.num_inference_steps
|
self.sdh.num_inference_steps = self.num_inference_steps
|
||||||
|
|
||||||
# # Recycling? There are requirements
|
|
||||||
# if recycle_img1 or recycle_img2:
|
|
||||||
# # if self.list_nmb_branches_prev == []:
|
|
||||||
# # print("Warning. You want to recycle but there is nothing here. Disabling recycling.")
|
|
||||||
# # recycle_img1 = False
|
|
||||||
# # recycle_img2 = False
|
|
||||||
# if self.list_nmb_branches_prev != self.list_nmb_branches:
|
|
||||||
# print("Warning. Cannot change list_nmb_branches if recycling latent. Disabling recycling.")
|
|
||||||
# recycle_img1 = False
|
|
||||||
# recycle_img2 = False
|
|
||||||
# elif self.list_injection_idx_prev != self.list_injection_idx:
|
|
||||||
# print("Warning. Cannot change list_nmb_branches if recycling latent. Disabling recycling.")
|
|
||||||
# recycle_img1 = False
|
|
||||||
# recycle_img2 = False
|
|
||||||
|
|
||||||
# Make a backup for future reference
|
# Make a backup for future reference
|
||||||
self.list_nmb_branches_prev = self.list_nmb_branches[:]
|
self.list_nmb_branches_prev = self.list_nmb_branches[:]
|
||||||
self.list_injection_idx_prev = self.list_injection_idx[:]
|
self.list_injection_idx_prev = self.list_injection_idx[:]
|
||||||
|
@ -415,15 +458,19 @@ class LatentBlending():
|
||||||
|
|
||||||
# Diffusion computations start here
|
# Diffusion computations start here
|
||||||
time_start = time.time()
|
time_start = time.time()
|
||||||
for t_block, idx_branch in tqdm(list_compute, desc="computing transition", smoothing=-1):
|
for t_block, idx_branch in tqdm(list_compute, desc="computing transition", smoothing=0.01):
|
||||||
if self.stop_diffusion:
|
if self.stop_diffusion:
|
||||||
print("run_transition: process interrupted")
|
print("run_transition: process interrupted")
|
||||||
return self.tree_final_imgs
|
return self.tree_final_imgs
|
||||||
|
if idx_branch > premature_stop:
|
||||||
|
print(f"run_transition: premature_stop criterion reached. returning tree with {premature_stop} branches")
|
||||||
|
return self.tree_final_imgs
|
||||||
|
|
||||||
# print(f"computing t_block {t_block} idx_branch {idx_branch}")
|
# print(f"computing t_block {t_block} idx_branch {idx_branch}")
|
||||||
idx_stop = self.list_injection_idx_ext[t_block+1]
|
idx_stop = self.list_injection_idx_ext[t_block+1]
|
||||||
fract_mixing = self.tree_fracts[t_block][idx_branch]
|
fract_mixing = self.tree_fracts[t_block][idx_branch]
|
||||||
text_embeddings_mix = interpolate_linear(self.text_embedding1, self.text_embedding2, fract_mixing)
|
|
||||||
|
list_conditionings = self.get_mixed_conditioning(fract_mixing)
|
||||||
self.set_guidance_mid_dampening(fract_mixing)
|
self.set_guidance_mid_dampening(fract_mixing)
|
||||||
# print(f"fract_mixing {fract_mixing} guid {self.sdh.guidance_scale}")
|
# print(f"fract_mixing {fract_mixing} guid {self.sdh.guidance_scale}")
|
||||||
if t_block == 0:
|
if t_block == 0:
|
||||||
|
@ -432,7 +479,7 @@ class LatentBlending():
|
||||||
self.set_seed(fixed_seeds[0])
|
self.set_seed(fixed_seeds[0])
|
||||||
elif idx_branch == self.list_nmb_branches[0] -1:
|
elif idx_branch == self.list_nmb_branches[0] -1:
|
||||||
self.set_seed(fixed_seeds[1])
|
self.set_seed(fixed_seeds[1])
|
||||||
list_latents = self.run_diffusion(text_embeddings_mix, idx_stop=idx_stop)
|
list_latents = self.run_diffusion(list_conditionings, idx_stop=idx_stop)
|
||||||
else:
|
else:
|
||||||
# find parents latents
|
# find parents latents
|
||||||
b_parent1, b_parent2 = get_closest_idx(fract_mixing, self.tree_fracts[t_block-1])
|
b_parent1, b_parent2 = get_closest_idx(fract_mixing, self.tree_fracts[t_block-1])
|
||||||
|
@ -444,7 +491,7 @@ class LatentBlending():
|
||||||
idx_start = self.list_injection_idx_ext[t_block]
|
idx_start = self.list_injection_idx_ext[t_block]
|
||||||
fract_mixing_parental = (fract_mixing - self.tree_fracts[t_block-1][b_parent1]) / (self.tree_fracts[t_block-1][b_parent2] - self.tree_fracts[t_block-1][b_parent1])
|
fract_mixing_parental = (fract_mixing - self.tree_fracts[t_block-1][b_parent1]) / (self.tree_fracts[t_block-1][b_parent2] - self.tree_fracts[t_block-1][b_parent1])
|
||||||
latents_for_injection = interpolate_spherical(latents1, latents2, fract_mixing_parental)
|
latents_for_injection = interpolate_spherical(latents1, latents2, fract_mixing_parental)
|
||||||
list_latents = self.run_diffusion(text_embeddings_mix, latents_for_injection, idx_start=idx_start, idx_stop=idx_stop)
|
list_latents = self.run_diffusion(list_conditionings, latents_for_injection, idx_start=idx_start, idx_stop=idx_stop)
|
||||||
|
|
||||||
self.tree_latents[t_block][idx_branch] = list_latents
|
self.tree_latents[t_block][idx_branch] = list_latents
|
||||||
self.tree_status[t_block][idx_branch] = 'computed'
|
self.tree_status[t_block][idx_branch] = 'computed'
|
||||||
|
@ -459,21 +506,20 @@ class LatentBlending():
|
||||||
|
|
||||||
def run_multi_transition(
|
def run_multi_transition(
|
||||||
self,
|
self,
|
||||||
|
fp_movie: str,
|
||||||
list_prompts: List[str],
|
list_prompts: List[str],
|
||||||
list_seeds: List[int] = None,
|
list_seeds: List[int] = None,
|
||||||
ms: MovieSaver = None,
|
|
||||||
fps: float = 24,
|
fps: float = 24,
|
||||||
duration_single_trans: float = 15,
|
duration_single_trans: float = 15,
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
Runs multiple transitions and stitches them together. You can supply the seeds for each prompt.
|
Runs multiple transitions and stitches them together. You can supply the seeds for each prompt.
|
||||||
Args:
|
Args:
|
||||||
|
fp_movie: file path for movie saving
|
||||||
list_prompts: List[float]:
|
list_prompts: List[float]:
|
||||||
list of the prompts. There will be a transition starting from the first to the last.
|
list of the prompts. There will be a transition starting from the first to the last.
|
||||||
list_seeds: List[int] = None:
|
list_seeds: List[int] = None:
|
||||||
Random Seeds for each prompt.
|
Random Seeds for each prompt.
|
||||||
ms: MovieSaver
|
|
||||||
You need to spawn a moviesaver instance.
|
|
||||||
fps: float:
|
fps: float:
|
||||||
frames per second
|
frames per second
|
||||||
duration_single_trans: float:
|
duration_single_trans: float:
|
||||||
|
@ -486,7 +532,8 @@ class LatentBlending():
|
||||||
|
|
||||||
if list_seeds is None:
|
if list_seeds is None:
|
||||||
list_seeds = list(np.random.randint(0, 10e10, len(list_prompts)))
|
list_seeds = list(np.random.randint(0, 10e10, len(list_prompts)))
|
||||||
|
|
||||||
|
ms = MovieSaver(fp_movie, fps=fps)
|
||||||
|
|
||||||
for i in range(len(list_prompts)-1):
|
for i in range(len(list_prompts)-1):
|
||||||
print(f"Starting movie segment {i+1}/{len(list_prompts)-1}")
|
print(f"Starting movie segment {i+1}/{len(list_prompts)-1}")
|
||||||
|
@ -516,7 +563,7 @@ class LatentBlending():
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def run_diffusion(
|
def run_diffusion(
|
||||||
self,
|
self,
|
||||||
text_embeddings: torch.FloatTensor,
|
list_conditionings,
|
||||||
latents_for_injection: torch.FloatTensor = None,
|
latents_for_injection: torch.FloatTensor = None,
|
||||||
idx_start: int = -1,
|
idx_start: int = -1,
|
||||||
idx_stop: int = -1,
|
idx_stop: int = -1,
|
||||||
|
@ -527,8 +574,7 @@ class LatentBlending():
|
||||||
Depending on the mode, the correct one will be executed.
|
Depending on the mode, the correct one will be executed.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text_embeddings: torch.FloatTensor
|
list_conditionings: List of all conditionings for the diffusion model.
|
||||||
Text embeddings used for diffusion
|
|
||||||
latents_for_injection: torch.FloatTensor
|
latents_for_injection: torch.FloatTensor
|
||||||
Latents that are used for injection
|
Latents that are used for injection
|
||||||
idx_start: int
|
idx_start: int
|
||||||
|
@ -541,15 +587,131 @@ class LatentBlending():
|
||||||
|
|
||||||
# Ensure correct num_inference_steps in Holder
|
# Ensure correct num_inference_steps in Holder
|
||||||
self.sdh.num_inference_steps = self.num_inference_steps
|
self.sdh.num_inference_steps = self.num_inference_steps
|
||||||
|
assert type(list_conditionings) is list, "list_conditionings need to be a list"
|
||||||
|
|
||||||
if self.mode == 'standard':
|
if self.mode == 'standard':
|
||||||
|
text_embeddings = list_conditionings[0]
|
||||||
return self.sdh.run_diffusion_standard(text_embeddings, latents_for_injection=latents_for_injection, idx_start=idx_start, idx_stop=idx_stop, return_image=return_image)
|
return self.sdh.run_diffusion_standard(text_embeddings, latents_for_injection=latents_for_injection, idx_start=idx_start, idx_stop=idx_stop, return_image=return_image)
|
||||||
|
|
||||||
elif self.mode == 'inpaint':
|
elif self.mode == 'inpaint':
|
||||||
|
text_embeddings = list_conditionings[0]
|
||||||
assert self.sdh.image_source is not None, "image_source is None. Please run init_inpainting first."
|
assert self.sdh.image_source is not None, "image_source is None. Please run init_inpainting first."
|
||||||
assert self.sdh.mask_image is not None, "image_source is None. Please run init_inpainting first."
|
assert self.sdh.mask_image is not None, "image_source is None. Please run init_inpainting first."
|
||||||
return self.sdh.run_diffusion_inpaint(text_embeddings, latents_for_injection=latents_for_injection, idx_start=idx_start, idx_stop=idx_stop, return_image=return_image)
|
return self.sdh.run_diffusion_inpaint(text_embeddings, latents_for_injection=latents_for_injection, idx_start=idx_start, idx_stop=idx_stop, return_image=return_image)
|
||||||
|
|
||||||
|
elif self.mode == 'upscale':
|
||||||
|
cond = list_conditionings[0]
|
||||||
|
uc_full = list_conditionings[1]
|
||||||
|
return self.sdh.run_diffusion_upscaling(cond, uc_full, latents_for_injection=latents_for_injection, idx_start=idx_start, idx_stop=idx_stop, return_image=return_image)
|
||||||
|
|
||||||
|
def run_upscaling_step1(
|
||||||
|
self,
|
||||||
|
dp_img: str,
|
||||||
|
quality: str = 'upscaling_step1',
|
||||||
|
depth_strength: float = 0.65,
|
||||||
|
fixed_seeds: Optional[List[int]] = None,
|
||||||
|
overwrite_folder: bool = False,
|
||||||
|
):
|
||||||
|
r"""
|
||||||
|
Initializes inpainting with a source and maks image.
|
||||||
|
Args:
|
||||||
|
dp_img:
|
||||||
|
Path to directory where the low-res images and yaml will be saved to.
|
||||||
|
This directory cannot exist and will be created here.
|
||||||
|
quality: str
|
||||||
|
Determines how many diffusion steps are being made + how many branches in total.
|
||||||
|
We suggest to leave it with upscaling_step1 which has 10 final branches.
|
||||||
|
depth_strength: float = 0.65,
|
||||||
|
Determines how deep the first injection will happen.
|
||||||
|
Deeper injections will cause (unwanted) formation of new structures,
|
||||||
|
more shallow values will go into alpha-blendy land.
|
||||||
|
fixed_seeds: Optional[List[int)]:
|
||||||
|
You can supply two seeds that are used for the first and second keyframe (prompt1 and prompt2).
|
||||||
|
Otherwise random seeds will be taken.
|
||||||
|
"""
|
||||||
|
assert self.text_embedding1 is not None, 'run set_prompt1(yourprompt1) first'
|
||||||
|
assert self.text_embedding2 is not None, 'run set_prompt2(yourprompt2) first'
|
||||||
|
assert not os.path.isdir(dp_img), f"directory already exists: {dp_img}"
|
||||||
|
|
||||||
|
if fixed_seeds is None:
|
||||||
|
fixed_seeds = list(np.random.randint(0, 1000000, 2).astype(np.int32))
|
||||||
|
|
||||||
|
# Run latent blending
|
||||||
|
self.autosetup_branching(quality='upscaling_step1', depth_strength=depth_strength)
|
||||||
|
imgs_transition = self.run_transition(fixed_seeds=fixed_seeds)
|
||||||
|
|
||||||
|
self.write_imgs_transition(dp_img, imgs_transition)
|
||||||
|
|
||||||
|
|
||||||
|
print(f"run_upscaling_step1: completed! {dp_img}")
|
||||||
|
|
||||||
|
|
||||||
|
def run_upscaling_step2(
|
||||||
|
self,
|
||||||
|
dp_img: str,
|
||||||
|
quality: str = 'upscaling_step2',
|
||||||
|
depth_strength: float = 0.65,
|
||||||
|
fixed_seeds: Optional[List[int]] = None,
|
||||||
|
overwrite_folder: bool = False,
|
||||||
|
):
|
||||||
|
|
||||||
|
fp_yml = os.path.join(dp_img, "lowres.yaml")
|
||||||
|
fp_movie = os.path.join(dp_img, "movie.mp4")
|
||||||
|
fps = 24
|
||||||
|
ms = MovieSaver(fp_movie, fps=fps)
|
||||||
|
assert os.path.isfile(fp_yml), "lowres.yaml does not exist. did you forget run_upscaling_step1?"
|
||||||
|
dict_stuff = yml_load(fp_yml)
|
||||||
|
|
||||||
|
# load lowres images
|
||||||
|
nmb_images_lowres = dict_stuff['nmb_images']
|
||||||
|
prompt1 = dict_stuff['prompt1']
|
||||||
|
prompt2 = dict_stuff['prompt2']
|
||||||
|
imgs_lowres = []
|
||||||
|
for i in range(nmb_images_lowres):
|
||||||
|
fp_img_lowres = os.path.join(dp_img, f"lowres_img_{str(i).zfill(4)}.jpg")
|
||||||
|
assert os.path.isfile(fp_img_lowres), f"{fp_img_lowres} does not exist. did you forget run_upscaling_step1?"
|
||||||
|
imgs_lowres.append(Image.open(fp_img_lowres))
|
||||||
|
|
||||||
|
|
||||||
|
# set up upscaling
|
||||||
|
text_embeddingA = self.sdh.get_text_embedding(prompt1)
|
||||||
|
text_embeddingB = self.sdh.get_text_embedding(prompt2)
|
||||||
|
|
||||||
|
self.autosetup_branching(quality='upscaling_step2', depth_strength=depth_strength)
|
||||||
|
|
||||||
|
# list_nmb_branches = [2, 3, 4]
|
||||||
|
# list_injection_strength = [0.0, 0.6, 0.95]
|
||||||
|
# num_inference_steps = 100
|
||||||
|
# self.setup_branching(num_inference_steps, list_nmb_branches, list_injection_strength)
|
||||||
|
|
||||||
|
duration_single_trans = 3
|
||||||
|
list_fract_mixing = np.linspace(0, 1, nmb_images_lowres-1)
|
||||||
|
|
||||||
|
for i in range(nmb_images_lowres-1):
|
||||||
|
print(f"Starting movie segment {i+1}/{nmb_images_lowres-1}")
|
||||||
|
|
||||||
|
self.text_embedding1 = interpolate_linear(text_embeddingA, text_embeddingB, list_fract_mixing[i])
|
||||||
|
self.text_embedding2 = interpolate_linear(text_embeddingA, text_embeddingB, 1-list_fract_mixing[i])
|
||||||
|
|
||||||
|
if i==0:
|
||||||
|
recycle_img1 = False
|
||||||
|
else:
|
||||||
|
self.swap_forward()
|
||||||
|
recycle_img1 = True
|
||||||
|
|
||||||
|
self.set_image1(imgs_lowres[i])
|
||||||
|
self.set_image2(imgs_lowres[i+1])
|
||||||
|
list_imgs = self.run_transition(recycle_img1=recycle_img1)
|
||||||
|
list_imgs_interp = add_frames_linear_interp(list_imgs, fps, duration_single_trans)
|
||||||
|
|
||||||
|
# Save movie frame
|
||||||
|
for img in list_imgs_interp:
|
||||||
|
ms.write_frame(img)
|
||||||
|
|
||||||
|
ms.finalize()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def init_inpainting(
|
def init_inpainting(
|
||||||
self,
|
self,
|
||||||
image_source: Union[Image.Image, np.ndarray] = None,
|
image_source: Union[Image.Image, np.ndarray] = None,
|
||||||
|
@ -567,10 +729,29 @@ class LatentBlending():
|
||||||
Initialize inpainting with an empty image and mask, effectively disabling inpainting,
|
Initialize inpainting with an empty image and mask, effectively disabling inpainting,
|
||||||
useful for generating a first image for transitions using diffusion.
|
useful for generating a first image for transitions using diffusion.
|
||||||
"""
|
"""
|
||||||
self.init_mode('inpaint')
|
self.init_mode()
|
||||||
self.sdh.init_inpainting(image_source, mask_image, init_empty)
|
self.sdh.init_inpainting(image_source, mask_image, init_empty)
|
||||||
|
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def get_mixed_conditioning(self, fract_mixing):
|
||||||
|
if self.mode == 'standard':
|
||||||
|
text_embeddings_mix = interpolate_linear(self.text_embedding1, self.text_embedding2, fract_mixing)
|
||||||
|
list_conditionings = [text_embeddings_mix]
|
||||||
|
elif self.mode == 'inpaint':
|
||||||
|
text_embeddings_mix = interpolate_linear(self.text_embedding1, self.text_embedding2, fract_mixing)
|
||||||
|
list_conditionings = [text_embeddings_mix]
|
||||||
|
elif self.mode == 'upscale':
|
||||||
|
text_embeddings_mix = interpolate_linear(self.text_embedding1, self.text_embedding2, fract_mixing)
|
||||||
|
cond, uc_full = self.sdh.get_cond_upscaling(self.image1_lowres, text_embeddings_mix, self.noise_level_upscaling)
|
||||||
|
condB, uc_fullB = self.sdh.get_cond_upscaling(self.image2_lowres, text_embeddings_mix, self.noise_level_upscaling)
|
||||||
|
cond['c_concat'][0] = interpolate_spherical(cond['c_concat'][0], condB['c_concat'][0], fract_mixing)
|
||||||
|
uc_full['c_concat'][0] = interpolate_spherical(uc_full['c_concat'][0], uc_fullB['c_concat'][0], fract_mixing)
|
||||||
|
list_conditionings = [cond, uc_full]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"mix_conditioning: unknown mode {self.mode}")
|
||||||
|
return list_conditionings
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def get_text_embeddings(
|
def get_text_embeddings(
|
||||||
self,
|
self,
|
||||||
|
@ -587,6 +768,27 @@ class LatentBlending():
|
||||||
return self.sdh.get_text_embedding(prompt)
|
return self.sdh.get_text_embedding(prompt)
|
||||||
|
|
||||||
|
|
||||||
|
def write_imgs_transition(self, dp_img, imgs_transition):
|
||||||
|
r"""
|
||||||
|
Writes the transition images into the folder dp_img.
|
||||||
|
"""
|
||||||
|
os.makedirs(dp_img)
|
||||||
|
for i, img in enumerate(imgs_transition):
|
||||||
|
img_leaf = Image.fromarray(img)
|
||||||
|
img_leaf.save(os.path.join(dp_img, f"lowres_img_{str(i).zfill(4)}.jpg"))
|
||||||
|
|
||||||
|
# Dump everything relevant into yaml
|
||||||
|
dict_stuff = {}
|
||||||
|
dict_stuff['prompt1'] = self.prompt1
|
||||||
|
dict_stuff['prompt2'] = self.prompt2
|
||||||
|
dict_stuff['seed1'] = int(self.seed1)
|
||||||
|
dict_stuff['seed2'] = int(self.seed2)
|
||||||
|
dict_stuff['num_inference_steps'] = self.num_inference_steps
|
||||||
|
dict_stuff['height'] = self.sdh.height
|
||||||
|
dict_stuff['width'] = self.sdh.width
|
||||||
|
dict_stuff['nmb_images'] = len(imgs_transition)
|
||||||
|
yml_save(os.path.join(dp_img, "lowres.yaml"), dict_stuff)
|
||||||
|
|
||||||
def randomize_seed(self):
|
def randomize_seed(self):
|
||||||
r"""
|
r"""
|
||||||
Set a random seed for a fresh start.
|
Set a random seed for a fresh start.
|
||||||
|
@ -815,7 +1017,7 @@ def add_frames_linear_interp(
|
||||||
return list_imgs_interp
|
return list_imgs_interp
|
||||||
|
|
||||||
|
|
||||||
def get_spacing(nmb_points:int, scaling: float):
|
def get_spacing(nmb_points: int, scaling: float):
|
||||||
"""
|
"""
|
||||||
Helper function for getting nonlinear spacing between 0 and 1, symmetric around 0.5
|
Helper function for getting nonlinear spacing between 0 and 1, symmetric around 0.5
|
||||||
Args:
|
Args:
|
||||||
|
@ -834,9 +1036,7 @@ def get_spacing(nmb_points:int, scaling: float):
|
||||||
else:
|
else:
|
||||||
left_side = np.abs(np.linspace(1, 0, nmb_points_per_side)**scaling / 2 - 0.5)[0:-1]
|
left_side = np.abs(np.linspace(1, 0, nmb_points_per_side)**scaling / 2 - 0.5)[0:-1]
|
||||||
right_side = 1-left_side[::-1]
|
right_side = 1-left_side[::-1]
|
||||||
|
|
||||||
all_fracts = np.hstack([left_side, right_side])
|
all_fracts = np.hstack([left_side, right_side])
|
||||||
|
|
||||||
return all_fracts
|
return all_fracts
|
||||||
|
|
||||||
|
|
||||||
|
@ -861,16 +1061,126 @@ def get_time(resolution=None):
|
||||||
return t
|
return t
|
||||||
|
|
||||||
|
|
||||||
|
def yml_load(fp_yml, print_fields=False):
|
||||||
|
"""
|
||||||
|
Helper function for loading yaml files
|
||||||
|
"""
|
||||||
|
with open(fp_yml) as f:
|
||||||
|
data = yaml.load(f, Loader=yaml.loader.SafeLoader)
|
||||||
|
dict_data = dict(data)
|
||||||
|
print("load: loaded {}".format(fp_yml))
|
||||||
|
return dict_data
|
||||||
|
|
||||||
|
def yml_save(fp_yml, dict_stuff):
|
||||||
|
"""
|
||||||
|
Helper function for saving yaml files
|
||||||
|
"""
|
||||||
|
with open(fp_yml, 'w') as f:
|
||||||
|
data = yaml.dump(dict_stuff, f, sort_keys=False, default_flow_style=False)
|
||||||
|
print("yml_save: saved {}".format(fp_yml))
|
||||||
|
|
||||||
|
|
||||||
#%% le main
|
#%% le main
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
pass
|
# xxxx
|
||||||
|
# #%% First let us spawn a stable diffusion holder
|
||||||
|
# device = "cuda:0"
|
||||||
|
# fp_ckpt = "../stable_diffusion_models/ckpt/v2-1_512-ema-pruned.ckpt"
|
||||||
|
# fp_config = 'configs/v2-inference.yaml'
|
||||||
|
# sdh = StableDiffusionHolder(fp_ckpt, fp_config, device, height=384, width=512)
|
||||||
|
# #%%
|
||||||
|
# # Spawn latent blending
|
||||||
|
# self = LatentBlending(sdh)
|
||||||
|
|
||||||
|
# dp_img = '/home/lugo/latentblending/test5'
|
||||||
|
|
||||||
|
# fn1 = '230105_211545_photo_of_a_pyroclastic_ash_cloud_racing_down_mount_etna.txt'
|
||||||
|
# fn2 = '230105_211815_a_breathtaking_drone_photo_of_a_bizarre_cliff_structure,_lava_streams_flowing_down_into_the_ocean.txt'
|
||||||
|
|
||||||
|
# dp_cherries ='/home/lugo/latentblending/cherries/'
|
||||||
|
|
||||||
|
# dict1 = yml_load(os.path.join(dp_cherries, fn1))
|
||||||
|
# dict2 = yml_load(os.path.join(dp_cherries, fn2))
|
||||||
|
|
||||||
|
# # prompt1 = "painting of a big pine tree"
|
||||||
|
# # prompt2 = "painting of the full moon shining, mountains in the background, rocks, eery"
|
||||||
|
# prompt1 = dict1['prompt']
|
||||||
|
# prompt2 = dict2['prompt']
|
||||||
|
# self.set_prompt1(prompt1)
|
||||||
|
# self.set_prompt2(prompt2)
|
||||||
|
# fixed_seeds = [dict1['seed'], dict2['seed']]
|
||||||
|
# self.run_upscaling_step1(dp_img, fixed_seeds=fixed_seeds, depth_strength=0.6)
|
||||||
|
|
||||||
|
# # FIXME: depth_strength=0.6 CAN cause trouble. why?!
|
||||||
|
|
||||||
|
#%% RUN UPSCALING_STEP2 (highres)
|
||||||
|
|
||||||
|
fp_ckpt= "../stable_diffusion_models/ckpt/x4-upscaler-ema.ckpt"
|
||||||
|
fp_config = 'configs/x4-upscaling.yaml'
|
||||||
|
sdh = StableDiffusionHolder(fp_ckpt, fp_config)
|
||||||
|
# self.run_upscaling_step2(dp_img)
|
||||||
|
#%% /home/lugo/latentblending/230106_210812 /
|
||||||
|
self = LatentBlending(sdh)
|
||||||
|
dp_img = '/home/lugo/latentblending/230107_144533'
|
||||||
|
fp_yml = os.path.join(dp_img, "lowres.yaml")
|
||||||
|
fp_movie = os.path.join(dp_img, "movie.mp4")
|
||||||
|
fps = 24
|
||||||
|
ms = MovieSaver(fp_movie, fps=fps)
|
||||||
|
assert os.path.isfile(fp_yml), "lowres.yaml does not exist. did you forget run_upscaling_step1?"
|
||||||
|
dict_stuff = yml_load(fp_yml)
|
||||||
|
|
||||||
|
# load lowres images
|
||||||
|
nmb_images_lowres = dict_stuff['nmb_images']
|
||||||
|
prompt1 = dict_stuff['prompt1']
|
||||||
|
prompt2 = dict_stuff['prompt2']
|
||||||
|
imgs_lowres = []
|
||||||
|
for i in range(nmb_images_lowres):
|
||||||
|
fp_img_lowres = os.path.join(dp_img, f"lowres_img_{str(i).zfill(4)}.jpg")
|
||||||
|
assert os.path.isfile(fp_img_lowres), f"{fp_img_lowres} does not exist. did you forget run_upscaling_step1?"
|
||||||
|
imgs_lowres.append(Image.open(fp_img_lowres))
|
||||||
|
|
||||||
|
|
||||||
|
# set up upscaling
|
||||||
|
text_embeddingA = self.sdh.get_text_embedding(prompt1)
|
||||||
|
text_embeddingB = self.sdh.get_text_embedding(prompt2)
|
||||||
|
|
||||||
|
list_nmb_branches = [2, 3, 6]
|
||||||
|
list_injection_strength = [0.0, 0.6, 0.95]
|
||||||
|
num_inference_steps = 100
|
||||||
|
duration_single_trans = 3
|
||||||
|
self.setup_branching(num_inference_steps, list_nmb_branches, list_injection_strength)
|
||||||
|
list_fract_mixing = np.linspace(0, 1, nmb_images_lowres-1)
|
||||||
|
|
||||||
|
for i in range(nmb_images_lowres-1):
|
||||||
|
print(f"Starting movie segment {i+1}/{nmb_images_lowres-1}")
|
||||||
|
|
||||||
|
self.text_embedding1 = interpolate_linear(text_embeddingA, text_embeddingB, list_fract_mixing[i])
|
||||||
|
self.text_embedding2 = interpolate_linear(text_embeddingA, text_embeddingB, 1-list_fract_mixing[i])
|
||||||
|
|
||||||
|
if i==0:
|
||||||
|
recycle_img1 = False
|
||||||
|
else:
|
||||||
|
self.swap_forward()
|
||||||
|
recycle_img1 = True
|
||||||
|
|
||||||
|
self.set_image1(imgs_lowres[i])
|
||||||
|
self.set_image2(imgs_lowres[i+1])
|
||||||
|
list_imgs = self.run_transition(recycle_img1=recycle_img1)
|
||||||
|
self.write_imgs_transition(os.path.join(dp_img, f"highres_{str(i).zfill(4)}"), list_imgs)
|
||||||
|
list_imgs_interp = add_frames_linear_interp(list_imgs, fps, duration_single_trans)
|
||||||
|
|
||||||
|
# Save movie frame
|
||||||
|
for img in list_imgs_interp:
|
||||||
|
ms.write_frame(img)
|
||||||
|
|
||||||
|
ms.finalize()
|
||||||
|
|
||||||
#%%
|
#%%
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
TODO Coding:
|
TODO Coding:
|
||||||
|
CHECK IF ALL STUFF WORKS STILL: STANDARD MODEL, INPAINTING
|
||||||
RUNNING WITHOUT PROMPT!
|
RUNNING WITHOUT PROMPT!
|
||||||
save value ranges, can it be trashed?
|
save value ranges, can it be trashed?
|
||||||
in the middle: have more branches + lower guidance scale
|
in the middle: have more branches + lower guidance scale
|
||||||
|
@ -878,8 +1188,6 @@ TODO Coding:
|
||||||
TODO Other:
|
TODO Other:
|
||||||
github
|
github
|
||||||
write text
|
write text
|
||||||
requirements
|
|
||||||
make graphic explaining
|
|
||||||
make colab
|
make colab
|
||||||
license
|
license
|
||||||
twitter et al
|
twitter et al
|
||||||
|
|
|
@ -27,7 +27,7 @@ import warnings
|
||||||
import torch
|
import torch
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import matplotlib.pyplot as plt
|
# import matplotlib.pyplot as plt
|
||||||
import torch
|
import torch
|
||||||
from movie_util import MovieSaver
|
from movie_util import MovieSaver
|
||||||
import datetime
|
import datetime
|
||||||
|
@ -40,29 +40,21 @@ from torch import autocast
|
||||||
from contextlib import nullcontext
|
from contextlib import nullcontext
|
||||||
from ldm.util import instantiate_from_config
|
from ldm.util import instantiate_from_config
|
||||||
from ldm.models.diffusion.ddim import DDIMSampler
|
from ldm.models.diffusion.ddim import DDIMSampler
|
||||||
from einops import repeat
|
from einops import repeat, rearrange
|
||||||
|
|
||||||
|
#%%
|
||||||
|
|
||||||
|
|
||||||
def load_model_from_config(config, ckpt, verbose=False):
|
def pad_image(input_image):
|
||||||
print(f"Loading model from {ckpt}")
|
pad_w, pad_h = np.max(((2, 2), np.ceil(
|
||||||
pl_sd = torch.load(ckpt, map_location="cpu")
|
np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size
|
||||||
if "global_step" in pl_sd:
|
im_padded = Image.fromarray(
|
||||||
print(f"Global Step: {pl_sd['global_step']}")
|
np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
|
||||||
sd = pl_sd["state_dict"]
|
return im_padded
|
||||||
model = instantiate_from_config(config.model)
|
|
||||||
m, u = model.load_state_dict(sd, strict=False)
|
|
||||||
if len(m) > 0 and verbose:
|
|
||||||
print("missing keys:")
|
|
||||||
print(m)
|
|
||||||
if len(u) > 0 and verbose:
|
|
||||||
print("unexpected keys:")
|
|
||||||
print(u)
|
|
||||||
|
|
||||||
model.cuda()
|
|
||||||
model.eval()
|
|
||||||
return model
|
|
||||||
|
|
||||||
def make_batch_sd(
|
|
||||||
|
def make_batch_inpaint(
|
||||||
image,
|
image,
|
||||||
mask,
|
mask,
|
||||||
txt,
|
txt,
|
||||||
|
@ -89,16 +81,42 @@ def make_batch_sd(
|
||||||
}
|
}
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
|
|
||||||
|
def make_batch_superres(
|
||||||
|
image,
|
||||||
|
txt,
|
||||||
|
device,
|
||||||
|
num_samples=1,
|
||||||
|
):
|
||||||
|
image = np.array(image.convert("RGB"))
|
||||||
|
image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
|
||||||
|
batch = {
|
||||||
|
"lr": rearrange(image, 'h w c -> 1 c h w'),
|
||||||
|
"txt": num_samples * [txt],
|
||||||
|
}
|
||||||
|
batch["lr"] = repeat(batch["lr"].to(device=device),
|
||||||
|
"1 ... -> n ...", n=num_samples)
|
||||||
|
return batch
|
||||||
|
|
||||||
|
|
||||||
|
def make_noise_augmentation(model, batch, noise_level=None):
|
||||||
|
x_low = batch[model.low_scale_key]
|
||||||
|
x_low = x_low.to(memory_format=torch.contiguous_format).float()
|
||||||
|
x_aug, noise_level = model.low_scale_model(x_low, noise_level)
|
||||||
|
return x_aug, noise_level
|
||||||
|
|
||||||
|
|
||||||
class StableDiffusionHolder:
|
class StableDiffusionHolder:
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
fp_ckpt: str = None,
|
fp_ckpt: str = None,
|
||||||
fp_config: str = None,
|
fp_config: str = None,
|
||||||
device: str = None,
|
num_inference_steps: int = 30,
|
||||||
height: Optional[int] = None,
|
height: Optional[int] = None,
|
||||||
width: Optional[int] = None,
|
width: Optional[int] = None,
|
||||||
num_inference_steps: int = 30,
|
device: str = None,
|
||||||
precision: str='autocast',
|
precision: str='autocast',
|
||||||
):
|
):
|
||||||
|
|
||||||
self.seed = 42
|
self.seed = 42
|
||||||
self.guidance_scale = 5.0
|
self.guidance_scale = 5.0
|
||||||
|
|
||||||
|
@ -130,13 +148,15 @@ class StableDiffusionHolder:
|
||||||
def init_model(self, fp_ckpt, fp_config):
|
def init_model(self, fp_ckpt, fp_config):
|
||||||
assert os.path.isfile(fp_ckpt), f"Your model checkpoint file does not exist: {fp_ckpt}"
|
assert os.path.isfile(fp_ckpt), f"Your model checkpoint file does not exist: {fp_ckpt}"
|
||||||
assert os.path.isfile(fp_config), f"Your config file does not exist: {fp_config}"
|
assert os.path.isfile(fp_config), f"Your config file does not exist: {fp_config}"
|
||||||
config = OmegaConf.load(fp_config)
|
self.fp_ckpt = fp_ckpt
|
||||||
self.model = load_model_from_config(config, fp_ckpt)
|
|
||||||
|
|
||||||
|
config = OmegaConf.load(fp_config)
|
||||||
|
|
||||||
|
self.model = instantiate_from_config(config.model)
|
||||||
|
self.model.load_state_dict(torch.load(fp_ckpt)["state_dict"], strict=False)
|
||||||
|
|
||||||
self.model = self.model.to(self.device)
|
self.model = self.model.to(self.device)
|
||||||
self.sampler = DDIMSampler(self.model)
|
self.sampler = DDIMSampler(self.model)
|
||||||
self.fp_ckpt = fp_ckpt
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -186,6 +206,26 @@ class StableDiffusionHolder:
|
||||||
def get_text_embedding(self, prompt):
|
def get_text_embedding(self, prompt):
|
||||||
c = self.model.get_learned_conditioning(prompt)
|
c = self.model.get_learned_conditioning(prompt)
|
||||||
return c
|
return c
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def get_cond_upscaling(self, image, text_embedding, noise_level):
|
||||||
|
r"""
|
||||||
|
Initializes the conditioning for the x4 upscaling model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
image = pad_image(image) # resize to integer multiple of 32
|
||||||
|
w, h = image.size
|
||||||
|
noise_level = torch.Tensor(1 * [noise_level]).to(self.sampler.model.device).long()
|
||||||
|
batch = make_batch_superres(image, txt="placeholder", device=self.device, num_samples=1)
|
||||||
|
|
||||||
|
x_augment, noise_level = make_noise_augmentation(self.model, batch, noise_level)
|
||||||
|
|
||||||
|
cond = {"c_concat": [x_augment], "c_crossattn": [text_embedding], "c_adm": noise_level}
|
||||||
|
# uncond cond
|
||||||
|
uc_cross = self.model.get_unconditional_conditioning(1, "")
|
||||||
|
uc_full = {"c_concat": [x_augment], "c_crossattn": [uc_cross], "c_adm": noise_level}
|
||||||
|
|
||||||
|
return cond, uc_full
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def run_diffusion_standard(
|
def run_diffusion_standard(
|
||||||
|
@ -317,7 +357,7 @@ class StableDiffusionHolder:
|
||||||
with precision_scope("cuda"):
|
with precision_scope("cuda"):
|
||||||
with self.model.ema_scope():
|
with self.model.ema_scope():
|
||||||
|
|
||||||
batch = make_batch_sd(self.image_source, self.mask_image, txt="willbereplaced", device=self.device, num_samples=1)
|
batch = make_batch_inpaint(self.image_source, self.mask_image, txt="willbereplaced", device=self.device, num_samples=1)
|
||||||
c = text_embeddings
|
c = text_embeddings
|
||||||
c_cat = list()
|
c_cat = list()
|
||||||
for ck in self.model.concat_keys:
|
for ck in self.model.concat_keys:
|
||||||
|
@ -383,7 +423,93 @@ class StableDiffusionHolder:
|
||||||
return self.latent2image(latents)
|
return self.latent2image(latents)
|
||||||
else:
|
else:
|
||||||
return list_latents_out
|
return list_latents_out
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def run_diffusion_upscaling(
|
||||||
|
self,
|
||||||
|
cond,
|
||||||
|
uc_full,
|
||||||
|
latents_for_injection: torch.FloatTensor = None,
|
||||||
|
idx_start: int = -1,
|
||||||
|
idx_stop: int = -1,
|
||||||
|
return_image: Optional[bool] = False
|
||||||
|
):
|
||||||
|
r"""
|
||||||
|
Wrapper function for run_diffusion_standard and run_diffusion_inpaint.
|
||||||
|
Depending on the mode, the correct one will be executed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
??
|
||||||
|
latents_for_injection: torch.FloatTensor
|
||||||
|
Latents that are used for injection
|
||||||
|
idx_start: int
|
||||||
|
Index of the diffusion process start and where the latents_for_injection are injected
|
||||||
|
idx_stop: int
|
||||||
|
Index of the diffusion process end.
|
||||||
|
return_image: Optional[bool]
|
||||||
|
Optionally return image directly
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
if latents_for_injection is None:
|
||||||
|
do_inject_latents = False
|
||||||
|
else:
|
||||||
|
do_inject_latents = True
|
||||||
|
|
||||||
|
precision_scope = autocast if self.precision == "autocast" else nullcontext
|
||||||
|
generator = torch.Generator(device=self.device).manual_seed(int(self.seed))
|
||||||
|
|
||||||
|
h = uc_full['c_concat'][0].shape[2]
|
||||||
|
w = uc_full['c_concat'][0].shape[3]
|
||||||
|
|
||||||
|
with precision_scope("cuda"):
|
||||||
|
with self.model.ema_scope():
|
||||||
|
|
||||||
|
|
||||||
|
shape_latents = [self.model.channels, h, w]
|
||||||
|
|
||||||
|
self.sampler.make_schedule(ddim_num_steps=self.num_inference_steps-1, ddim_eta=self.ddim_eta, verbose=False)
|
||||||
|
C, H, W = shape_latents
|
||||||
|
size = (1, C, H, W)
|
||||||
|
b = size[0]
|
||||||
|
|
||||||
|
latents = torch.randn(size, generator=generator, device=self.device)
|
||||||
|
|
||||||
|
timesteps = self.sampler.ddim_timesteps
|
||||||
|
|
||||||
|
time_range = np.flip(timesteps)
|
||||||
|
total_steps = timesteps.shape[0]
|
||||||
|
|
||||||
|
# collect latents
|
||||||
|
list_latents_out = []
|
||||||
|
for i, step in enumerate(time_range):
|
||||||
|
if do_inject_latents:
|
||||||
|
# Inject latent at right place
|
||||||
|
if i < idx_start:
|
||||||
|
continue
|
||||||
|
elif i == idx_start:
|
||||||
|
latents = latents_for_injection.clone()
|
||||||
|
|
||||||
|
if i == idx_stop:
|
||||||
|
return list_latents_out
|
||||||
|
|
||||||
|
# print(f"diffusion iter {i}")
|
||||||
|
index = total_steps - i - 1
|
||||||
|
ts = torch.full((b,), step, device=self.device, dtype=torch.long)
|
||||||
|
outs = self.sampler.p_sample_ddim(latents, cond, ts, index=index, use_original_steps=False,
|
||||||
|
quantize_denoised=False, temperature=1.0,
|
||||||
|
noise_dropout=0.0, score_corrector=None,
|
||||||
|
corrector_kwargs=None,
|
||||||
|
unconditional_guidance_scale=self.guidance_scale,
|
||||||
|
unconditional_conditioning=uc_full,
|
||||||
|
dynamic_threshold=None)
|
||||||
|
latents, pred_x0 = outs
|
||||||
|
list_latents_out.append(latents.clone())
|
||||||
|
|
||||||
|
if return_image:
|
||||||
|
return self.latent2image(latents)
|
||||||
|
else:
|
||||||
|
return list_latents_out
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def latent2image(
|
def latent2image(
|
||||||
|
@ -405,47 +531,178 @@ class StableDiffusionHolder:
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
num_inference_steps = 20 # Number of diffusion interations
|
|
||||||
|
fp_ckpt= "../stable_diffusion_models/ckpt/x4-upscaler-ema.ckpt"
|
||||||
# fp_ckpt = "../stable_diffusion_models/ckpt/768-v-ema.ckpt"
|
fp_config = 'configs/x4-upscaling.yaml'
|
||||||
# fp_config = '../stablediffusion/configs/stable-diffusion/v2-inference-v.yaml'
|
num_inference_steps = 100
|
||||||
|
self = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps=num_inference_steps)
|
||||||
fp_ckpt= "../stable_diffusion_models/ckpt/512-inpainting-ema.ckpt"
|
xxx
|
||||||
fp_config = '../stablediffusion/configs//stable-diffusion/v2-inpainting-inference.yaml'
|
#%% image A
|
||||||
|
image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg')
|
||||||
sdh = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps)
|
image = image.resize((32*20, 32*12))
|
||||||
# fp_ckpt= "../stable_diffusion_models/ckpt/512-base-ema.ckpt"
|
promptA = "photo of a an ancient castle surrounded by a forest"
|
||||||
# fp_config = '../stablediffusion/configs//stable-diffusion/v2-inference.yaml'
|
noise_level = 20 #gradio min=0, max=350, value=20
|
||||||
|
text_embeddingA = self.get_text_embedding(promptA)
|
||||||
|
cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level)
|
||||||
|
|
||||||
#%% INPAINT PREPS
|
|
||||||
image_source = Image.fromarray((255*np.random.rand(512,512,3)).astype(np.uint8))
|
|
||||||
mask = 255*np.ones([512,512], dtype=np.uint8)
|
|
||||||
mask[0:50, 0:50] = 0
|
|
||||||
mask = Image.fromarray(mask)
|
|
||||||
|
|
||||||
sdh.init_inpainting(image_source, mask)
|
|
||||||
text_embedding = sdh.get_text_embedding("photo of a strange house, surreal painting")
|
|
||||||
list_latents = sdh.run_diffusion_inpaint(text_embedding)
|
|
||||||
|
|
||||||
#%%
|
|
||||||
idx_inject = 3
|
|
||||||
img_orig = sdh.latent2image(list_latents[-1])
|
|
||||||
list_inject = sdh.run_diffusion_inpaint(text_embedding, list_latents[idx_inject], idx_start=idx_inject+1)
|
|
||||||
img_inject = sdh.latent2image(list_inject[-1])
|
|
||||||
|
|
||||||
img_diff = img_orig - img_inject
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
plt.imshow(np.concatenate((img_orig, img_inject, img_diff), axis=1))
|
|
||||||
|
|
||||||
|
list_samplesA = self.run_diffusion_upscaling(cond, uc_full)
|
||||||
|
image_result = Image.fromarray(self.latent2image(list_samplesA[-1]))
|
||||||
|
image_result.save('/home/lugo/latentblending/test1/high/imgA.jpg')
|
||||||
|
|
||||||
|
|
||||||
|
#%% image B
|
||||||
|
from latent_blending import interpolate_linear, interpolate_spherical
|
||||||
|
image = Image.open('/home/lugo/latentblending/test1/img_0006.jpg')
|
||||||
|
image = image.resize((32*20, 32*12))
|
||||||
|
promptA = "photo of a an ancient castle surrounded by a forest"
|
||||||
|
promptB = "photo of a beautiful island on the horizon, blue sea with waves"
|
||||||
|
noise_level = 20 #gradio min=0, max=350, value=20
|
||||||
|
text_embeddingA = self.get_text_embedding(promptA)
|
||||||
|
text_embeddingB = self.get_text_embedding(promptB)
|
||||||
|
text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8)
|
||||||
|
|
||||||
|
cond, uc_full = self.get_cond_upscaling(image, text_embedding, noise_level)
|
||||||
|
|
||||||
|
list_samplesB = self.run_diffusion_upscaling(cond, uc_full)
|
||||||
|
image_result = Image.fromarray(self.latent2image(list_samplesB[-1]))
|
||||||
|
image_result.save('/home/lugo/latentblending/test1/high/imgB.jpg')
|
||||||
|
|
||||||
|
|
||||||
|
#%% reality check: run only for 50 iter.
|
||||||
|
image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg')
|
||||||
|
image = image.resize((32*20, 32*12))
|
||||||
|
promptA = "photo of a an ancient castle surrounded by a forest"
|
||||||
|
noise_level = 20 #gradio min=0, max=350, value=20
|
||||||
|
text_embeddingA = self.get_text_embedding(promptA)
|
||||||
|
cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level)
|
||||||
|
|
||||||
|
latents_inject = list_samplesA[50]
|
||||||
|
list_samplesAx = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=50)
|
||||||
|
image_result = Image.fromarray(self.latent2image(list_samplesAx[-1]))
|
||||||
|
image_result.save('/home/lugo/latentblending/test1/high/imgA_restart.jpg')
|
||||||
|
|
||||||
|
# RESULTS ARE NOT EXACTLY IDENTICAL! INVESTIGATE WHY
|
||||||
|
|
||||||
|
#%% mix in the middle! which uc_full should be taken?
|
||||||
|
# expA: take the one from A
|
||||||
|
idx_start = 90
|
||||||
|
latentsA = list_samplesA[idx_start]
|
||||||
|
latentsB = list_samplesB[idx_start]
|
||||||
|
latents_inject = interpolate_spherical(latentsA, latentsB, 0.5)
|
||||||
|
|
||||||
|
image = Image.open('/home/lugo/latentblending/test1/img_0007.jpg')
|
||||||
|
image = image.resize((32*20, 32*12))
|
||||||
|
promptA = "photo of a an ancient castle surrounded by a forest"
|
||||||
|
noise_level = 20 #gradio min=0, max=350, value=20
|
||||||
|
text_embeddingA = self.get_text_embedding(promptA)
|
||||||
|
cond, uc_full = self.get_cond_upscaling(image, text_embeddingA, noise_level)
|
||||||
|
|
||||||
|
list_samples = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=idx_start)
|
||||||
|
image_result = Image.fromarray(self.latent2image(list_samples[-1]))
|
||||||
|
image_result.save('/home/lugo/latentblending/test1/high/img_mix_expA_late.jpg')
|
||||||
|
|
||||||
|
|
||||||
|
#%% mix in the middle! which uc_full should be taken?
|
||||||
|
# expA: take the one from B
|
||||||
|
idx_start = 90
|
||||||
|
latentsA = list_samplesA[idx_start]
|
||||||
|
latentsB = list_samplesB[idx_start]
|
||||||
|
latents_inject = interpolate_spherical(latentsA, latentsB, 0.5)
|
||||||
|
|
||||||
|
image = Image.open('/home/lugo/latentblending/test1/img_0006.jpg').resize((32*20, 32*12))
|
||||||
|
promptA = "photo of a an ancient castle surrounded by a forest"
|
||||||
|
promptB = "photo of a beautiful island on the horizon, blue sea with waves"
|
||||||
|
noise_level = 20 #gradio min=0, max=350, value=20
|
||||||
|
text_embeddingA = self.get_text_embedding(promptA)
|
||||||
|
text_embeddingB = self.get_text_embedding(promptB)
|
||||||
|
text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8)
|
||||||
|
cond, uc_full = self.get_cond_upscaling(image, text_embedding, noise_level)
|
||||||
|
|
||||||
|
list_samples = self.run_diffusion_upscaling(cond, uc_full, latents_inject, idx_start=idx_start)
|
||||||
|
image_result = Image.fromarray(self.latent2image(list_samples[-1]))
|
||||||
|
image_result.save('/home/lugo/latentblending/test1/high/img_mix_expB_late.jpg')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#%% lets blend the uc_full too!
|
||||||
|
# expC
|
||||||
|
|
||||||
|
idx_start = 50
|
||||||
|
list_mix = np.linspace(0, 1, 20)
|
||||||
|
for fract_mix in list_mix:
|
||||||
|
# fract_mix = 0.75
|
||||||
|
latentsA = list_samplesA[idx_start]
|
||||||
|
latentsB = list_samplesB[idx_start]
|
||||||
|
latents_inject = interpolate_spherical(latentsA, latentsB, fract_mix)
|
||||||
|
|
||||||
|
text_embeddingA = self.get_text_embedding(promptA)
|
||||||
|
text_embeddingB = self.get_text_embedding(promptB)
|
||||||
|
text_embedding = interpolate_linear(text_embeddingA, text_embeddingB, 1/8)
|
||||||
|
|
||||||
|
imageA = Image.open('/home/lugo/latentblending/test1/img_0007.jpg').resize((32*20, 32*12))
|
||||||
|
condA, uc_fullA = self.get_cond_upscaling(imageA, text_embedding, noise_level)
|
||||||
|
|
||||||
|
imageB = Image.open('/home/lugo/latentblending/test1/img_0006.jpg').resize((32*20, 32*12))
|
||||||
|
condB, uc_fullB = self.get_cond_upscaling(imageB, text_embedding, noise_level)
|
||||||
|
|
||||||
|
condA['c_concat'][0] = interpolate_spherical(condA['c_concat'][0], condB['c_concat'][0], fract_mix)
|
||||||
|
uc_fullA['c_concat'][0] = interpolate_spherical(uc_fullA['c_concat'][0], uc_fullB['c_concat'][0], fract_mix)
|
||||||
|
|
||||||
|
list_samples = self.run_diffusion_upscaling(condA, uc_fullA, latents_inject, idx_start=idx_start)
|
||||||
|
image_result = Image.fromarray(self.latent2image(list_samples[-1]))
|
||||||
|
image_result.save(f'/home/lugo/latentblending/test1/high/img_mix_expC_{fract_mix}_start{idx_start}.jpg')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
|
||||||
|
list_imgs = os.listdir('/home/lugo/latentblending/test1/high/')
|
||||||
|
list_imgs = [l for l in list_imgs if "expC" in l]
|
||||||
|
list_imgs.pop(0)
|
||||||
|
|
||||||
|
lx = []
|
||||||
|
for fn in list_imgs:
|
||||||
|
Image.open
|
||||||
|
|
||||||
|
|
||||||
#%%
|
#%%
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if False:
|
||||||
|
|
||||||
|
num_inference_steps = 20 # Number of diffusion interations
|
||||||
|
|
||||||
|
# fp_ckpt = "../stable_diffusion_models/ckpt/768-v-ema.ckpt"
|
||||||
|
# fp_config = '../stablediffusion/configs/stable-diffusion/v2-inference-v.yaml'
|
||||||
|
|
||||||
|
fp_ckpt= "../stable_diffusion_models/ckpt/512-inpainting-ema.ckpt"
|
||||||
|
fp_config = '../stablediffusion/configs//stable-diffusion/v2-inpainting-inference.yaml'
|
||||||
|
|
||||||
|
sdh = StableDiffusionHolder(fp_ckpt, fp_config, num_inference_steps)
|
||||||
|
# fp_ckpt= "../stable_diffusion_models/ckpt/512-base-ema.ckpt"
|
||||||
|
# fp_config = '../stablediffusion/configs//stable-diffusion/v2-inference.yaml'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
image_source = Image.fromarray((255*np.random.rand(512,512,3)).astype(np.uint8))
|
||||||
|
mask = 255*np.ones([512,512], dtype=np.uint8)
|
||||||
|
mask[0:50, 0:50] = 0
|
||||||
|
mask = Image.fromarray(mask)
|
||||||
|
|
||||||
|
sdh.init_inpainting(image_source, mask)
|
||||||
|
text_embedding = sdh.get_text_embedding("photo of a strange house, surreal painting")
|
||||||
|
list_latents = sdh.run_diffusion_inpaint(text_embedding)
|
||||||
|
|
||||||
|
idx_inject = 3
|
||||||
|
img_orig = sdh.latent2image(list_latents[-1])
|
||||||
|
list_inject = sdh.run_diffusion_inpaint(text_embedding, list_latents[idx_inject], idx_start=idx_inject+1)
|
||||||
|
img_inject = sdh.latent2image(list_inject[-1])
|
||||||
|
|
||||||
|
img_diff = img_orig - img_inject
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
plt.imshow(np.concatenate((img_orig, img_inject, img_diff), axis=1))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
next steps:
|
|
||||||
incorporate into lb
|
|
||||||
incorporate into outpaint
|
|
||||||
"""
|
|
||||||
|
|
Loading…
Reference in New Issue