-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgenerate.py
150 lines (130 loc) · 5.72 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
if __name__ == "__main__":
import os
import json
import math
import uuid
import torch
from datetime import datetime
from tqdm import trange
from PIL import Image
from concurrent.futures import ThreadPoolExecutor
from v_diffusion import *
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("--data-root", type=str, default="~/datasets")
parser.add_argument("--batch-size", type=int, default=128)
parser.add_argument("--total-size", type=int, default=50000)
parser.add_argument("--default-config-path", default="./configs/defaults.json", type=str)
parser.add_argument("--config-path", type=str, required=True)
parser.add_argument("--ckpt-path", type=str, required=True)
parser.add_argument("--save-dir", type=str, default="./images/eval")
parser.add_argument("--device", type=str, default="cuda:0")
parser.add_argument("--use-ema", action="store_true")
parser.add_argument("--use-ddim", action="store_true")
parser.add_argument("--sample-timesteps", type=int, default=1024)
parser.add_argument("--uncond", action="store_true")
parser.add_argument("--w-guide", type=float, default=0.1)
args = parser.parse_args()
device = torch.device(args.device)
ckpt_path = args.ckpt_path
use_ema = args.use_ema
if use_ema:
state_dict = torch.load(ckpt_path, map_location=device)["ema"]["shadow"]
else:
state_dict = torch.load(ckpt_path, map_location=device)["model"]
for k in list(state_dict.keys()):
if k.startswith("module."): # state_dict of DDP
state_dict[k.split(".", maxsplit=1)[1]] = state_dict.pop(k)
use_cfg = "class_embed" in {k.split(".")[0] for k in state_dict.keys()}
config_path = args.config_path
exp_name = os.path.splitext(os.path.basename(config_path))[0] # truncated at file extension
with open(config_path, "r") as f:
config: dict = json.load(f)
with open(args.default_config_path, "r") as f:
defaults: dict = json.load(f)
fill_with_defaults(config, defaults)
dataset = config["data"]["name"]
data_root = args.data_root
if "~" in data_root:
data_root = os.path.expanduser(data_root)
if "$" in data_root:
data_root = os.path.expandvars(data_root)
in_channels = DATA_INFO[dataset]["channels"]
image_res = DATA_INFO[dataset]["resolution"][0]
multitags = DATA_INFO[dataset].get("multitags", False)
if use_cfg:
num_classes = DATA_INFO[dataset]["num_classes"]
w_guide = 0. if args.uncond else args.w_guide
else:
num_classes = 0
w_guide = 0
diffusion_kwargs = config["diffusion"]
logsnr_schedule = diffusion_kwargs.pop("logsnr_schedule")
logsnr_max = diffusion_kwargs.pop("logsnr_max")
logsnr_min = diffusion_kwargs.pop("logsnr_min")
logsnr_fn = get_logsnr_schedule(
logsnr_schedule, logsnr_min, logsnr_max, rescale=diffusion_kwargs.pop("allow_rescale"))
diffusion_kwargs["sample_timesteps"] = args.sample_timesteps
diffusion_kwargs.pop("train_timesteps")
diffusion = GaussianDiffusion(
logsnr_fn=logsnr_fn,
w_guide=w_guide,
**diffusion_kwargs)
model_out_type = diffusion_kwargs.get("model_out_type", "both")
out_channels = (2 if model_out_type == "both" else 1) * in_channels
model = UNet(
out_channels=out_channels,
num_classes=num_classes,
multitags=multitags,
**config["model"],
)
model.to(device)
model.load_state_dict(state_dict)
model.eval()
for p in model.parameters():
if p.requires_grad:
p.requires_grad_(False)
timestamp = datetime.now().strftime("%Y-%m-%dT%H%M%S%f")
save_dir = os.path.join(args.save_dir, exp_name, timestamp)
os.makedirs(save_dir, exist_ok=True)
batch_size = args.batch_size
total_size = args.total_size
num_eval_batches = math.ceil(total_size / batch_size)
shape = (batch_size, 3, image_res, image_res)
with open(os.path.join(save_dir, "args.txt"), "w") as f:
json.dump(vars(args), f)
def save_image(arr):
with Image.fromarray(arr, mode="RGB") as im:
im.save(f"{save_dir}/{uuid.uuid4()}.png")
if torch.backends.cudnn.is_available(): # noqa
torch.backends.cudnn.benchmark = True # noqa
uncond = args.uncond
if multitags:
labels = DATA_INFO[dataset]["data"](root=args.data_root, split="all").targets
def get_label_loader(to_device):
while True:
if uncond:
yield torch.zeros((batch_size, num_classes), dtype=torch.float32, device=to_device)
else:
yield labels[torch.randint(len(labels), size=(batch_size, ))].float().to(to_device)
else:
def get_label_loader(to_device):
while True:
if uncond:
yield torch.zeros((batch_size, ), dtype=torch.int64, device=to_device)
else:
yield torch.randint(num_classes, size=(batch_size, ), device=to_device) + 1
label_loader = get_label_loader(to_device=device)
with ThreadPoolExecutor(max_workers=os.cpu_count()) as pool:
for i in trange(num_eval_batches):
if i == num_eval_batches - 1:
batch_size = total_size - i * batch_size
shape = (batch_size, 3, image_res, image_res)
x = diffusion.p_sample(
model, shape=shape, device=device,
noise=torch.randn(shape, device=device),
label=next(label_loader)[:batch_size],
use_ddim=args.use_ddim
).cpu()
x = (x * 127.5 + 127.5).clamp(0, 255).to(torch.uint8).permute(0, 2, 3, 1).numpy()
pool.map(save_image, list(x))