remove asymmetric
Browse files- README.md +0 -1
- asymmetric_vae/config.json +0 -38
- asymmetric_vae/diffusion_pytorch_model.safetensors +0 -3
- asymmetric_vae_new/config.json +0 -45
- asymmetric_vae_new/diffusion_pytorch_model.safetensors +0 -3
- eval_alchemist.py +129 -38
- samples/sample_0.jpg +0 -3
- samples/sample_1.jpg +0 -3
- samples/sample_2.jpg +0 -3
- samples/sample_decoded.jpg +0 -3
- samples/sample_real.jpg +0 -3
- train_sdxl_vae_gpt5.py → train_sdxl_vae.py +3 -3
- vae/diffusion_pytorch_model.safetensors +1 -1
README.md
CHANGED
@@ -24,7 +24,6 @@ Alchemist eval (512px)
|
|
24 |
| madebyollin/sdxl-vae-fp16 | 100% | 100% | 100% |
|
25 |
| KBlueLeaf/EQ-SDXL-VAE | 107.8% | 100.1% | 95.5% |
|
26 |
| AiArtLab/sdxl_vae | 112.3% | 101.8% | 106.6% |
|
27 |
-
| AiArtLab/sdxl_vae_asym | 111.7% | 101.1% | 89.4% |
|
28 |
| FLUX.1-schnell-vae | 324.0% | 119.8% | 292.0% |
|
29 |
|
30 |
[](https://imgsli.com/NDA3OTgz)
|
|
|
24 |
| madebyollin/sdxl-vae-fp16 | 100% | 100% | 100% |
|
25 |
| KBlueLeaf/EQ-SDXL-VAE | 107.8% | 100.1% | 95.5% |
|
26 |
| AiArtLab/sdxl_vae | 112.3% | 101.8% | 106.6% |
|
|
|
27 |
| FLUX.1-schnell-vae | 324.0% | 119.8% | 292.0% |
|
28 |
|
29 |
[](https://imgsli.com/NDA3OTgz)
|
asymmetric_vae/config.json
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_class_name": "AsymmetricAutoencoderKL",
|
3 |
-
"_diffusers_version": "0.34.0",
|
4 |
-
"_name_or_path": "asymmetric_vae_empty",
|
5 |
-
"act_fn": "silu",
|
6 |
-
"down_block_out_channels": [
|
7 |
-
128,
|
8 |
-
256,
|
9 |
-
512,
|
10 |
-
512
|
11 |
-
],
|
12 |
-
"down_block_types": [
|
13 |
-
"DownEncoderBlock2D",
|
14 |
-
"DownEncoderBlock2D",
|
15 |
-
"DownEncoderBlock2D",
|
16 |
-
"DownEncoderBlock2D"
|
17 |
-
],
|
18 |
-
"in_channels": 3,
|
19 |
-
"latent_channels": 4,
|
20 |
-
"layers_per_down_block": 2,
|
21 |
-
"layers_per_up_block": 3,
|
22 |
-
"norm_num_groups": 32,
|
23 |
-
"out_channels": 3,
|
24 |
-
"sample_size": 1024,
|
25 |
-
"scaling_factor": 0.13025,
|
26 |
-
"up_block_out_channels": [
|
27 |
-
128,
|
28 |
-
256,
|
29 |
-
512,
|
30 |
-
512
|
31 |
-
],
|
32 |
-
"up_block_types": [
|
33 |
-
"UpDecoderBlock2D",
|
34 |
-
"UpDecoderBlock2D",
|
35 |
-
"UpDecoderBlock2D",
|
36 |
-
"UpDecoderBlock2D"
|
37 |
-
]
|
38 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
asymmetric_vae/diffusion_pytorch_model.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ded3c30322578e3371f32a58423b6a3be3a2c3b81d3eb5d35433772be796a1ba
|
3 |
-
size 421473052
|
|
|
|
|
|
|
|
asymmetric_vae_new/config.json
DELETED
@@ -1,45 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_class_name": "AsymmetricAutoencoderKL",
|
3 |
-
"_diffusers_version": "0.35.0.dev0",
|
4 |
-
"_name_or_path": "asymmetric_vae",
|
5 |
-
"act_fn": "silu",
|
6 |
-
"block_out_channels": [
|
7 |
-
128,
|
8 |
-
256,
|
9 |
-
512,
|
10 |
-
512
|
11 |
-
],
|
12 |
-
"down_block_out_channels": [
|
13 |
-
128,
|
14 |
-
256,
|
15 |
-
512,
|
16 |
-
512
|
17 |
-
],
|
18 |
-
"down_block_types": [
|
19 |
-
"DownEncoderBlock2D",
|
20 |
-
"DownEncoderBlock2D",
|
21 |
-
"DownEncoderBlock2D",
|
22 |
-
"DownEncoderBlock2D"
|
23 |
-
],
|
24 |
-
"force_upcast": false,
|
25 |
-
"in_channels": 3,
|
26 |
-
"latent_channels": 4,
|
27 |
-
"layers_per_down_block": 2,
|
28 |
-
"layers_per_up_block": 3,
|
29 |
-
"norm_num_groups": 32,
|
30 |
-
"out_channels": 3,
|
31 |
-
"sample_size": 1024,
|
32 |
-
"scaling_factor": 0.13025,
|
33 |
-
"up_block_out_channels": [
|
34 |
-
128,
|
35 |
-
256,
|
36 |
-
512,
|
37 |
-
512
|
38 |
-
],
|
39 |
-
"up_block_types": [
|
40 |
-
"UpDecoderBlock2D",
|
41 |
-
"UpDecoderBlock2D",
|
42 |
-
"UpDecoderBlock2D",
|
43 |
-
"UpDecoderBlock2D"
|
44 |
-
]
|
45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
asymmetric_vae_new/diffusion_pytorch_model.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:df9380b1e8d8b1a36b3d0f9501a854717a911ae9b8d2aebe18809a6eefa9318b
|
3 |
-
size 421473052
|
|
|
|
|
|
|
|
eval_alchemist.py
CHANGED
@@ -5,29 +5,71 @@ import lpips
|
|
5 |
from PIL import Image, UnidentifiedImageError
|
6 |
from tqdm import tqdm
|
7 |
from torch.utils.data import Dataset, DataLoader
|
8 |
-
from torchvision.transforms import Compose, Resize, ToTensor, CenterCrop
|
9 |
from diffusers import AutoencoderKL, AsymmetricAutoencoderKL
|
10 |
import random
|
11 |
|
12 |
# --------------------------- Параметры ---------------------------
|
13 |
DEVICE = "cuda"
|
14 |
DTYPE = torch.float16
|
15 |
-
IMAGE_FOLDER = "/workspace/alchemist"
|
16 |
MIN_SIZE = 1280
|
17 |
CROP_SIZE = 512
|
18 |
-
BATCH_SIZE =
|
19 |
-
MAX_IMAGES =
|
20 |
-
NUM_WORKERS = 4
|
|
|
|
|
21 |
|
22 |
# Список VAE для тестирования
|
23 |
VAE_LIST = [
|
|
|
|
|
|
|
24 |
("madebyollin/sdxl-vae-fp16", AutoencoderKL, "madebyollin/sdxl-vae-fp16-fix", None),
|
25 |
-
("
|
26 |
("AiArtLab/sdxl_vae", AutoencoderKL, "AiArtLab/sdxl_vae", None),
|
27 |
-
("AiArtLab/sdxl_vae_asym", AsymmetricAutoencoderKL, "AiArtLab/sdxl_vae", "asymmetric_vae"),
|
28 |
-
("
|
|
|
|
|
29 |
]
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
# --------------------------- Dataset ---------------------------
|
32 |
class ImageFolderDataset(Dataset):
|
33 |
def __init__(self, root_dir, extensions=('.png',), min_size=1024, crop_size=512, limit=None):
|
@@ -36,18 +78,15 @@ class ImageFolderDataset(Dataset):
|
|
36 |
self.crop_size = crop_size
|
37 |
self.paths = []
|
38 |
|
39 |
-
# Собираем пути к файлам
|
40 |
print("Сканирование папки...")
|
41 |
for root, _, files in os.walk(root_dir):
|
42 |
for fname in files:
|
43 |
if fname.lower().endswith(extensions):
|
44 |
self.paths.append(os.path.join(root, fname))
|
45 |
|
46 |
-
# Ограничение количества
|
47 |
if limit:
|
48 |
self.paths = self.paths[:limit]
|
49 |
|
50 |
-
# Быстрая проверка валидности (опционально, можно убрать для скорости)
|
51 |
print("Проверка изображений...")
|
52 |
valid = []
|
53 |
for p in tqdm(self.paths, desc="Проверка"):
|
@@ -62,11 +101,9 @@ class ImageFolderDataset(Dataset):
|
|
62 |
if len(self.paths) == 0:
|
63 |
raise RuntimeError(f"Не найдено валидных изображений в {root_dir}")
|
64 |
|
65 |
-
# Перемешиваем для случайности
|
66 |
random.shuffle(self.paths)
|
67 |
print(f"Найдено {len(self.paths)} изображений")
|
68 |
|
69 |
-
# Трансформации
|
70 |
self.transform = Compose([
|
71 |
Resize(min_size, interpolation=Image.LANCZOS),
|
72 |
CenterCrop(crop_size),
|
@@ -89,9 +126,14 @@ def process(x):
|
|
89 |
def deprocess(x):
|
90 |
return x * 0.5 + 0.5
|
91 |
|
|
|
|
|
|
|
92 |
# --------------------------- Основной код ---------------------------
|
93 |
if __name__ == "__main__":
|
94 |
-
|
|
|
|
|
95 |
dataset = ImageFolderDataset(
|
96 |
IMAGE_FOLDER,
|
97 |
extensions=('.png',),
|
@@ -103,16 +145,14 @@ if __name__ == "__main__":
|
|
103 |
dataloader = DataLoader(
|
104 |
dataset,
|
105 |
batch_size=BATCH_SIZE,
|
106 |
-
shuffle=False,
|
107 |
num_workers=NUM_WORKERS,
|
108 |
pin_memory=True,
|
109 |
drop_last=False
|
110 |
)
|
111 |
|
112 |
-
# Инициализация LPIPS
|
113 |
lpips_net = lpips.LPIPS(net="vgg").eval().to(DEVICE).requires_grad_(False)
|
114 |
|
115 |
-
# Загрузка VAE моделей
|
116 |
print("\nЗагрузка VAE моделей...")
|
117 |
vaes = []
|
118 |
names = []
|
@@ -120,67 +160,118 @@ if __name__ == "__main__":
|
|
120 |
for name, vae_class, model_path, subfolder in VAE_LIST:
|
121 |
try:
|
122 |
print(f" Загружаю {name}...")
|
123 |
-
|
|
|
|
|
|
|
|
|
124 |
vae = vae.to(DEVICE, DTYPE).eval()
|
125 |
vaes.append(vae)
|
126 |
names.append(name)
|
127 |
except Exception as e:
|
128 |
print(f" ❌ Ошибка загрузки {name}: {e}")
|
129 |
|
130 |
-
# Оценка метрик
|
131 |
print("\nОценка метрик...")
|
132 |
-
results = {name: {"mse": 0.0, "psnr": 0.0, "lpips": 0.0, "count": 0} for name in names}
|
133 |
-
|
|
|
|
|
|
|
134 |
with torch.no_grad():
|
|
|
135 |
for batch in tqdm(dataloader, desc="Обработка батчей"):
|
136 |
-
batch = batch.to(DEVICE)
|
137 |
-
test_inp = process(batch).to(DTYPE)
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
141 |
latent = vae.encode(test_inp).latent_dist.mode()
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
145 |
for i in range(batch.shape[0]):
|
146 |
img_orig = batch[i:i+1]
|
147 |
img_recon = recon[i:i+1]
|
148 |
-
|
149 |
mse = F.mse_loss(img_orig, img_recon).item()
|
150 |
psnr = 10 * torch.log10(1 / torch.tensor(mse)).item()
|
151 |
lpips_val = lpips_net(img_orig, img_recon, normalize=True).mean().item()
|
152 |
-
|
153 |
results[name]["mse"] += mse
|
154 |
results[name]["psnr"] += psnr
|
155 |
results[name]["lpips"] += lpips_val
|
|
|
156 |
results[name]["count"] += 1
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
# Усреднение результатов
|
159 |
for name in names:
|
160 |
count = results[name]["count"]
|
161 |
results[name]["mse"] /= count
|
162 |
results[name]["psnr"] /= count
|
163 |
results[name]["lpips"] /= count
|
|
|
164 |
|
165 |
# Вывод абсолютных значений
|
166 |
print("\n=== Абсолютные значения ===")
|
167 |
for name in names:
|
168 |
-
print(f"{name:30s}: MSE: {results[name]['mse']:.3e}, PSNR: {results[name]['psnr']:.4f},
|
|
|
169 |
|
170 |
# Вывод таблицы с процентами
|
171 |
print("\n=== Сравнение с первой моделью (%) ===")
|
172 |
-
print(f"| {'Модель':30s} | {'MSE':>10s} | {'PSNR':>10s} | {'LPIPS':>10s} |")
|
173 |
-
print(f"|{'-'*32}|{'-'*12}|{'-'*12}|{'-'*12}|")
|
174 |
|
175 |
baseline = names[0]
|
176 |
for name in names:
|
|
|
177 |
mse_pct = (results[baseline]["mse"] / results[name]["mse"]) * 100
|
|
|
178 |
psnr_pct = (results[name]["psnr"] / results[baseline]["psnr"]) * 100
|
|
|
179 |
lpips_pct = (results[baseline]["lpips"] / results[name]["lpips"]) * 100
|
|
|
180 |
|
181 |
if name == baseline:
|
182 |
-
print(f"| {name:30s} | {'100%':>10s} | {'100%':>10s} | {'100%':>10s} |")
|
183 |
else:
|
184 |
-
print(f"| {name:30s} | {f'{mse_pct:.1f}%':>10s} | {f'{psnr_pct:.1f}%':>10s} |
|
|
|
185 |
|
186 |
-
print("\n✅ Готово!")
|
|
|
5 |
from PIL import Image, UnidentifiedImageError
|
6 |
from tqdm import tqdm
|
7 |
from torch.utils.data import Dataset, DataLoader
|
8 |
+
from torchvision.transforms import Compose, Resize, ToTensor, CenterCrop,ToPILImage
|
9 |
from diffusers import AutoencoderKL, AsymmetricAutoencoderKL
|
10 |
import random
|
11 |
|
12 |
# --------------------------- Параметры ---------------------------
|
13 |
DEVICE = "cuda"
|
14 |
DTYPE = torch.float16
|
15 |
+
IMAGE_FOLDER = "/workspace/alchemist" #wget https://huggingface.co/datasets/AiArtLab/alchemist/resolve/main/alchemist.zip
|
16 |
MIN_SIZE = 1280
|
17 |
CROP_SIZE = 512
|
18 |
+
BATCH_SIZE = 5
|
19 |
+
MAX_IMAGES = 100
|
20 |
+
NUM_WORKERS = 4
|
21 |
+
NUM_SAMPLES_TO_SAVE = 10 # Сколько примеров сохранить (0 - не сохранять)
|
22 |
+
SAMPLES_FOLDER = "vaetest"
|
23 |
|
24 |
# Список VAE для тестирования
|
25 |
VAE_LIST = [
|
26 |
+
|
27 |
+
# ("stable-diffusion-v1-5/stable-diffusion-v1-5", AutoencoderKL, "stable-diffusion-v1-5/stable-diffusion-v1-5", "vae"),
|
28 |
+
# ("cross-attention/asymmetric-autoencoder-kl-x-1-5", AsymmetricAutoencoderKL, "cross-attention/asymmetric-autoencoder-kl-x-1-5", None),
|
29 |
("madebyollin/sdxl-vae-fp16", AutoencoderKL, "madebyollin/sdxl-vae-fp16-fix", None),
|
30 |
+
# ("AiArtLab/sdxs", AutoencoderKL, "AiArtLab/sdxs", "vae"),
|
31 |
("AiArtLab/sdxl_vae", AutoencoderKL, "AiArtLab/sdxl_vae", None),
|
32 |
+
# ("AiArtLab/sdxl_vae_asym", AsymmetricAutoencoderKL, "AiArtLab/sdxl_vae", "asymmetric_vae"),
|
33 |
+
("AiArtLab/sdxl_vae_asym_new", AsymmetricAutoencoderKL, "AiArtLab/sdxl_vae", "asymmetric_vae_new"),
|
34 |
+
# ("KBlueLeaf/EQ-SDXL-VAE", AutoencoderKL, "KBlueLeaf/EQ-SDXL-VAE", None),
|
35 |
+
# ("FLUX.1-schnell-vae", AutoencoderKL, "black-forest-labs/FLUX.1-schnell", "vae"),
|
36 |
]
|
37 |
|
38 |
+
# --------------------------- Sobel Edge Detection ---------------------------
|
39 |
+
# Определяем фильтры Собеля глобально
|
40 |
+
_sobel_kx = torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype=torch.float32).view(1, 1, 3, 3)
|
41 |
+
_sobel_ky = torch.tensor([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtype=torch.float32).view(1, 1, 3, 3)
|
42 |
+
|
43 |
+
def sobel_edges(x: torch.Tensor) -> torch.Tensor:
|
44 |
+
"""
|
45 |
+
Вычисляет карту границ с помощью оператора Собеля
|
46 |
+
x: [B,C,H,W] в диапазоне [-1,1]
|
47 |
+
Возвращает: [B,C,H,W] - магнитуда градиента
|
48 |
+
"""
|
49 |
+
C = x.shape[1]
|
50 |
+
kx = _sobel_kx.to(x.device, x.dtype).repeat(C, 1, 1, 1)
|
51 |
+
ky = _sobel_ky.to(x.device, x.dtype).repeat(C, 1, 1, 1)
|
52 |
+
gx = F.conv2d(x, kx, padding=1, groups=C)
|
53 |
+
gy = F.conv2d(x, ky, padding=1, groups=C)
|
54 |
+
return torch.sqrt(gx * gx + gy * gy + 1e-12)
|
55 |
+
|
56 |
+
def compute_edge_loss(real: torch.Tensor, fake: torch.Tensor) -> float:
|
57 |
+
"""
|
58 |
+
Вычисляет Edge Loss между реальным и сгенерированным изображением
|
59 |
+
real, fake: [B,C,H,W] в диапазоне [0,1]
|
60 |
+
Возвращает: скалярное значение loss
|
61 |
+
"""
|
62 |
+
# Конвертируем в [-1,1] для sobel_edges
|
63 |
+
real_norm = real * 2 - 1
|
64 |
+
fake_norm = fake * 2 - 1
|
65 |
+
|
66 |
+
# Получаем карты границ
|
67 |
+
edges_real = sobel_edges(real_norm)
|
68 |
+
edges_fake = sobel_edges(fake_norm)
|
69 |
+
|
70 |
+
# L1 loss между картами границ
|
71 |
+
return F.l1_loss(edges_fake, edges_real).item()
|
72 |
+
|
73 |
# --------------------------- Dataset ---------------------------
|
74 |
class ImageFolderDataset(Dataset):
|
75 |
def __init__(self, root_dir, extensions=('.png',), min_size=1024, crop_size=512, limit=None):
|
|
|
78 |
self.crop_size = crop_size
|
79 |
self.paths = []
|
80 |
|
|
|
81 |
print("Сканирование папки...")
|
82 |
for root, _, files in os.walk(root_dir):
|
83 |
for fname in files:
|
84 |
if fname.lower().endswith(extensions):
|
85 |
self.paths.append(os.path.join(root, fname))
|
86 |
|
|
|
87 |
if limit:
|
88 |
self.paths = self.paths[:limit]
|
89 |
|
|
|
90 |
print("Проверка изображений...")
|
91 |
valid = []
|
92 |
for p in tqdm(self.paths, desc="Проверка"):
|
|
|
101 |
if len(self.paths) == 0:
|
102 |
raise RuntimeError(f"Не найдено валидных изображений в {root_dir}")
|
103 |
|
|
|
104 |
random.shuffle(self.paths)
|
105 |
print(f"Найдено {len(self.paths)} изображений")
|
106 |
|
|
|
107 |
self.transform = Compose([
|
108 |
Resize(min_size, interpolation=Image.LANCZOS),
|
109 |
CenterCrop(crop_size),
|
|
|
126 |
def deprocess(x):
|
127 |
return x * 0.5 + 0.5
|
128 |
|
129 |
+
def _sanitize_name(name: str) -> str:
|
130 |
+
return name.replace('/', '_').replace('-', '_')
|
131 |
+
|
132 |
# --------------------------- Основной код ---------------------------
|
133 |
if __name__ == "__main__":
|
134 |
+
if NUM_SAMPLES_TO_SAVE > 0:
|
135 |
+
os.makedirs(SAMPLES_FOLDER, exist_ok=True)
|
136 |
+
|
137 |
dataset = ImageFolderDataset(
|
138 |
IMAGE_FOLDER,
|
139 |
extensions=('.png',),
|
|
|
145 |
dataloader = DataLoader(
|
146 |
dataset,
|
147 |
batch_size=BATCH_SIZE,
|
148 |
+
shuffle=False,
|
149 |
num_workers=NUM_WORKERS,
|
150 |
pin_memory=True,
|
151 |
drop_last=False
|
152 |
)
|
153 |
|
|
|
154 |
lpips_net = lpips.LPIPS(net="vgg").eval().to(DEVICE).requires_grad_(False)
|
155 |
|
|
|
156 |
print("\nЗагрузка VAE моделей...")
|
157 |
vaes = []
|
158 |
names = []
|
|
|
160 |
for name, vae_class, model_path, subfolder in VAE_LIST:
|
161 |
try:
|
162 |
print(f" Загружаю {name}...")
|
163 |
+
# Исправлена загрузка для variant
|
164 |
+
if "sdxs" in model_path:
|
165 |
+
vae = vae_class.from_pretrained(model_path, subfolder=subfolder, variant="fp16")
|
166 |
+
else:
|
167 |
+
vae = vae_class.from_pretrained(model_path, subfolder=subfolder)
|
168 |
vae = vae.to(DEVICE, DTYPE).eval()
|
169 |
vaes.append(vae)
|
170 |
names.append(name)
|
171 |
except Exception as e:
|
172 |
print(f" ❌ Ошибка загрузки {name}: {e}")
|
173 |
|
|
|
174 |
print("\nОценка метрик...")
|
175 |
+
results = {name: {"mse": 0.0, "psnr": 0.0, "lpips": 0.0, "edge": 0.0, "count": 0} for name in names}
|
176 |
+
|
177 |
+
to_pil = ToPILImage()
|
178 |
+
|
179 |
+
# >>>>>>>> ОСНОВНЫЕ ИЗМЕНЕНИЯ ЗДЕСЬ (KISS) <<<<<<<<
|
180 |
with torch.no_grad():
|
181 |
+
images_saved = 0 # считаем именно КОЛ-ВО ИЗОБРАЖЕНИЙ, а не сохранённых файлов
|
182 |
for batch in tqdm(dataloader, desc="Обработка батчей"):
|
183 |
+
batch = batch.to(DEVICE) # [B,3,H,W] в [0,1]
|
184 |
+
test_inp = process(batch).to(DTYPE) # [-1,1] для энкодера
|
185 |
+
|
186 |
+
# 1) считаем реконструкции для всех VAE на весь батч
|
187 |
+
recon_list = []
|
188 |
+
for vae in vaes:
|
189 |
latent = vae.encode(test_inp).latent_dist.mode()
|
190 |
+
dec = vae.decode(latent).sample.float() # [-1,1] (как правило)
|
191 |
+
recon = deprocess(dec).clamp(0.0, 1.0) # -> [0,1], clamp убирает артефакты
|
192 |
+
recon_list.append(recon)
|
193 |
+
|
194 |
+
# 2) обновляем метрики (по каждой VAE)
|
195 |
+
for recon, name in zip(recon_list, names):
|
196 |
for i in range(batch.shape[0]):
|
197 |
img_orig = batch[i:i+1]
|
198 |
img_recon = recon[i:i+1]
|
|
|
199 |
mse = F.mse_loss(img_orig, img_recon).item()
|
200 |
psnr = 10 * torch.log10(1 / torch.tensor(mse)).item()
|
201 |
lpips_val = lpips_net(img_orig, img_recon, normalize=True).mean().item()
|
202 |
+
edge_loss = compute_edge_loss(img_orig, img_recon)
|
203 |
results[name]["mse"] += mse
|
204 |
results[name]["psnr"] += psnr
|
205 |
results[name]["lpips"] += lpips_val
|
206 |
+
results[name]["edge"] += edge_loss
|
207 |
results[name]["count"] += 1
|
208 |
+
|
209 |
+
# 3) сохраняем ровно NUM_SAMPLES_TO_SAVE изображений (orig + все VAE + общий коллаж)
|
210 |
+
if NUM_SAMPLES_TO_SAVE > 0:
|
211 |
+
for i in range(batch.shape[0]):
|
212 |
+
if images_saved >= NUM_SAMPLES_TO_SAVE:
|
213 |
+
break
|
214 |
+
idx_str = f"{images_saved + 1:03d}"
|
215 |
+
|
216 |
+
# original
|
217 |
+
orig_pil = to_pil(batch[i].detach().float().cpu())
|
218 |
+
orig_pil.save(os.path.join(SAMPLES_FOLDER, f"{idx_str}_orig.png"))
|
219 |
+
|
220 |
+
# per-VAE decodes
|
221 |
+
tiles = [orig_pil]
|
222 |
+
for recon, name in zip(recon_list, names):
|
223 |
+
recon_pil = to_pil(recon[i].detach().cpu())
|
224 |
+
recon_pil.save(os.path.join(
|
225 |
+
SAMPLES_FOLDER, f"{idx_str}_decoded_{_sanitize_name(name)}.png"
|
226 |
+
))
|
227 |
+
tiles.append(recon_pil)
|
228 |
+
|
229 |
+
# общий коллаж: [orig | vae1 | vae2 | ...]
|
230 |
+
collage_w = CROP_SIZE * len(tiles)
|
231 |
+
collage_h = CROP_SIZE
|
232 |
+
collage = Image.new("RGB", (collage_w, collage_h))
|
233 |
+
x = 0
|
234 |
+
for tile in tiles:
|
235 |
+
collage.paste(tile, (x, 0))
|
236 |
+
x += CROP_SIZE
|
237 |
+
collage.save(os.path.join(SAMPLES_FOLDER, f"{idx_str}_all.png"))
|
238 |
+
|
239 |
+
images_saved += 1
|
240 |
+
|
241 |
+
|
242 |
# Усреднение результатов
|
243 |
for name in names:
|
244 |
count = results[name]["count"]
|
245 |
results[name]["mse"] /= count
|
246 |
results[name]["psnr"] /= count
|
247 |
results[name]["lpips"] /= count
|
248 |
+
results[name]["edge"] /= count
|
249 |
|
250 |
# Вывод абсолютных значений
|
251 |
print("\n=== Абсолютные значения ===")
|
252 |
for name in names:
|
253 |
+
print(f"{name:30s}: MSE: {results[name]['mse']:.3e}, PSNR: {results[name]['psnr']:.4f}, "
|
254 |
+
f"LPIPS: {results[name]['lpips']:.4f}, Edge: {results[name]['edge']:.4f}")
|
255 |
|
256 |
# Вывод таблицы с процентами
|
257 |
print("\n=== Сравнение с первой моделью (%) ===")
|
258 |
+
print(f"| {'Модель':30s} | {'MSE':>10s} | {'PSNR':>10s} | {'LPIPS':>10s} | {'Edge':>10s} |")
|
259 |
+
print(f"|{'-'*32}|{'-'*12}|{'-'*12}|{'-'*12}|{'-'*12}|")
|
260 |
|
261 |
baseline = names[0]
|
262 |
for name in names:
|
263 |
+
# Для MSE, LPIPS и Edge: меньше = лучше, поэтому инвертируем
|
264 |
mse_pct = (results[baseline]["mse"] / results[name]["mse"]) * 100
|
265 |
+
# Для PSNR: больше = лучше
|
266 |
psnr_pct = (results[name]["psnr"] / results[baseline]["psnr"]) * 100
|
267 |
+
# Для LPIPS и Edge: меньше = лучше
|
268 |
lpips_pct = (results[baseline]["lpips"] / results[name]["lpips"]) * 100
|
269 |
+
edge_pct = (results[baseline]["edge"] / results[name]["edge"]) * 100
|
270 |
|
271 |
if name == baseline:
|
272 |
+
print(f"| {name:30s} | {'100%':>10s} | {'100%':>10s} | {'100%':>10s} | {'100%':>10s} |")
|
273 |
else:
|
274 |
+
print(f"| {name:30s} | {f'{mse_pct:.1f}%':>10s} | {f'{psnr_pct:.1f}%':>10s} | "
|
275 |
+
f"{f'{lpips_pct:.1f}%':>10s} | {f'{edge_pct:.1f}%':>10s} |")
|
276 |
|
277 |
+
print("\n✅ Готово!")
|
samples/sample_0.jpg
DELETED
Git LFS Details
|
samples/sample_1.jpg
DELETED
Git LFS Details
|
samples/sample_2.jpg
DELETED
Git LFS Details
|
samples/sample_decoded.jpg
DELETED
Git LFS Details
|
samples/sample_real.jpg
DELETED
Git LFS Details
|
train_sdxl_vae_gpt5.py → train_sdxl_vae.py
RENAMED
@@ -24,7 +24,7 @@ from collections import deque
|
|
24 |
|
25 |
# --------------------------- Параметры ---------------------------
|
26 |
ds_path = "/workspace/png"
|
27 |
-
project = "
|
28 |
batch_size = 3
|
29 |
base_learning_rate = 6e-6
|
30 |
min_learning_rate = 1e-6
|
@@ -50,7 +50,7 @@ clip_grad_norm = 1.0
|
|
50 |
mixed_precision = "no" # или "fp16"/"bf16" при поддержке
|
51 |
gradient_accumulation_steps = 5
|
52 |
generated_folder = "samples"
|
53 |
-
save_as = "
|
54 |
num_workers = 0
|
55 |
device = None # accelerator задаст устройство
|
56 |
|
@@ -65,7 +65,7 @@ loss_ratios = {
|
|
65 |
median_coeff_steps = 256 # за сколько шагов считать медианные коэффициенты
|
66 |
|
67 |
# --------------------------- параметры препроцессинга ---------------------------
|
68 |
-
resize_long_side = 1280 # если None или 0 — ресайза не будет; рекомендовано
|
69 |
|
70 |
Path(generated_folder).mkdir(parents=True, exist_ok=True)
|
71 |
|
|
|
24 |
|
25 |
# --------------------------- Параметры ---------------------------
|
26 |
ds_path = "/workspace/png"
|
27 |
+
project = "vae"
|
28 |
batch_size = 3
|
29 |
base_learning_rate = 6e-6
|
30 |
min_learning_rate = 1e-6
|
|
|
50 |
mixed_precision = "no" # или "fp16"/"bf16" при поддержке
|
51 |
gradient_accumulation_steps = 5
|
52 |
generated_folder = "samples"
|
53 |
+
save_as = "vae_nightly"
|
54 |
num_workers = 0
|
55 |
device = None # accelerator задаст устройство
|
56 |
|
|
|
65 |
median_coeff_steps = 256 # за сколько шагов считать медианные коэффициенты
|
66 |
|
67 |
# --------------------------- параметры препроцессинга ---------------------------
|
68 |
+
resize_long_side = 1280 # если None или 0 — ресайза не будет; рекомендовано 1280
|
69 |
|
70 |
Path(generated_folder).mkdir(parents=True, exist_ok=True)
|
71 |
|
vae/diffusion_pytorch_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 334643268
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03f2412467f6bedce9efeddba5860b5ec0d3267931d14c500d4bd7a878e14cbd
|
3 |
size 334643268
|