noutbooks
Browse files- 123456789.jpg +3 -0
- README.md +2 -2
- challenge.ipynb +0 -0
- contents/buildings.jpg +3 -0
- contents/chinatown.jpg +3 -0
- contents/cosplayers.jpg +3 -0
- contents/faces.jpg +3 -0
- contents/flowers.jpg +3 -0
- contents/magazines.jpg +3 -0
- vae_comp.ipynb +199 -0
- vae_results.png +3 -0
- vaetest/decoded_AiArtLab_vae.png +3 -0
- vaetest/decoded_EQ-SDXL-VAE.png +3 -0
- vaetest/decoded_sdxl.png +3 -0
- vaetest/dist_AiArtLab_vae.png +3 -0
- vaetest/dist_EQ-SDXL-VAE.png +3 -0
- vaetest/dist_sdxl.png +3 -0
- vaetest/original.jpg +3 -0
123456789.jpg
ADDED
![]() |
Git LFS Details
|
README.md
CHANGED
@@ -45,9 +45,9 @@ library_name: diffusers
|
|
45 |
- Balanced LPIPS and MAE at 90/10 ratio.
|
46 |
- Used median perceptual_loss_weight for better balance.
|
47 |
|
48 |
-
##
|
49 |
|
50 |
-
https://imgsli.com/
|
51 |
|
52 |
## Donations
|
53 |
|
|
|
45 |
- Balanced LPIPS and MAE at 90/10 ratio.
|
46 |
- Used median perceptual_loss_weight for better balance.
|
47 |
|
48 |
+
## Compare
|
49 |
|
50 |
+
https://imgsli.com/NDA3Njgw/2/3
|
51 |
|
52 |
## Donations
|
53 |
|
challenge.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
contents/buildings.jpg
ADDED
![]() |
Git LFS Details
|
contents/chinatown.jpg
ADDED
![]() |
Git LFS Details
|
contents/cosplayers.jpg
ADDED
![]() |
Git LFS Details
|
contents/faces.jpg
ADDED
![]() |
Git LFS Details
|
contents/flowers.jpg
ADDED
![]() |
Git LFS Details
|
contents/magazines.jpg
ADDED
![]() |
Git LFS Details
|
vae_comp.ipynb
ADDED
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
6 |
+
"id": "b3b23a40-8354-4287-bac2-32f9d084fff3",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stdout",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"sdxl log-variance: -0.105\n",
|
14 |
+
"AiArtLab_vae log-variance: -0.105\n"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"data": {
|
19 |
+
"application/vnd.jupyter.widget-view+json": {
|
20 |
+
"model_id": "0d4d4ef4209b42ec82f6125e39067eed",
|
21 |
+
"version_major": 2,
|
22 |
+
"version_minor": 0
|
23 |
+
},
|
24 |
+
"text/plain": [
|
25 |
+
"config.json: 0.00B [00:00, ?B/s]"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"metadata": {},
|
29 |
+
"output_type": "display_data"
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"data": {
|
33 |
+
"application/vnd.jupyter.widget-view+json": {
|
34 |
+
"model_id": "c88119452e114e3e91ef56cb56c06a0c",
|
35 |
+
"version_major": 2,
|
36 |
+
"version_minor": 0
|
37 |
+
},
|
38 |
+
"text/plain": [
|
39 |
+
"diffusion_pytorch_model.safetensors: 0%| | 0.00/167M [00:00<?, ?B/s]"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
"metadata": {},
|
43 |
+
"output_type": "display_data"
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"name": "stdout",
|
47 |
+
"output_type": "stream",
|
48 |
+
"text": [
|
49 |
+
"EQ-SDXL-VAE log-variance: -3.922\n",
|
50 |
+
"Готово\n"
|
51 |
+
]
|
52 |
+
}
|
53 |
+
],
|
54 |
+
"source": [
|
55 |
+
"import torch\n",
|
56 |
+
"from PIL import Image\n",
|
57 |
+
"from diffusers import AutoencoderKL\n",
|
58 |
+
"from torchvision.transforms.functional import to_pil_image\n",
|
59 |
+
"import matplotlib.pyplot as plt\n",
|
60 |
+
"import os\n",
|
61 |
+
"from torchvision.transforms import ToTensor, Normalize, CenterCrop\n",
|
62 |
+
"\n",
|
63 |
+
"# путь к вашей картинке\n",
|
64 |
+
"IMG_PATH = \"123456789.jpg\"\n",
|
65 |
+
"OUT_DIR = \"vaetest\"\n",
|
66 |
+
"device = \"cuda\"\n",
|
67 |
+
"dtype = torch.float32 # ← единый float32\n",
|
68 |
+
"os.makedirs(OUT_DIR, exist_ok=True)\n",
|
69 |
+
"\n",
|
70 |
+
"# список VAE\n",
|
71 |
+
"VAES = {\n",
|
72 |
+
" \"sdxl\": \"madebyollin/sdxl-vae-fp16-fix\",\n",
|
73 |
+
" \"AiArtLab_vae\": \"AiArtLab/sdxl_vae\",\n",
|
74 |
+
" \"EQ-SDXL-VAE\": \"KBlueLeaf/EQ-SDXL-VAE\"\n",
|
75 |
+
"}\n",
|
76 |
+
"\n",
|
77 |
+
"def load_image(path):\n",
|
78 |
+
" img = Image.open(path).convert('RGB')\n",
|
79 |
+
" # обрезаем до кратности 8\n",
|
80 |
+
" w, h = img.size\n",
|
81 |
+
" img = CenterCrop((h // 8 * 8, w // 8 * 8))(img)\n",
|
82 |
+
" tensor = ToTensor()(img).unsqueeze(0) # [0,1]\n",
|
83 |
+
" tensor = Normalize(mean=[0.5]*3, std=[0.5]*3)(tensor) # [-1,1]\n",
|
84 |
+
" return img, tensor.to(device, dtype=dtype)\n",
|
85 |
+
"\n",
|
86 |
+
"# обратно в PIL\n",
|
87 |
+
"def tensor_to_img(t):\n",
|
88 |
+
" t = (t * 0.5 + 0.5).clamp(0, 1)\n",
|
89 |
+
" return to_pil_image(t[0])\n",
|
90 |
+
"\n",
|
91 |
+
"def logvariance(latents):\n",
|
92 |
+
" \"\"\"Возвращает лог-дисперсию по всем элементам.\"\"\"\n",
|
93 |
+
" return torch.log(latents.var() + 1e-8).item()\n",
|
94 |
+
"\n",
|
95 |
+
"def plot_latent_distribution(latents, title, save_path):\n",
|
96 |
+
" \"\"\"Гистограмма + QQ-plot.\"\"\"\n",
|
97 |
+
" lat = latents.detach().cpu().numpy().flatten()\n",
|
98 |
+
" plt.figure(figsize=(10, 4))\n",
|
99 |
+
"\n",
|
100 |
+
" # гистограмма\n",
|
101 |
+
" plt.subplot(1, 2, 1)\n",
|
102 |
+
" plt.hist(lat, bins=100, density=True, alpha=0.7, color='steelblue')\n",
|
103 |
+
" plt.title(f\"{title} histogram\")\n",
|
104 |
+
" plt.xlabel(\"latent value\")\n",
|
105 |
+
" plt.ylabel(\"density\")\n",
|
106 |
+
"\n",
|
107 |
+
" # QQ-plot\n",
|
108 |
+
" from scipy.stats import probplot\n",
|
109 |
+
" plt.subplot(1, 2, 2)\n",
|
110 |
+
" probplot(lat, dist=\"norm\", plot=plt)\n",
|
111 |
+
" plt.title(f\"{title} QQ-plot\")\n",
|
112 |
+
"\n",
|
113 |
+
" plt.tight_layout()\n",
|
114 |
+
" plt.savefig(save_path)\n",
|
115 |
+
" plt.close()\n",
|
116 |
+
"\n",
|
117 |
+
"for name, repo in VAES.items():\n",
|
118 |
+
" if name==\"flux\":\n",
|
119 |
+
" vae = AutoencoderKL.from_pretrained(repo, torch_dtype=dtype).to(device)\n",
|
120 |
+
" else:\n",
|
121 |
+
" vae = AutoencoderKL.from_pretrained(repo, torch_dtype=dtype).to(device)#, subfolder=\"vae\", variant=\"fp16\"\n",
|
122 |
+
"\n",
|
123 |
+
" cfg = vae.config\n",
|
124 |
+
" scale = getattr(cfg, \"scaling_factor\", 1.)\n",
|
125 |
+
" shift = getattr(cfg, \"shift_factor\", 0.0)\n",
|
126 |
+
" mean = getattr(cfg, \"latents_mean\", None)\n",
|
127 |
+
" std = getattr(cfg, \"latents_std\", None)\n",
|
128 |
+
"\n",
|
129 |
+
" C = 4 # 4 для SDXL\n",
|
130 |
+
" if mean is not None:\n",
|
131 |
+
" mean = torch.tensor(mean, device=device, dtype=dtype).view(1, C, 1, 1)\n",
|
132 |
+
" if std is not None:\n",
|
133 |
+
" std = torch.tensor(std, device=device, dtype=dtype).view(1, C, 1, 1)\n",
|
134 |
+
" if shift is not None:\n",
|
135 |
+
" shift = torch.tensor(shift, device=device, dtype=dtype)\n",
|
136 |
+
" else:\n",
|
137 |
+
" shift = 0.0 \n",
|
138 |
+
"\n",
|
139 |
+
" scale = torch.tensor(scale, device=device, dtype=dtype)\n",
|
140 |
+
"\n",
|
141 |
+
" img, x = load_image(IMG_PATH)\n",
|
142 |
+
" img.save(os.path.join(OUT_DIR, f\"original.jpg\"))\n",
|
143 |
+
"\n",
|
144 |
+
" with torch.no_grad():\n",
|
145 |
+
" # encode\n",
|
146 |
+
" latents = vae.encode(x).latent_dist.sample().to(dtype)\n",
|
147 |
+
" if mean is not None and std is not None:\n",
|
148 |
+
" latents = (latents - mean) / std\n",
|
149 |
+
" latents = latents * scale + shift\n",
|
150 |
+
"\n",
|
151 |
+
" lv = logvariance(latents)\n",
|
152 |
+
" print(f\"{name} log-variance: {lv:.3f}\")\n",
|
153 |
+
"\n",
|
154 |
+
" # график\n",
|
155 |
+
" plot_latent_distribution(latents, f\"{name}_latents\",\n",
|
156 |
+
" os.path.join(OUT_DIR, f\"dist_{name}.png\"))\n",
|
157 |
+
"\n",
|
158 |
+
" # decode\n",
|
159 |
+
" latents = (latents - shift) / scale\n",
|
160 |
+
" if mean is not None and std is not None:\n",
|
161 |
+
" latents = latents * std + mean\n",
|
162 |
+
" rec = vae.decode(latents).sample\n",
|
163 |
+
"\n",
|
164 |
+
" tensor_to_img(rec).save(os.path.join(OUT_DIR, f\"decoded_{name}.png\"))\n",
|
165 |
+
"\n",
|
166 |
+
"print(\"Готово\")"
|
167 |
+
]
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"cell_type": "code",
|
171 |
+
"execution_count": null,
|
172 |
+
"id": "200b72ab-1978-4d71-9aba-b1ef97cf0b27",
|
173 |
+
"metadata": {},
|
174 |
+
"outputs": [],
|
175 |
+
"source": []
|
176 |
+
}
|
177 |
+
],
|
178 |
+
"metadata": {
|
179 |
+
"kernelspec": {
|
180 |
+
"display_name": "Python 3 (ipykernel)",
|
181 |
+
"language": "python",
|
182 |
+
"name": "python3"
|
183 |
+
},
|
184 |
+
"language_info": {
|
185 |
+
"codemirror_mode": {
|
186 |
+
"name": "ipython",
|
187 |
+
"version": 3
|
188 |
+
},
|
189 |
+
"file_extension": ".py",
|
190 |
+
"mimetype": "text/x-python",
|
191 |
+
"name": "python",
|
192 |
+
"nbconvert_exporter": "python",
|
193 |
+
"pygments_lexer": "ipython3",
|
194 |
+
"version": "3.11.10"
|
195 |
+
}
|
196 |
+
},
|
197 |
+
"nbformat": 4,
|
198 |
+
"nbformat_minor": 5
|
199 |
+
}
|
vae_results.png
ADDED
![]() |
Git LFS Details
|
vaetest/decoded_AiArtLab_vae.png
ADDED
![]() |
Git LFS Details
|
vaetest/decoded_EQ-SDXL-VAE.png
ADDED
![]() |
Git LFS Details
|
vaetest/decoded_sdxl.png
ADDED
![]() |
Git LFS Details
|
vaetest/dist_AiArtLab_vae.png
ADDED
![]() |
Git LFS Details
|
vaetest/dist_EQ-SDXL-VAE.png
ADDED
![]() |
Git LFS Details
|
vaetest/dist_sdxl.png
ADDED
![]() |
Git LFS Details
|
vaetest/original.jpg
ADDED
![]() |
Git LFS Details
|