noutbooks

Browse files

Files changed (18) hide show

123456789.jpg +3 -0
README.md +2 -2
challenge.ipynb +0 -0
contents/buildings.jpg +3 -0
contents/chinatown.jpg +3 -0
contents/cosplayers.jpg +3 -0
contents/faces.jpg +3 -0
contents/flowers.jpg +3 -0
contents/magazines.jpg +3 -0
vae_comp.ipynb +199 -0
vae_results.png +3 -0
vaetest/decoded_AiArtLab_vae.png +3 -0
vaetest/decoded_EQ-SDXL-VAE.png +3 -0
vaetest/decoded_sdxl.png +3 -0
vaetest/dist_AiArtLab_vae.png +3 -0
vaetest/dist_EQ-SDXL-VAE.png +3 -0
vaetest/dist_sdxl.png +3 -0
vaetest/original.jpg +3 -0

123456789.jpg ADDED Viewed

Git LFS Details

SHA256: 131522c2f1db361170fb7f8819138893ccec8c1be544509b03aee277c3118e31
Pointer size: 131 Bytes
Size of remote file: 215 kB

README.md CHANGED Viewed

@@ -45,9 +45,9 @@ library_name: diffusers
  - Balanced LPIPS and MAE at 90/10 ratio.
  - Used median perceptual_loss_weight for better balance.
-## Results
-https://imgsli.com/NDA3NTEy/1/2
 ## Donations

  - Balanced LPIPS and MAE at 90/10 ratio.
  - Used median perceptual_loss_weight for better balance.
+## Compare
+https://imgsli.com/NDA3Njgw/2/3
 ## Donations

challenge.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

contents/buildings.jpg ADDED Viewed

Git LFS Details

SHA256: 1a4a5187b863b6a261a28589838aa6c06f17e81ec2cd24172be81a39e051d975
Pointer size: 131 Bytes
Size of remote file: 650 kB

contents/chinatown.jpg ADDED Viewed

Git LFS Details

SHA256: 94daa5d2fe904a798ce59a4a1e8a7b023ebd7fbb35c5ba264dca1bb5ee03fbd0
Pointer size: 131 Bytes
Size of remote file: 340 kB

contents/cosplayers.jpg ADDED Viewed

Git LFS Details

SHA256: 2bfc7ec5ca5d7b68ef0f75bd7eef3fb32895ee9df865c2bcf51d0afa6e8f31f5
Pointer size: 131 Bytes
Size of remote file: 253 kB

contents/faces.jpg ADDED Viewed

Git LFS Details

SHA256: db124297c0da55ab93d86d84b1491285ab3a762bcb5b0e0f27bcc3341d57b9bf
Pointer size: 131 Bytes
Size of remote file: 262 kB

contents/flowers.jpg ADDED Viewed

Git LFS Details

SHA256: b87e5685178f4202da2f32190d1088a554c304363945e4fe21443c47a8e74413
Pointer size: 132 Bytes
Size of remote file: 2.9 MB

contents/magazines.jpg ADDED Viewed

Git LFS Details

SHA256: 65bb54b0219a6ce12e0494dfc58b9ba60bdf54bc61c003d9b8e7ae2a15706d51
Pointer size: 131 Bytes
Size of remote file: 788 kB

vae_comp.ipynb ADDED Viewed

	@@ -0,0 +1,199 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b3b23a40-8354-4287-bac2-32f9d084fff3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sdxl log-variance: -0.105\n",
+      "AiArtLab_vae log-variance: -0.105\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0d4d4ef4209b42ec82f6125e39067eed",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c88119452e114e3e91ef56cb56c06a0c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "diffusion_pytorch_model.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "EQ-SDXL-VAE log-variance: -3.922\n",
+      "Готово\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from PIL import Image\n",
+    "from diffusers import AutoencoderKL\n",
+    "from torchvision.transforms.functional import to_pil_image\n",
+    "import matplotlib.pyplot as plt\n",
+    "import os\n",
+    "from torchvision.transforms import ToTensor, Normalize, CenterCrop\n",
+    "\n",
+    "# путь к вашей картинке\n",
+    "IMG_PATH = \"123456789.jpg\"\n",
+    "OUT_DIR  = \"vaetest\"\n",
+    "device   = \"cuda\"\n",
+    "dtype    = torch.float32          # ← единый float32\n",
+    "os.makedirs(OUT_DIR, exist_ok=True)\n",
+    "\n",
+    "# список VAE\n",
+    "VAES = {\n",
+    "    \"sdxl\": \"madebyollin/sdxl-vae-fp16-fix\",\n",
+    "    \"AiArtLab_vae\": \"AiArtLab/sdxl_vae\",\n",
+    "    \"EQ-SDXL-VAE\": \"KBlueLeaf/EQ-SDXL-VAE\"\n",
+    "}\n",
+    "\n",
+    "def load_image(path):\n",
+    "    img = Image.open(path).convert('RGB')\n",
+    "    # обрезаем до кратности 8\n",
+    "    w, h = img.size\n",
+    "    img = CenterCrop((h // 8 * 8, w // 8 * 8))(img)\n",
+    "    tensor = ToTensor()(img).unsqueeze(0)          # [0,1]\n",
+    "    tensor = Normalize(mean=[0.5]*3, std=[0.5]*3)(tensor)  # [-1,1]\n",
+    "    return img, tensor.to(device, dtype=dtype)\n",
+    "\n",
+    "# обратно в PIL\n",
+    "def tensor_to_img(t):\n",
+    "    t = (t * 0.5 + 0.5).clamp(0, 1)\n",
+    "    return to_pil_image(t[0])\n",
+    "\n",
+    "def logvariance(latents):\n",
+    "    \"\"\"Возвращает лог-дисперсию по всем элементам.\"\"\"\n",
+    "    return torch.log(latents.var() + 1e-8).item()\n",
+    "\n",
+    "def plot_latent_distribution(latents, title, save_path):\n",
+    "    \"\"\"Гистограмма + QQ-plot.\"\"\"\n",
+    "    lat = latents.detach().cpu().numpy().flatten()\n",
+    "    plt.figure(figsize=(10, 4))\n",
+    "\n",
+    "    # гистограмма\n",
+    "    plt.subplot(1, 2, 1)\n",
+    "    plt.hist(lat, bins=100, density=True, alpha=0.7, color='steelblue')\n",
+    "    plt.title(f\"{title} histogram\")\n",
+    "    plt.xlabel(\"latent value\")\n",
+    "    plt.ylabel(\"density\")\n",
+    "\n",
+    "    # QQ-plot\n",
+    "    from scipy.stats import probplot\n",
+    "    plt.subplot(1, 2, 2)\n",
+    "    probplot(lat, dist=\"norm\", plot=plt)\n",
+    "    plt.title(f\"{title} QQ-plot\")\n",
+    "\n",
+    "    plt.tight_layout()\n",
+    "    plt.savefig(save_path)\n",
+    "    plt.close()\n",
+    "\n",
+    "for name, repo in VAES.items():\n",
+    "    if name==\"flux\":\n",
+    "        vae = AutoencoderKL.from_pretrained(repo, torch_dtype=dtype).to(device)\n",
+    "    else:\n",
+    "        vae = AutoencoderKL.from_pretrained(repo, torch_dtype=dtype).to(device)#, subfolder=\"vae\", variant=\"fp16\"\n",
+    "\n",
+    "    cfg   = vae.config\n",
+    "    scale = getattr(cfg, \"scaling_factor\", 1.)\n",
+    "    shift = getattr(cfg, \"shift_factor\", 0.0)\n",
+    "    mean  = getattr(cfg, \"latents_mean\", None)\n",
+    "    std  = getattr(cfg, \"latents_std\",  None)\n",
+    "\n",
+    "    C = 4  # 4 для SDXL\n",
+    "    if mean is not None:\n",
+    "        mean = torch.tensor(mean, device=device, dtype=dtype).view(1, C, 1, 1)\n",
+    "    if std is not None:\n",
+    "        std  = torch.tensor(std,  device=device, dtype=dtype).view(1, C, 1, 1)\n",
+    "    if shift is not None:\n",
+    "        shift = torch.tensor(shift, device=device, dtype=dtype)\n",
+    "    else:\n",
+    "        shift = 0.0 \n",
+    "\n",
+    "    scale = torch.tensor(scale, device=device, dtype=dtype)\n",
+    "\n",
+    "    img, x = load_image(IMG_PATH)\n",
+    "    img.save(os.path.join(OUT_DIR, f\"original.jpg\"))\n",
+    "\n",
+    "    with torch.no_grad():\n",
+    "        # encode\n",
+    "        latents = vae.encode(x).latent_dist.sample().to(dtype)\n",
+    "        if mean is not None and std is not None:\n",
+    "            latents = (latents - mean) / std\n",
+    "        latents = latents * scale + shift\n",
+    "\n",
+    "        lv = logvariance(latents)\n",
+    "        print(f\"{name} log-variance: {lv:.3f}\")\n",
+    "\n",
+    "        # график\n",
+    "        plot_latent_distribution(latents, f\"{name}_latents\",\n",
+    "                                 os.path.join(OUT_DIR, f\"dist_{name}.png\"))\n",
+    "\n",
+    "        # decode\n",
+    "        latents = (latents - shift) / scale\n",
+    "        if mean is not None and std is not None:\n",
+    "            latents = latents * std + mean\n",
+    "        rec = vae.decode(latents).sample\n",
+    "\n",
+    "    tensor_to_img(rec).save(os.path.join(OUT_DIR, f\"decoded_{name}.png\"))\n",
+    "\n",
+    "print(\"Готово\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "200b72ab-1978-4d71-9aba-b1ef97cf0b27",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}