In [2]:
from diffusers.models import AsymmetricAutoencoderKL
import torch

config = {
 "_class_name": "AsymmetricAutoencoderKL",
 "act_fn": "silu",
 "down_block_out_channels": [128, 256, 512, 512],
 "down_block_types": [
 "DownEncoderBlock2D",
 "DownEncoderBlock2D",
 "DownEncoderBlock2D",
 "DownEncoderBlock2D",
 ],
 "in_channels": 3,
 "latent_channels": 4,
 "norm_num_groups": 32,
 "out_channels": 3,
 "sample_size": 1024,
 "scaling_factor": 0.13025,
 "shift_factor": 0,
 "up_block_out_channels": [128, 256, 512, 512],
 "up_block_types": [
 "UpDecoderBlock2D",
 "UpDecoderBlock2D",
 "UpDecoderBlock2D",
 "UpDecoderBlock2D",
 ],
}

# Создаем модель
vae = AsymmetricAutoencoderKL(
 act_fn=config["act_fn"],
 down_block_out_channels=config["down_block_out_channels"],
 down_block_types=config["down_block_types"],
 in_channels=config["in_channels"],
 latent_channels=config["latent_channels"],
 norm_num_groups=config["norm_num_groups"],
 out_channels=config["out_channels"],
 sample_size=config["sample_size"],
 scaling_factor=config["scaling_factor"],
 up_block_out_channels=config["up_block_out_channels"],
 up_block_types=config["up_block_types"],
 layers_per_down_block = 2,
 layers_per_up_block = 3
)


vae.save_pretrained("asymmetric_vae_empty")
print(vae)

AsymmetricAutoencoderKL(
 (encoder): Encoder(
 (conv_in): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 (down_blocks): ModuleList(
 (0): DownEncoderBlock2D(
 (resnets): ModuleList(
 (0-1): 2 x ResnetBlock2D(
 (norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
 (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 (norm2): GroupNorm(32, 128, eps=1e-06, affine=True)
 (dropout): Dropout(p=0.0, inplace=False)
 (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 (nonlinearity): SiLU()
 )
 )
 (downsamplers): ModuleList(
 (0): Downsample2D(
 (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2))
 )
 )
 )
 (1): DownEncoderBlock2D(
 (resnets): ModuleList(
 (0): ResnetBlock2D(
 (norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
 (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 (norm2): GroupNorm(32, 256, eps=1e-06, affine=True)
 (dropout): Dropout(p=0.0, inplace=False)
 (conv2): Conv2d(256,

In [3]:
import torch
from diffusers import AsymmetricAutoencoderKL,AutoencoderKL
from tqdm import tqdm
import torch.nn.init as init

def log(message):
 print(message)

def main():
 checkpoint_path_old = "vae"
 checkpoint_path_new = "asymmetric_vae_empty"
 device = "cuda"
 dtype = torch.float32

 # Загрузка моделей
 old_unet = AutoencoderKL.from_pretrained(checkpoint_path_old).to(device, dtype=dtype)
 new_unet = AsymmetricAutoencoderKL.from_pretrained(checkpoint_path_new).to(device, dtype=dtype)

 old_state_dict = old_unet.state_dict()
 new_state_dict = new_unet.state_dict()

 transferred_state_dict = {}
 transfer_stats = {
 "перенесено": 0,
 "несовпадение_размеров": 0,
 "пропущено": 0
 }

 transferred_keys = set()

 # Обрабатываем каждый ключ старой модели
 for old_key in tqdm(old_state_dict.keys(), desc="Перенос весов"):
 new_key = old_key

 if new_key in new_state_dict:
 if old_state_dict[old_key].shape == new_state_dict[new_key].shape:
 transferred_state_dict[new_key] = old_state_dict[old_key].clone()
 transferred_keys.add(new_key)
 transfer_stats["перенесено"] += 1
 else:
 log(f"✗ Несовпадение размеров: {old_key} ({old_state_dict[old_key].shape}) -> {new_key} ({new_state_dict[new_key].shape})")
 transfer_stats["несовпадение_размеров"] += 1
 else:
 log(f"? Ключ не найден в новой модели: {old_key} -> {old_state_dict[old_key].shape}")
 transfer_stats["пропущено"] += 1

 # Обновляем состояние новой модели перенесенными весами
 new_state_dict.update(transferred_state_dict)
 
 # Инициализируем веса для нового mid блока
 #new_state_dict = initialize_mid_block_weights(new_state_dict, device, dtype)
 
 new_unet.load_state_dict(new_state_dict)
 new_unet.save_pretrained("asymmetric_vae")

 # Получаем список неперенесенных ключей
 non_transferred_keys = sorted(set(new_state_dict.keys()) - transferred_keys)

 print("Статистика переноса:", transfer_stats)
 print("Неперенесенные ключи в новой модели:")
 for key in non_transferred_keys:
 print(key)

if __name__ == "__main__":
 main()

The config attributes {'block_out_channels': [128, 256, 512, 512], 'force_upcast': False} were passed to AsymmetricAutoencoderKL, but are not expected and will be ignored. Please verify your config.json configuration file.
Перенос весов: 100%|██████████| 248/248 [00:00<00:00, 30427.29it/s]


Статистика переноса: {'перенесено': 248, 'несовпадение_размеров': 0, 'пропущено': 0}
Неперенесенные ключи в новой модели:
decoder.condition_encoder.layers.0.bias
decoder.condition_encoder.layers.0.weight
decoder.condition_encoder.layers.1.bias
decoder.condition_encoder.layers.1.weight
decoder.condition_encoder.layers.2.bias
decoder.condition_encoder.layers.2.weight
decoder.condition_encoder.layers.3.bias
decoder.condition_encoder.layers.3.weight
decoder.condition_encoder.layers.4.bias
decoder.condition_encoder.layers.4.weight
decoder.up_blocks.0.resnets.3.conv1.bias
decoder.up_blocks.0.resnets.3.conv1.weight
decoder.up_blocks.0.resnets.3.conv2.bias
decoder.up_blocks.0.resnets.3.conv2.weight
decoder.up_blocks.0.resnets.3.norm1.bias
decoder.up_blocks.0.resnets.3.norm1.weight
decoder.up_blocks.0.resnets.3.norm2.bias
decoder.up_blocks.0.resnets.3.norm2.weight
decoder.up_blocks.1.resnets.3.conv1.bias
decoder.up_blocks.1.resnets.3.conv1.weight
decoder.up_blocks.1.resnets.3.conv2.bias
decoder

In [1]:
import torch

from torchvision import transforms, utils

import diffusers
from diffusers import AsymmetricAutoencoderKL

from diffusers.utils import load_image

def crop_image_to_nearest_divisible_by_8(img):
 # Check if the image height and width are divisible by 8
 if img.shape[1] % 8 == 0 and img.shape[2] % 8 == 0:
 return img
 else:
 # Calculate the closest lower resolution divisible by 8
 new_height = img.shape[1] - (img.shape[1] % 8)
 new_width = img.shape[2] - (img.shape[2] % 8)
 
 # Use CenterCrop to crop the image
 transform = transforms.CenterCrop((new_height, new_width), interpolation=transforms.InterpolationMode.BILINEAR)
 img = transform(img).to(torch.float32).clamp(-1, 1)
 
 return img
 
to_tensor = transforms.ToTensor()

device = "cuda"
dtype=torch.float16
vae = AsymmetricAutoencoderKL.from_pretrained("asymmetric_vae",torch_dtype=dtype).to(device).eval()

image = load_image("123456789.jpg")

image = crop_image_to_nearest_divisible_by_8(to_tensor(image)).unsqueeze(0).to(device,dtype=dtype)

upscaled_image = vae(image).sample
#vae.config.scaled_factor
# Save the reconstructed image
utils.save_image(upscaled_image, "test.png")
print('ok')

The config attributes {'block_out_channels': [128, 256, 512, 512], 'force_upcast': False} were passed to AsymmetricAutoencoderKL, but are not expected and will be ignored. Please verify your config.json configuration file.


ok
