Wav2 / diffsynth /extensions /ImageQualityMetric /__init__.py

Upload folder using huggingface_hub

79dc332 verified 6 months ago

6.13 kB

	from modelscope import snapshot_download
	from typing_extensions import Literal, TypeAlias
	import os
	from diffsynth.extensions.ImageQualityMetric.aesthetic import AestheticScore
	from diffsynth.extensions.ImageQualityMetric.imagereward import ImageRewardScore
	from diffsynth.extensions.ImageQualityMetric.pickscore import PickScore
	from diffsynth.extensions.ImageQualityMetric.clip import CLIPScore
	from diffsynth.extensions.ImageQualityMetric.hps import HPScore_v2
	from diffsynth.extensions.ImageQualityMetric.mps import MPScore


	preference_model_id: TypeAlias = Literal[
	"ImageReward",
	"Aesthetic",
	"PickScore",
	"CLIP",
	"HPSv2",
	"HPSv2.1",
	"MPS",
	]
	model_dict = {
	"ImageReward": {
	"model_id": "DiffSynth-Studio/QualityMetric_reward_pretrained",
	"allow_file_pattern": [
	"ImageReward/ImageReward.safetensors",
	"ImageReward/med_config.json",
	"bert-base-uncased/config.json",
	"bert-base-uncased/model.safetensors",
	"bert-base-uncased/tokenizer.json",
	"bert-base-uncased/tokenizer_config.json",
	"bert-base-uncased/vocab.txt",
	],
	"load_path": {
	"imagereward": "ImageReward/ImageReward.safetensors",
	"med_config": "ImageReward/med_config.json",
	"bert_model_path": "bert-base-uncased",
	},
	"model_class": ImageRewardScore
	},
	"Aesthetic": {
	"model_id": "DiffSynth-Studio/QualityMetric_reward_pretrained",
	"allow_file_pattern": [
	"aesthetic-predictor/sac+logos+ava1-l14-linearMSE.safetensors",
	"clip-vit-large-patch14/config.json",
	"clip-vit-large-patch14/merges.txt",
	"clip-vit-large-patch14/model.safetensors",
	"clip-vit-large-patch14/preprocessor_config.json",
	"clip-vit-large-patch14/special_tokens_map.json",
	"clip-vit-large-patch14/tokenizer.json",
	"clip-vit-large-patch14/tokenizer_config.json",
	"clip-vit-large-patch14/vocab.json",
	],
	"load_path": {
	"aesthetic_predictor": "aesthetic-predictor/sac+logos+ava1-l14-linearMSE.safetensors",
	"clip-large": "clip-vit-large-patch14",
	},
	"model_class": AestheticScore
	},
	"PickScore": {
	"model_id": "DiffSynth-Studio/QualityMetric_reward_pretrained",
	"allow_file_pattern": [
	"PickScore_v1/*",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/config.json",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/merges.txt",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/preprocessor_config.json",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/special_tokens_map.json",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/tokenizer.json",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/tokenizer_config.json",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/vocab.json",
	],
	"load_path": {
	"pickscore": "PickScore_v1",
	"clip": "CLIP-ViT-H-14-laion2B-s32B-b79K",
	},
	"model_class": PickScore
	},
	"CLIP": {
	"model_id": "DiffSynth-Studio/QualityMetric_reward_pretrained",
	"allow_file_pattern": [
	"CLIP-ViT-H-14-laion2B-s32B-b79K/open_clip_pytorch_model.bin",
	"bpe_simple_vocab_16e6.txt.gz",
	],
	"load_path": {
	"open_clip": "CLIP-ViT-H-14-laion2B-s32B-b79K/open_clip_pytorch_model.bin",
	"open_clip_bpe": "bpe_simple_vocab_16e6.txt.gz",
	},
	"model_class": CLIPScore
	},
	"HPSv2": {
	"model_id": "DiffSynth-Studio/QualityMetric_reward_pretrained",
	"allow_file_pattern": [
	"HPS_v2/HPS_v2_compressed.safetensors",
	"bpe_simple_vocab_16e6.txt.gz",
	],
	"load_path": {
	"hpsv2": "HPS_v2/HPS_v2_compressed.safetensors",
	"open_clip_bpe": "bpe_simple_vocab_16e6.txt.gz",
	},
	"model_class": HPScore_v2,
	"extra_kwargs": {"model_version": "v2"}
	},
	"HPSv2.1": {
	"model_id": "DiffSynth-Studio/QualityMetric_reward_pretrained",
	"allow_file_pattern": [
	"HPS_v2/HPS_v2.1_compressed.safetensors",
	"bpe_simple_vocab_16e6.txt.gz",
	],
	"load_path": {
	"hpsv2.1": "HPS_v2/HPS_v2.1_compressed.safetensors",
	"open_clip_bpe": "bpe_simple_vocab_16e6.txt.gz",
	},
	"model_class": HPScore_v2,
	"extra_kwargs": {"model_version": "v21"}
	},
	"MPS": {
	"model_id": "DiffSynth-Studio/QualityMetric_reward_pretrained",
	"allow_file_pattern": [
	"MPS_overall_checkpoint/MPS_overall_checkpoint_diffsynth.safetensors",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/config.json",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/merges.txt",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/preprocessor_config.json",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/special_tokens_map.json",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/tokenizer.json",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/tokenizer_config.json",
	"CLIP-ViT-H-14-laion2B-s32B-b79K/vocab.json",
	],
	"load_path": {
	"mps": "MPS_overall_checkpoint/MPS_overall_checkpoint_diffsynth.safetensors",
	"clip": "CLIP-ViT-H-14-laion2B-s32B-b79K",
	},
	"model_class": MPScore
	},
	}


	def download_preference_model(model_name: preference_model_id, cache_dir="models"):
	metadata = model_dict[model_name]
	snapshot_download(model_id=metadata["model_id"], allow_file_pattern=metadata["allow_file_pattern"], cache_dir=cache_dir)
	load_path = metadata["load_path"]
	load_path = {key: os.path.join(cache_dir, metadata["model_id"], path) for key, path in load_path.items()}
	return load_path


	def load_preference_model(model_name: preference_model_id, device = "cuda", path = None):
	model_class = model_dict[model_name]["model_class"]
	extra_kwargs = model_dict[model_name].get("extra_kwargs", {})
	preference_model = model_class(device=device, path=path, **extra_kwargs)
	return preference_model