Wav2 / apps /gradio /DiffSynth_Studio.py

Upload folder using huggingface_hub

79dc332 verified 6 months ago

13.2 kB

	import gradio as gr
	from diffsynth import ModelManager, SDImagePipeline, SDXLImagePipeline, SD3ImagePipeline, HunyuanDiTImagePipeline, FluxImagePipeline
	import os, torch
	from PIL import Image
	import numpy as np


	config = {
	"model_config": {
	"Stable Diffusion": {
	"model_folder": "models/stable_diffusion",
	"pipeline_class": SDImagePipeline,
	"default_parameters": {
	"cfg_scale": 7.0,
	"height": 512,
	"width": 512,
	}
	},
	"Stable Diffusion XL": {
	"model_folder": "models/stable_diffusion_xl",
	"pipeline_class": SDXLImagePipeline,
	"default_parameters": {
	"cfg_scale": 7.0,
	}
	},
	"Stable Diffusion 3": {
	"model_folder": "models/stable_diffusion_3",
	"pipeline_class": SD3ImagePipeline,
	"default_parameters": {
	"cfg_scale": 7.0,
	}
	},
	"Stable Diffusion XL Turbo": {
	"model_folder": "models/stable_diffusion_xl_turbo",
	"pipeline_class": SDXLImagePipeline,
	"default_parameters": {
	"negative_prompt": "",
	"cfg_scale": 1.0,
	"num_inference_steps": 1,
	"height": 512,
	"width": 512,
	}
	},
	"Kolors": {
	"model_folder": "models/kolors",
	"pipeline_class": SDXLImagePipeline,
	"default_parameters": {
	"cfg_scale": 7.0,
	}
	},
	"HunyuanDiT": {
	"model_folder": "models/HunyuanDiT",
	"pipeline_class": HunyuanDiTImagePipeline,
	"default_parameters": {
	"cfg_scale": 7.0,
	}
	},
	"FLUX": {
	"model_folder": "models/FLUX",
	"pipeline_class": FluxImagePipeline,
	"default_parameters": {
	"cfg_scale": 1.0,
	}
	}
	},
	"max_num_painter_layers": 8,
	"max_num_model_cache": 1,
	}


	def load_model_list(model_type):
	if model_type is None:
	return []
	folder = config["model_config"][model_type]["model_folder"]
	file_list = [i for i in os.listdir(folder) if i.endswith(".safetensors")]
	if model_type in ["HunyuanDiT", "Kolors", "FLUX"]:
	file_list += [i for i in os.listdir(folder) if os.path.isdir(os.path.join(folder, i))]
	file_list = sorted(file_list)
	return file_list


	def load_model(model_type, model_path):
	global model_dict
	model_key = f"{model_type}:{model_path}"
	if model_key in model_dict:
	return model_dict[model_key]
	model_path = os.path.join(config["model_config"][model_type]["model_folder"], model_path)
	model_manager = ModelManager()
	if model_type == "HunyuanDiT":
	model_manager.load_models([
	os.path.join(model_path, "clip_text_encoder/pytorch_model.bin"),
	os.path.join(model_path, "mt5/pytorch_model.bin"),
	os.path.join(model_path, "model/pytorch_model_ema.pt"),
	os.path.join(model_path, "sdxl-vae-fp16-fix/diffusion_pytorch_model.bin"),
	])
	elif model_type == "Kolors":
	model_manager.load_models([
	os.path.join(model_path, "text_encoder"),
	os.path.join(model_path, "unet/diffusion_pytorch_model.safetensors"),
	os.path.join(model_path, "vae/diffusion_pytorch_model.safetensors"),
	])
	elif model_type == "FLUX":
	model_manager.torch_dtype = torch.bfloat16
	file_list = [
	os.path.join(model_path, "text_encoder/model.safetensors"),
	os.path.join(model_path, "text_encoder_2"),
	]
	for file_name in os.listdir(model_path):
	if file_name.endswith(".safetensors"):
	file_list.append(os.path.join(model_path, file_name))
	model_manager.load_models(file_list)
	else:
	model_manager.load_model(model_path)
	pipe = config["model_config"][model_type]["pipeline_class"].from_model_manager(model_manager)
	while len(model_dict) + 1 > config["max_num_model_cache"]:
	key = next(iter(model_dict.keys()))
	model_manager_to_release, _ = model_dict[key]
	model_manager_to_release.to("cpu")
	del model_dict[key]
	torch.cuda.empty_cache()
	model_dict[model_key] = model_manager, pipe
	return model_manager, pipe


	model_dict = {}

	with gr.Blocks() as app:
	gr.Markdown("# DiffSynth-Studio Painter")
	with gr.Row():
	with gr.Column(scale=382, min_width=100):

	with gr.Accordion(label="Model"):
	model_type = gr.Dropdown(choices=[i for i in config["model_config"]], label="Model type")
	model_path = gr.Dropdown(choices=[], interactive=True, label="Model path")

	@gr.on(inputs=model_type, outputs=model_path, triggers=model_type.change)
	def model_type_to_model_path(model_type):
	return gr.Dropdown(choices=load_model_list(model_type))

	with gr.Accordion(label="Prompt"):
	prompt = gr.Textbox(label="Prompt", lines=3)
	negative_prompt = gr.Textbox(label="Negative prompt", lines=1)
	cfg_scale = gr.Slider(minimum=1.0, maximum=10.0, value=7.0, step=0.1, interactive=True, label="Classifier-free guidance scale")
	embedded_guidance = gr.Slider(minimum=0.0, maximum=10.0, value=0.0, step=0.1, interactive=True, label="Embedded guidance scale (only for FLUX)")

	with gr.Accordion(label="Image"):
	num_inference_steps = gr.Slider(minimum=1, maximum=100, value=20, step=1, interactive=True, label="Inference steps")
	height = gr.Slider(minimum=64, maximum=2048, value=1024, step=64, interactive=True, label="Height")
	width = gr.Slider(minimum=64, maximum=2048, value=1024, step=64, interactive=True, label="Width")
	with gr.Column():
	use_fixed_seed = gr.Checkbox(value=True, interactive=False, label="Use fixed seed")
	seed = gr.Number(minimum=0, maximum=10**9, value=0, interactive=True, label="Random seed", show_label=False)

	@gr.on(
	inputs=[model_type, model_path, prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width],
	outputs=[prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width],
	triggers=model_path.change
	)
	def model_path_to_default_params(model_type, model_path, prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width):
	load_model(model_type, model_path)
	cfg_scale = config["model_config"][model_type]["default_parameters"].get("cfg_scale", cfg_scale)
	embedded_guidance = config["model_config"][model_type]["default_parameters"].get("embedded_guidance", embedded_guidance)
	num_inference_steps = config["model_config"][model_type]["default_parameters"].get("num_inference_steps", num_inference_steps)
	height = config["model_config"][model_type]["default_parameters"].get("height", height)
	width = config["model_config"][model_type]["default_parameters"].get("width", width)
	return prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width


	with gr.Column(scale=618, min_width=100):
	with gr.Accordion(label="Painter"):
	enable_local_prompt_list = []
	local_prompt_list = []
	mask_scale_list = []
	canvas_list = []
	for painter_layer_id in range(config["max_num_painter_layers"]):
	with gr.Tab(label=f"Layer {painter_layer_id}"):
	enable_local_prompt = gr.Checkbox(label="Enable", value=False, key=f"enable_local_prompt_{painter_layer_id}")
	local_prompt = gr.Textbox(label="Local prompt", key=f"local_prompt_{painter_layer_id}")
	mask_scale = gr.Slider(minimum=0.0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Mask scale", key=f"mask_scale_{painter_layer_id}")
	canvas = gr.ImageEditor(canvas_size=(512, 1), sources=None, layers=False, interactive=True, image_mode="RGBA",
	brush=gr.Brush(default_size=100, default_color="#000000", colors=["#000000"]),
	label="Painter", key=f"canvas_{painter_layer_id}")
	@gr.on(inputs=[height, width, canvas], outputs=canvas, triggers=[height.change, width.change, canvas.clear, enable_local_prompt.change], show_progress="hidden")
	def resize_canvas(height, width, canvas):
	h, w = canvas["background"].shape[:2]
	if h != height or width != w:
	return np.ones((height, width, 3), dtype=np.uint8) * 255
	else:
	return canvas

	enable_local_prompt_list.append(enable_local_prompt)
	local_prompt_list.append(local_prompt)
	mask_scale_list.append(mask_scale)
	canvas_list.append(canvas)
	with gr.Accordion(label="Results"):
	run_button = gr.Button(value="Generate", variant="primary")
	output_image = gr.Image(sources=None, show_label=False, interactive=False, type="pil")
	with gr.Row():
	with gr.Column():
	output_to_painter_button = gr.Button(value="Set as painter's background")
	with gr.Column():
	output_to_input_button = gr.Button(value="Set as input image")
	painter_background = gr.State(None)
	input_background = gr.State(None)
	@gr.on(
	inputs=[model_type, model_path, prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width, seed] + enable_local_prompt_list + local_prompt_list + mask_scale_list + canvas_list,
	outputs=[output_image],
	triggers=run_button.click
	)
	def generate_image(model_type, model_path, prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width, seed, *args, progress=gr.Progress()):
	_, pipe = load_model(model_type, model_path)
	input_params = {
	"prompt": prompt,
	"negative_prompt": negative_prompt,
	"cfg_scale": cfg_scale,
	"num_inference_steps": num_inference_steps,
	"height": height,
	"width": width,
	"progress_bar_cmd": progress.tqdm,
	}
	if isinstance(pipe, FluxImagePipeline):
	input_params["embedded_guidance"] = embedded_guidance
	enable_local_prompt_list, local_prompt_list, mask_scale_list, canvas_list = (
	args[0 * config["max_num_painter_layers"]: 1 * config["max_num_painter_layers"]],
	args[1 * config["max_num_painter_layers"]: 2 * config["max_num_painter_layers"]],
	args[2 * config["max_num_painter_layers"]: 3 * config["max_num_painter_layers"]],
	args[3 * config["max_num_painter_layers"]: 4 * config["max_num_painter_layers"]]
	)
	local_prompts, masks, mask_scales = [], [], []
	for enable_local_prompt, local_prompt, mask_scale, canvas in zip(
	enable_local_prompt_list, local_prompt_list, mask_scale_list, canvas_list
	):
	if enable_local_prompt:
	local_prompts.append(local_prompt)
	masks.append(Image.fromarray(canvas["layers"][0][:, :, -1]).convert("RGB"))
	mask_scales.append(mask_scale)
	input_params.update({
	"local_prompts": local_prompts,
	"masks": masks,
	"mask_scales": mask_scales,
	})
	torch.manual_seed(seed)
	image = pipe(**input_params)
	return image

	@gr.on(inputs=[output_image] + canvas_list, outputs=canvas_list, triggers=output_to_painter_button.click)
	def send_output_to_painter_background(output_image, *canvas_list):
	for canvas in canvas_list:
	h, w = canvas["background"].shape[:2]
	canvas["background"] = output_image.resize((w, h))
	return tuple(canvas_list)
	app.launch()