OrlandoHugBot commited on
Commit
c6c298d
·
verified ·
1 Parent(s): 49b976a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -39
README.md CHANGED
@@ -75,60 +75,34 @@ import torch
75
  from PIL import Image
76
  from unipicv2.pipeline_stable_diffusion_3_kontext import StableDiffusion3KontextPipeline
77
  from unipicv2.transformer_sd3_kontext import SD3Transformer2DKontextModel
78
- from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL, BitsAndBytesConfig
79
  from transformers import CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
80
 
81
  # Load model components
82
- pretrained_model_name_or_path = "/mnt/datasets_vlm/chris/hf_ckpt/Unipic2-t2i"
83
- # int4 is recommended for inference:lower VRAM with no quality loss {"int4", "fp16"}
84
- quant = "int4"
85
-
86
- # BitsAndBytes config
87
- bnb4 = BitsAndBytesConfig(
88
- load_in_4bit=True,
89
- bnb_4bit_use_double_quant=True,
90
- bnb_4bit_quant_type="nf4",
91
- bnb_4bit_compute_dtype=torch.bfloat16,
92
- )
93
- bnb8 = BitsAndBytesConfig(load_in_8bit=True)
94
-
95
- if quant == "int4":
96
- transformer = SD3Transformer2DKontextModel.from_pretrained(
97
- pretrained_model_name_or_path, subfolder="transformer",
98
- quantization_config=bnb4, device_map="auto", low_cpu_mem_usage=True
99
- ).cuda()
100
- text_qconf = bnb8
101
- vae_dtype = torch.float16
102
- else: # fp16
103
- transformer = SD3Transformer2DKontextModel.from_pretrained(
104
- pretrained_model_name_or_path, subfolder="transformer",
105
- torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
106
- ).cuda()
107
- text_qconf = None
108
- vae_dtype = torch.float16
109
 
110
  vae = AutoencoderKL.from_pretrained(
111
  pretrained_model_name_or_path, subfolder="vae",
112
- torch_dtype=vae_dtype, device_map="auto", low_cpu_mem_usage=True
113
- )
114
 
115
  # Load text encoders
116
  text_encoder = CLIPTextModelWithProjection.from_pretrained(
117
- pretrained_model_name_or_path, subfolder="text_encoder",
118
- quantization_config=text_qconf, torch_dtype=None, device_map="auto", low_cpu_mem_usage=True
119
- )
120
  tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer")
121
 
122
  text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(
123
- pretrained_model_name_or_path, subfolder="text_encoder_2",
124
- quantization_config=text_qconf, torch_dtype=None, device_map="auto", low_cpu_mem_usage=True
125
- )
126
  tokenizer_2 = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer_2")
127
 
128
  text_encoder_3 = T5EncoderModel.from_pretrained(
129
- pretrained_model_name_or_path, subfolder="text_encoder_3",
130
- quantization_config=text_qconf, torch_dtype=None, device_map="auto", low_cpu_mem_usage=True
131
- )
132
  tokenizer_3 = T5TokenizerFast.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer_3")
133
 
134
  scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
 
75
  from PIL import Image
76
  from unipicv2.pipeline_stable_diffusion_3_kontext import StableDiffusion3KontextPipeline
77
  from unipicv2.transformer_sd3_kontext import SD3Transformer2DKontextModel
78
+ from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
79
  from transformers import CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
80
 
81
  # Load model components
82
+ pretrained_model_name_or_path = "Skywork/UniPic2-SD3.5M-Kontext-2B"
83
+
84
+ transformer = SD3Transformer2DKontextModel.from_pretrained(
85
+ pretrained_model_name_or_path, subfolder="transformer", torch_dtype=torch.bfloat16).cuda()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  vae = AutoencoderKL.from_pretrained(
88
  pretrained_model_name_or_path, subfolder="vae",
89
+ torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True
90
+ ).cuda()
91
 
92
  # Load text encoders
93
  text_encoder = CLIPTextModelWithProjection.from_pretrained(
94
+ pretrained_model_name_or_path, subfolder="text_encoder", torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True
95
+ ).cuda()
 
96
  tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer")
97
 
98
  text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(
99
+ pretrained_model_name_or_path, subfolder="text_encoder_2", torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True
100
+ ).cuda()
 
101
  tokenizer_2 = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer_2")
102
 
103
  text_encoder_3 = T5EncoderModel.from_pretrained(
104
+ pretrained_model_name_or_path, subfolder="text_encoder_3", torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True
105
+ ).cuda()
 
106
  tokenizer_3 = T5TokenizerFast.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer_3")
107
 
108
  scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(