OrlandoHugBot commited on
Commit
7cbbca1
·
verified ·
1 Parent(s): 41c8d05

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +81 -15
README.md CHANGED
@@ -69,13 +69,56 @@ pip install -r requirements.txt
69
 
70
  ### 3.Text-to-Image Generation
71
  ```bash
72
- export PYTHONPATH=./:$PYTHONPATH
73
-
74
- python scripts/text2image.py configs/models/qwen2_5_1_5b_kl16_mar_h.py \
75
- --checkpoint checkpoint/pytorch_model.bin \
76
- --image_size 1024 \
77
- --prompt "A glossy-coated golden retriever stands on the park lawn beside a life-sized penguin statue." \
78
- --output output.jpg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  ```
80
 
81
 
@@ -83,14 +126,37 @@ python scripts/text2image.py configs/models/qwen2_5_1_5b_kl16_mar_h.py \
83
  The image editing feature within this unified model is an exploratory module at the forefront of research. And it is not yet production-ready.
84
 
85
  ```bash
86
- export PYTHONPATH=./:$PYTHONPATH
87
-
88
- python scripts/image_edit.py configs/models/qwen2_5_1_5b_kl16_mar_h.py \
89
- --checkpoint checkpoint/pytorch_model.bin \
90
- --image_size 1024 \
91
- --image data/sample.png \
92
- --prompt "Replace the stars with the candle." \
93
- --output output.jpg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  ```
95
 
96
  ## 📄 License
 
69
 
70
  ### 3.Text-to-Image Generation
71
  ```bash
72
+ import torch
73
+ from PIL import Image
74
+ from unipicv2.pipeline_stable_diffusion_3_kontext import StableDiffusion3KontextPipeline
75
+ from unipicv2.transformer_sd3_kontext import SD3Transformer2DKontextModel
76
+ from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
77
+ from transformers import CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
78
+
79
+ # Load model components
80
+ pretrained_model_name_or_path = "/path/to/unipicv2_sd_3_5m_kontext"
81
+
82
+ transformer = SD3Transformer2DKontextModel.from_pretrained(
83
+ pretrained_model_name_or_path, subfolder="transformer", torch_dtype=torch.bfloat16).cuda()
84
+
85
+ vae = AutoencoderKL.from_pretrained(
86
+ pretrained_model_name_or_path, subfolder="vae", torch_dtype=torch.bfloat16).cuda()
87
+
88
+ # Load text encoders
89
+ text_encoder = CLIPTextModelWithProjection.from_pretrained(
90
+ pretrained_model_name_or_path, subfolder="text_encoder", torch_dtype=torch.bfloat16).cuda()
91
+ tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer")
92
+
93
+ text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(
94
+ pretrained_model_name_or_path, subfolder="text_encoder_2", torch_dtype=torch.bfloat16).cuda()
95
+ tokenizer_2 = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer_2")
96
+
97
+ text_encoder_3 = T5EncoderModel.from_pretrained(
98
+ pretrained_model_name_or_path, subfolder="text_encoder_3", torch_dtype=torch.bfloat16).cuda()
99
+ tokenizer_3 = T5TokenizerFast.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer_3")
100
+
101
+ scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
102
+
103
+ # Create pipeline
104
+ pipeline = StableDiffusion3KontextPipeline(
105
+ transformer=transformer, vae=vae,
106
+ text_encoder=text_encoder, tokenizer=tokenizer,
107
+ text_encoder_2=text_encoder_2, tokenizer_2=tokenizer_2,
108
+ text_encoder_3=text_encoder_3, tokenizer_3=tokenizer_3,
109
+ scheduler=scheduler)
110
+
111
+ # Generate image
112
+ image = pipeline(
113
+ prompt='a pig with wings and a top hat flying over a happy futuristic scifi city',
114
+ negative_prompt='',
115
+ height=512, width=384,
116
+ num_inference_steps=50,
117
+ guidance_scale=3.5,
118
+ generator=torch.Generator(device=transformer.device).manual_seed(42)
119
+ ).images[0]
120
+
121
+ image.save("text2image.png")
122
  ```
123
 
124
 
 
126
  The image editing feature within this unified model is an exploratory module at the forefront of research. And it is not yet production-ready.
127
 
128
  ```bash
129
+ # Load and preprocess image
130
+ def fix_longer_edge(x, image_size, factor=32):
131
+ w, h = x.size
132
+ if w >= h:
133
+ target_w = image_size
134
+ target_h = h * (target_w / w)
135
+ target_h = round(target_h / factor) * factor
136
+ else:
137
+ target_h = image_size
138
+ target_w = w * (target_h / h)
139
+ target_w = round(target_w / factor) * factor
140
+ x = x.resize(size=(target_w, target_h))
141
+ return x
142
+
143
+ image = Image.open("text2image.png")
144
+ image = fix_longer_edge(image, image_size=512)
145
+
146
+ negative_prompt = "blurry, low quality, low resolution, distorted, deformed, broken content, missing parts, damaged details, artifacts, glitch, noise, pixelated, grainy, compression artifacts, bad composition, wrong proportion, incomplete editing, unfinished, unedited areas."
147
+
148
+ # Edit image
149
+ edited_image = pipeline(
150
+ image=image,
151
+ prompt="remove the pig's hat",
152
+ negative_prompt=negative_prompt,
153
+ height=image.height, width=image.width,
154
+ num_inference_steps=50,
155
+ guidance_scale=3.5,
156
+ generator=torch.Generator(device=transformer.device).manual_seed(42)
157
+ ).images[0]
158
+
159
+ edited_image.save("image_editing.png")
160
  ```
161
 
162
  ## 📄 License