Fix a few bugs
Browse files1. Fix a bug when specificing image size
2. Fix bug: ratio index out of range
- hunyuan.py +5 -0
- tokenizer_wrapper.py +4 -3
hunyuan.py
CHANGED
@@ -2344,6 +2344,7 @@ class HunyuanImage3ForCausalMM(HunyuanImage3PreTrainedModel, GenerationMixin):
|
|
2344 |
extra_auto_stops = [tkw.boi_token_id]
|
2345 |
stop_token_id = dict(
|
2346 |
auto=[tkw.eos_token_id] + extra_auto_stops,
|
|
|
2347 |
recaption=[tkw.end_recaption_token_id, tkw.end_answer_token_id, tkw.eos_token_id],
|
2348 |
think=[tkw.end_recaption_token_id, tkw.end_answer_token_id, tkw.eos_token_id],
|
2349 |
img_ratio=extra_auto_stops,
|
@@ -2642,6 +2643,10 @@ class HunyuanImage3ForCausalMM(HunyuanImage3PreTrainedModel, GenerationMixin):
|
|
2642 |
prompt=prompt, cot_text=cot_text, bot_task="img_ratio", system_prompt=system_prompt, seed=seed)
|
2643 |
outputs = self._generate(**model_inputs, **kwargs, verbose=verbose)
|
2644 |
ratio_index = outputs[0, -1].item() - self._tkwrapper.ratio_token_offset
|
|
|
|
|
|
|
|
|
2645 |
reso = self.image_processor.reso_group[ratio_index]
|
2646 |
image_size = reso.height, reso.width
|
2647 |
|
|
|
2344 |
extra_auto_stops = [tkw.boi_token_id]
|
2345 |
stop_token_id = dict(
|
2346 |
auto=[tkw.eos_token_id] + extra_auto_stops,
|
2347 |
+
image=[tkw.eos_token_id],
|
2348 |
recaption=[tkw.end_recaption_token_id, tkw.end_answer_token_id, tkw.eos_token_id],
|
2349 |
think=[tkw.end_recaption_token_id, tkw.end_answer_token_id, tkw.eos_token_id],
|
2350 |
img_ratio=extra_auto_stops,
|
|
|
2643 |
prompt=prompt, cot_text=cot_text, bot_task="img_ratio", system_prompt=system_prompt, seed=seed)
|
2644 |
outputs = self._generate(**model_inputs, **kwargs, verbose=verbose)
|
2645 |
ratio_index = outputs[0, -1].item() - self._tkwrapper.ratio_token_offset
|
2646 |
+
# In some cases, the generated ratio_index is out of range. A valid ratio_index should be in [0, 32].
|
2647 |
+
# If ratio_index is out of range, we set it to 16 (i.e., 1:1).
|
2648 |
+
if ratio_index < 0 or ratio_index >= len(self.image_processor.reso_group):
|
2649 |
+
ratio_index = 16
|
2650 |
reso = self.image_processor.reso_group[ratio_index]
|
2651 |
image_size = reso.height, reso.width
|
2652 |
|
tokenizer_wrapper.py
CHANGED
@@ -1313,6 +1313,7 @@ class TokenizerWrapper(object):
|
|
1313 |
# We can add special tokens for the bot lastest message according to different tasks
|
1314 |
bot_response_prefix = dict(
|
1315 |
auto=_bot_prefix,
|
|
|
1316 |
think=f"{_bot_prefix}<think>",
|
1317 |
recaption=f"{_bot_prefix}<recaption>",
|
1318 |
img_ratio=f"{_bot_prefix}{answer_prefix}<boi><img_size_{image_base_size}>",
|
@@ -1345,15 +1346,15 @@ class TokenizerWrapper(object):
|
|
1345 |
batch_system_prompt: Optional[List[str]] = None,
|
1346 |
batch_cot_text: Optional[List[str]] = None,
|
1347 |
max_length: Optional[int] = None,
|
1348 |
-
bot_task: str = "auto", # auto/think/recaption/img_ratio
|
1349 |
image_base_size: int = 1024,
|
1350 |
sequence_template: str = "pretrain",
|
1351 |
cfg_factor: int = 1,
|
1352 |
add_assistant_prefix: Optional[bool] = None,
|
1353 |
drop_think: bool = False,
|
1354 |
) -> Dict[str, Any]:
|
1355 |
-
assert bot_task in ["auto", "think", "recaption", "img_ratio"], \
|
1356 |
-
f"bot_task should be one of ['auto', 'think', 'recaption', 'img_ratio'], but got {bot_task}."
|
1357 |
|
1358 |
if batch_message_list is None:
|
1359 |
# Simple text-to-image or text-cot-to-image task
|
|
|
1313 |
# We can add special tokens for the bot lastest message according to different tasks
|
1314 |
bot_response_prefix = dict(
|
1315 |
auto=_bot_prefix,
|
1316 |
+
image="",
|
1317 |
think=f"{_bot_prefix}<think>",
|
1318 |
recaption=f"{_bot_prefix}<recaption>",
|
1319 |
img_ratio=f"{_bot_prefix}{answer_prefix}<boi><img_size_{image_base_size}>",
|
|
|
1346 |
batch_system_prompt: Optional[List[str]] = None,
|
1347 |
batch_cot_text: Optional[List[str]] = None,
|
1348 |
max_length: Optional[int] = None,
|
1349 |
+
bot_task: str = "auto", # auto/image/think/recaption/img_ratio
|
1350 |
image_base_size: int = 1024,
|
1351 |
sequence_template: str = "pretrain",
|
1352 |
cfg_factor: int = 1,
|
1353 |
add_assistant_prefix: Optional[bool] = None,
|
1354 |
drop_think: bool = False,
|
1355 |
) -> Dict[str, Any]:
|
1356 |
+
assert bot_task in ["image", "auto", "think", "recaption", "img_ratio"], \
|
1357 |
+
f"bot_task should be one of ['image', 'auto', 'think', 'recaption', 'img_ratio'], but got {bot_task}."
|
1358 |
|
1359 |
if batch_message_list is None:
|
1360 |
# Simple text-to-image or text-cot-to-image task
|