Jarvis73 commited on
Commit
630d5da
·
verified ·
1 Parent(s): f4d0d24

Fix a few bugs

Browse files

1. Fix a bug when specificing image size
2. Fix bug: ratio index out of range

Files changed (2) hide show
  1. hunyuan.py +5 -0
  2. tokenizer_wrapper.py +4 -3
hunyuan.py CHANGED
@@ -2344,6 +2344,7 @@ class HunyuanImage3ForCausalMM(HunyuanImage3PreTrainedModel, GenerationMixin):
2344
  extra_auto_stops = [tkw.boi_token_id]
2345
  stop_token_id = dict(
2346
  auto=[tkw.eos_token_id] + extra_auto_stops,
 
2347
  recaption=[tkw.end_recaption_token_id, tkw.end_answer_token_id, tkw.eos_token_id],
2348
  think=[tkw.end_recaption_token_id, tkw.end_answer_token_id, tkw.eos_token_id],
2349
  img_ratio=extra_auto_stops,
@@ -2642,6 +2643,10 @@ class HunyuanImage3ForCausalMM(HunyuanImage3PreTrainedModel, GenerationMixin):
2642
  prompt=prompt, cot_text=cot_text, bot_task="img_ratio", system_prompt=system_prompt, seed=seed)
2643
  outputs = self._generate(**model_inputs, **kwargs, verbose=verbose)
2644
  ratio_index = outputs[0, -1].item() - self._tkwrapper.ratio_token_offset
 
 
 
 
2645
  reso = self.image_processor.reso_group[ratio_index]
2646
  image_size = reso.height, reso.width
2647
 
 
2344
  extra_auto_stops = [tkw.boi_token_id]
2345
  stop_token_id = dict(
2346
  auto=[tkw.eos_token_id] + extra_auto_stops,
2347
+ image=[tkw.eos_token_id],
2348
  recaption=[tkw.end_recaption_token_id, tkw.end_answer_token_id, tkw.eos_token_id],
2349
  think=[tkw.end_recaption_token_id, tkw.end_answer_token_id, tkw.eos_token_id],
2350
  img_ratio=extra_auto_stops,
 
2643
  prompt=prompt, cot_text=cot_text, bot_task="img_ratio", system_prompt=system_prompt, seed=seed)
2644
  outputs = self._generate(**model_inputs, **kwargs, verbose=verbose)
2645
  ratio_index = outputs[0, -1].item() - self._tkwrapper.ratio_token_offset
2646
+ # In some cases, the generated ratio_index is out of range. A valid ratio_index should be in [0, 32].
2647
+ # If ratio_index is out of range, we set it to 16 (i.e., 1:1).
2648
+ if ratio_index < 0 or ratio_index >= len(self.image_processor.reso_group):
2649
+ ratio_index = 16
2650
  reso = self.image_processor.reso_group[ratio_index]
2651
  image_size = reso.height, reso.width
2652
 
tokenizer_wrapper.py CHANGED
@@ -1313,6 +1313,7 @@ class TokenizerWrapper(object):
1313
  # We can add special tokens for the bot lastest message according to different tasks
1314
  bot_response_prefix = dict(
1315
  auto=_bot_prefix,
 
1316
  think=f"{_bot_prefix}<think>",
1317
  recaption=f"{_bot_prefix}<recaption>",
1318
  img_ratio=f"{_bot_prefix}{answer_prefix}<boi><img_size_{image_base_size}>",
@@ -1345,15 +1346,15 @@ class TokenizerWrapper(object):
1345
  batch_system_prompt: Optional[List[str]] = None,
1346
  batch_cot_text: Optional[List[str]] = None,
1347
  max_length: Optional[int] = None,
1348
- bot_task: str = "auto", # auto/think/recaption/img_ratio
1349
  image_base_size: int = 1024,
1350
  sequence_template: str = "pretrain",
1351
  cfg_factor: int = 1,
1352
  add_assistant_prefix: Optional[bool] = None,
1353
  drop_think: bool = False,
1354
  ) -> Dict[str, Any]:
1355
- assert bot_task in ["auto", "think", "recaption", "img_ratio"], \
1356
- f"bot_task should be one of ['auto', 'think', 'recaption', 'img_ratio'], but got {bot_task}."
1357
 
1358
  if batch_message_list is None:
1359
  # Simple text-to-image or text-cot-to-image task
 
1313
  # We can add special tokens for the bot lastest message according to different tasks
1314
  bot_response_prefix = dict(
1315
  auto=_bot_prefix,
1316
+ image="",
1317
  think=f"{_bot_prefix}<think>",
1318
  recaption=f"{_bot_prefix}<recaption>",
1319
  img_ratio=f"{_bot_prefix}{answer_prefix}<boi><img_size_{image_base_size}>",
 
1346
  batch_system_prompt: Optional[List[str]] = None,
1347
  batch_cot_text: Optional[List[str]] = None,
1348
  max_length: Optional[int] = None,
1349
+ bot_task: str = "auto", # auto/image/think/recaption/img_ratio
1350
  image_base_size: int = 1024,
1351
  sequence_template: str = "pretrain",
1352
  cfg_factor: int = 1,
1353
  add_assistant_prefix: Optional[bool] = None,
1354
  drop_think: bool = False,
1355
  ) -> Dict[str, Any]:
1356
+ assert bot_task in ["image", "auto", "think", "recaption", "img_ratio"], \
1357
+ f"bot_task should be one of ['image', 'auto', 'think', 'recaption', 'img_ratio'], but got {bot_task}."
1358
 
1359
  if batch_message_list is None:
1360
  # Simple text-to-image or text-cot-to-image task