How to solve `AttributeError: Qwen2TokenizerFast has no attribute start_image_token`
Traceback (most recent call last):
File "/home/Guanjq/Work/MTXray/test_script/lab4_eval_mtxray_version_1_6_compare.py", line 312, in
generator = InternVL3_5_8B()
File "/home/Guanjq/Work/MTXray/test_script/../projs/QWenVL/generation.py", line 235, in init
self.processor = AutoProcessor.from_pretrained(
File "/mnt/Guanjq/miniconda3/envs/qwen/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py", line 376, in from_pretrained
return processor_class.from_pretrained(
File "/mnt/Guanjq/miniconda3/envs/qwen/lib/python3.10/site-packages/transformers/processing_utils.py", line 1187, in from_pretrained
return cls.from_args_and_dict(args, processor_dict, **kwargs)
File "/mnt/Guanjq/miniconda3/envs/qwen/lib/python3.10/site-packages/transformers/processing_utils.py", line 982, in from_args_and_dict
processor = cls(*args, **processor_dict)
File "/mnt/Guanjq/miniconda3/envs/qwen/lib/python3.10/site-packages/transformers/models/internvl/processing_internvl.py", line 95, in init
self.start_image_token = tokenizer.start_image_token
File "/mnt/Guanjq/miniconda3/envs/qwen/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1111, in getattr
raise AttributeError(f"{self.class.name} has no attribute {key}")
AttributeError: Qwen2TokenizerFast has no attribute start_image_token
same
but the '-HF' models don't seem to have .generate() .. :(
Same here.
$python3 -m sglang.bench_serving --backend sglang-oai-chat --dataset-name image --num-prompts 3 --apply-chat-template --random-input-len 128 --random-output-len 20 --image-resolution 560x560 --image-format jpeg --image-count 1 --image-content random --random-range-ratio 0.1 --port 30000 --max-concurrency 1 --profile
/opt/conda/lib/python3.10/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you.
import pynvml # type: ignore[import]
benchmark_args=Namespace(backend='sglang-oai-chat', base_url=None, host='0.0.0.0', port=30000, dataset_name='image', dataset_path='', model=None, served_model_name=None, tokenizer=None, num_prompts=3, sharegpt_output_len=None, sharegpt_context_len=None, random_input_len=128, random_output_len=20, random_range_ratio=0.1, image_count=1, image_resolution='560x560', image_format='jpeg', image_content='random', request_rate=inf, use_trace_timestamps=False, max_concurrency=1, output_file=None, output_details=False, print_requests=False, disable_tqdm=False, disable_stream=False, return_logprob=False, seed=1, disable_ignore_eos=False, extra_request_body=None, apply_chat_template=True, profile=True, plot_throughput=False, profile_activities=['CPU', 'GPU'], profile_num_steps=None, profile_by_stage=False, profile_stages=None, lora_name=None, lora_request_distribution='uniform', lora_zipf_alpha=1.5, prompt_suffix='', pd_separated=False, profile_prefill_url=None, profile_decode_url=None, flush_cache=False, warmup_requests=1, tokenize_prompt=False, gsp_num_groups=64, gsp_prompts_per_group=16, gsp_system_prompt_len=2048, gsp_question_len=128, gsp_output_len=256, gsp_range_ratio=1.0, mooncake_slowdown_factor=1.0, mooncake_num_rounds=1, mooncake_workload='conversation', tag=None)
Namespace(backend='sglang-oai-chat', base_url=None, host='0.0.0.0', port=30000, dataset_name='image', dataset_path='', model='OpenGVLab/InternVL3_5-8B', served_model_name=None, tokenizer=None, num_prompts=3, sharegpt_output_len=None, sharegpt_context_len=None, random_input_len=128, random_output_len=20, random_range_ratio=0.1, image_count=1, image_resolution='560x560', image_format='jpeg', image_content='random', request_rate=inf, use_trace_timestamps=False, max_concurrency=1, output_file=None, output_details=False, print_requests=False, disable_tqdm=False, disable_stream=False, return_logprob=False, seed=1, disable_ignore_eos=False, extra_request_body=None, apply_chat_template=True, profile=True, plot_throughput=False, profile_activities=['CPU', 'GPU'], profile_num_steps=None, profile_by_stage=False, profile_stages=None, lora_name=None, lora_request_distribution='uniform', lora_zipf_alpha=1.5, prompt_suffix='', pd_separated=False, profile_prefill_url=None, profile_decode_url=None, flush_cache=False, warmup_requests=1, tokenize_prompt=False, gsp_num_groups=64, gsp_prompts_per_group=16, gsp_system_prompt_len=2048, gsp_question_len=128, gsp_output_len=256, gsp_range_ratio=1.0, mooncake_slowdown_factor=1.0, mooncake_num_rounds=1, mooncake_workload='conversation', tag=None)
processor_config.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 72.0/72.0 [00:00<00:00, 543kB/s]
preprocessor_config.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 666/666 [00:00<00:00, 5.54MB/s]
video_preprocessor_config.json: 1.34kB [00:00, 1.75MB/s]
Traceback (most recent call last):
File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/opt/conda/lib/python3.10/site-packages/sglang/bench_serving.py", line 2950, in <module>
run_benchmark(args)
File "/opt/conda/lib/python3.10/site-packages/sglang/bench_serving.py", line 2526, in run_benchmark
input_requests = get_dataset(args, tokenizer, model_id)
File "/opt/conda/lib/python3.10/site-packages/sglang/bench_serving.py", line 811, in get_dataset
processor = get_processor(model_id)
File "/opt/conda/lib/python3.10/site-packages/sglang/bench_serving.py", line 781, in get_processor
return AutoProcessor.from_pretrained(
File "/opt/conda/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py", line 396, in from_pretrained
return processor_class.from_pretrained(
File "/opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py", line 1396, in from_pretrained
return cls.from_args_and_dict(args, processor_dict, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py", line 1197, in from_args_and_dict
processor = cls(*args, **valid_kwargs)
File "/opt/conda/lib/python3.10/site-packages/transformers/models/internvl/processing_internvl.py", line 83, in __init__
self.start_image_token = tokenizer.start_image_token
File "/opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1127, in __getattr__
raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
AttributeError: Qwen2TokenizerFast has no attribute start_image_token
I ended up just using the InternVL3model instead of 3.5