RaushanTurganbay HF staff commited on
Commit
1501a36
·
1 Parent(s): e62441b

update extra special tokens

Browse files
preprocessor_config.json CHANGED
@@ -10,7 +10,7 @@
10
  0.5,
11
  0.5
12
  ],
13
- "image_processor_type": "Idefics3ImageProcessor",
14
  "image_std": [
15
  0.5,
16
  0.5,
@@ -19,15 +19,17 @@
19
  "max_image_size": {
20
  "longest_edge": 512
21
  },
22
- "video_sampling": {
23
- "fps": 1,
24
- "max_frames": 64,
25
- "video_size": {"longest_edge": 384}
26
- },
27
  "processor_class": "SmolVLMProcessor",
28
  "resample": 1,
29
  "rescale_factor": 0.00392156862745098,
30
  "size": {
31
  "longest_edge": 2048
 
 
 
 
 
 
 
32
  }
33
  }
 
10
  0.5,
11
  0.5
12
  ],
13
+ "image_processor_type": "SmolVLMImageProcessor",
14
  "image_std": [
15
  0.5,
16
  0.5,
 
19
  "max_image_size": {
20
  "longest_edge": 512
21
  },
 
 
 
 
 
22
  "processor_class": "SmolVLMProcessor",
23
  "resample": 1,
24
  "rescale_factor": 0.00392156862745098,
25
  "size": {
26
  "longest_edge": 2048
27
+ },
28
+ "video_sampling": {
29
+ "fps": 1,
30
+ "max_frames": 64,
31
+ "video_size": {
32
+ "longest_edge": 384
33
+ }
34
  }
35
  }
special_tokens_map.json CHANGED
@@ -1,26 +1,8 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "<fake_token_around_image>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "<image>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "<end_of_utterance>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- }
24
  ],
25
  "bos_token": {
26
  "content": "<|im_start|>",
@@ -29,6 +11,7 @@
29
  "rstrip": false,
30
  "single_word": false
31
  },
 
32
  "eos_token": {
33
  "content": "<end_of_utterance>",
34
  "lstrip": false,
@@ -36,6 +19,9 @@
36
  "rstrip": false,
37
  "single_word": false
38
  },
 
 
 
39
  "pad_token": {
40
  "content": "<|im_end|>",
41
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ "<fake_token_around_image>",
4
+ "<image>",
5
+ "<end_of_utterance>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  ],
7
  "bos_token": {
8
  "content": "<|im_start|>",
 
11
  "rstrip": false,
12
  "single_word": false
13
  },
14
+ "end_of_utterance_token": "<end_of_utterance>",
15
  "eos_token": {
16
  "content": "<end_of_utterance>",
17
  "lstrip": false,
 
19
  "rstrip": false,
20
  "single_word": false
21
  },
22
+ "fake_image_token": "<fake_token_around_image>",
23
+ "global_image_token": "<global-img>",
24
+ "image_token": "<image>",
25
  "pad_token": {
26
  "content": "<|im_end|>",
27
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -1170,12 +1170,21 @@
1170
  "bos_token": "<|im_start|>",
1171
  "chat_template": "<|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>\n{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
1172
  "clean_up_tokenization_spaces": false,
 
1173
  "eos_token": "<end_of_utterance>",
1174
- "extra_special_tokens": {},
 
 
 
 
 
 
 
 
1175
  "legacy": false,
1176
  "model_max_length": 8192,
1177
  "pad_token": "<|im_end|>",
1178
- "processor_class": "Idefics3Processor",
1179
  "tokenizer_class": "GPT2Tokenizer",
1180
  "truncation_side": "left",
1181
  "unk_token": "<|endoftext|>",
 
1170
  "bos_token": "<|im_start|>",
1171
  "chat_template": "<|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>\n{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
1172
  "clean_up_tokenization_spaces": false,
1173
+ "end_of_utterance_token": "<end_of_utterance>",
1174
  "eos_token": "<end_of_utterance>",
1175
+ "extra_special_tokens": {
1176
+ "end_of_utterance_token": "<end_of_utterance>",
1177
+ "fake_image_token": "<fake_token_around_image>",
1178
+ "global_image_token": "<global-img>",
1179
+ "image_token": "<image>"
1180
+ },
1181
+ "fake_image_token": "<fake_token_around_image>",
1182
+ "global_image_token": "<global-img>",
1183
+ "image_token": "<image>",
1184
  "legacy": false,
1185
  "model_max_length": 8192,
1186
  "pad_token": "<|im_end|>",
1187
+ "processor_class": "SmolVLMProcessor",
1188
  "tokenizer_class": "GPT2Tokenizer",
1189
  "truncation_side": "left",
1190
  "unk_token": "<|endoftext|>",