doyoungkim commited on
Commit
e6e0901
·
verified ·
1 Parent(s): a5f0ee2

Add action tokens and update config

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/.gitattributes +35 -0
  2. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/README.md +270 -0
  3. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/added_tokens.json +386 -0
  4. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/chat_template.jinja +2 -0
  5. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/chat_template.json +3 -0
  6. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/config.json +141 -0
  7. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/generation_config.json +7 -0
  8. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/merges.txt +0 -0
  9. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/model.safetensors +3 -0
  10. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged.onnx +3 -0
  11. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_bnb4.onnx +3 -0
  12. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_fp16.onnx +3 -0
  13. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_int8.onnx +3 -0
  14. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_q4.onnx +3 -0
  15. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_q4f16.onnx +3 -0
  16. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_quantized.onnx +3 -0
  17. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_uint8.onnx +3 -0
  18. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens.onnx +3 -0
  19. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_bnb4.onnx +3 -0
  20. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_fp16.onnx +3 -0
  21. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_int8.onnx +3 -0
  22. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_q4.onnx +3 -0
  23. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_q4f16.onnx +3 -0
  24. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_quantized.onnx +3 -0
  25. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_uint8.onnx +3 -0
  26. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder.onnx +3 -0
  27. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_bnb4.onnx +3 -0
  28. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_fp16.onnx +3 -0
  29. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_int8.onnx +3 -0
  30. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_q4.onnx +3 -0
  31. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_q4f16.onnx +3 -0
  32. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_quantized.onnx +3 -0
  33. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_uint8.onnx +3 -0
  34. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/preprocessor_config.json +35 -0
  35. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/processor_config.json +4 -0
  36. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/special_tokens_map.json +78 -0
  37. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/tokenizer.json +0 -0
  38. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/tokenizer_config.json +3242 -0
  39. HuggingFaceTB_SmolVLM2-500M-Video-Instruct/vocab.json +0 -0
  40. README.md +269 -0
  41. SmolVLM2-500M-Video-Instruct-Action/.gitattributes +35 -0
  42. SmolVLM2-500M-Video-Instruct-Action/README.md +270 -0
  43. SmolVLM2-500M-Video-Instruct-Action/added_tokens.json +131 -0
  44. SmolVLM2-500M-Video-Instruct-Action/chat_template.jinja +2 -0
  45. SmolVLM2-500M-Video-Instruct-Action/chat_template.json +3 -0
  46. SmolVLM2-500M-Video-Instruct-Action/config.json +157 -0
  47. SmolVLM2-500M-Video-Instruct-Action/generation_config.json +7 -0
  48. SmolVLM2-500M-Video-Instruct-Action/merges.txt +0 -0
  49. SmolVLM2-500M-Video-Instruct-Action/model.safetensors +3 -0
  50. SmolVLM2-500M-Video-Instruct-Action/onnx/decoder_model_merged.onnx +3 -0
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/README.md ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ datasets:
5
+ - HuggingFaceM4/the_cauldron
6
+ - HuggingFaceM4/Docmatix
7
+ - lmms-lab/LLaVA-OneVision-Data
8
+ - lmms-lab/M4-Instruct-Data
9
+ - HuggingFaceFV/finevideo
10
+ - MAmmoTH-VL/MAmmoTH-VL-Instruct-12M
11
+ - lmms-lab/LLaVA-Video-178K
12
+ - orrzohar/Video-STaR
13
+ - Mutonix/Vript
14
+ - TIGER-Lab/VISTA-400K
15
+ - Enxin/MovieChat-1K_train
16
+ - ShareGPT4Video/ShareGPT4Video
17
+ pipeline_tag: image-text-to-text
18
+ language:
19
+ - en
20
+ base_model:
21
+ - HuggingFaceTB/SmolVLM-500M-Instruct
22
+ ---
23
+
24
+ <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/SmolVLM2_banner.png" width="800" height="auto" alt="Image description">
25
+
26
+ # SmolVLM2-500M-Video
27
+
28
+ SmolVLM2-500M-Video is a lightweight multimodal model designed to analyze video content. The model processes videos, images, and text inputs to generate text outputs - whether answering questions about media files, comparing visual content, or transcribing text from images. Despite its compact size, requiring only 1.8GB of GPU RAM for video inference, it delivers robust performance on complex multimodal tasks. This efficiency makes it particularly well-suited for on-device applications where computational resources may be limited.
29
+ ## Model Summary
30
+
31
+ - **Developed by:** Hugging Face 🤗
32
+ - **Model type:** Multi-modal model (image/multi-image/video/text)
33
+ - **Language(s) (NLP):** English
34
+ - **License:** Apache 2.0
35
+ - **Architecture:** Based on [Idefics3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) (see technical summary)
36
+
37
+ ## Resources
38
+
39
+ - **Demo:** [Video Highlight Generator](https://huggingface.co/spaces/HuggingFaceTB/SmolVLM2-HighlightGenerator)
40
+ - **Blog:** [Blog post](https://huggingface.co/blog/smolvlm2)
41
+
42
+ ## Uses
43
+
44
+ SmolVLM2 can be used for inference on multimodal (video / image / text) tasks where the input consists of text queries along with video or one or more images. Text and media files can be interleaved arbitrarily, enabling tasks like captioning, visual question answering, and storytelling based on visual content. The model does not support image or video generation.
45
+
46
+ To fine-tune SmolVLM2 on a specific task, you can follow [the fine-tuning tutorial](https://github.com/huggingface/smollm/blob/main/vision/finetuning/Smol_VLM_FT.ipynb).
47
+
48
+ ## Evaluation
49
+
50
+ We evaluated the performance of the SmolVLM2 family on the following scientific benchmarks:
51
+
52
+ | Size | Video-MME | MLVU | MVBench |
53
+ |----------|-----------------|----------|---------------|
54
+ | 2.2B | 52.1 | 55.2 | 46.27 |
55
+ | 500M | 42.2 | 47.3 | 39.73 |
56
+ | 256M | 33.7 | 40.6 | 32.7 |
57
+
58
+
59
+ ### How to get started
60
+
61
+ You can use transformers to load, infer and fine-tune SmolVLM. Make sure you have num2words, flash-attn and latest transformers installed.
62
+ You can load the model as follows.
63
+
64
+ ```python
65
+ from transformers import AutoProcessor, AutoModelForImageTextToText
66
+ import torch
67
+
68
+ model_path = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
69
+ processor = AutoProcessor.from_pretrained(model_path)
70
+ model = AutoModelForImageTextToText.from_pretrained(
71
+ model_path,
72
+ torch_dtype=torch.bfloat16,
73
+ _attn_implementation="flash_attention_2"
74
+ ).to("cuda")
75
+ ```
76
+
77
+ #### Simple Inference
78
+
79
+ You preprocess your inputs directly using chat templates and directly passing them
80
+
81
+ ```python
82
+ messages = [
83
+ {
84
+ "role": "user",
85
+ "content": [
86
+ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
87
+ {"type": "text", "text": "Can you describe this image?"},
88
+ ]
89
+ },
90
+ ]
91
+
92
+ inputs = processor.apply_chat_template(
93
+ messages,
94
+ add_generation_prompt=True,
95
+ tokenize=True,
96
+ return_dict=True,
97
+ return_tensors="pt",
98
+ ).to(model.device, dtype=torch.bfloat16)
99
+
100
+ generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)
101
+ generated_texts = processor.batch_decode(
102
+ generated_ids,
103
+ skip_special_tokens=True,
104
+ )
105
+ print(generated_texts[0])
106
+ ```
107
+
108
+ #### Video Inference
109
+
110
+ To use SmolVLM2 for video inference, make sure you have decord installed.
111
+
112
+ ```python
113
+ messages = [
114
+ {
115
+ "role": "user",
116
+ "content": [
117
+ {"type": "video", "path": "path_to_video.mp4"},
118
+ {"type": "text", "text": "Describe this video in detail"}
119
+ ]
120
+ },
121
+ ]
122
+
123
+ inputs = processor.apply_chat_template(
124
+ messages,
125
+ add_generation_prompt=True,
126
+ tokenize=True,
127
+ return_dict=True,
128
+ return_tensors="pt",
129
+ ).to(model.device, dtype=torch.bfloat16)
130
+
131
+ generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)
132
+ generated_texts = processor.batch_decode(
133
+ generated_ids,
134
+ skip_special_tokens=True,
135
+ )
136
+
137
+ print(generated_texts[0])
138
+ ```
139
+ #### Multi-image Interleaved Inference
140
+
141
+ You can interleave multiple media with text using chat templates.
142
+
143
+ ```python
144
+ import torch
145
+
146
+
147
+ messages = [
148
+ {
149
+ "role": "user",
150
+ "content": [
151
+ {"type": "text", "text": "What is the similarity between these two images?"},
152
+ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
153
+ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"},
154
+ ]
155
+ },
156
+ ]
157
+
158
+ inputs = processor.apply_chat_template(
159
+ messages,
160
+ add_generation_prompt=True,
161
+ tokenize=True,
162
+ return_dict=True,
163
+ return_tensors="pt",
164
+ ).to(model.device, dtype=torch.bfloat16)
165
+
166
+ generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)
167
+ generated_texts = processor.batch_decode(
168
+ generated_ids,
169
+ skip_special_tokens=True,
170
+ )
171
+ print(generated_texts[0])
172
+ ```
173
+
174
+
175
+ ### Model optimizations
176
+
177
+ ## Misuse and Out-of-scope Use
178
+
179
+ SmolVLM is not intended for high-stakes scenarios or critical decision-making processes that affect an individual's well-being or livelihood. The model may produce content that appears factual but may not be accurate. Misuse includes, but is not limited to:
180
+
181
+ - Prohibited Uses:
182
+ - Evaluating or scoring individuals (e.g., in employment, education, credit)
183
+ - Critical automated decision-making
184
+ - Generating unreliable factual content
185
+ - Malicious Activities:
186
+ - Spam generation
187
+ - Disinformation campaigns
188
+ - Harassment or abuse
189
+ - Unauthorized surveillance
190
+
191
+ ### License
192
+
193
+ SmolVLM2 is built upon [SigLIP](https://huggingface.co/google/siglip-base-patch16-512) as image encoder and [SmolLM2](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct) for text decoder part.
194
+
195
+ We release the SmolVLM2 checkpoints under the Apache 2.0 license.
196
+
197
+ ## Citation information
198
+ You can cite us in the following way:
199
+ ```bibtex
200
+ @article{marafioti2025smolvlm,
201
+ title={SmolVLM: Redefining small and efficient multimodal models},
202
+ author={Andrés Marafioti and Orr Zohar and Miquel Farré and Merve Noyan and Elie Bakouch and Pedro Cuenca and Cyril Zakka and Loubna Ben Allal and Anton Lozhkov and Nouamane Tazi and Vaibhav Srivastav and Joshua Lochner and Hugo Larcher and Mathieu Morlon and Lewis Tunstall and Leandro von Werra and Thomas Wolf},
203
+ journal={arXiv preprint arXiv:2504.05299},
204
+ year={2025}
205
+ }
206
+ ```
207
+
208
+ ## Training Data
209
+ SmolVLM2 used 3.3M samples for training originally from ten different datasets: [LlaVa Onevision](https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data), [M4-Instruct](https://huggingface.co/datasets/lmms-lab/M4-Instruct-Data), [Mammoth](https://huggingface.co/datasets/MAmmoTH-VL/MAmmoTH-VL-Instruct-12M), [LlaVa Video 178K](https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K), [FineVideo](https://huggingface.co/datasets/HuggingFaceFV/finevideo), [VideoStar](https://huggingface.co/datasets/orrzohar/Video-STaR), [VRipt](https://huggingface.co/datasets/Mutonix/Vript), [Vista-400K](https://huggingface.co/datasets/TIGER-Lab/VISTA-400K), [MovieChat](https://huggingface.co/datasets/Enxin/MovieChat-1K_train) and [ShareGPT4Video](https://huggingface.co/datasets/ShareGPT4Video/ShareGPT4Video).
210
+ In the following plots we give a general overview of the samples across modalities and the source of those samples.
211
+ <!--
212
+ <center><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolvlm2_data_split.png" width="auto" height="auto" alt="Image description">
213
+ </center>
214
+
215
+ ### Details
216
+ <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolvlm2_datadetails.png" width="auto" height="auto" alt="Image description"> -->
217
+
218
+ ## Data Split per modality
219
+
220
+ | Data Type | Percentage |
221
+ |--------------|------------|
222
+ | Image | 34.4% |
223
+ | Text | 20.2% |
224
+ | Video | 33.0% |
225
+ | Multi-image | 12.3% |
226
+
227
+
228
+ ## Granular dataset slices per modality
229
+
230
+ ### Text Datasets
231
+ | Dataset | Percentage |
232
+ |--------------------------------------------|------------|
233
+ | llava-onevision/magpie_pro_ft3_80b_mt | 6.8% |
234
+ | llava-onevision/magpie_pro_ft3_80b_tt | 6.8% |
235
+ | llava-onevision/magpie_pro_qwen2_72b_tt | 5.8% |
236
+ | llava-onevision/mathqa | 0.9% |
237
+
238
+ ### Multi-image Datasets
239
+ | Dataset | Percentage |
240
+ |--------------------------------------------|------------|
241
+ | m4-instruct-data/m4_instruct_multiimage | 10.4% |
242
+ | mammoth/multiimage-cap6 | 1.9% |
243
+
244
+ ### Image Datasets
245
+ | Dataset | Percentage |
246
+ |--------------------------------------------|------------|
247
+ | llava-onevision/other | 17.4% |
248
+ | llava-onevision/vision_flan | 3.9% |
249
+ | llava-onevision/mavis_math_metagen | 2.6% |
250
+ | llava-onevision/mavis_math_rule_geo | 2.5% |
251
+ | llava-onevision/sharegpt4o | 1.7% |
252
+ | llava-onevision/sharegpt4v_coco | 1.5% |
253
+ | llava-onevision/image_textualization | 1.3% |
254
+ | llava-onevision/sharegpt4v_llava | 0.9% |
255
+ | llava-onevision/mapqa | 0.9% |
256
+ | llava-onevision/qa | 0.8% |
257
+ | llava-onevision/textocr | 0.8% |
258
+
259
+ ### Video Datasets
260
+ | Dataset | Percentage |
261
+ |--------------------------------------------|------------|
262
+ | llava-video-178k/1-2m | 7.3% |
263
+ | llava-video-178k/2-3m | 7.0% |
264
+ | other-video/combined | 5.7% |
265
+ | llava-video-178k/hound | 4.4% |
266
+ | llava-video-178k/0-30s | 2.4% |
267
+ | video-star/starb | 2.2% |
268
+ | vista-400k/combined | 2.2% |
269
+ | vript/long | 1.0% |
270
+ | ShareGPT4Video/all | 0.8% |
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/added_tokens.json ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<action_0>": 49280,
3
+ "<action_100>": 49380,
4
+ "<action_101>": 49381,
5
+ "<action_102>": 49382,
6
+ "<action_103>": 49383,
7
+ "<action_104>": 49384,
8
+ "<action_105>": 49385,
9
+ "<action_106>": 49386,
10
+ "<action_107>": 49387,
11
+ "<action_108>": 49388,
12
+ "<action_109>": 49389,
13
+ "<action_10>": 49290,
14
+ "<action_110>": 49390,
15
+ "<action_111>": 49391,
16
+ "<action_112>": 49392,
17
+ "<action_113>": 49393,
18
+ "<action_114>": 49394,
19
+ "<action_115>": 49395,
20
+ "<action_116>": 49396,
21
+ "<action_117>": 49397,
22
+ "<action_118>": 49398,
23
+ "<action_119>": 49399,
24
+ "<action_11>": 49291,
25
+ "<action_120>": 49400,
26
+ "<action_121>": 49401,
27
+ "<action_122>": 49402,
28
+ "<action_123>": 49403,
29
+ "<action_124>": 49404,
30
+ "<action_125>": 49405,
31
+ "<action_126>": 49406,
32
+ "<action_127>": 49407,
33
+ "<action_128>": 49408,
34
+ "<action_129>": 49409,
35
+ "<action_12>": 49292,
36
+ "<action_130>": 49410,
37
+ "<action_131>": 49411,
38
+ "<action_132>": 49412,
39
+ "<action_133>": 49413,
40
+ "<action_134>": 49414,
41
+ "<action_135>": 49415,
42
+ "<action_136>": 49416,
43
+ "<action_137>": 49417,
44
+ "<action_138>": 49418,
45
+ "<action_139>": 49419,
46
+ "<action_13>": 49293,
47
+ "<action_140>": 49420,
48
+ "<action_141>": 49421,
49
+ "<action_142>": 49422,
50
+ "<action_143>": 49423,
51
+ "<action_144>": 49424,
52
+ "<action_145>": 49425,
53
+ "<action_146>": 49426,
54
+ "<action_147>": 49427,
55
+ "<action_148>": 49428,
56
+ "<action_149>": 49429,
57
+ "<action_14>": 49294,
58
+ "<action_150>": 49430,
59
+ "<action_151>": 49431,
60
+ "<action_152>": 49432,
61
+ "<action_153>": 49433,
62
+ "<action_154>": 49434,
63
+ "<action_155>": 49435,
64
+ "<action_156>": 49436,
65
+ "<action_157>": 49437,
66
+ "<action_158>": 49438,
67
+ "<action_159>": 49439,
68
+ "<action_15>": 49295,
69
+ "<action_160>": 49440,
70
+ "<action_161>": 49441,
71
+ "<action_162>": 49442,
72
+ "<action_163>": 49443,
73
+ "<action_164>": 49444,
74
+ "<action_165>": 49445,
75
+ "<action_166>": 49446,
76
+ "<action_167>": 49447,
77
+ "<action_168>": 49448,
78
+ "<action_169>": 49449,
79
+ "<action_16>": 49296,
80
+ "<action_170>": 49450,
81
+ "<action_171>": 49451,
82
+ "<action_172>": 49452,
83
+ "<action_173>": 49453,
84
+ "<action_174>": 49454,
85
+ "<action_175>": 49455,
86
+ "<action_176>": 49456,
87
+ "<action_177>": 49457,
88
+ "<action_178>": 49458,
89
+ "<action_179>": 49459,
90
+ "<action_17>": 49297,
91
+ "<action_180>": 49460,
92
+ "<action_181>": 49461,
93
+ "<action_182>": 49462,
94
+ "<action_183>": 49463,
95
+ "<action_184>": 49464,
96
+ "<action_185>": 49465,
97
+ "<action_186>": 49466,
98
+ "<action_187>": 49467,
99
+ "<action_188>": 49468,
100
+ "<action_189>": 49469,
101
+ "<action_18>": 49298,
102
+ "<action_190>": 49470,
103
+ "<action_191>": 49471,
104
+ "<action_192>": 49472,
105
+ "<action_193>": 49473,
106
+ "<action_194>": 49474,
107
+ "<action_195>": 49475,
108
+ "<action_196>": 49476,
109
+ "<action_197>": 49477,
110
+ "<action_198>": 49478,
111
+ "<action_199>": 49479,
112
+ "<action_19>": 49299,
113
+ "<action_1>": 49281,
114
+ "<action_200>": 49480,
115
+ "<action_201>": 49481,
116
+ "<action_202>": 49482,
117
+ "<action_203>": 49483,
118
+ "<action_204>": 49484,
119
+ "<action_205>": 49485,
120
+ "<action_206>": 49486,
121
+ "<action_207>": 49487,
122
+ "<action_208>": 49488,
123
+ "<action_209>": 49489,
124
+ "<action_20>": 49300,
125
+ "<action_210>": 49490,
126
+ "<action_211>": 49491,
127
+ "<action_212>": 49492,
128
+ "<action_213>": 49493,
129
+ "<action_214>": 49494,
130
+ "<action_215>": 49495,
131
+ "<action_216>": 49496,
132
+ "<action_217>": 49497,
133
+ "<action_218>": 49498,
134
+ "<action_219>": 49499,
135
+ "<action_21>": 49301,
136
+ "<action_220>": 49500,
137
+ "<action_221>": 49501,
138
+ "<action_222>": 49502,
139
+ "<action_223>": 49503,
140
+ "<action_224>": 49504,
141
+ "<action_225>": 49505,
142
+ "<action_226>": 49506,
143
+ "<action_227>": 49507,
144
+ "<action_228>": 49508,
145
+ "<action_229>": 49509,
146
+ "<action_22>": 49302,
147
+ "<action_230>": 49510,
148
+ "<action_231>": 49511,
149
+ "<action_232>": 49512,
150
+ "<action_233>": 49513,
151
+ "<action_234>": 49514,
152
+ "<action_235>": 49515,
153
+ "<action_236>": 49516,
154
+ "<action_237>": 49517,
155
+ "<action_238>": 49518,
156
+ "<action_239>": 49519,
157
+ "<action_23>": 49303,
158
+ "<action_240>": 49520,
159
+ "<action_241>": 49521,
160
+ "<action_242>": 49522,
161
+ "<action_243>": 49523,
162
+ "<action_244>": 49524,
163
+ "<action_245>": 49525,
164
+ "<action_246>": 49526,
165
+ "<action_247>": 49527,
166
+ "<action_248>": 49528,
167
+ "<action_249>": 49529,
168
+ "<action_24>": 49304,
169
+ "<action_250>": 49530,
170
+ "<action_251>": 49531,
171
+ "<action_252>": 49532,
172
+ "<action_253>": 49533,
173
+ "<action_254>": 49534,
174
+ "<action_255>": 49535,
175
+ "<action_25>": 49305,
176
+ "<action_26>": 49306,
177
+ "<action_27>": 49307,
178
+ "<action_28>": 49308,
179
+ "<action_29>": 49309,
180
+ "<action_2>": 49282,
181
+ "<action_30>": 49310,
182
+ "<action_31>": 49311,
183
+ "<action_32>": 49312,
184
+ "<action_33>": 49313,
185
+ "<action_34>": 49314,
186
+ "<action_35>": 49315,
187
+ "<action_36>": 49316,
188
+ "<action_37>": 49317,
189
+ "<action_38>": 49318,
190
+ "<action_39>": 49319,
191
+ "<action_3>": 49283,
192
+ "<action_40>": 49320,
193
+ "<action_41>": 49321,
194
+ "<action_42>": 49322,
195
+ "<action_43>": 49323,
196
+ "<action_44>": 49324,
197
+ "<action_45>": 49325,
198
+ "<action_46>": 49326,
199
+ "<action_47>": 49327,
200
+ "<action_48>": 49328,
201
+ "<action_49>": 49329,
202
+ "<action_4>": 49284,
203
+ "<action_50>": 49330,
204
+ "<action_51>": 49331,
205
+ "<action_52>": 49332,
206
+ "<action_53>": 49333,
207
+ "<action_54>": 49334,
208
+ "<action_55>": 49335,
209
+ "<action_56>": 49336,
210
+ "<action_57>": 49337,
211
+ "<action_58>": 49338,
212
+ "<action_59>": 49339,
213
+ "<action_5>": 49285,
214
+ "<action_60>": 49340,
215
+ "<action_61>": 49341,
216
+ "<action_62>": 49342,
217
+ "<action_63>": 49343,
218
+ "<action_64>": 49344,
219
+ "<action_65>": 49345,
220
+ "<action_66>": 49346,
221
+ "<action_67>": 49347,
222
+ "<action_68>": 49348,
223
+ "<action_69>": 49349,
224
+ "<action_6>": 49286,
225
+ "<action_70>": 49350,
226
+ "<action_71>": 49351,
227
+ "<action_72>": 49352,
228
+ "<action_73>": 49353,
229
+ "<action_74>": 49354,
230
+ "<action_75>": 49355,
231
+ "<action_76>": 49356,
232
+ "<action_77>": 49357,
233
+ "<action_78>": 49358,
234
+ "<action_79>": 49359,
235
+ "<action_7>": 49287,
236
+ "<action_80>": 49360,
237
+ "<action_81>": 49361,
238
+ "<action_82>": 49362,
239
+ "<action_83>": 49363,
240
+ "<action_84>": 49364,
241
+ "<action_85>": 49365,
242
+ "<action_86>": 49366,
243
+ "<action_87>": 49367,
244
+ "<action_88>": 49368,
245
+ "<action_89>": 49369,
246
+ "<action_8>": 49288,
247
+ "<action_90>": 49370,
248
+ "<action_91>": 49371,
249
+ "<action_92>": 49372,
250
+ "<action_93>": 49373,
251
+ "<action_94>": 49374,
252
+ "<action_95>": 49375,
253
+ "<action_96>": 49376,
254
+ "<action_97>": 49377,
255
+ "<action_98>": 49378,
256
+ "<action_99>": 49379,
257
+ "<action_9>": 49289,
258
+ "<end_of_utterance>": 49279,
259
+ "<fake_token_around_image>": 49189,
260
+ "<global-img>": 49152,
261
+ "<image>": 49190,
262
+ "<row_1_col_1>": 49153,
263
+ "<row_1_col_2>": 49154,
264
+ "<row_1_col_3>": 49155,
265
+ "<row_1_col_4>": 49156,
266
+ "<row_1_col_5>": 49157,
267
+ "<row_1_col_6>": 49158,
268
+ "<row_2_col_1>": 49159,
269
+ "<row_2_col_2>": 49160,
270
+ "<row_2_col_3>": 49161,
271
+ "<row_2_col_4>": 49162,
272
+ "<row_2_col_5>": 49163,
273
+ "<row_2_col_6>": 49164,
274
+ "<row_3_col_1>": 49165,
275
+ "<row_3_col_2>": 49166,
276
+ "<row_3_col_3>": 49167,
277
+ "<row_3_col_4>": 49168,
278
+ "<row_3_col_5>": 49169,
279
+ "<row_3_col_6>": 49170,
280
+ "<row_4_col_1>": 49171,
281
+ "<row_4_col_2>": 49172,
282
+ "<row_4_col_3>": 49173,
283
+ "<row_4_col_4>": 49174,
284
+ "<row_4_col_5>": 49175,
285
+ "<row_4_col_6>": 49176,
286
+ "<row_5_col_1>": 49177,
287
+ "<row_5_col_2>": 49178,
288
+ "<row_5_col_3>": 49179,
289
+ "<row_5_col_4>": 49180,
290
+ "<row_5_col_5>": 49181,
291
+ "<row_5_col_6>": 49182,
292
+ "<row_6_col_1>": 49183,
293
+ "<row_6_col_2>": 49184,
294
+ "<row_6_col_3>": 49185,
295
+ "<row_6_col_4>": 49186,
296
+ "<row_6_col_5>": 49187,
297
+ "<row_6_col_6>": 49188,
298
+ "<|reserved_special_token_0|>": 49191,
299
+ "<|reserved_special_token_10|>": 49201,
300
+ "<|reserved_special_token_11|>": 49202,
301
+ "<|reserved_special_token_12|>": 49203,
302
+ "<|reserved_special_token_13|>": 49204,
303
+ "<|reserved_special_token_14|>": 49205,
304
+ "<|reserved_special_token_15|>": 49206,
305
+ "<|reserved_special_token_16|>": 49207,
306
+ "<|reserved_special_token_17|>": 49208,
307
+ "<|reserved_special_token_18|>": 49209,
308
+ "<|reserved_special_token_19|>": 49210,
309
+ "<|reserved_special_token_1|>": 49192,
310
+ "<|reserved_special_token_20|>": 49211,
311
+ "<|reserved_special_token_21|>": 49212,
312
+ "<|reserved_special_token_22|>": 49213,
313
+ "<|reserved_special_token_23|>": 49214,
314
+ "<|reserved_special_token_24|>": 49215,
315
+ "<|reserved_special_token_25|>": 49216,
316
+ "<|reserved_special_token_26|>": 49217,
317
+ "<|reserved_special_token_27|>": 49218,
318
+ "<|reserved_special_token_28|>": 49219,
319
+ "<|reserved_special_token_29|>": 49220,
320
+ "<|reserved_special_token_2|>": 49193,
321
+ "<|reserved_special_token_30|>": 49221,
322
+ "<|reserved_special_token_31|>": 49222,
323
+ "<|reserved_special_token_32|>": 49223,
324
+ "<|reserved_special_token_33|>": 49224,
325
+ "<|reserved_special_token_34|>": 49225,
326
+ "<|reserved_special_token_35|>": 49226,
327
+ "<|reserved_special_token_36|>": 49227,
328
+ "<|reserved_special_token_37|>": 49228,
329
+ "<|reserved_special_token_38|>": 49229,
330
+ "<|reserved_special_token_39|>": 49230,
331
+ "<|reserved_special_token_3|>": 49194,
332
+ "<|reserved_special_token_40|>": 49231,
333
+ "<|reserved_special_token_41|>": 49232,
334
+ "<|reserved_special_token_42|>": 49233,
335
+ "<|reserved_special_token_43|>": 49234,
336
+ "<|reserved_special_token_44|>": 49235,
337
+ "<|reserved_special_token_45|>": 49236,
338
+ "<|reserved_special_token_46|>": 49237,
339
+ "<|reserved_special_token_47|>": 49238,
340
+ "<|reserved_special_token_48|>": 49239,
341
+ "<|reserved_special_token_49|>": 49240,
342
+ "<|reserved_special_token_4|>": 49195,
343
+ "<|reserved_special_token_50|>": 49241,
344
+ "<|reserved_special_token_51|>": 49242,
345
+ "<|reserved_special_token_52|>": 49243,
346
+ "<|reserved_special_token_53|>": 49244,
347
+ "<|reserved_special_token_54|>": 49245,
348
+ "<|reserved_special_token_55|>": 49246,
349
+ "<|reserved_special_token_56|>": 49247,
350
+ "<|reserved_special_token_57|>": 49248,
351
+ "<|reserved_special_token_58|>": 49249,
352
+ "<|reserved_special_token_59|>": 49250,
353
+ "<|reserved_special_token_5|>": 49196,
354
+ "<|reserved_special_token_60|>": 49251,
355
+ "<|reserved_special_token_61|>": 49252,
356
+ "<|reserved_special_token_62|>": 49253,
357
+ "<|reserved_special_token_63|>": 49254,
358
+ "<|reserved_special_token_64|>": 49255,
359
+ "<|reserved_special_token_65|>": 49256,
360
+ "<|reserved_special_token_66|>": 49257,
361
+ "<|reserved_special_token_67|>": 49258,
362
+ "<|reserved_special_token_68|>": 49259,
363
+ "<|reserved_special_token_69|>": 49260,
364
+ "<|reserved_special_token_6|>": 49197,
365
+ "<|reserved_special_token_70|>": 49261,
366
+ "<|reserved_special_token_71|>": 49262,
367
+ "<|reserved_special_token_72|>": 49263,
368
+ "<|reserved_special_token_73|>": 49264,
369
+ "<|reserved_special_token_74|>": 49265,
370
+ "<|reserved_special_token_75|>": 49266,
371
+ "<|reserved_special_token_76|>": 49267,
372
+ "<|reserved_special_token_77|>": 49268,
373
+ "<|reserved_special_token_78|>": 49269,
374
+ "<|reserved_special_token_79|>": 49270,
375
+ "<|reserved_special_token_7|>": 49198,
376
+ "<|reserved_special_token_80|>": 49271,
377
+ "<|reserved_special_token_81|>": 49272,
378
+ "<|reserved_special_token_82|>": 49273,
379
+ "<|reserved_special_token_83|>": 49274,
380
+ "<|reserved_special_token_84|>": 49275,
381
+ "<|reserved_special_token_85|>": 49276,
382
+ "<|reserved_special_token_86|>": 49277,
383
+ "<|reserved_special_token_87|>": 49278,
384
+ "<|reserved_special_token_8|>": 49199,
385
+ "<|reserved_special_token_9|>": 49200
386
+ }
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/chat_template.jinja ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ <|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
2
+ {% endfor %}{% if add_generation_prompt %}{{ 'Assistant: ' }}{% endif %}
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "<|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>\n{% endfor %}{% if add_generation_prompt %}{{ 'Assistant: ' }}{% endif %}"
3
+ }
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/config.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SmolVLMForConditionalGeneration"
4
+ ],
5
+ "image_token_id": 49190,
6
+ "model_type": "smolvlm",
7
+ "pad_token_id": 128002,
8
+ "scale_factor": 4,
9
+ "text_config": {
10
+ "_flash_attn_2_enabled": true,
11
+ "_name_or_path": "None",
12
+ "architectures": [
13
+ "VLlama3ForCausalLM"
14
+ ],
15
+ "head_dim": 64,
16
+ "hidden_size": 960,
17
+ "intermediate_size": 2560,
18
+ "is_llama_config": true,
19
+ "max_position_embeddings": 8192,
20
+ "model_type": "llama",
21
+ "neftune_noise_alpha": 0.0,
22
+ "num_attention_heads": 15,
23
+ "num_hidden_layers": 32,
24
+ "num_key_value_heads": 5,
25
+ "pad_token_id": 2,
26
+ "perceiver_config": {
27
+ "_attn_implementation_autoset": false,
28
+ "_name_or_path": "",
29
+ "add_cross_attention": false,
30
+ "architectures": null,
31
+ "attention_dropout": 0.0,
32
+ "bad_words_ids": null,
33
+ "begin_suppress_tokens": null,
34
+ "bos_token_id": null,
35
+ "chunk_size_feed_forward": 0,
36
+ "cross_attention_hidden_size": null,
37
+ "decoder_start_token_id": null,
38
+ "diversity_penalty": 0.0,
39
+ "do_sample": false,
40
+ "early_stopping": false,
41
+ "encoder_no_repeat_ngram_size": 0,
42
+ "eos_token_id": null,
43
+ "exponential_decay_length_penalty": null,
44
+ "finetuning_task": null,
45
+ "forced_bos_token_id": null,
46
+ "forced_eos_token_id": null,
47
+ "hidden_act": "silu",
48
+ "id2label": {
49
+ "0": "LABEL_0",
50
+ "1": "LABEL_1"
51
+ },
52
+ "is_decoder": false,
53
+ "is_encoder_decoder": false,
54
+ "label2id": {
55
+ "LABEL_0": 0,
56
+ "LABEL_1": 1
57
+ },
58
+ "length_penalty": 1.0,
59
+ "max_length": 20,
60
+ "min_length": 0,
61
+ "model_type": "vllama3",
62
+ "no_repeat_ngram_size": 0,
63
+ "num_beam_groups": 1,
64
+ "num_beams": 1,
65
+ "num_key_value_heads": 1,
66
+ "num_return_sequences": 1,
67
+ "output_attentions": false,
68
+ "output_hidden_states": false,
69
+ "output_scores": false,
70
+ "pad_token_id": null,
71
+ "prefix": null,
72
+ "problem_type": null,
73
+ "pruned_heads": {},
74
+ "qk_layer_norms_perceiver": false,
75
+ "remove_invalid_values": false,
76
+ "repetition_penalty": 1.0,
77
+ "resampler_depth": 6,
78
+ "resampler_head_dim": 96,
79
+ "resampler_n_heads": 16,
80
+ "resampler_n_latents": 64,
81
+ "return_dict": true,
82
+ "return_dict_in_generate": false,
83
+ "sep_token_id": null,
84
+ "suppress_tokens": null,
85
+ "task_specific_params": null,
86
+ "temperature": 1.0,
87
+ "tf_legacy_loss": false,
88
+ "tie_encoder_decoder": false,
89
+ "tie_word_embeddings": true,
90
+ "tokenizer_class": null,
91
+ "top_k": 50,
92
+ "top_p": 1.0,
93
+ "torch_dtype": null,
94
+ "torchscript": false,
95
+ "transformers_version": "4.46.0",
96
+ "typical_p": 1.0,
97
+ "use_bfloat16": false
98
+ },
99
+ "pixel_shuffle_factor": 4,
100
+ "qk_layer_norms": false,
101
+ "rms_norm_eps": 1e-05,
102
+ "rope_interleaved": false,
103
+ "rope_theta": 100000,
104
+ "torch_dtype": "bfloat16",
105
+ "transformers.js_config": {
106
+ "kv_cache_dtype": {
107
+ "fp16": "float16",
108
+ "q4f16": "float16"
109
+ }
110
+ },
111
+ "use_resampler": false,
112
+ "vocab_size": 49536
113
+ },
114
+ "tie_word_embeddings": false,
115
+ "torch_dtype": "float32",
116
+ "transformers.js_config": {
117
+ "kv_cache_dtype": {
118
+ "fp16": "float16",
119
+ "q4f16": "float16"
120
+ }
121
+ },
122
+ "transformers_version": "4.47.1",
123
+ "use_cache": false,
124
+ "use_reentrant_checkpointing": false,
125
+ "vision_config": {
126
+ "hidden_size": 768,
127
+ "image_size": 512,
128
+ "max_image_size": {
129
+ "longest_edge": 512
130
+ },
131
+ "model_type": "smolvlm_vision",
132
+ "num_attention_heads": 12,
133
+ "patch_size": 16,
134
+ "size": {
135
+ "longest_edge": 512
136
+ },
137
+ "tie_word_embeddings": false,
138
+ "use_base_siglip": false
139
+ },
140
+ "vocab_size": 49536
141
+ }
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 49279,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.47.1"
7
+ }
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9bfd456c9472c0acd5719d6e514c4b859891af205ee1a736552fd3497b8b0c3
3
+ size 2029990624
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c8743f020606f401ffa0c0ec7a7055da6ad7518b981ecdb060b62da3a60ec45
3
+ size 1450426001
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c20111c92ecee932e85de34e0afd9341f60bb210456bc5d8a4809c055dd5bc15
3
+ size 206503532
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace259f4ff3a070cfcefc41c6b4ddaf52a5c1b988348004da13043ff4a6de5ae
3
+ size 725489385
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d0a356562c7e474012c5e383de58d44d487a517c04190d26e5891db86cb2b1f
3
+ size 365039224
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69561703c23810bf58cd4fe81b2427df95339a5b55743f62d1e767bd4a83a1e2
3
+ size 229119396
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6265f0aa571ad624f67b5f19b79f1562b4bfd4e6db812815c1794172985b3e5e
3
+ size 205328508
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5aaa12d62d43a05a4a7456864c508d0b3c17c55a654c05fb568f6da0b7d212d
3
+ size 365039344
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/decoder_model_merged_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5aaa12d62d43a05a4a7456864c508d0b3c17c55a654c05fb568f6da0b7d212d
3
+ size 365039344
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4696831e7181d72fd7736b0c119cd53141a46c940ca24c03ce05ba623b402dd4
3
+ size 189235499
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc09c2b3264365dcc8de8cae7513b640a40624cd038ede20a0ffcac0f82ecb11
3
+ size 189235518
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc9e790f2e90ebf3b1efe84ec82530d6c12e27b0a103703fe865a5faf24f2336
3
+ size 94617986
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e6d275190cd1cdf3339225b98b42116871ddc4e0c3210801f02bf2a1d2e164a
3
+ size 47309344
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc09c2b3264365dcc8de8cae7513b640a40624cd038ede20a0ffcac0f82ecb11
3
+ size 189235518
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a330cc444671c47e6a2de559405404043b2a49b015188250eca8c0c88d263b73
3
+ size 94618005
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e6d275190cd1cdf3339225b98b42116871ddc4e0c3210801f02bf2a1d2e164a
3
+ size 47309344
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/embed_tokens_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e6d275190cd1cdf3339225b98b42116871ddc4e0c3210801f02bf2a1d2e164a
3
+ size 47309344
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4797cdad0ba1958e7dfcc5e074d0b02ae3c4df80cce17ecab282dde37e0ddc1
3
+ size 393190822
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:749b03c76adc0311fdc2d347ace3c9f78c792816e519c3e28a17f12f27cd496d
3
+ size 60688904
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9faa8f3b4a922514a2e7773f20e99fdf7dc7b4eeeeaca1fb82cf18f7945ca23a
3
+ size 196731834
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d931eb204e21ab4d905d62976aeaefe20360076f52760ba55b84a5b35088284f
3
+ size 98966476
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fb71423919f04851bdac96588267e6a9db95903ca94fcd7b7a0f51b5cd53d8a
3
+ size 66734064
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7e811e4cef2c214c80ea7572eacb102ea396a881fb9170577b20c10fe4fa485
3
+ size 57691749
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e8e2950e6837a71a3d972a14eb287d8875c4c2d5e8cfd6001bf185115295cdd
3
+ size 98966521
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/onnx/vision_encoder_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e8e2950e6837a71a3d972a14eb287d8875c4c2d5e8cfd6001bf185115295cdd
3
+ size 98966521
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/preprocessor_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_image_splitting": true,
4
+ "do_normalize": true,
5
+ "do_pad": true,
6
+ "do_rescale": true,
7
+ "do_resize": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "SmolVLMImageProcessor",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "max_image_size": {
20
+ "longest_edge": 512
21
+ },
22
+ "processor_class": "SmolVLMProcessor",
23
+ "resample": 1,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "longest_edge": 512
27
+ },
28
+ "video_sampling": {
29
+ "fps": 1,
30
+ "max_frames": 64,
31
+ "video_size": {
32
+ "longest_edge": 512
33
+ }
34
+ }
35
+ }
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/processor_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "image_seq_len": 64,
3
+ "processor_class": "SmolVLMProcessor"
4
+ }
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/special_tokens_map.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<action_250>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<action_251>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<action_252>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<action_253>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<action_254>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<action_255>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ }
45
+ ],
46
+ "bos_token": {
47
+ "content": "<|im_start|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false
52
+ },
53
+ "end_of_utterance_token": "<end_of_utterance>",
54
+ "eos_token": {
55
+ "content": "<end_of_utterance>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false
60
+ },
61
+ "fake_image_token": "<fake_token_around_image>",
62
+ "global_image_token": "<global-img>",
63
+ "image_token": "<image>",
64
+ "pad_token": {
65
+ "content": "<|im_end|>",
66
+ "lstrip": false,
67
+ "normalized": false,
68
+ "rstrip": false,
69
+ "single_word": false
70
+ },
71
+ "unk_token": {
72
+ "content": "<|endoftext|>",
73
+ "lstrip": false,
74
+ "normalized": false,
75
+ "rstrip": false,
76
+ "single_word": false
77
+ }
78
+ }
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/tokenizer_config.json ADDED
@@ -0,0 +1,3242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<repo_name>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<reponame>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": "<file_sep>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "6": {
53
+ "content": "<filename>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": "<gh_stars>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": "<issue_start>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": "<issue_comment>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": "<issue_closed>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<jupyter_start>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "12": {
101
+ "content": "<jupyter_text>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "13": {
109
+ "content": "<jupyter_code>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "14": {
117
+ "content": "<jupyter_output>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "15": {
125
+ "content": "<jupyter_script>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "16": {
133
+ "content": "<empty_output>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "49152": {
141
+ "content": "<global-img>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "49153": {
149
+ "content": "<row_1_col_1>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "49154": {
157
+ "content": "<row_1_col_2>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "49155": {
165
+ "content": "<row_1_col_3>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "49156": {
173
+ "content": "<row_1_col_4>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "49157": {
181
+ "content": "<row_1_col_5>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "49158": {
189
+ "content": "<row_1_col_6>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "49159": {
197
+ "content": "<row_2_col_1>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "49160": {
205
+ "content": "<row_2_col_2>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "49161": {
213
+ "content": "<row_2_col_3>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "49162": {
221
+ "content": "<row_2_col_4>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "49163": {
229
+ "content": "<row_2_col_5>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "49164": {
237
+ "content": "<row_2_col_6>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "49165": {
245
+ "content": "<row_3_col_1>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "49166": {
253
+ "content": "<row_3_col_2>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "49167": {
261
+ "content": "<row_3_col_3>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "49168": {
269
+ "content": "<row_3_col_4>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "49169": {
277
+ "content": "<row_3_col_5>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "49170": {
285
+ "content": "<row_3_col_6>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "49171": {
293
+ "content": "<row_4_col_1>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "49172": {
301
+ "content": "<row_4_col_2>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "49173": {
309
+ "content": "<row_4_col_3>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "49174": {
317
+ "content": "<row_4_col_4>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "49175": {
325
+ "content": "<row_4_col_5>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "49176": {
333
+ "content": "<row_4_col_6>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "49177": {
341
+ "content": "<row_5_col_1>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "49178": {
349
+ "content": "<row_5_col_2>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "49179": {
357
+ "content": "<row_5_col_3>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "49180": {
365
+ "content": "<row_5_col_4>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "49181": {
373
+ "content": "<row_5_col_5>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "49182": {
381
+ "content": "<row_5_col_6>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "49183": {
389
+ "content": "<row_6_col_1>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "49184": {
397
+ "content": "<row_6_col_2>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "49185": {
405
+ "content": "<row_6_col_3>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "49186": {
413
+ "content": "<row_6_col_4>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "49187": {
421
+ "content": "<row_6_col_5>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "49188": {
429
+ "content": "<row_6_col_6>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "49189": {
437
+ "content": "<fake_token_around_image>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "49190": {
445
+ "content": "<image>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "49191": {
453
+ "content": "<|reserved_special_token_0|>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "49192": {
461
+ "content": "<|reserved_special_token_1|>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "49193": {
469
+ "content": "<|reserved_special_token_2|>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "49194": {
477
+ "content": "<|reserved_special_token_3|>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "49195": {
485
+ "content": "<|reserved_special_token_4|>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "49196": {
493
+ "content": "<|reserved_special_token_5|>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "49197": {
501
+ "content": "<|reserved_special_token_6|>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "49198": {
509
+ "content": "<|reserved_special_token_7|>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "49199": {
517
+ "content": "<|reserved_special_token_8|>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "49200": {
525
+ "content": "<|reserved_special_token_9|>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "49201": {
533
+ "content": "<|reserved_special_token_10|>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "49202": {
541
+ "content": "<|reserved_special_token_11|>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "49203": {
549
+ "content": "<|reserved_special_token_12|>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "49204": {
557
+ "content": "<|reserved_special_token_13|>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "49205": {
565
+ "content": "<|reserved_special_token_14|>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "49206": {
573
+ "content": "<|reserved_special_token_15|>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "49207": {
581
+ "content": "<|reserved_special_token_16|>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "49208": {
589
+ "content": "<|reserved_special_token_17|>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "49209": {
597
+ "content": "<|reserved_special_token_18|>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "49210": {
605
+ "content": "<|reserved_special_token_19|>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "49211": {
613
+ "content": "<|reserved_special_token_20|>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "49212": {
621
+ "content": "<|reserved_special_token_21|>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "49213": {
629
+ "content": "<|reserved_special_token_22|>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "49214": {
637
+ "content": "<|reserved_special_token_23|>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "49215": {
645
+ "content": "<|reserved_special_token_24|>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "49216": {
653
+ "content": "<|reserved_special_token_25|>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "49217": {
661
+ "content": "<|reserved_special_token_26|>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "49218": {
669
+ "content": "<|reserved_special_token_27|>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "49219": {
677
+ "content": "<|reserved_special_token_28|>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "49220": {
685
+ "content": "<|reserved_special_token_29|>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "49221": {
693
+ "content": "<|reserved_special_token_30|>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "49222": {
701
+ "content": "<|reserved_special_token_31|>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "49223": {
709
+ "content": "<|reserved_special_token_32|>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "49224": {
717
+ "content": "<|reserved_special_token_33|>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "49225": {
725
+ "content": "<|reserved_special_token_34|>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "49226": {
733
+ "content": "<|reserved_special_token_35|>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "49227": {
741
+ "content": "<|reserved_special_token_36|>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "49228": {
749
+ "content": "<|reserved_special_token_37|>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "49229": {
757
+ "content": "<|reserved_special_token_38|>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "49230": {
765
+ "content": "<|reserved_special_token_39|>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "49231": {
773
+ "content": "<|reserved_special_token_40|>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "49232": {
781
+ "content": "<|reserved_special_token_41|>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "49233": {
789
+ "content": "<|reserved_special_token_42|>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "49234": {
797
+ "content": "<|reserved_special_token_43|>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "49235": {
805
+ "content": "<|reserved_special_token_44|>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "49236": {
813
+ "content": "<|reserved_special_token_45|>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "49237": {
821
+ "content": "<|reserved_special_token_46|>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "49238": {
829
+ "content": "<|reserved_special_token_47|>",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "49239": {
837
+ "content": "<|reserved_special_token_48|>",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "49240": {
845
+ "content": "<|reserved_special_token_49|>",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "49241": {
853
+ "content": "<|reserved_special_token_50|>",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "49242": {
861
+ "content": "<|reserved_special_token_51|>",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "49243": {
869
+ "content": "<|reserved_special_token_52|>",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": true
875
+ },
876
+ "49244": {
877
+ "content": "<|reserved_special_token_53|>",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": true
883
+ },
884
+ "49245": {
885
+ "content": "<|reserved_special_token_54|>",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": true
891
+ },
892
+ "49246": {
893
+ "content": "<|reserved_special_token_55|>",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": true
899
+ },
900
+ "49247": {
901
+ "content": "<|reserved_special_token_56|>",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": true
907
+ },
908
+ "49248": {
909
+ "content": "<|reserved_special_token_57|>",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": true
915
+ },
916
+ "49249": {
917
+ "content": "<|reserved_special_token_58|>",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": true
923
+ },
924
+ "49250": {
925
+ "content": "<|reserved_special_token_59|>",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": true
931
+ },
932
+ "49251": {
933
+ "content": "<|reserved_special_token_60|>",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": true
939
+ },
940
+ "49252": {
941
+ "content": "<|reserved_special_token_61|>",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": true
947
+ },
948
+ "49253": {
949
+ "content": "<|reserved_special_token_62|>",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": true
955
+ },
956
+ "49254": {
957
+ "content": "<|reserved_special_token_63|>",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": true
963
+ },
964
+ "49255": {
965
+ "content": "<|reserved_special_token_64|>",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": true
971
+ },
972
+ "49256": {
973
+ "content": "<|reserved_special_token_65|>",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": true
979
+ },
980
+ "49257": {
981
+ "content": "<|reserved_special_token_66|>",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": true
987
+ },
988
+ "49258": {
989
+ "content": "<|reserved_special_token_67|>",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": true
995
+ },
996
+ "49259": {
997
+ "content": "<|reserved_special_token_68|>",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": true
1003
+ },
1004
+ "49260": {
1005
+ "content": "<|reserved_special_token_69|>",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": true
1011
+ },
1012
+ "49261": {
1013
+ "content": "<|reserved_special_token_70|>",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": true
1019
+ },
1020
+ "49262": {
1021
+ "content": "<|reserved_special_token_71|>",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": true
1027
+ },
1028
+ "49263": {
1029
+ "content": "<|reserved_special_token_72|>",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": true
1035
+ },
1036
+ "49264": {
1037
+ "content": "<|reserved_special_token_73|>",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": true
1043
+ },
1044
+ "49265": {
1045
+ "content": "<|reserved_special_token_74|>",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": true
1051
+ },
1052
+ "49266": {
1053
+ "content": "<|reserved_special_token_75|>",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": true
1059
+ },
1060
+ "49267": {
1061
+ "content": "<|reserved_special_token_76|>",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": true
1067
+ },
1068
+ "49268": {
1069
+ "content": "<|reserved_special_token_77|>",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": true
1075
+ },
1076
+ "49269": {
1077
+ "content": "<|reserved_special_token_78|>",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": true
1083
+ },
1084
+ "49270": {
1085
+ "content": "<|reserved_special_token_79|>",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": true
1091
+ },
1092
+ "49271": {
1093
+ "content": "<|reserved_special_token_80|>",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": true
1099
+ },
1100
+ "49272": {
1101
+ "content": "<|reserved_special_token_81|>",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": true
1107
+ },
1108
+ "49273": {
1109
+ "content": "<|reserved_special_token_82|>",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": true
1115
+ },
1116
+ "49274": {
1117
+ "content": "<|reserved_special_token_83|>",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": true
1123
+ },
1124
+ "49275": {
1125
+ "content": "<|reserved_special_token_84|>",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": true
1131
+ },
1132
+ "49276": {
1133
+ "content": "<|reserved_special_token_85|>",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": true
1139
+ },
1140
+ "49277": {
1141
+ "content": "<|reserved_special_token_86|>",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": true
1147
+ },
1148
+ "49278": {
1149
+ "content": "<|reserved_special_token_87|>",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": true
1155
+ },
1156
+ "49279": {
1157
+ "content": "<end_of_utterance>",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": true
1163
+ },
1164
+ "49280": {
1165
+ "content": "<action_0>",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": true
1171
+ },
1172
+ "49281": {
1173
+ "content": "<action_1>",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": true
1179
+ },
1180
+ "49282": {
1181
+ "content": "<action_2>",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": true
1187
+ },
1188
+ "49283": {
1189
+ "content": "<action_3>",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": true
1195
+ },
1196
+ "49284": {
1197
+ "content": "<action_4>",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": true
1203
+ },
1204
+ "49285": {
1205
+ "content": "<action_5>",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": true
1211
+ },
1212
+ "49286": {
1213
+ "content": "<action_6>",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": true
1219
+ },
1220
+ "49287": {
1221
+ "content": "<action_7>",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": true
1227
+ },
1228
+ "49288": {
1229
+ "content": "<action_8>",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": true
1235
+ },
1236
+ "49289": {
1237
+ "content": "<action_9>",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": true
1243
+ },
1244
+ "49290": {
1245
+ "content": "<action_10>",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": true
1251
+ },
1252
+ "49291": {
1253
+ "content": "<action_11>",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": true
1259
+ },
1260
+ "49292": {
1261
+ "content": "<action_12>",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": true
1267
+ },
1268
+ "49293": {
1269
+ "content": "<action_13>",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": true
1275
+ },
1276
+ "49294": {
1277
+ "content": "<action_14>",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": true
1283
+ },
1284
+ "49295": {
1285
+ "content": "<action_15>",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": true
1291
+ },
1292
+ "49296": {
1293
+ "content": "<action_16>",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": true
1299
+ },
1300
+ "49297": {
1301
+ "content": "<action_17>",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": true
1307
+ },
1308
+ "49298": {
1309
+ "content": "<action_18>",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": true
1315
+ },
1316
+ "49299": {
1317
+ "content": "<action_19>",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": true
1323
+ },
1324
+ "49300": {
1325
+ "content": "<action_20>",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": true
1331
+ },
1332
+ "49301": {
1333
+ "content": "<action_21>",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": true
1339
+ },
1340
+ "49302": {
1341
+ "content": "<action_22>",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": true
1347
+ },
1348
+ "49303": {
1349
+ "content": "<action_23>",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": true
1355
+ },
1356
+ "49304": {
1357
+ "content": "<action_24>",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": true
1363
+ },
1364
+ "49305": {
1365
+ "content": "<action_25>",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": true
1371
+ },
1372
+ "49306": {
1373
+ "content": "<action_26>",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": true
1379
+ },
1380
+ "49307": {
1381
+ "content": "<action_27>",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": true
1387
+ },
1388
+ "49308": {
1389
+ "content": "<action_28>",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": true
1395
+ },
1396
+ "49309": {
1397
+ "content": "<action_29>",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": true
1403
+ },
1404
+ "49310": {
1405
+ "content": "<action_30>",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": true
1411
+ },
1412
+ "49311": {
1413
+ "content": "<action_31>",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": true
1419
+ },
1420
+ "49312": {
1421
+ "content": "<action_32>",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": true
1427
+ },
1428
+ "49313": {
1429
+ "content": "<action_33>",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": true
1435
+ },
1436
+ "49314": {
1437
+ "content": "<action_34>",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": true
1443
+ },
1444
+ "49315": {
1445
+ "content": "<action_35>",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": true
1451
+ },
1452
+ "49316": {
1453
+ "content": "<action_36>",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": true
1459
+ },
1460
+ "49317": {
1461
+ "content": "<action_37>",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": true
1467
+ },
1468
+ "49318": {
1469
+ "content": "<action_38>",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": true
1475
+ },
1476
+ "49319": {
1477
+ "content": "<action_39>",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": true
1483
+ },
1484
+ "49320": {
1485
+ "content": "<action_40>",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": true
1491
+ },
1492
+ "49321": {
1493
+ "content": "<action_41>",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": true
1499
+ },
1500
+ "49322": {
1501
+ "content": "<action_42>",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": true
1507
+ },
1508
+ "49323": {
1509
+ "content": "<action_43>",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": true
1515
+ },
1516
+ "49324": {
1517
+ "content": "<action_44>",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": true
1523
+ },
1524
+ "49325": {
1525
+ "content": "<action_45>",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": true
1531
+ },
1532
+ "49326": {
1533
+ "content": "<action_46>",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": true
1539
+ },
1540
+ "49327": {
1541
+ "content": "<action_47>",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": true
1547
+ },
1548
+ "49328": {
1549
+ "content": "<action_48>",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": true
1555
+ },
1556
+ "49329": {
1557
+ "content": "<action_49>",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": true
1563
+ },
1564
+ "49330": {
1565
+ "content": "<action_50>",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": true
1571
+ },
1572
+ "49331": {
1573
+ "content": "<action_51>",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": true
1579
+ },
1580
+ "49332": {
1581
+ "content": "<action_52>",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": true
1587
+ },
1588
+ "49333": {
1589
+ "content": "<action_53>",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": true
1595
+ },
1596
+ "49334": {
1597
+ "content": "<action_54>",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": true
1603
+ },
1604
+ "49335": {
1605
+ "content": "<action_55>",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": true
1611
+ },
1612
+ "49336": {
1613
+ "content": "<action_56>",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": true
1619
+ },
1620
+ "49337": {
1621
+ "content": "<action_57>",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": true
1627
+ },
1628
+ "49338": {
1629
+ "content": "<action_58>",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": true
1635
+ },
1636
+ "49339": {
1637
+ "content": "<action_59>",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": true
1643
+ },
1644
+ "49340": {
1645
+ "content": "<action_60>",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": true
1651
+ },
1652
+ "49341": {
1653
+ "content": "<action_61>",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": true
1659
+ },
1660
+ "49342": {
1661
+ "content": "<action_62>",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": true
1667
+ },
1668
+ "49343": {
1669
+ "content": "<action_63>",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": true
1675
+ },
1676
+ "49344": {
1677
+ "content": "<action_64>",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": true
1683
+ },
1684
+ "49345": {
1685
+ "content": "<action_65>",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "49346": {
1693
+ "content": "<action_66>",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
+ },
1700
+ "49347": {
1701
+ "content": "<action_67>",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": true
1707
+ },
1708
+ "49348": {
1709
+ "content": "<action_68>",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": true
1715
+ },
1716
+ "49349": {
1717
+ "content": "<action_69>",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": true
1723
+ },
1724
+ "49350": {
1725
+ "content": "<action_70>",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": true
1731
+ },
1732
+ "49351": {
1733
+ "content": "<action_71>",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": true
1739
+ },
1740
+ "49352": {
1741
+ "content": "<action_72>",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": true
1747
+ },
1748
+ "49353": {
1749
+ "content": "<action_73>",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": true
1755
+ },
1756
+ "49354": {
1757
+ "content": "<action_74>",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": true
1763
+ },
1764
+ "49355": {
1765
+ "content": "<action_75>",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": true
1771
+ },
1772
+ "49356": {
1773
+ "content": "<action_76>",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": true
1779
+ },
1780
+ "49357": {
1781
+ "content": "<action_77>",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": true
1787
+ },
1788
+ "49358": {
1789
+ "content": "<action_78>",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": true
1795
+ },
1796
+ "49359": {
1797
+ "content": "<action_79>",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": true
1803
+ },
1804
+ "49360": {
1805
+ "content": "<action_80>",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": true
1811
+ },
1812
+ "49361": {
1813
+ "content": "<action_81>",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": true
1819
+ },
1820
+ "49362": {
1821
+ "content": "<action_82>",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": true
1827
+ },
1828
+ "49363": {
1829
+ "content": "<action_83>",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": true
1835
+ },
1836
+ "49364": {
1837
+ "content": "<action_84>",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": true
1843
+ },
1844
+ "49365": {
1845
+ "content": "<action_85>",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": true
1851
+ },
1852
+ "49366": {
1853
+ "content": "<action_86>",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": true
1859
+ },
1860
+ "49367": {
1861
+ "content": "<action_87>",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": true
1867
+ },
1868
+ "49368": {
1869
+ "content": "<action_88>",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": true
1875
+ },
1876
+ "49369": {
1877
+ "content": "<action_89>",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": true
1883
+ },
1884
+ "49370": {
1885
+ "content": "<action_90>",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": true
1891
+ },
1892
+ "49371": {
1893
+ "content": "<action_91>",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": true
1899
+ },
1900
+ "49372": {
1901
+ "content": "<action_92>",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": true
1907
+ },
1908
+ "49373": {
1909
+ "content": "<action_93>",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": true
1915
+ },
1916
+ "49374": {
1917
+ "content": "<action_94>",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": true
1923
+ },
1924
+ "49375": {
1925
+ "content": "<action_95>",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": true
1931
+ },
1932
+ "49376": {
1933
+ "content": "<action_96>",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": true
1939
+ },
1940
+ "49377": {
1941
+ "content": "<action_97>",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": true
1947
+ },
1948
+ "49378": {
1949
+ "content": "<action_98>",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": true
1955
+ },
1956
+ "49379": {
1957
+ "content": "<action_99>",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": true
1963
+ },
1964
+ "49380": {
1965
+ "content": "<action_100>",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": true
1971
+ },
1972
+ "49381": {
1973
+ "content": "<action_101>",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": true
1979
+ },
1980
+ "49382": {
1981
+ "content": "<action_102>",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": true
1987
+ },
1988
+ "49383": {
1989
+ "content": "<action_103>",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": true
1995
+ },
1996
+ "49384": {
1997
+ "content": "<action_104>",
1998
+ "lstrip": false,
1999
+ "normalized": false,
2000
+ "rstrip": false,
2001
+ "single_word": false,
2002
+ "special": true
2003
+ },
2004
+ "49385": {
2005
+ "content": "<action_105>",
2006
+ "lstrip": false,
2007
+ "normalized": false,
2008
+ "rstrip": false,
2009
+ "single_word": false,
2010
+ "special": true
2011
+ },
2012
+ "49386": {
2013
+ "content": "<action_106>",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false,
2018
+ "special": true
2019
+ },
2020
+ "49387": {
2021
+ "content": "<action_107>",
2022
+ "lstrip": false,
2023
+ "normalized": false,
2024
+ "rstrip": false,
2025
+ "single_word": false,
2026
+ "special": true
2027
+ },
2028
+ "49388": {
2029
+ "content": "<action_108>",
2030
+ "lstrip": false,
2031
+ "normalized": false,
2032
+ "rstrip": false,
2033
+ "single_word": false,
2034
+ "special": true
2035
+ },
2036
+ "49389": {
2037
+ "content": "<action_109>",
2038
+ "lstrip": false,
2039
+ "normalized": false,
2040
+ "rstrip": false,
2041
+ "single_word": false,
2042
+ "special": true
2043
+ },
2044
+ "49390": {
2045
+ "content": "<action_110>",
2046
+ "lstrip": false,
2047
+ "normalized": false,
2048
+ "rstrip": false,
2049
+ "single_word": false,
2050
+ "special": true
2051
+ },
2052
+ "49391": {
2053
+ "content": "<action_111>",
2054
+ "lstrip": false,
2055
+ "normalized": false,
2056
+ "rstrip": false,
2057
+ "single_word": false,
2058
+ "special": true
2059
+ },
2060
+ "49392": {
2061
+ "content": "<action_112>",
2062
+ "lstrip": false,
2063
+ "normalized": false,
2064
+ "rstrip": false,
2065
+ "single_word": false,
2066
+ "special": true
2067
+ },
2068
+ "49393": {
2069
+ "content": "<action_113>",
2070
+ "lstrip": false,
2071
+ "normalized": false,
2072
+ "rstrip": false,
2073
+ "single_word": false,
2074
+ "special": true
2075
+ },
2076
+ "49394": {
2077
+ "content": "<action_114>",
2078
+ "lstrip": false,
2079
+ "normalized": false,
2080
+ "rstrip": false,
2081
+ "single_word": false,
2082
+ "special": true
2083
+ },
2084
+ "49395": {
2085
+ "content": "<action_115>",
2086
+ "lstrip": false,
2087
+ "normalized": false,
2088
+ "rstrip": false,
2089
+ "single_word": false,
2090
+ "special": true
2091
+ },
2092
+ "49396": {
2093
+ "content": "<action_116>",
2094
+ "lstrip": false,
2095
+ "normalized": false,
2096
+ "rstrip": false,
2097
+ "single_word": false,
2098
+ "special": true
2099
+ },
2100
+ "49397": {
2101
+ "content": "<action_117>",
2102
+ "lstrip": false,
2103
+ "normalized": false,
2104
+ "rstrip": false,
2105
+ "single_word": false,
2106
+ "special": true
2107
+ },
2108
+ "49398": {
2109
+ "content": "<action_118>",
2110
+ "lstrip": false,
2111
+ "normalized": false,
2112
+ "rstrip": false,
2113
+ "single_word": false,
2114
+ "special": true
2115
+ },
2116
+ "49399": {
2117
+ "content": "<action_119>",
2118
+ "lstrip": false,
2119
+ "normalized": false,
2120
+ "rstrip": false,
2121
+ "single_word": false,
2122
+ "special": true
2123
+ },
2124
+ "49400": {
2125
+ "content": "<action_120>",
2126
+ "lstrip": false,
2127
+ "normalized": false,
2128
+ "rstrip": false,
2129
+ "single_word": false,
2130
+ "special": true
2131
+ },
2132
+ "49401": {
2133
+ "content": "<action_121>",
2134
+ "lstrip": false,
2135
+ "normalized": false,
2136
+ "rstrip": false,
2137
+ "single_word": false,
2138
+ "special": true
2139
+ },
2140
+ "49402": {
2141
+ "content": "<action_122>",
2142
+ "lstrip": false,
2143
+ "normalized": false,
2144
+ "rstrip": false,
2145
+ "single_word": false,
2146
+ "special": true
2147
+ },
2148
+ "49403": {
2149
+ "content": "<action_123>",
2150
+ "lstrip": false,
2151
+ "normalized": false,
2152
+ "rstrip": false,
2153
+ "single_word": false,
2154
+ "special": true
2155
+ },
2156
+ "49404": {
2157
+ "content": "<action_124>",
2158
+ "lstrip": false,
2159
+ "normalized": false,
2160
+ "rstrip": false,
2161
+ "single_word": false,
2162
+ "special": true
2163
+ },
2164
+ "49405": {
2165
+ "content": "<action_125>",
2166
+ "lstrip": false,
2167
+ "normalized": false,
2168
+ "rstrip": false,
2169
+ "single_word": false,
2170
+ "special": true
2171
+ },
2172
+ "49406": {
2173
+ "content": "<action_126>",
2174
+ "lstrip": false,
2175
+ "normalized": false,
2176
+ "rstrip": false,
2177
+ "single_word": false,
2178
+ "special": true
2179
+ },
2180
+ "49407": {
2181
+ "content": "<action_127>",
2182
+ "lstrip": false,
2183
+ "normalized": false,
2184
+ "rstrip": false,
2185
+ "single_word": false,
2186
+ "special": true
2187
+ },
2188
+ "49408": {
2189
+ "content": "<action_128>",
2190
+ "lstrip": false,
2191
+ "normalized": false,
2192
+ "rstrip": false,
2193
+ "single_word": false,
2194
+ "special": true
2195
+ },
2196
+ "49409": {
2197
+ "content": "<action_129>",
2198
+ "lstrip": false,
2199
+ "normalized": false,
2200
+ "rstrip": false,
2201
+ "single_word": false,
2202
+ "special": true
2203
+ },
2204
+ "49410": {
2205
+ "content": "<action_130>",
2206
+ "lstrip": false,
2207
+ "normalized": false,
2208
+ "rstrip": false,
2209
+ "single_word": false,
2210
+ "special": true
2211
+ },
2212
+ "49411": {
2213
+ "content": "<action_131>",
2214
+ "lstrip": false,
2215
+ "normalized": false,
2216
+ "rstrip": false,
2217
+ "single_word": false,
2218
+ "special": true
2219
+ },
2220
+ "49412": {
2221
+ "content": "<action_132>",
2222
+ "lstrip": false,
2223
+ "normalized": false,
2224
+ "rstrip": false,
2225
+ "single_word": false,
2226
+ "special": true
2227
+ },
2228
+ "49413": {
2229
+ "content": "<action_133>",
2230
+ "lstrip": false,
2231
+ "normalized": false,
2232
+ "rstrip": false,
2233
+ "single_word": false,
2234
+ "special": true
2235
+ },
2236
+ "49414": {
2237
+ "content": "<action_134>",
2238
+ "lstrip": false,
2239
+ "normalized": false,
2240
+ "rstrip": false,
2241
+ "single_word": false,
2242
+ "special": true
2243
+ },
2244
+ "49415": {
2245
+ "content": "<action_135>",
2246
+ "lstrip": false,
2247
+ "normalized": false,
2248
+ "rstrip": false,
2249
+ "single_word": false,
2250
+ "special": true
2251
+ },
2252
+ "49416": {
2253
+ "content": "<action_136>",
2254
+ "lstrip": false,
2255
+ "normalized": false,
2256
+ "rstrip": false,
2257
+ "single_word": false,
2258
+ "special": true
2259
+ },
2260
+ "49417": {
2261
+ "content": "<action_137>",
2262
+ "lstrip": false,
2263
+ "normalized": false,
2264
+ "rstrip": false,
2265
+ "single_word": false,
2266
+ "special": true
2267
+ },
2268
+ "49418": {
2269
+ "content": "<action_138>",
2270
+ "lstrip": false,
2271
+ "normalized": false,
2272
+ "rstrip": false,
2273
+ "single_word": false,
2274
+ "special": true
2275
+ },
2276
+ "49419": {
2277
+ "content": "<action_139>",
2278
+ "lstrip": false,
2279
+ "normalized": false,
2280
+ "rstrip": false,
2281
+ "single_word": false,
2282
+ "special": true
2283
+ },
2284
+ "49420": {
2285
+ "content": "<action_140>",
2286
+ "lstrip": false,
2287
+ "normalized": false,
2288
+ "rstrip": false,
2289
+ "single_word": false,
2290
+ "special": true
2291
+ },
2292
+ "49421": {
2293
+ "content": "<action_141>",
2294
+ "lstrip": false,
2295
+ "normalized": false,
2296
+ "rstrip": false,
2297
+ "single_word": false,
2298
+ "special": true
2299
+ },
2300
+ "49422": {
2301
+ "content": "<action_142>",
2302
+ "lstrip": false,
2303
+ "normalized": false,
2304
+ "rstrip": false,
2305
+ "single_word": false,
2306
+ "special": true
2307
+ },
2308
+ "49423": {
2309
+ "content": "<action_143>",
2310
+ "lstrip": false,
2311
+ "normalized": false,
2312
+ "rstrip": false,
2313
+ "single_word": false,
2314
+ "special": true
2315
+ },
2316
+ "49424": {
2317
+ "content": "<action_144>",
2318
+ "lstrip": false,
2319
+ "normalized": false,
2320
+ "rstrip": false,
2321
+ "single_word": false,
2322
+ "special": true
2323
+ },
2324
+ "49425": {
2325
+ "content": "<action_145>",
2326
+ "lstrip": false,
2327
+ "normalized": false,
2328
+ "rstrip": false,
2329
+ "single_word": false,
2330
+ "special": true
2331
+ },
2332
+ "49426": {
2333
+ "content": "<action_146>",
2334
+ "lstrip": false,
2335
+ "normalized": false,
2336
+ "rstrip": false,
2337
+ "single_word": false,
2338
+ "special": true
2339
+ },
2340
+ "49427": {
2341
+ "content": "<action_147>",
2342
+ "lstrip": false,
2343
+ "normalized": false,
2344
+ "rstrip": false,
2345
+ "single_word": false,
2346
+ "special": true
2347
+ },
2348
+ "49428": {
2349
+ "content": "<action_148>",
2350
+ "lstrip": false,
2351
+ "normalized": false,
2352
+ "rstrip": false,
2353
+ "single_word": false,
2354
+ "special": true
2355
+ },
2356
+ "49429": {
2357
+ "content": "<action_149>",
2358
+ "lstrip": false,
2359
+ "normalized": false,
2360
+ "rstrip": false,
2361
+ "single_word": false,
2362
+ "special": true
2363
+ },
2364
+ "49430": {
2365
+ "content": "<action_150>",
2366
+ "lstrip": false,
2367
+ "normalized": false,
2368
+ "rstrip": false,
2369
+ "single_word": false,
2370
+ "special": true
2371
+ },
2372
+ "49431": {
2373
+ "content": "<action_151>",
2374
+ "lstrip": false,
2375
+ "normalized": false,
2376
+ "rstrip": false,
2377
+ "single_word": false,
2378
+ "special": true
2379
+ },
2380
+ "49432": {
2381
+ "content": "<action_152>",
2382
+ "lstrip": false,
2383
+ "normalized": false,
2384
+ "rstrip": false,
2385
+ "single_word": false,
2386
+ "special": true
2387
+ },
2388
+ "49433": {
2389
+ "content": "<action_153>",
2390
+ "lstrip": false,
2391
+ "normalized": false,
2392
+ "rstrip": false,
2393
+ "single_word": false,
2394
+ "special": true
2395
+ },
2396
+ "49434": {
2397
+ "content": "<action_154>",
2398
+ "lstrip": false,
2399
+ "normalized": false,
2400
+ "rstrip": false,
2401
+ "single_word": false,
2402
+ "special": true
2403
+ },
2404
+ "49435": {
2405
+ "content": "<action_155>",
2406
+ "lstrip": false,
2407
+ "normalized": false,
2408
+ "rstrip": false,
2409
+ "single_word": false,
2410
+ "special": true
2411
+ },
2412
+ "49436": {
2413
+ "content": "<action_156>",
2414
+ "lstrip": false,
2415
+ "normalized": false,
2416
+ "rstrip": false,
2417
+ "single_word": false,
2418
+ "special": true
2419
+ },
2420
+ "49437": {
2421
+ "content": "<action_157>",
2422
+ "lstrip": false,
2423
+ "normalized": false,
2424
+ "rstrip": false,
2425
+ "single_word": false,
2426
+ "special": true
2427
+ },
2428
+ "49438": {
2429
+ "content": "<action_158>",
2430
+ "lstrip": false,
2431
+ "normalized": false,
2432
+ "rstrip": false,
2433
+ "single_word": false,
2434
+ "special": true
2435
+ },
2436
+ "49439": {
2437
+ "content": "<action_159>",
2438
+ "lstrip": false,
2439
+ "normalized": false,
2440
+ "rstrip": false,
2441
+ "single_word": false,
2442
+ "special": true
2443
+ },
2444
+ "49440": {
2445
+ "content": "<action_160>",
2446
+ "lstrip": false,
2447
+ "normalized": false,
2448
+ "rstrip": false,
2449
+ "single_word": false,
2450
+ "special": true
2451
+ },
2452
+ "49441": {
2453
+ "content": "<action_161>",
2454
+ "lstrip": false,
2455
+ "normalized": false,
2456
+ "rstrip": false,
2457
+ "single_word": false,
2458
+ "special": true
2459
+ },
2460
+ "49442": {
2461
+ "content": "<action_162>",
2462
+ "lstrip": false,
2463
+ "normalized": false,
2464
+ "rstrip": false,
2465
+ "single_word": false,
2466
+ "special": true
2467
+ },
2468
+ "49443": {
2469
+ "content": "<action_163>",
2470
+ "lstrip": false,
2471
+ "normalized": false,
2472
+ "rstrip": false,
2473
+ "single_word": false,
2474
+ "special": true
2475
+ },
2476
+ "49444": {
2477
+ "content": "<action_164>",
2478
+ "lstrip": false,
2479
+ "normalized": false,
2480
+ "rstrip": false,
2481
+ "single_word": false,
2482
+ "special": true
2483
+ },
2484
+ "49445": {
2485
+ "content": "<action_165>",
2486
+ "lstrip": false,
2487
+ "normalized": false,
2488
+ "rstrip": false,
2489
+ "single_word": false,
2490
+ "special": true
2491
+ },
2492
+ "49446": {
2493
+ "content": "<action_166>",
2494
+ "lstrip": false,
2495
+ "normalized": false,
2496
+ "rstrip": false,
2497
+ "single_word": false,
2498
+ "special": true
2499
+ },
2500
+ "49447": {
2501
+ "content": "<action_167>",
2502
+ "lstrip": false,
2503
+ "normalized": false,
2504
+ "rstrip": false,
2505
+ "single_word": false,
2506
+ "special": true
2507
+ },
2508
+ "49448": {
2509
+ "content": "<action_168>",
2510
+ "lstrip": false,
2511
+ "normalized": false,
2512
+ "rstrip": false,
2513
+ "single_word": false,
2514
+ "special": true
2515
+ },
2516
+ "49449": {
2517
+ "content": "<action_169>",
2518
+ "lstrip": false,
2519
+ "normalized": false,
2520
+ "rstrip": false,
2521
+ "single_word": false,
2522
+ "special": true
2523
+ },
2524
+ "49450": {
2525
+ "content": "<action_170>",
2526
+ "lstrip": false,
2527
+ "normalized": false,
2528
+ "rstrip": false,
2529
+ "single_word": false,
2530
+ "special": true
2531
+ },
2532
+ "49451": {
2533
+ "content": "<action_171>",
2534
+ "lstrip": false,
2535
+ "normalized": false,
2536
+ "rstrip": false,
2537
+ "single_word": false,
2538
+ "special": true
2539
+ },
2540
+ "49452": {
2541
+ "content": "<action_172>",
2542
+ "lstrip": false,
2543
+ "normalized": false,
2544
+ "rstrip": false,
2545
+ "single_word": false,
2546
+ "special": true
2547
+ },
2548
+ "49453": {
2549
+ "content": "<action_173>",
2550
+ "lstrip": false,
2551
+ "normalized": false,
2552
+ "rstrip": false,
2553
+ "single_word": false,
2554
+ "special": true
2555
+ },
2556
+ "49454": {
2557
+ "content": "<action_174>",
2558
+ "lstrip": false,
2559
+ "normalized": false,
2560
+ "rstrip": false,
2561
+ "single_word": false,
2562
+ "special": true
2563
+ },
2564
+ "49455": {
2565
+ "content": "<action_175>",
2566
+ "lstrip": false,
2567
+ "normalized": false,
2568
+ "rstrip": false,
2569
+ "single_word": false,
2570
+ "special": true
2571
+ },
2572
+ "49456": {
2573
+ "content": "<action_176>",
2574
+ "lstrip": false,
2575
+ "normalized": false,
2576
+ "rstrip": false,
2577
+ "single_word": false,
2578
+ "special": true
2579
+ },
2580
+ "49457": {
2581
+ "content": "<action_177>",
2582
+ "lstrip": false,
2583
+ "normalized": false,
2584
+ "rstrip": false,
2585
+ "single_word": false,
2586
+ "special": true
2587
+ },
2588
+ "49458": {
2589
+ "content": "<action_178>",
2590
+ "lstrip": false,
2591
+ "normalized": false,
2592
+ "rstrip": false,
2593
+ "single_word": false,
2594
+ "special": true
2595
+ },
2596
+ "49459": {
2597
+ "content": "<action_179>",
2598
+ "lstrip": false,
2599
+ "normalized": false,
2600
+ "rstrip": false,
2601
+ "single_word": false,
2602
+ "special": true
2603
+ },
2604
+ "49460": {
2605
+ "content": "<action_180>",
2606
+ "lstrip": false,
2607
+ "normalized": false,
2608
+ "rstrip": false,
2609
+ "single_word": false,
2610
+ "special": true
2611
+ },
2612
+ "49461": {
2613
+ "content": "<action_181>",
2614
+ "lstrip": false,
2615
+ "normalized": false,
2616
+ "rstrip": false,
2617
+ "single_word": false,
2618
+ "special": true
2619
+ },
2620
+ "49462": {
2621
+ "content": "<action_182>",
2622
+ "lstrip": false,
2623
+ "normalized": false,
2624
+ "rstrip": false,
2625
+ "single_word": false,
2626
+ "special": true
2627
+ },
2628
+ "49463": {
2629
+ "content": "<action_183>",
2630
+ "lstrip": false,
2631
+ "normalized": false,
2632
+ "rstrip": false,
2633
+ "single_word": false,
2634
+ "special": true
2635
+ },
2636
+ "49464": {
2637
+ "content": "<action_184>",
2638
+ "lstrip": false,
2639
+ "normalized": false,
2640
+ "rstrip": false,
2641
+ "single_word": false,
2642
+ "special": true
2643
+ },
2644
+ "49465": {
2645
+ "content": "<action_185>",
2646
+ "lstrip": false,
2647
+ "normalized": false,
2648
+ "rstrip": false,
2649
+ "single_word": false,
2650
+ "special": true
2651
+ },
2652
+ "49466": {
2653
+ "content": "<action_186>",
2654
+ "lstrip": false,
2655
+ "normalized": false,
2656
+ "rstrip": false,
2657
+ "single_word": false,
2658
+ "special": true
2659
+ },
2660
+ "49467": {
2661
+ "content": "<action_187>",
2662
+ "lstrip": false,
2663
+ "normalized": false,
2664
+ "rstrip": false,
2665
+ "single_word": false,
2666
+ "special": true
2667
+ },
2668
+ "49468": {
2669
+ "content": "<action_188>",
2670
+ "lstrip": false,
2671
+ "normalized": false,
2672
+ "rstrip": false,
2673
+ "single_word": false,
2674
+ "special": true
2675
+ },
2676
+ "49469": {
2677
+ "content": "<action_189>",
2678
+ "lstrip": false,
2679
+ "normalized": false,
2680
+ "rstrip": false,
2681
+ "single_word": false,
2682
+ "special": true
2683
+ },
2684
+ "49470": {
2685
+ "content": "<action_190>",
2686
+ "lstrip": false,
2687
+ "normalized": false,
2688
+ "rstrip": false,
2689
+ "single_word": false,
2690
+ "special": true
2691
+ },
2692
+ "49471": {
2693
+ "content": "<action_191>",
2694
+ "lstrip": false,
2695
+ "normalized": false,
2696
+ "rstrip": false,
2697
+ "single_word": false,
2698
+ "special": true
2699
+ },
2700
+ "49472": {
2701
+ "content": "<action_192>",
2702
+ "lstrip": false,
2703
+ "normalized": false,
2704
+ "rstrip": false,
2705
+ "single_word": false,
2706
+ "special": true
2707
+ },
2708
+ "49473": {
2709
+ "content": "<action_193>",
2710
+ "lstrip": false,
2711
+ "normalized": false,
2712
+ "rstrip": false,
2713
+ "single_word": false,
2714
+ "special": true
2715
+ },
2716
+ "49474": {
2717
+ "content": "<action_194>",
2718
+ "lstrip": false,
2719
+ "normalized": false,
2720
+ "rstrip": false,
2721
+ "single_word": false,
2722
+ "special": true
2723
+ },
2724
+ "49475": {
2725
+ "content": "<action_195>",
2726
+ "lstrip": false,
2727
+ "normalized": false,
2728
+ "rstrip": false,
2729
+ "single_word": false,
2730
+ "special": true
2731
+ },
2732
+ "49476": {
2733
+ "content": "<action_196>",
2734
+ "lstrip": false,
2735
+ "normalized": false,
2736
+ "rstrip": false,
2737
+ "single_word": false,
2738
+ "special": true
2739
+ },
2740
+ "49477": {
2741
+ "content": "<action_197>",
2742
+ "lstrip": false,
2743
+ "normalized": false,
2744
+ "rstrip": false,
2745
+ "single_word": false,
2746
+ "special": true
2747
+ },
2748
+ "49478": {
2749
+ "content": "<action_198>",
2750
+ "lstrip": false,
2751
+ "normalized": false,
2752
+ "rstrip": false,
2753
+ "single_word": false,
2754
+ "special": true
2755
+ },
2756
+ "49479": {
2757
+ "content": "<action_199>",
2758
+ "lstrip": false,
2759
+ "normalized": false,
2760
+ "rstrip": false,
2761
+ "single_word": false,
2762
+ "special": true
2763
+ },
2764
+ "49480": {
2765
+ "content": "<action_200>",
2766
+ "lstrip": false,
2767
+ "normalized": false,
2768
+ "rstrip": false,
2769
+ "single_word": false,
2770
+ "special": true
2771
+ },
2772
+ "49481": {
2773
+ "content": "<action_201>",
2774
+ "lstrip": false,
2775
+ "normalized": false,
2776
+ "rstrip": false,
2777
+ "single_word": false,
2778
+ "special": true
2779
+ },
2780
+ "49482": {
2781
+ "content": "<action_202>",
2782
+ "lstrip": false,
2783
+ "normalized": false,
2784
+ "rstrip": false,
2785
+ "single_word": false,
2786
+ "special": true
2787
+ },
2788
+ "49483": {
2789
+ "content": "<action_203>",
2790
+ "lstrip": false,
2791
+ "normalized": false,
2792
+ "rstrip": false,
2793
+ "single_word": false,
2794
+ "special": true
2795
+ },
2796
+ "49484": {
2797
+ "content": "<action_204>",
2798
+ "lstrip": false,
2799
+ "normalized": false,
2800
+ "rstrip": false,
2801
+ "single_word": false,
2802
+ "special": true
2803
+ },
2804
+ "49485": {
2805
+ "content": "<action_205>",
2806
+ "lstrip": false,
2807
+ "normalized": false,
2808
+ "rstrip": false,
2809
+ "single_word": false,
2810
+ "special": true
2811
+ },
2812
+ "49486": {
2813
+ "content": "<action_206>",
2814
+ "lstrip": false,
2815
+ "normalized": false,
2816
+ "rstrip": false,
2817
+ "single_word": false,
2818
+ "special": true
2819
+ },
2820
+ "49487": {
2821
+ "content": "<action_207>",
2822
+ "lstrip": false,
2823
+ "normalized": false,
2824
+ "rstrip": false,
2825
+ "single_word": false,
2826
+ "special": true
2827
+ },
2828
+ "49488": {
2829
+ "content": "<action_208>",
2830
+ "lstrip": false,
2831
+ "normalized": false,
2832
+ "rstrip": false,
2833
+ "single_word": false,
2834
+ "special": true
2835
+ },
2836
+ "49489": {
2837
+ "content": "<action_209>",
2838
+ "lstrip": false,
2839
+ "normalized": false,
2840
+ "rstrip": false,
2841
+ "single_word": false,
2842
+ "special": true
2843
+ },
2844
+ "49490": {
2845
+ "content": "<action_210>",
2846
+ "lstrip": false,
2847
+ "normalized": false,
2848
+ "rstrip": false,
2849
+ "single_word": false,
2850
+ "special": true
2851
+ },
2852
+ "49491": {
2853
+ "content": "<action_211>",
2854
+ "lstrip": false,
2855
+ "normalized": false,
2856
+ "rstrip": false,
2857
+ "single_word": false,
2858
+ "special": true
2859
+ },
2860
+ "49492": {
2861
+ "content": "<action_212>",
2862
+ "lstrip": false,
2863
+ "normalized": false,
2864
+ "rstrip": false,
2865
+ "single_word": false,
2866
+ "special": true
2867
+ },
2868
+ "49493": {
2869
+ "content": "<action_213>",
2870
+ "lstrip": false,
2871
+ "normalized": false,
2872
+ "rstrip": false,
2873
+ "single_word": false,
2874
+ "special": true
2875
+ },
2876
+ "49494": {
2877
+ "content": "<action_214>",
2878
+ "lstrip": false,
2879
+ "normalized": false,
2880
+ "rstrip": false,
2881
+ "single_word": false,
2882
+ "special": true
2883
+ },
2884
+ "49495": {
2885
+ "content": "<action_215>",
2886
+ "lstrip": false,
2887
+ "normalized": false,
2888
+ "rstrip": false,
2889
+ "single_word": false,
2890
+ "special": true
2891
+ },
2892
+ "49496": {
2893
+ "content": "<action_216>",
2894
+ "lstrip": false,
2895
+ "normalized": false,
2896
+ "rstrip": false,
2897
+ "single_word": false,
2898
+ "special": true
2899
+ },
2900
+ "49497": {
2901
+ "content": "<action_217>",
2902
+ "lstrip": false,
2903
+ "normalized": false,
2904
+ "rstrip": false,
2905
+ "single_word": false,
2906
+ "special": true
2907
+ },
2908
+ "49498": {
2909
+ "content": "<action_218>",
2910
+ "lstrip": false,
2911
+ "normalized": false,
2912
+ "rstrip": false,
2913
+ "single_word": false,
2914
+ "special": true
2915
+ },
2916
+ "49499": {
2917
+ "content": "<action_219>",
2918
+ "lstrip": false,
2919
+ "normalized": false,
2920
+ "rstrip": false,
2921
+ "single_word": false,
2922
+ "special": true
2923
+ },
2924
+ "49500": {
2925
+ "content": "<action_220>",
2926
+ "lstrip": false,
2927
+ "normalized": false,
2928
+ "rstrip": false,
2929
+ "single_word": false,
2930
+ "special": true
2931
+ },
2932
+ "49501": {
2933
+ "content": "<action_221>",
2934
+ "lstrip": false,
2935
+ "normalized": false,
2936
+ "rstrip": false,
2937
+ "single_word": false,
2938
+ "special": true
2939
+ },
2940
+ "49502": {
2941
+ "content": "<action_222>",
2942
+ "lstrip": false,
2943
+ "normalized": false,
2944
+ "rstrip": false,
2945
+ "single_word": false,
2946
+ "special": true
2947
+ },
2948
+ "49503": {
2949
+ "content": "<action_223>",
2950
+ "lstrip": false,
2951
+ "normalized": false,
2952
+ "rstrip": false,
2953
+ "single_word": false,
2954
+ "special": true
2955
+ },
2956
+ "49504": {
2957
+ "content": "<action_224>",
2958
+ "lstrip": false,
2959
+ "normalized": false,
2960
+ "rstrip": false,
2961
+ "single_word": false,
2962
+ "special": true
2963
+ },
2964
+ "49505": {
2965
+ "content": "<action_225>",
2966
+ "lstrip": false,
2967
+ "normalized": false,
2968
+ "rstrip": false,
2969
+ "single_word": false,
2970
+ "special": true
2971
+ },
2972
+ "49506": {
2973
+ "content": "<action_226>",
2974
+ "lstrip": false,
2975
+ "normalized": false,
2976
+ "rstrip": false,
2977
+ "single_word": false,
2978
+ "special": true
2979
+ },
2980
+ "49507": {
2981
+ "content": "<action_227>",
2982
+ "lstrip": false,
2983
+ "normalized": false,
2984
+ "rstrip": false,
2985
+ "single_word": false,
2986
+ "special": true
2987
+ },
2988
+ "49508": {
2989
+ "content": "<action_228>",
2990
+ "lstrip": false,
2991
+ "normalized": false,
2992
+ "rstrip": false,
2993
+ "single_word": false,
2994
+ "special": true
2995
+ },
2996
+ "49509": {
2997
+ "content": "<action_229>",
2998
+ "lstrip": false,
2999
+ "normalized": false,
3000
+ "rstrip": false,
3001
+ "single_word": false,
3002
+ "special": true
3003
+ },
3004
+ "49510": {
3005
+ "content": "<action_230>",
3006
+ "lstrip": false,
3007
+ "normalized": false,
3008
+ "rstrip": false,
3009
+ "single_word": false,
3010
+ "special": true
3011
+ },
3012
+ "49511": {
3013
+ "content": "<action_231>",
3014
+ "lstrip": false,
3015
+ "normalized": false,
3016
+ "rstrip": false,
3017
+ "single_word": false,
3018
+ "special": true
3019
+ },
3020
+ "49512": {
3021
+ "content": "<action_232>",
3022
+ "lstrip": false,
3023
+ "normalized": false,
3024
+ "rstrip": false,
3025
+ "single_word": false,
3026
+ "special": true
3027
+ },
3028
+ "49513": {
3029
+ "content": "<action_233>",
3030
+ "lstrip": false,
3031
+ "normalized": false,
3032
+ "rstrip": false,
3033
+ "single_word": false,
3034
+ "special": true
3035
+ },
3036
+ "49514": {
3037
+ "content": "<action_234>",
3038
+ "lstrip": false,
3039
+ "normalized": false,
3040
+ "rstrip": false,
3041
+ "single_word": false,
3042
+ "special": true
3043
+ },
3044
+ "49515": {
3045
+ "content": "<action_235>",
3046
+ "lstrip": false,
3047
+ "normalized": false,
3048
+ "rstrip": false,
3049
+ "single_word": false,
3050
+ "special": true
3051
+ },
3052
+ "49516": {
3053
+ "content": "<action_236>",
3054
+ "lstrip": false,
3055
+ "normalized": false,
3056
+ "rstrip": false,
3057
+ "single_word": false,
3058
+ "special": true
3059
+ },
3060
+ "49517": {
3061
+ "content": "<action_237>",
3062
+ "lstrip": false,
3063
+ "normalized": false,
3064
+ "rstrip": false,
3065
+ "single_word": false,
3066
+ "special": true
3067
+ },
3068
+ "49518": {
3069
+ "content": "<action_238>",
3070
+ "lstrip": false,
3071
+ "normalized": false,
3072
+ "rstrip": false,
3073
+ "single_word": false,
3074
+ "special": true
3075
+ },
3076
+ "49519": {
3077
+ "content": "<action_239>",
3078
+ "lstrip": false,
3079
+ "normalized": false,
3080
+ "rstrip": false,
3081
+ "single_word": false,
3082
+ "special": true
3083
+ },
3084
+ "49520": {
3085
+ "content": "<action_240>",
3086
+ "lstrip": false,
3087
+ "normalized": false,
3088
+ "rstrip": false,
3089
+ "single_word": false,
3090
+ "special": true
3091
+ },
3092
+ "49521": {
3093
+ "content": "<action_241>",
3094
+ "lstrip": false,
3095
+ "normalized": false,
3096
+ "rstrip": false,
3097
+ "single_word": false,
3098
+ "special": true
3099
+ },
3100
+ "49522": {
3101
+ "content": "<action_242>",
3102
+ "lstrip": false,
3103
+ "normalized": false,
3104
+ "rstrip": false,
3105
+ "single_word": false,
3106
+ "special": true
3107
+ },
3108
+ "49523": {
3109
+ "content": "<action_243>",
3110
+ "lstrip": false,
3111
+ "normalized": false,
3112
+ "rstrip": false,
3113
+ "single_word": false,
3114
+ "special": true
3115
+ },
3116
+ "49524": {
3117
+ "content": "<action_244>",
3118
+ "lstrip": false,
3119
+ "normalized": false,
3120
+ "rstrip": false,
3121
+ "single_word": false,
3122
+ "special": true
3123
+ },
3124
+ "49525": {
3125
+ "content": "<action_245>",
3126
+ "lstrip": false,
3127
+ "normalized": false,
3128
+ "rstrip": false,
3129
+ "single_word": false,
3130
+ "special": true
3131
+ },
3132
+ "49526": {
3133
+ "content": "<action_246>",
3134
+ "lstrip": false,
3135
+ "normalized": false,
3136
+ "rstrip": false,
3137
+ "single_word": false,
3138
+ "special": true
3139
+ },
3140
+ "49527": {
3141
+ "content": "<action_247>",
3142
+ "lstrip": false,
3143
+ "normalized": false,
3144
+ "rstrip": false,
3145
+ "single_word": false,
3146
+ "special": true
3147
+ },
3148
+ "49528": {
3149
+ "content": "<action_248>",
3150
+ "lstrip": false,
3151
+ "normalized": false,
3152
+ "rstrip": false,
3153
+ "single_word": false,
3154
+ "special": true
3155
+ },
3156
+ "49529": {
3157
+ "content": "<action_249>",
3158
+ "lstrip": false,
3159
+ "normalized": false,
3160
+ "rstrip": false,
3161
+ "single_word": false,
3162
+ "special": true
3163
+ },
3164
+ "49530": {
3165
+ "content": "<action_250>",
3166
+ "lstrip": false,
3167
+ "normalized": false,
3168
+ "rstrip": false,
3169
+ "single_word": false,
3170
+ "special": true
3171
+ },
3172
+ "49531": {
3173
+ "content": "<action_251>",
3174
+ "lstrip": false,
3175
+ "normalized": false,
3176
+ "rstrip": false,
3177
+ "single_word": false,
3178
+ "special": true
3179
+ },
3180
+ "49532": {
3181
+ "content": "<action_252>",
3182
+ "lstrip": false,
3183
+ "normalized": false,
3184
+ "rstrip": false,
3185
+ "single_word": false,
3186
+ "special": true
3187
+ },
3188
+ "49533": {
3189
+ "content": "<action_253>",
3190
+ "lstrip": false,
3191
+ "normalized": false,
3192
+ "rstrip": false,
3193
+ "single_word": false,
3194
+ "special": true
3195
+ },
3196
+ "49534": {
3197
+ "content": "<action_254>",
3198
+ "lstrip": false,
3199
+ "normalized": false,
3200
+ "rstrip": false,
3201
+ "single_word": false,
3202
+ "special": true
3203
+ },
3204
+ "49535": {
3205
+ "content": "<action_255>",
3206
+ "lstrip": false,
3207
+ "normalized": false,
3208
+ "rstrip": false,
3209
+ "single_word": false,
3210
+ "special": true
3211
+ }
3212
+ },
3213
+ "additional_special_tokens": [
3214
+ "<action_250>",
3215
+ "<action_251>",
3216
+ "<action_252>",
3217
+ "<action_253>",
3218
+ "<action_254>",
3219
+ "<action_255>"
3220
+ ],
3221
+ "bos_token": "<|im_start|>",
3222
+ "clean_up_tokenization_spaces": false,
3223
+ "end_of_utterance_token": "<end_of_utterance>",
3224
+ "eos_token": "<end_of_utterance>",
3225
+ "extra_special_tokens": {
3226
+ "end_of_utterance_token": "<end_of_utterance>",
3227
+ "fake_image_token": "<fake_token_around_image>",
3228
+ "global_image_token": "<global-img>",
3229
+ "image_token": "<image>"
3230
+ },
3231
+ "fake_image_token": "<fake_token_around_image>",
3232
+ "global_image_token": "<global-img>",
3233
+ "image_token": "<image>",
3234
+ "legacy": false,
3235
+ "model_max_length": 8192,
3236
+ "pad_token": "<|im_end|>",
3237
+ "processor_class": "SmolVLMProcessor",
3238
+ "tokenizer_class": "GPT2Tokenizer",
3239
+ "truncation_side": "left",
3240
+ "unk_token": "<|endoftext|>",
3241
+ "vocab_size": 49152
3242
+ }
HuggingFaceTB_SmolVLM2-500M-Video-Instruct/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
README.md ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ datasets:
5
+ - HuggingFaceM4/the_cauldron
6
+ - HuggingFaceM4/Docmatix
7
+ - lmms-lab/LLaVA-OneVision-Data
8
+ - lmms-lab/M4-Instruct-Data
9
+ - HuggingFaceFV/finevideo
10
+ - MAmmoTH-VL/MAmmoTH-VL-Instruct-12M
11
+ - lmms-lab/LLaVA-Video-178K
12
+ - orrzohar/Video-STaR
13
+ - Mutonix/Vript
14
+ - TIGER-Lab/VISTA-400K
15
+ - Enxin/MovieChat-1K_train
16
+ - ShareGPT4Video/ShareGPT4Video
17
+ pipeline_tag: image-text-to-text
18
+ language:
19
+ - en
20
+ base_model:
21
+ - HuggingFaceTB/SmolVLM-256M-Instruct
22
+ ---
23
+
24
+ <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/SmolVLM2_banner.png" width="800" height="auto" alt="Image description">
25
+
26
+ # SmolVLM2-256M-Video
27
+
28
+ SmolVLM2-256M-Video is a lightweight multimodal model designed to analyze video content. The model processes videos, images, and text inputs to generate text outputs - whether answering questions about media files, comparing visual content, or transcribing text from images. Despite its compact size, requiring only 1.38GB of GPU RAM for video inference. This efficiency makes it particularly well-suited for on-device applications that require specific domain fine-tuning and computational resources may be limited.
29
+ ## Model Summary
30
+
31
+ - **Developed by:** Hugging Face 🤗
32
+ - **Model type:** Multi-modal model (image/multi-image/video/text)
33
+ - **Language(s) (NLP):** English
34
+ - **License:** Apache 2.0
35
+ - **Architecture:** Based on [Idefics3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) (see technical summary)
36
+
37
+ ## Resources
38
+
39
+ - **Demo:** [Video Highlight Generator](https://huggingface.co/spaces/HuggingFaceTB/SmolVLM2-HighlightGenerator)
40
+ - **Blog:** [Blog post](https://huggingface.co/blog/smolvlm2)
41
+
42
+ ## Uses
43
+
44
+ SmolVLM2 can be used for inference on multimodal (video / image / text) tasks where the input consists of text queries along with video or one or more images. Text and media files can be interleaved arbitrarily, enabling tasks like captioning, visual question answering, and storytelling based on visual content. The model does not support image or video generation.
45
+
46
+ To fine-tune SmolVLM2 on a specific task, you can follow [the fine-tuning tutorial](https://github.com/huggingface/smollm/blob/main/vision/finetuning/Smol_VLM_FT.ipynb).
47
+
48
+ ## Evaluation
49
+
50
+ We evaluated the performance of the SmolVLM2 family on the following scientific benchmarks:
51
+
52
+ | Size | Video-MME | MLVU | MVBench |
53
+ |----------|-----------------|----------|---------------|
54
+ | 2.2B | 52.1 | 55.2 | 46.27 |
55
+ | 500M | 42.2 | 47.3 | 39.73 |
56
+ | 256M | 33.7 | 40.6 | 32.7 |
57
+
58
+
59
+ ### How to get started
60
+
61
+ You can use transformers to load, infer and fine-tune SmolVLM. Make sure you have num2words, flash-attn and latest transformers installed.
62
+ You can load the model as follows.
63
+
64
+ ```python
65
+ from transformers import AutoProcessor, AutoModelForImageTextToText
66
+ import torch
67
+
68
+ model_path = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
69
+ processor = AutoProcessor.from_pretrained(model_path)
70
+ model = AutoModelForImageTextToText.from_pretrained(
71
+ model_path,
72
+ torch_dtype=torch.bfloat16,
73
+ _attn_implementation="flash_attention_2"
74
+ ).to("cuda")
75
+ ```
76
+
77
+ #### Simple Inference
78
+
79
+ You preprocess your inputs directly using chat templates and directly passing them
80
+
81
+ ```python
82
+ messages = [
83
+ {
84
+ "role": "user",
85
+ "content": [
86
+ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
87
+ {"type": "text", "text": "Can you describe this image?"},
88
+ ]
89
+ },
90
+ ]
91
+
92
+ inputs = processor.apply_chat_template(
93
+ messages,
94
+ add_generation_prompt=True,
95
+ tokenize=True,
96
+ return_dict=True,
97
+ return_tensors="pt",
98
+ ).to(model.device, dtype=torch.bfloat16)
99
+
100
+ generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)
101
+ generated_texts = processor.batch_decode(
102
+ generated_ids,
103
+ skip_special_tokens=True,
104
+ )
105
+ print(generated_texts[0])
106
+ ```
107
+
108
+ #### Video Inference
109
+
110
+ To use SmolVLM2 for video inference, make sure you have decord installed.
111
+
112
+ ```python
113
+ messages = [
114
+ {
115
+ "role": "user",
116
+ "content": [
117
+ {"type": "video", "path": "path_to_video.mp4"},
118
+ {"type": "text", "text": "Describe this video in detail"}
119
+ ]
120
+ },
121
+ ]
122
+
123
+ inputs = processor.apply_chat_template(
124
+ messages,
125
+ add_generation_prompt=True,
126
+ tokenize=True,
127
+ return_dict=True,
128
+ return_tensors="pt",
129
+ ).to(model.device, dtype=torch.bfloat16)
130
+
131
+ generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)
132
+ generated_texts = processor.batch_decode(
133
+ generated_ids,
134
+ skip_special_tokens=True,
135
+ )
136
+
137
+ print(generated_texts[0])
138
+ ```
139
+ #### Multi-image Interleaved Inference
140
+
141
+ You can interleave multiple media with text using chat templates.
142
+
143
+ ```python
144
+ import torch
145
+
146
+
147
+ messages = [
148
+ {
149
+ "role": "user",
150
+ "content": [
151
+ {"type": "text", "text": "What is the similarity between these two images?"},
152
+ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
153
+ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"},
154
+ ]
155
+ },
156
+ ]
157
+ inputs = processor.apply_chat_template(
158
+ messages,
159
+ add_generation_prompt=True,
160
+ tokenize=True,
161
+ return_dict=True,
162
+ return_tensors="pt",
163
+ ).to(model.device, dtype=torch.bfloat16)
164
+
165
+ generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)
166
+ generated_texts = processor.batch_decode(
167
+ generated_ids,
168
+ skip_special_tokens=True,
169
+ )
170
+ print(generated_texts[0])
171
+ ```
172
+
173
+
174
+ ### Model optimizations
175
+
176
+ ## Misuse and Out-of-scope Use
177
+
178
+ SmolVLM is not intended for high-stakes scenarios or critical decision-making processes that affect an individual's well-being or livelihood. The model may produce content that appears factual but may not be accurate. Misuse includes, but is not limited to:
179
+
180
+ - Prohibited Uses:
181
+ - Evaluating or scoring individuals (e.g., in employment, education, credit)
182
+ - Critical automated decision-making
183
+ - Generating unreliable factual content
184
+ - Malicious Activities:
185
+ - Spam generation
186
+ - Disinformation campaigns
187
+ - Harassment or abuse
188
+ - Unauthorized surveillance
189
+
190
+ ### License
191
+
192
+ SmolVLM2 is built upon [SigLIP](https://huggingface.co/google/siglip-base-patch16-512) as image encoder and [SmolLM2](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct) for text decoder part.
193
+
194
+ We release the SmolVLM2 checkpoints under the Apache 2.0 license.
195
+
196
+ ## Citation information
197
+ You can cite us in the following way:
198
+ ```bibtex
199
+ @article{marafioti2025smolvlm,
200
+ title={SmolVLM: Redefining small and efficient multimodal models},
201
+ author={Andrés Marafioti and Orr Zohar and Miquel Farré and Merve Noyan and Elie Bakouch and Pedro Cuenca and Cyril Zakka and Loubna Ben Allal and Anton Lozhkov and Nouamane Tazi and Vaibhav Srivastav and Joshua Lochner and Hugo Larcher and Mathieu Morlon and Lewis Tunstall and Leandro von Werra and Thomas Wolf},
202
+ journal={arXiv preprint arXiv:2504.05299},
203
+ year={2025}
204
+ }
205
+ ```
206
+
207
+ ## Training Data
208
+ SmolVLM2 used 3.3M samples for training originally from ten different datasets: [LlaVa Onevision](https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data), [M4-Instruct](https://huggingface.co/datasets/lmms-lab/M4-Instruct-Data), [Mammoth](https://huggingface.co/datasets/MAmmoTH-VL/MAmmoTH-VL-Instruct-12M), [LlaVa Video 178K](https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K), [FineVideo](https://huggingface.co/datasets/HuggingFaceFV/finevideo), [VideoStar](https://huggingface.co/datasets/orrzohar/Video-STaR), [VRipt](https://huggingface.co/datasets/Mutonix/Vript), [Vista-400K](https://huggingface.co/datasets/TIGER-Lab/VISTA-400K), [MovieChat](https://huggingface.co/datasets/Enxin/MovieChat-1K_train) and [ShareGPT4Video](https://huggingface.co/datasets/ShareGPT4Video/ShareGPT4Video).
209
+ In the following plots we give a general overview of the samples across modalities and the source of those samples.
210
+ <!--
211
+ <center><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolvlm2_data_split.png" width="auto" height="auto" alt="Image description">
212
+ </center>
213
+
214
+ ### Details
215
+ <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolvlm2_datadetails.png" width="auto" height="auto" alt="Image description"> -->
216
+
217
+ ## Data Split per modality
218
+
219
+ | Data Type | Percentage |
220
+ |--------------|------------|
221
+ | Image | 34.4% |
222
+ | Text | 20.2% |
223
+ | Video | 33.0% |
224
+ | Multi-image | 12.3% |
225
+
226
+
227
+ ## Granular dataset slices per modality
228
+
229
+ ### Text Datasets
230
+ | Dataset | Percentage |
231
+ |--------------------------------------------|------------|
232
+ | llava-onevision/magpie_pro_ft3_80b_mt | 6.8% |
233
+ | llava-onevision/magpie_pro_ft3_80b_tt | 6.8% |
234
+ | llava-onevision/magpie_pro_qwen2_72b_tt | 5.8% |
235
+ | llava-onevision/mathqa | 0.9% |
236
+
237
+ ### Multi-image Datasets
238
+ | Dataset | Percentage |
239
+ |--------------------------------------------|------------|
240
+ | m4-instruct-data/m4_instruct_multiimage | 10.4% |
241
+ | mammoth/multiimage-cap6 | 1.9% |
242
+
243
+ ### Image Datasets
244
+ | Dataset | Percentage |
245
+ |--------------------------------------------|------------|
246
+ | llava-onevision/other | 17.4% |
247
+ | llava-onevision/vision_flan | 3.9% |
248
+ | llava-onevision/mavis_math_metagen | 2.6% |
249
+ | llava-onevision/mavis_math_rule_geo | 2.5% |
250
+ | llava-onevision/sharegpt4o | 1.7% |
251
+ | llava-onevision/sharegpt4v_coco | 1.5% |
252
+ | llava-onevision/image_textualization | 1.3% |
253
+ | llava-onevision/sharegpt4v_llava | 0.9% |
254
+ | llava-onevision/mapqa | 0.9% |
255
+ | llava-onevision/qa | 0.8% |
256
+ | llava-onevision/textocr | 0.8% |
257
+
258
+ ### Video Datasets
259
+ | Dataset | Percentage |
260
+ |--------------------------------------------|------------|
261
+ | llava-video-178k/1-2m | 7.3% |
262
+ | llava-video-178k/2-3m | 7.0% |
263
+ | other-video/combined | 5.7% |
264
+ | llava-video-178k/hound | 4.4% |
265
+ | llava-video-178k/0-30s | 2.4% |
266
+ | video-star/starb | 2.2% |
267
+ | vista-400k/combined | 2.2% |
268
+ | vript/long | 1.0% |
269
+ | ShareGPT4Video/all | 0.8% |
SmolVLM2-500M-Video-Instruct-Action/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
SmolVLM2-500M-Video-Instruct-Action/README.md ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ datasets:
5
+ - HuggingFaceM4/the_cauldron
6
+ - HuggingFaceM4/Docmatix
7
+ - lmms-lab/LLaVA-OneVision-Data
8
+ - lmms-lab/M4-Instruct-Data
9
+ - HuggingFaceFV/finevideo
10
+ - MAmmoTH-VL/MAmmoTH-VL-Instruct-12M
11
+ - lmms-lab/LLaVA-Video-178K
12
+ - orrzohar/Video-STaR
13
+ - Mutonix/Vript
14
+ - TIGER-Lab/VISTA-400K
15
+ - Enxin/MovieChat-1K_train
16
+ - ShareGPT4Video/ShareGPT4Video
17
+ pipeline_tag: image-text-to-text
18
+ language:
19
+ - en
20
+ base_model:
21
+ - HuggingFaceTB/SmolVLM-500M-Instruct
22
+ ---
23
+
24
+ <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/SmolVLM2_banner.png" width="800" height="auto" alt="Image description">
25
+
26
+ # SmolVLM2-500M-Video
27
+
28
+ SmolVLM2-500M-Video is a lightweight multimodal model designed to analyze video content. The model processes videos, images, and text inputs to generate text outputs - whether answering questions about media files, comparing visual content, or transcribing text from images. Despite its compact size, requiring only 1.8GB of GPU RAM for video inference, it delivers robust performance on complex multimodal tasks. This efficiency makes it particularly well-suited for on-device applications where computational resources may be limited.
29
+ ## Model Summary
30
+
31
+ - **Developed by:** Hugging Face 🤗
32
+ - **Model type:** Multi-modal model (image/multi-image/video/text)
33
+ - **Language(s) (NLP):** English
34
+ - **License:** Apache 2.0
35
+ - **Architecture:** Based on [Idefics3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) (see technical summary)
36
+
37
+ ## Resources
38
+
39
+ - **Demo:** [Video Highlight Generator](https://huggingface.co/spaces/HuggingFaceTB/SmolVLM2-HighlightGenerator)
40
+ - **Blog:** [Blog post](https://huggingface.co/blog/smolvlm2)
41
+
42
+ ## Uses
43
+
44
+ SmolVLM2 can be used for inference on multimodal (video / image / text) tasks where the input consists of text queries along with video or one or more images. Text and media files can be interleaved arbitrarily, enabling tasks like captioning, visual question answering, and storytelling based on visual content. The model does not support image or video generation.
45
+
46
+ To fine-tune SmolVLM2 on a specific task, you can follow [the fine-tuning tutorial](https://github.com/huggingface/smollm/blob/main/vision/finetuning/Smol_VLM_FT.ipynb).
47
+
48
+ ## Evaluation
49
+
50
+ We evaluated the performance of the SmolVLM2 family on the following scientific benchmarks:
51
+
52
+ | Size | Video-MME | MLVU | MVBench |
53
+ |----------|-----------------|----------|---------------|
54
+ | 2.2B | 52.1 | 55.2 | 46.27 |
55
+ | 500M | 42.2 | 47.3 | 39.73 |
56
+ | 256M | 33.7 | 40.6 | 32.7 |
57
+
58
+
59
+ ### How to get started
60
+
61
+ You can use transformers to load, infer and fine-tune SmolVLM. Make sure you have num2words, flash-attn and latest transformers installed.
62
+ You can load the model as follows.
63
+
64
+ ```python
65
+ from transformers import AutoProcessor, AutoModelForImageTextToText
66
+ import torch
67
+
68
+ model_path = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
69
+ processor = AutoProcessor.from_pretrained(model_path)
70
+ model = AutoModelForImageTextToText.from_pretrained(
71
+ model_path,
72
+ torch_dtype=torch.bfloat16,
73
+ _attn_implementation="flash_attention_2"
74
+ ).to("cuda")
75
+ ```
76
+
77
+ #### Simple Inference
78
+
79
+ You preprocess your inputs directly using chat templates and directly passing them
80
+
81
+ ```python
82
+ messages = [
83
+ {
84
+ "role": "user",
85
+ "content": [
86
+ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
87
+ {"type": "text", "text": "Can you describe this image?"},
88
+ ]
89
+ },
90
+ ]
91
+
92
+ inputs = processor.apply_chat_template(
93
+ messages,
94
+ add_generation_prompt=True,
95
+ tokenize=True,
96
+ return_dict=True,
97
+ return_tensors="pt",
98
+ ).to(model.device, dtype=torch.bfloat16)
99
+
100
+ generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)
101
+ generated_texts = processor.batch_decode(
102
+ generated_ids,
103
+ skip_special_tokens=True,
104
+ )
105
+ print(generated_texts[0])
106
+ ```
107
+
108
+ #### Video Inference
109
+
110
+ To use SmolVLM2 for video inference, make sure you have decord installed.
111
+
112
+ ```python
113
+ messages = [
114
+ {
115
+ "role": "user",
116
+ "content": [
117
+ {"type": "video", "path": "path_to_video.mp4"},
118
+ {"type": "text", "text": "Describe this video in detail"}
119
+ ]
120
+ },
121
+ ]
122
+
123
+ inputs = processor.apply_chat_template(
124
+ messages,
125
+ add_generation_prompt=True,
126
+ tokenize=True,
127
+ return_dict=True,
128
+ return_tensors="pt",
129
+ ).to(model.device, dtype=torch.bfloat16)
130
+
131
+ generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)
132
+ generated_texts = processor.batch_decode(
133
+ generated_ids,
134
+ skip_special_tokens=True,
135
+ )
136
+
137
+ print(generated_texts[0])
138
+ ```
139
+ #### Multi-image Interleaved Inference
140
+
141
+ You can interleave multiple media with text using chat templates.
142
+
143
+ ```python
144
+ import torch
145
+
146
+
147
+ messages = [
148
+ {
149
+ "role": "user",
150
+ "content": [
151
+ {"type": "text", "text": "What is the similarity between these two images?"},
152
+ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
153
+ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"},
154
+ ]
155
+ },
156
+ ]
157
+
158
+ inputs = processor.apply_chat_template(
159
+ messages,
160
+ add_generation_prompt=True,
161
+ tokenize=True,
162
+ return_dict=True,
163
+ return_tensors="pt",
164
+ ).to(model.device, dtype=torch.bfloat16)
165
+
166
+ generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)
167
+ generated_texts = processor.batch_decode(
168
+ generated_ids,
169
+ skip_special_tokens=True,
170
+ )
171
+ print(generated_texts[0])
172
+ ```
173
+
174
+
175
+ ### Model optimizations
176
+
177
+ ## Misuse and Out-of-scope Use
178
+
179
+ SmolVLM is not intended for high-stakes scenarios or critical decision-making processes that affect an individual's well-being or livelihood. The model may produce content that appears factual but may not be accurate. Misuse includes, but is not limited to:
180
+
181
+ - Prohibited Uses:
182
+ - Evaluating or scoring individuals (e.g., in employment, education, credit)
183
+ - Critical automated decision-making
184
+ - Generating unreliable factual content
185
+ - Malicious Activities:
186
+ - Spam generation
187
+ - Disinformation campaigns
188
+ - Harassment or abuse
189
+ - Unauthorized surveillance
190
+
191
+ ### License
192
+
193
+ SmolVLM2 is built upon [SigLIP](https://huggingface.co/google/siglip-base-patch16-512) as image encoder and [SmolLM2](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct) for text decoder part.
194
+
195
+ We release the SmolVLM2 checkpoints under the Apache 2.0 license.
196
+
197
+ ## Citation information
198
+ You can cite us in the following way:
199
+ ```bibtex
200
+ @article{marafioti2025smolvlm,
201
+ title={SmolVLM: Redefining small and efficient multimodal models},
202
+ author={Andrés Marafioti and Orr Zohar and Miquel Farré and Merve Noyan and Elie Bakouch and Pedro Cuenca and Cyril Zakka and Loubna Ben Allal and Anton Lozhkov and Nouamane Tazi and Vaibhav Srivastav and Joshua Lochner and Hugo Larcher and Mathieu Morlon and Lewis Tunstall and Leandro von Werra and Thomas Wolf},
203
+ journal={arXiv preprint arXiv:2504.05299},
204
+ year={2025}
205
+ }
206
+ ```
207
+
208
+ ## Training Data
209
+ SmolVLM2 used 3.3M samples for training originally from ten different datasets: [LlaVa Onevision](https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data), [M4-Instruct](https://huggingface.co/datasets/lmms-lab/M4-Instruct-Data), [Mammoth](https://huggingface.co/datasets/MAmmoTH-VL/MAmmoTH-VL-Instruct-12M), [LlaVa Video 178K](https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K), [FineVideo](https://huggingface.co/datasets/HuggingFaceFV/finevideo), [VideoStar](https://huggingface.co/datasets/orrzohar/Video-STaR), [VRipt](https://huggingface.co/datasets/Mutonix/Vript), [Vista-400K](https://huggingface.co/datasets/TIGER-Lab/VISTA-400K), [MovieChat](https://huggingface.co/datasets/Enxin/MovieChat-1K_train) and [ShareGPT4Video](https://huggingface.co/datasets/ShareGPT4Video/ShareGPT4Video).
210
+ In the following plots we give a general overview of the samples across modalities and the source of those samples.
211
+ <!--
212
+ <center><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolvlm2_data_split.png" width="auto" height="auto" alt="Image description">
213
+ </center>
214
+
215
+ ### Details
216
+ <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolvlm2_datadetails.png" width="auto" height="auto" alt="Image description"> -->
217
+
218
+ ## Data Split per modality
219
+
220
+ | Data Type | Percentage |
221
+ |--------------|------------|
222
+ | Image | 34.4% |
223
+ | Text | 20.2% |
224
+ | Video | 33.0% |
225
+ | Multi-image | 12.3% |
226
+
227
+
228
+ ## Granular dataset slices per modality
229
+
230
+ ### Text Datasets
231
+ | Dataset | Percentage |
232
+ |--------------------------------------------|------------|
233
+ | llava-onevision/magpie_pro_ft3_80b_mt | 6.8% |
234
+ | llava-onevision/magpie_pro_ft3_80b_tt | 6.8% |
235
+ | llava-onevision/magpie_pro_qwen2_72b_tt | 5.8% |
236
+ | llava-onevision/mathqa | 0.9% |
237
+
238
+ ### Multi-image Datasets
239
+ | Dataset | Percentage |
240
+ |--------------------------------------------|------------|
241
+ | m4-instruct-data/m4_instruct_multiimage | 10.4% |
242
+ | mammoth/multiimage-cap6 | 1.9% |
243
+
244
+ ### Image Datasets
245
+ | Dataset | Percentage |
246
+ |--------------------------------------------|------------|
247
+ | llava-onevision/other | 17.4% |
248
+ | llava-onevision/vision_flan | 3.9% |
249
+ | llava-onevision/mavis_math_metagen | 2.6% |
250
+ | llava-onevision/mavis_math_rule_geo | 2.5% |
251
+ | llava-onevision/sharegpt4o | 1.7% |
252
+ | llava-onevision/sharegpt4v_coco | 1.5% |
253
+ | llava-onevision/image_textualization | 1.3% |
254
+ | llava-onevision/sharegpt4v_llava | 0.9% |
255
+ | llava-onevision/mapqa | 0.9% |
256
+ | llava-onevision/qa | 0.8% |
257
+ | llava-onevision/textocr | 0.8% |
258
+
259
+ ### Video Datasets
260
+ | Dataset | Percentage |
261
+ |--------------------------------------------|------------|
262
+ | llava-video-178k/1-2m | 7.3% |
263
+ | llava-video-178k/2-3m | 7.0% |
264
+ | other-video/combined | 5.7% |
265
+ | llava-video-178k/hound | 4.4% |
266
+ | llava-video-178k/0-30s | 2.4% |
267
+ | video-star/starb | 2.2% |
268
+ | vista-400k/combined | 2.2% |
269
+ | vript/long | 1.0% |
270
+ | ShareGPT4Video/all | 0.8% |
SmolVLM2-500M-Video-Instruct-Action/added_tokens.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<action>": 49280,
3
+ "<end_of_utterance>": 49279,
4
+ "<fake_token_around_image>": 49189,
5
+ "<global-img>": 49152,
6
+ "<image>": 49190,
7
+ "<row_1_col_1>": 49153,
8
+ "<row_1_col_2>": 49154,
9
+ "<row_1_col_3>": 49155,
10
+ "<row_1_col_4>": 49156,
11
+ "<row_1_col_5>": 49157,
12
+ "<row_1_col_6>": 49158,
13
+ "<row_2_col_1>": 49159,
14
+ "<row_2_col_2>": 49160,
15
+ "<row_2_col_3>": 49161,
16
+ "<row_2_col_4>": 49162,
17
+ "<row_2_col_5>": 49163,
18
+ "<row_2_col_6>": 49164,
19
+ "<row_3_col_1>": 49165,
20
+ "<row_3_col_2>": 49166,
21
+ "<row_3_col_3>": 49167,
22
+ "<row_3_col_4>": 49168,
23
+ "<row_3_col_5>": 49169,
24
+ "<row_3_col_6>": 49170,
25
+ "<row_4_col_1>": 49171,
26
+ "<row_4_col_2>": 49172,
27
+ "<row_4_col_3>": 49173,
28
+ "<row_4_col_4>": 49174,
29
+ "<row_4_col_5>": 49175,
30
+ "<row_4_col_6>": 49176,
31
+ "<row_5_col_1>": 49177,
32
+ "<row_5_col_2>": 49178,
33
+ "<row_5_col_3>": 49179,
34
+ "<row_5_col_4>": 49180,
35
+ "<row_5_col_5>": 49181,
36
+ "<row_5_col_6>": 49182,
37
+ "<row_6_col_1>": 49183,
38
+ "<row_6_col_2>": 49184,
39
+ "<row_6_col_3>": 49185,
40
+ "<row_6_col_4>": 49186,
41
+ "<row_6_col_5>": 49187,
42
+ "<row_6_col_6>": 49188,
43
+ "<|reserved_special_token_0|>": 49191,
44
+ "<|reserved_special_token_10|>": 49201,
45
+ "<|reserved_special_token_11|>": 49202,
46
+ "<|reserved_special_token_12|>": 49203,
47
+ "<|reserved_special_token_13|>": 49204,
48
+ "<|reserved_special_token_14|>": 49205,
49
+ "<|reserved_special_token_15|>": 49206,
50
+ "<|reserved_special_token_16|>": 49207,
51
+ "<|reserved_special_token_17|>": 49208,
52
+ "<|reserved_special_token_18|>": 49209,
53
+ "<|reserved_special_token_19|>": 49210,
54
+ "<|reserved_special_token_1|>": 49192,
55
+ "<|reserved_special_token_20|>": 49211,
56
+ "<|reserved_special_token_21|>": 49212,
57
+ "<|reserved_special_token_22|>": 49213,
58
+ "<|reserved_special_token_23|>": 49214,
59
+ "<|reserved_special_token_24|>": 49215,
60
+ "<|reserved_special_token_25|>": 49216,
61
+ "<|reserved_special_token_26|>": 49217,
62
+ "<|reserved_special_token_27|>": 49218,
63
+ "<|reserved_special_token_28|>": 49219,
64
+ "<|reserved_special_token_29|>": 49220,
65
+ "<|reserved_special_token_2|>": 49193,
66
+ "<|reserved_special_token_30|>": 49221,
67
+ "<|reserved_special_token_31|>": 49222,
68
+ "<|reserved_special_token_32|>": 49223,
69
+ "<|reserved_special_token_33|>": 49224,
70
+ "<|reserved_special_token_34|>": 49225,
71
+ "<|reserved_special_token_35|>": 49226,
72
+ "<|reserved_special_token_36|>": 49227,
73
+ "<|reserved_special_token_37|>": 49228,
74
+ "<|reserved_special_token_38|>": 49229,
75
+ "<|reserved_special_token_39|>": 49230,
76
+ "<|reserved_special_token_3|>": 49194,
77
+ "<|reserved_special_token_40|>": 49231,
78
+ "<|reserved_special_token_41|>": 49232,
79
+ "<|reserved_special_token_42|>": 49233,
80
+ "<|reserved_special_token_43|>": 49234,
81
+ "<|reserved_special_token_44|>": 49235,
82
+ "<|reserved_special_token_45|>": 49236,
83
+ "<|reserved_special_token_46|>": 49237,
84
+ "<|reserved_special_token_47|>": 49238,
85
+ "<|reserved_special_token_48|>": 49239,
86
+ "<|reserved_special_token_49|>": 49240,
87
+ "<|reserved_special_token_4|>": 49195,
88
+ "<|reserved_special_token_50|>": 49241,
89
+ "<|reserved_special_token_51|>": 49242,
90
+ "<|reserved_special_token_52|>": 49243,
91
+ "<|reserved_special_token_53|>": 49244,
92
+ "<|reserved_special_token_54|>": 49245,
93
+ "<|reserved_special_token_55|>": 49246,
94
+ "<|reserved_special_token_56|>": 49247,
95
+ "<|reserved_special_token_57|>": 49248,
96
+ "<|reserved_special_token_58|>": 49249,
97
+ "<|reserved_special_token_59|>": 49250,
98
+ "<|reserved_special_token_5|>": 49196,
99
+ "<|reserved_special_token_60|>": 49251,
100
+ "<|reserved_special_token_61|>": 49252,
101
+ "<|reserved_special_token_62|>": 49253,
102
+ "<|reserved_special_token_63|>": 49254,
103
+ "<|reserved_special_token_64|>": 49255,
104
+ "<|reserved_special_token_65|>": 49256,
105
+ "<|reserved_special_token_66|>": 49257,
106
+ "<|reserved_special_token_67|>": 49258,
107
+ "<|reserved_special_token_68|>": 49259,
108
+ "<|reserved_special_token_69|>": 49260,
109
+ "<|reserved_special_token_6|>": 49197,
110
+ "<|reserved_special_token_70|>": 49261,
111
+ "<|reserved_special_token_71|>": 49262,
112
+ "<|reserved_special_token_72|>": 49263,
113
+ "<|reserved_special_token_73|>": 49264,
114
+ "<|reserved_special_token_74|>": 49265,
115
+ "<|reserved_special_token_75|>": 49266,
116
+ "<|reserved_special_token_76|>": 49267,
117
+ "<|reserved_special_token_77|>": 49268,
118
+ "<|reserved_special_token_78|>": 49269,
119
+ "<|reserved_special_token_79|>": 49270,
120
+ "<|reserved_special_token_7|>": 49198,
121
+ "<|reserved_special_token_80|>": 49271,
122
+ "<|reserved_special_token_81|>": 49272,
123
+ "<|reserved_special_token_82|>": 49273,
124
+ "<|reserved_special_token_83|>": 49274,
125
+ "<|reserved_special_token_84|>": 49275,
126
+ "<|reserved_special_token_85|>": 49276,
127
+ "<|reserved_special_token_86|>": 49277,
128
+ "<|reserved_special_token_87|>": 49278,
129
+ "<|reserved_special_token_8|>": 49199,
130
+ "<|reserved_special_token_9|>": 49200
131
+ }
SmolVLM2-500M-Video-Instruct-Action/chat_template.jinja ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ <|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
2
+ {% endfor %}{% if add_generation_prompt %}{{ 'Assistant: ' }}{% endif %}
SmolVLM2-500M-Video-Instruct-Action/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "<|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>\n{% endfor %}{% if add_generation_prompt %}{{ 'Assistant: ' }}{% endif %}"
3
+ }
SmolVLM2-500M-Video-Instruct-Action/config.json ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SmolVLMForConditionalGeneration"
4
+ ],
5
+ "image_token_id": 49190,
6
+ "model_type": "smolvlm",
7
+ "pad_token_id": 128002,
8
+ "scale_factor": 4,
9
+ "text_config": {
10
+ "_flash_attn_2_enabled": true,
11
+ "_name_or_path": "None",
12
+ "architectures": [
13
+ "VLlama3ForCausalLM"
14
+ ],
15
+ "attention_bias": false,
16
+ "attention_dropout": 0.0,
17
+ "head_dim": 64,
18
+ "hidden_act": "silu",
19
+ "hidden_size": 960,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 2560,
22
+ "is_llama_config": true,
23
+ "max_position_embeddings": 8192,
24
+ "mlp_bias": false,
25
+ "model_type": "llama",
26
+ "neftune_noise_alpha": 0.0,
27
+ "num_attention_heads": 15,
28
+ "num_hidden_layers": 32,
29
+ "num_key_value_heads": 5,
30
+ "pad_token_id": 2,
31
+ "perceiver_config": {
32
+ "_name_or_path": "",
33
+ "add_cross_attention": false,
34
+ "architectures": null,
35
+ "attention_dropout": 0.0,
36
+ "bad_words_ids": null,
37
+ "begin_suppress_tokens": null,
38
+ "bos_token_id": null,
39
+ "chunk_size_feed_forward": 0,
40
+ "cross_attention_hidden_size": null,
41
+ "decoder_start_token_id": null,
42
+ "diversity_penalty": 0.0,
43
+ "do_sample": false,
44
+ "early_stopping": false,
45
+ "encoder_no_repeat_ngram_size": 0,
46
+ "eos_token_id": null,
47
+ "exponential_decay_length_penalty": null,
48
+ "finetuning_task": null,
49
+ "forced_bos_token_id": null,
50
+ "forced_eos_token_id": null,
51
+ "hidden_act": "silu",
52
+ "id2label": {
53
+ "0": "LABEL_0",
54
+ "1": "LABEL_1"
55
+ },
56
+ "is_decoder": false,
57
+ "is_encoder_decoder": false,
58
+ "label2id": {
59
+ "LABEL_0": 0,
60
+ "LABEL_1": 1
61
+ },
62
+ "length_penalty": 1.0,
63
+ "max_length": 20,
64
+ "min_length": 0,
65
+ "model_type": "vllama3",
66
+ "no_repeat_ngram_size": 0,
67
+ "num_beam_groups": 1,
68
+ "num_beams": 1,
69
+ "num_key_value_heads": 1,
70
+ "num_return_sequences": 1,
71
+ "output_attentions": false,
72
+ "output_hidden_states": false,
73
+ "output_scores": false,
74
+ "pad_token_id": null,
75
+ "prefix": null,
76
+ "problem_type": null,
77
+ "pruned_heads": {},
78
+ "qk_layer_norms_perceiver": false,
79
+ "remove_invalid_values": false,
80
+ "repetition_penalty": 1.0,
81
+ "resampler_depth": 6,
82
+ "resampler_head_dim": 96,
83
+ "resampler_n_heads": 16,
84
+ "resampler_n_latents": 64,
85
+ "return_dict": true,
86
+ "return_dict_in_generate": false,
87
+ "sep_token_id": null,
88
+ "suppress_tokens": null,
89
+ "task_specific_params": null,
90
+ "temperature": 1.0,
91
+ "tf_legacy_loss": false,
92
+ "tie_encoder_decoder": false,
93
+ "tie_word_embeddings": true,
94
+ "tokenizer_class": null,
95
+ "top_k": 50,
96
+ "top_p": 1.0,
97
+ "torch_dtype": null,
98
+ "torchscript": false,
99
+ "transformers_version": "4.46.0",
100
+ "typical_p": 1.0,
101
+ "use_bfloat16": false
102
+ },
103
+ "pixel_shuffle_factor": 4,
104
+ "pretraining_tp": 1,
105
+ "qk_layer_norms": false,
106
+ "rms_norm_eps": 1e-05,
107
+ "rope_interleaved": false,
108
+ "rope_scaling": null,
109
+ "rope_theta": 100000,
110
+ "torch_dtype": "float32",
111
+ "transformers.js_config": {
112
+ "kv_cache_dtype": {
113
+ "fp16": "float16",
114
+ "q4f16": "float16"
115
+ }
116
+ },
117
+ "use_cache": true,
118
+ "use_resampler": false,
119
+ "vocab_size": 49281
120
+ },
121
+ "tie_word_embeddings": false,
122
+ "torch_dtype": "float32",
123
+ "transformers.js_config": {
124
+ "kv_cache_dtype": {
125
+ "fp16": "float16",
126
+ "q4f16": "float16"
127
+ }
128
+ },
129
+ "transformers_version": "4.52.4",
130
+ "use_cache": false,
131
+ "use_reentrant_checkpointing": false,
132
+ "vision_config": {
133
+ "attention_dropout": 0.0,
134
+ "hidden_act": "gelu_pytorch_tanh",
135
+ "hidden_size": 768,
136
+ "image_size": 512,
137
+ "initializer_range": 0.02,
138
+ "intermediate_size": 3072,
139
+ "layer_norm_eps": 1e-06,
140
+ "max_image_size": {
141
+ "longest_edge": 512
142
+ },
143
+ "model_type": "smolvlm_vision",
144
+ "num_attention_heads": 12,
145
+ "num_channels": 3,
146
+ "num_hidden_layers": 12,
147
+ "patch_size": 16,
148
+ "size": {
149
+ "longest_edge": 512
150
+ },
151
+ "tie_word_embeddings": false,
152
+ "torch_dtype": "float32",
153
+ "use_base_siglip": false
154
+ },
155
+ "vocab_size": 49281,
156
+ "action_dim": 7
157
+ }
SmolVLM2-500M-Video-Instruct-Action/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 49279,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.52.4"
7
+ }
SmolVLM2-500M-Video-Instruct-Action/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
SmolVLM2-500M-Video-Instruct-Action/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ab8a3f316e950c0f5eba355fe24b65f69096a112d8fd9b7f49133a16e65b08
3
+ size 2029998304
SmolVLM2-500M-Video-Instruct-Action/onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c8743f020606f401ffa0c0ec7a7055da6ad7518b981ecdb060b62da3a60ec45
3
+ size 1450426001