HugC commited on
Commit
30823e7
·
verified ·
1 Parent(s): 2b728f4

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
37
+ teaser.png filter=lfs diff=lfs merge=lfs -text
.hfd/aria2c_urls.txt ADDED
File without changes
.hfd/last_download_command ADDED
@@ -0,0 +1 @@
 
 
1
+ REPO_ID=microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned TOOL=aria2c INCLUDE_PATTERNS= EXCLUDE_PATTERNS= DATASET=0 HF_USERNAME= HF_TOKEN= HF_ENDPOINT=https://hf-mirror.com REVISION=main
.hfd/repo_metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_id":"6738ab4f7250a807ef3d09d0","id":"microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned","private":false,"pipeline_tag":"zero-shot-classification","tags":["safetensors","llama","CLIP","LLM2CLIP","zero-shot-classification","custom_code","arxiv:2411.04997","license:apache-2.0","region:us"],"downloads":9501,"likes":36,"modelId":"microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned","author":"microsoft","sha":"97f7d164a1eb89a79e344b21c7a1be7eee265d2e","lastModified":"2024-11-19T16:27:34.000Z","gated":false,"disabled":false,"widgetData":[{"text":"I have a problem with my iphone that needs to be resolved asap!","candidate_labels":"urgent, not urgent, phone, tablet, computer","multi_class":true},{"text":"Last week I upgraded my iOS version and ever since then my phone has been overheating whenever I use your app.","candidate_labels":"mobile, website, billing, account access","multi_class":false},{"text":"A new model offers an explanation for how the Galilean satellites formed around the solar system’s largest world. Konstantin Batygin did not set out to solve one of the solar system’s most puzzling mysteries when he went for a run up a hill in Nice, France. Dr. Batygin, a Caltech researcher, best known for his contributions to the search for the solar system’s missing “Planet Nine,” spotted a beer bottle. At a steep, 20 degree grade, he wondered why it wasn’t rolling down the hill. He realized there was a breeze at his back holding the bottle in place. Then he had a thought that would only pop into the mind of a theoretical astrophysicist: “Oh! This is how Europa formed.” Europa is one of Jupiter’s four large Galilean moons. And in a paper published Monday in the Astrophysical Journal, Dr. Batygin and a co-author, Alessandro Morbidelli, a planetary scientist at the Côte d’Azur Observatory in France, present a theory explaining how some moons form around gas giants like Jupiter and Saturn, suggesting that millimeter-sized grains of hail produced during the solar system’s formation became trapped around these massive worlds, taking shape one at a time into the potentially habitable moons we know today.","candidate_labels":"space & cosmos, scientific discovery, microbiology, robots, archeology","multi_class":true}],"model-index":null,"config":{"architectures":["LlamaEncoderModel"],"auto_map":{"AutoModel":"McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp--modeling_llama_encoder.LlamaEncoderModel"},"model_type":"llama","tokenizer_config":{"bos_token":"<|begin_of_text|>","chat_template":"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}","eos_token":"<|end_of_text|>","pad_token":"<|end_of_text|>"}},"cardData":{"license":"apache-2.0","tags":["CLIP","LLM2CLIP"],"pipeline_tag":"zero-shot-classification"},"siblings":[{"rfilename":".gitattributes"},{"rfilename":"README.md"},{"rfilename":"config.json"},{"rfilename":"model-00001-of-00004.safetensors"},{"rfilename":"model-00002-of-00004.safetensors"},{"rfilename":"model-00003-of-00004.safetensors"},{"rfilename":"model-00004-of-00004.safetensors"},{"rfilename":"model.safetensors.index.json"},{"rfilename":"modeling_llama_encoder.py"},{"rfilename":"special_tokens_map.json"},{"rfilename":"teaser.png"},{"rfilename":"tokenizer.json"},{"rfilename":"tokenizer_config.json"}],"spaces":["BronioInt/Lake-1-Pro"],"createdAt":"2024-11-16T14:25:19.000Z","safetensors":{"parameters":{"BF16":7504924672},"total":7504924672},"usedStorage":15009881368}
README.md CHANGED
@@ -1,3 +1,131 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - CLIP
5
+ - LLM2CLIP
6
+ pipeline_tag: zero-shot-classification
7
+ ---
8
+
9
+ # Modification Notes
10
+
11
+ This is a modified version of the original Microsoft LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned model. The main modifications include:
12
+
13
+ 1. **Configuration Updates**: Updated `config.json` to load model from local repo rather than `McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp`
14
+ 2. **Model Code Adaptation**: Modified `modeling_llama_encoder.py` and added `attn_mask_utils.py` to work with newer versions of transformers
15
+ 3. **Model Weights**: The model weight files remain unchanged from the original
16
+
17
+ Original model: https://huggingface.co/microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned
18
+
19
+ ---
20
+
21
+ <div align="center">
22
+
23
+ <h2><a href="">LLM2CLIP: Extending the Capability Boundaries of CLIP through Large Language Models</a></h2>
24
+ Weiquan Huang<sup>1*</sup>, Aoqi Wu<sup>1*</sup>, Yifan Yang<sup>2†</sup>, Xufang Luo<sup>2</sup>, Yuqing Yang<sup>2</sup>, Liang Hu<sup>1</sup>, Qi Dai<sup>2</sup>, Xiyang Dai<sup>2</sup>, Dongdong Chen<sup>2</sup>, Chong Luo<sup>2</sup>, Lili Qiu<sup>2</sup>
25
+
26
+ <sup>1</sup>Tongji Universiy, <sup>2</sup>Microsoft Corporation <br><sup>*</sup>Equal contribution <br><sup>†</sup> Corresponding to: [email protected]
27
+
28
+ <p><a rel="nofollow" href="https://github.com/microsoft/LLM2CLIP">[📂 GitHub]</a> <a rel="nofollow" href="https://microsoft.github.io/LLM2CLIP/">[🆕 Blog]</a> <a rel="nofollow" href="">[📜 LLM2CLIP]</a>
29
+ </div>
30
+
31
+
32
+ In this paper, we propose LLM2CLIP, a novel approach that embraces the power of LLMs to unlock CLIP’s potential. By fine-tuning the LLM in the caption space with contrastive learning, we extract its textual capabilities into the output embeddings, significantly improving the output layer’s textual discriminability. We then design an efficient training process where the fine-tuned LLM acts as a powerful teacher for CLIP’s visual encoder. Thanks to the LLM’s presence, we can now incorporate longer and more complex captions without being restricted by vanilla CLIP text encoder’s context window and ability limitations. Our experiments demonstrate that this approach brings substantial improvements in cross-modal tasks. Our method directly boosted the performance of the previously SOTA EVA02 model by 16.5% on both long-text and short-text retrieval tasks, transforming a CLIP model trained solely on English data into a state-of-the-art cross-lingual model. Moreover, when integrated into mul- timodal training with models like Llava 1.5, it consistently outperformed CLIP across nearly all benchmarks, demonstrating comprehensive performance improvements.
33
+
34
+ ## LLM2CLIP performance
35
+
36
+ <div align="center">
37
+ <img src="teaser.png" alt="summary_tab" width="85%">
38
+ </div>
39
+ **It's important to note that all results presented in the paper are evaluated using PyTorch weights. There may be differences in performance when using Hugging Face (hf) models.**
40
+
41
+ ## Model Details
42
+ - **Model Type:** vision foundation model, feature backbone
43
+ - **Pretrain Dataset:** CC3M, CC12M, YFCC15M and Recap-DataComp-1B(30M subset)
44
+
45
+
46
+ ## Usage
47
+
48
+ ### Huggingface Version
49
+ Image Embeddings
50
+ ```python
51
+ from PIL import Image
52
+ from transformers import AutoModel
53
+ from transformers import CLIPImageProcessor
54
+ import torch
55
+
56
+ image_path = "CLIP.png"
57
+ model_name_or_path = "LLM2CLIP-Openai-L-14-336" # or /path/to/local/LLM2CLIP-Openai-L-14-336
58
+
59
+ processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14-336")
60
+ model = AutoModel.from_pretrained(
61
+ model_name_or_path,
62
+ torch_dtype=torch.float16,
63
+ trust_remote_code=True).to('cuda').eval()
64
+
65
+ image = Image.open(image_path)
66
+ input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')
67
+
68
+ with torch.no_grad(), torch.cuda.amp.autocast():
69
+ outputs = model.get_image_features(input_pixels)
70
+ ```
71
+ Retrieval
72
+ ```python
73
+ from PIL import Image
74
+ from transformers import AutoModel, AutoConfig, AutoTokenizer
75
+ from transformers import CLIPImageProcessor
76
+ import torch
77
+ from llm2vec import LLM2Vec
78
+ import os
79
+
80
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
81
+
82
+ processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14-336")
83
+ model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-336" # or /path/to/local/LLM2CLIP-Openai-L-14-336
84
+ model = AutoModel.from_pretrained(
85
+ model_name_or_path,
86
+ torch_dtype=torch.bfloat16,
87
+ trust_remote_code=True).to('cuda').eval()
88
+
89
+ llm_model_name = 'microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned'
90
+ config = AutoConfig.from_pretrained(
91
+ llm_model_name, trust_remote_code=True
92
+ )
93
+ llm_model = AutoModel.from_pretrained(llm_model_name, torch_dtype=torch.bfloat16, config=config, trust_remote_code=True)
94
+ tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
95
+ llm_model.config._name_or_path = 'meta-llama/Meta-Llama-3-8B-Instruct' # Workaround for LLM2VEC
96
+ l2v = LLM2Vec(llm_model, tokenizer, pooling_mode="mean", max_length=512, doc_max_length=512)
97
+
98
+ captions = ["a diagram", "a dog", "a cat"]
99
+ image_path = "CLIP.png"
100
+
101
+ image = Image.open(image_path)
102
+ input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')
103
+ text_features = l2v.encode(captions, convert_to_tensor=True).to('cuda')
104
+
105
+ with torch.no_grad(), torch.cuda.amp.autocast():
106
+ image_features = model.get_image_features(input_pixels)
107
+ text_features = model.get_text_features(text_features)
108
+
109
+ image_features /= image_features.norm(dim=-1, keepdim=True)
110
+ text_features /= text_features.norm(dim=-1, keepdim=True)
111
+
112
+ text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
113
+
114
+ print("Label probs:", text_probs)
115
+
116
+ ```
117
+
118
+ ## BibTeX & Citation
119
+
120
+ ```
121
+ @misc{huang2024llm2clippowerfullanguagemodel,
122
+ title={LLM2CLIP: Powerful Language Model Unlock Richer Visual Representation},
123
+ author={Weiquan Huang and Aoqi Wu and Yifan Yang and Xufang Luo and Yuqing Yang and Liang Hu and Qi Dai and Xiyang Dai and Dongdong Chen and Chong Luo and Lili Qiu},
124
+ year={2024},
125
+ eprint={2411.04997},
126
+ archivePrefix={arXiv},
127
+ primaryClass={cs.CV},
128
+ url={https://arxiv.org/abs/2411.04997},
129
+ }
130
+
131
+ ```
attn_mask_utils.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional, Tuple, Union
2
+ import torch
3
+ from packaging import version
4
+ import importlib.metadata
5
+ from transformers.modeling_attn_mask_utils import AttentionMaskConverter
6
+
7
+ from transformers.utils.import_utils import _is_package_available
8
+
9
+ def is_transformers_attn_greater_or_equal_4_39():
10
+ if not _is_package_available("transformers"):
11
+ return False
12
+
13
+ return version.parse(importlib.metadata.version("transformers")) >= version.parse(
14
+ "4.39.0"
15
+ )
16
+
17
+ def _prepare_4d_attention_mask_for_sdpa(
18
+ attention_mask: Optional[torch.Tensor],
19
+ input_shape: Union[torch.Size, Tuple, List],
20
+ inputs_embeds: torch.Tensor,
21
+ past_key_values_length: int,
22
+ sliding_window: Optional[int] = None,
23
+ ):
24
+ attn_mask_converter = AttentionMaskConverter(is_causal=False, sliding_window=sliding_window)
25
+
26
+ key_value_length = input_shape[-1] + past_key_values_length
27
+ batch_size, query_length = input_shape
28
+
29
+ # torch.jit.trace and torchdynamo with fullgraph=True are unable to capture the controlflow `is_causal=attention_mask is None and q_len > 1`
30
+ # used as an SDPA argument. We keep compatibility with these tracing tools by always using SDPA's `attn_mask` argument in case we are tracing.
31
+ # TODO: Fix this as well when using torchdynamo with fullgraph=True.
32
+ is_tracing = torch.jit.is_tracing()
33
+
34
+ if attention_mask is not None:
35
+ if torch.all(attention_mask == 1):
36
+ if is_tracing:
37
+ pass
38
+ elif query_length == 1:
39
+ # For query_length == 1, causal attention and bi-directional attention are the same.
40
+ attention_mask = None
41
+ # Commented out to deal with batch size=1 cases
42
+ # elif key_value_length == query_length:
43
+ # attention_mask = None
44
+ else:
45
+ # Unfortunately, for query_length > 1 and key_value_length != query_length, we cannot generally ignore the attention mask, as SDPA causal mask generation
46
+ # may be wrong. We will set `is_causal=False` in SDPA and rely on Transformers attention_mask instead, hence not setting it to None here.
47
+ # Reference: https://github.com/pytorch/pytorch/issues/108108
48
+ pass
49
+ elif query_length > 1 and key_value_length != query_length:
50
+ # See the comment above (https://github.com/pytorch/pytorch/issues/108108).
51
+ # Ugly: we set it to True here to dispatch in the following controlflow to `to_causal_4d`.
52
+ attention_mask = True
53
+ elif is_tracing:
54
+ raise ValueError(
55
+ 'Attention using SDPA can not be traced with torch.jit.trace when no attention_mask is provided. To solve this issue, please either load your model with the argument `attn_implementation="eager"` or pass an attention_mask input when tracing the model.'
56
+ )
57
+
58
+ if attention_mask is None:
59
+ expanded_4d_mask = None
60
+ elif attention_mask is True:
61
+ expanded_4d_mask = attn_mask_converter.to_causal_4d(
62
+ input_shape[0], input_shape[-1], key_value_length, dtype=inputs_embeds.dtype, device=inputs_embeds.device
63
+ )
64
+ else:
65
+ expanded_4d_mask = attn_mask_converter.to_4d(
66
+ attention_mask,
67
+ input_shape[-1],
68
+ dtype=inputs_embeds.dtype,
69
+ key_value_length=key_value_length,
70
+ )
71
+
72
+ # From PyTorch 2.1 onwards, F.scaled_dot_product_attention with the memory-efficient attention backend
73
+ # produces nans if sequences are completely unattended in the attention mask. Details: https://github.com/pytorch/pytorch/issues/110213
74
+ if query_length > 1:
75
+ if is_transformers_attn_greater_or_equal_4_39():
76
+ expanded_4d_mask = AttentionMaskConverter._unmask_unattended(
77
+ expanded_4d_mask, min_dtype=torch.finfo(inputs_embeds.dtype).min
78
+ )
79
+ else:
80
+ expanded_4d_mask = AttentionMaskConverter._unmask_unattended(
81
+ expanded_4d_mask, attention_mask, unmasked_value=0.0
82
+ )
83
+
84
+ return expanded_4d_mask
85
+
86
+
87
+ def _prepare_4d_attention_mask(
88
+ attention_mask: Optional[torch.Tensor],
89
+ input_shape: Union[torch.Size, Tuple, List],
90
+ inputs_embeds: torch.Tensor,
91
+ past_key_values_length: int,
92
+ sliding_window: Optional[int] = None,
93
+ ):
94
+ attn_mask_converter = AttentionMaskConverter(is_causal=False, sliding_window=sliding_window)
95
+
96
+ key_value_length = input_shape[-1] + past_key_values_length
97
+
98
+ # 4d mask is passed through the layers
99
+ if attention_mask is not None:
100
+ attention_mask = attn_mask_converter.to_4d(
101
+ attention_mask, input_shape[-1], key_value_length=key_value_length, dtype=inputs_embeds.dtype
102
+ )
103
+ else:
104
+ attention_mask = attn_mask_converter.to_causal_4d(
105
+ input_shape[0], input_shape[-1], key_value_length, dtype=inputs_embeds.dtype, device=inputs_embeds.device
106
+ )
107
+
108
+ return attention_mask
109
+
110
+
111
+ def _prepare_4d_causal_attention_mask(
112
+ attention_mask: Optional[torch.Tensor],
113
+ input_shape: Union[torch.Size, Tuple, List],
114
+ inputs_embeds: torch.Tensor,
115
+ past_key_values_length: int,
116
+ sliding_window: Optional[int] = None,
117
+ ):
118
+ attn_mask_converter = AttentionMaskConverter(is_causal=False, sliding_window=sliding_window)
119
+
120
+ key_value_length = input_shape[-1] + past_key_values_length
121
+
122
+ # 4d mask is passed through the layers
123
+ if attention_mask is not None:
124
+ attention_mask = attn_mask_converter.to_4d(
125
+ attention_mask, input_shape[-1], key_value_length=key_value_length, dtype=inputs_embeds.dtype
126
+ )
127
+ else:
128
+ attention_mask = attn_mask_converter.to_causal_4d(
129
+ input_shape[0], input_shape[-1], key_value_length, dtype=inputs_embeds.dtype, device=inputs_embeds.device
130
+ )
131
+
132
+ return attention_mask
133
+
134
+
135
+ def _prepare_4d_causal_attention_mask_for_sdpa(
136
+ attention_mask: Optional[torch.Tensor],
137
+ input_shape: Union[torch.Size, Tuple, List],
138
+ inputs_embeds: torch.Tensor,
139
+ past_key_values_length: int,
140
+ sliding_window: Optional[int] = None,
141
+ ):
142
+ """
143
+ Prepares the correct `attn_mask` argument to be used by `torch.nn.functional.scaled_dot_product_attention`.
144
+
145
+ In case no token is masked in the `attention_mask` argument, we simply set it to `None` for the cases `query_length == 1` and
146
+ `key_value_length == query_length`, and rely instead on SDPA `is_causal` argument to use causal/non-causal masks,
147
+ allowing to dispatch to the flash attention kernel (that can otherwise not be used if a custom `attn_mask` is passed).
148
+ """
149
+ attn_mask_converter = AttentionMaskConverter(is_causal=False, sliding_window=sliding_window)
150
+
151
+ key_value_length = input_shape[-1] + past_key_values_length
152
+ batch_size, query_length = input_shape
153
+
154
+ # torch.jit.trace, symbolic_trace and torchdynamo with fullgraph=True are unable to capture the controlflow `is_causal=attention_mask is None and q_len > 1`
155
+ # used as an SDPA argument. We keep compatibility with these tracing tools by always using SDPA's `attn_mask` argument in case we are tracing.
156
+ # TODO: Fix this as well when using torchdynamo with fullgraph=True.
157
+ is_tracing = torch.jit.is_tracing() or isinstance(inputs_embeds, torch.fx.Proxy)
158
+
159
+ if attention_mask is not None:
160
+ # 4d mask is passed through
161
+ if len(attention_mask.shape) == 4:
162
+ expected_shape = (input_shape[0], 1, input_shape[1], key_value_length)
163
+ if tuple(attention_mask.shape) != expected_shape:
164
+ raise ValueError(
165
+ f"Incorrect 4D attention_mask shape: {tuple(attention_mask.shape)}; expected: {expected_shape}."
166
+ )
167
+ else:
168
+ # if the 4D mask has correct shape - invert it and fill with negative infinity
169
+ inverted_mask = 1.0 - attention_mask.to(inputs_embeds.dtype)
170
+ attention_mask = inverted_mask.masked_fill(
171
+ inverted_mask.to(torch.bool), torch.finfo(inputs_embeds.dtype).min
172
+ )
173
+ return attention_mask
174
+
175
+ elif not is_tracing and torch.all(attention_mask == 1):
176
+ if query_length == 1:
177
+ # For query_length == 1, causal attention and bi-directional attention are the same.
178
+ attention_mask = None
179
+ # Commented out to deal with batch size=1 cases
180
+ # elif key_value_length == query_length:
181
+ # attention_mask = None
182
+ else:
183
+ # Unfortunately, for query_length > 1 and key_value_length != query_length, we cannot generally ignore the attention mask, as SDPA causal mask generation
184
+ # may be wrong. We will set `is_causal=False` in SDPA and rely on Transformers attention_mask instead, hence not setting it to None here.
185
+ # Reference: https://github.com/pytorch/pytorch/issues/108108
186
+ pass
187
+ elif query_length > 1 and key_value_length != query_length:
188
+ # See the comment above (https://github.com/pytorch/pytorch/issues/108108).
189
+ # Ugly: we set it to True here to dispatch in the following controlflow to `to_causal_4d`.
190
+ attention_mask = True
191
+ elif is_tracing:
192
+ raise ValueError(
193
+ 'Attention using SDPA can not be traced with torch.jit.trace when no attention_mask is provided. To solve this issue, please either load your model with the argument `attn_implementation="eager"` or pass an attention_mask input when tracing the model.'
194
+ )
195
+
196
+ if attention_mask is None:
197
+ expanded_4d_mask = None
198
+ elif attention_mask is True:
199
+ expanded_4d_mask = attn_mask_converter.to_causal_4d(
200
+ input_shape[0], input_shape[-1], key_value_length, dtype=inputs_embeds.dtype, device=inputs_embeds.device
201
+ )
202
+ else:
203
+ expanded_4d_mask = attn_mask_converter.to_4d(
204
+ attention_mask,
205
+ input_shape[-1],
206
+ dtype=inputs_embeds.dtype,
207
+ key_value_length=key_value_length,
208
+ )
209
+
210
+ # From PyTorch 2.1 onwards, F.scaled_dot_product_attention with the memory-efficient attention backend
211
+ # produces nans if sequences are completely unattended in the attention mask. Details: https://github.com/pytorch/pytorch/issues/110213
212
+ #
213
+ # This fix is not applied in case we are tracing with torch.jit.trace or symbolic_trace, as _unmask_unattended has a data-dependent
214
+ # controlflow that can not be captured properly.
215
+ # TODO: _unmask_unattended does not work either with torch.compile when using fullgraph=True. We should find a way to detect this case.
216
+ if query_length > 1 and not is_tracing:
217
+ if is_transformers_attn_greater_or_equal_4_39():
218
+ expanded_4d_mask = AttentionMaskConverter._unmask_unattended(
219
+ expanded_4d_mask, min_dtype=torch.finfo(inputs_embeds.dtype).min
220
+ )
221
+ else:
222
+ expanded_4d_mask = AttentionMaskConverter._unmask_unattended(
223
+ expanded_4d_mask, attention_mask, unmasked_value=0.0
224
+ )
225
+
226
+ return expanded_4d_mask
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
3
+ "architectures": [
4
+ "LlamaEncoderModel"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0,
8
+ "auto_map": {
9
+ "AutoModel": "modeling_llama_encoder.LlamaEncoderModel"
10
+ },
11
+ "bos_token_id": 128000,
12
+ "eos_token_id": 128001,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 14336,
17
+ "max_position_embeddings": 8192,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 32,
20
+ "num_hidden_layers": 32,
21
+ "num_key_value_heads": 8,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": null,
25
+ "rope_theta": 500000,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.40.2",
29
+ "use_cache": true,
30
+ "vocab_size": 128256
31
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d2978cc1e862d8f020697817335f98bab2eb81c0c95e36194b0884af0bf260
3
+ size 4976698176
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781c9768fb4c8da12e93686d7f5c95d4bf2144a4c8c5ea3b06bf24070bfbe548
3
+ size 4999802096
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edfbd80a5905a0c35b45bb41e976c9128e4bb2edec7066ac9aa078f9fb6cb00b
3
+ size 4915915576
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf91bee5f60a59ca899d0d6fd62e9a809cb7fca6519bea87937d664be2ec05c
3
+ size 117465520
model.safetensors.index.json ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15009849344
4
+ },
5
+ "weight_map": {
6
+ "embed_tokens.weight": "model-00001-of-00004.safetensors",
7
+ "layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
8
+ "layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
9
+ "layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
10
+ "layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
11
+ "layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
12
+ "layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
13
+ "layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
14
+ "layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
15
+ "layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
16
+ "layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
17
+ "layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
18
+ "layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
19
+ "layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
20
+ "layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
22
+ "layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
23
+ "layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
24
+ "layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
25
+ "layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
26
+ "layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
27
+ "layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
28
+ "layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
29
+ "layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
30
+ "layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
31
+ "layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
32
+ "layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
33
+ "layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
34
+ "layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
35
+ "layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
36
+ "layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
37
+ "layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
38
+ "layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
39
+ "layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
40
+ "layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
41
+ "layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
42
+ "layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
43
+ "layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
44
+ "layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
45
+ "layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
46
+ "layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
47
+ "layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
48
+ "layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
49
+ "layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
50
+ "layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
51
+ "layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
52
+ "layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
53
+ "layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
54
+ "layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
55
+ "layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
56
+ "layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
57
+ "layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
58
+ "layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
59
+ "layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
60
+ "layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
61
+ "layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
62
+ "layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
63
+ "layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
64
+ "layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
65
+ "layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
66
+ "layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
67
+ "layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
68
+ "layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
69
+ "layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
70
+ "layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
71
+ "layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
72
+ "layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
73
+ "layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
74
+ "layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
75
+ "layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
76
+ "layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
77
+ "layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
78
+ "layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
79
+ "layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
80
+ "layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
81
+ "layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
82
+ "layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
83
+ "layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
84
+ "layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
85
+ "layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
86
+ "layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
87
+ "layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
88
+ "layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
89
+ "layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
90
+ "layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
91
+ "layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
92
+ "layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
93
+ "layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
94
+ "layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
95
+ "layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
96
+ "layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
97
+ "layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
98
+ "layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
99
+ "layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
100
+ "layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
101
+ "layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
102
+ "layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
103
+ "layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
104
+ "layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
105
+ "layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
106
+ "layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
107
+ "layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
108
+ "layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
109
+ "layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
110
+ "layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
111
+ "layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
112
+ "layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
113
+ "layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
114
+ "layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
115
+ "layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
116
+ "layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
117
+ "layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
118
+ "layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
119
+ "layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
120
+ "layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
121
+ "layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
122
+ "layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
123
+ "layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
124
+ "layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
125
+ "layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
126
+ "layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
127
+ "layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
128
+ "layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
129
+ "layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
130
+ "layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
131
+ "layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
132
+ "layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
133
+ "layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
134
+ "layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
135
+ "layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
136
+ "layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
137
+ "layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
138
+ "layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
139
+ "layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
140
+ "layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
141
+ "layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
142
+ "layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
143
+ "layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
144
+ "layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
145
+ "layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
146
+ "layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
147
+ "layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
148
+ "layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
149
+ "layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
150
+ "layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
151
+ "layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
152
+ "layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
153
+ "layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
154
+ "layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
155
+ "layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
156
+ "layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
157
+ "layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
158
+ "layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
159
+ "layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
160
+ "layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
161
+ "layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
162
+ "layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
163
+ "layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
164
+ "layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
165
+ "layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
166
+ "layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
167
+ "layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
168
+ "layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
169
+ "layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
170
+ "layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
171
+ "layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
172
+ "layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
173
+ "layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
174
+ "layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
175
+ "layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
176
+ "layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
177
+ "layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
178
+ "layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
179
+ "layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
180
+ "layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
181
+ "layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
182
+ "layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
183
+ "layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
184
+ "layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
185
+ "layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
186
+ "layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
187
+ "layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
188
+ "layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
189
+ "layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
190
+ "layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
191
+ "layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
192
+ "layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
193
+ "layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
194
+ "layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
195
+ "layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
196
+ "layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
197
+ "layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
198
+ "layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
199
+ "layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
200
+ "layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
201
+ "layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
202
+ "layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
203
+ "layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
204
+ "layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
205
+ "layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
206
+ "layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
207
+ "layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
208
+ "layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
209
+ "layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
210
+ "layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
211
+ "layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
212
+ "layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
213
+ "layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
214
+ "layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
215
+ "layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
216
+ "layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
217
+ "layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
218
+ "layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
219
+ "layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
220
+ "layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
221
+ "layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
222
+ "layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
223
+ "layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
224
+ "layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
225
+ "layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
226
+ "layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
227
+ "layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
228
+ "layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
229
+ "layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
230
+ "layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
231
+ "layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
232
+ "layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
233
+ "layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
234
+ "layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
235
+ "layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
236
+ "layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
237
+ "layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
238
+ "layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
239
+ "layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
240
+ "layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
241
+ "layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
242
+ "layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
243
+ "layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
244
+ "layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
245
+ "layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
246
+ "layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
247
+ "layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
248
+ "layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
249
+ "layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
250
+ "layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
251
+ "layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
252
+ "layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
253
+ "layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
254
+ "layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
255
+ "layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
256
+ "layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
257
+ "layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
258
+ "layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
259
+ "layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
260
+ "layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
261
+ "layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
262
+ "layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
263
+ "layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
264
+ "layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
265
+ "layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
266
+ "layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
267
+ "layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
268
+ "layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
269
+ "layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
270
+ "layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
271
+ "layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
272
+ "layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
273
+ "layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
274
+ "layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
275
+ "layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
276
+ "layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
277
+ "layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
278
+ "layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
279
+ "layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
280
+ "layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
281
+ "layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
282
+ "layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
283
+ "layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
284
+ "layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
285
+ "layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
286
+ "layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
287
+ "layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
288
+ "layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
289
+ "layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
290
+ "layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
291
+ "layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
292
+ "layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
293
+ "layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
294
+ "layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
295
+ "norm.weight": "model-00004-of-00004.safetensors"
296
+ }
297
+ }
modeling_llama_encoder.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional, Tuple, Union
2
+ import torch
3
+ from transformers import LlamaModel, LlamaPreTrainedModel
4
+ from transformers.models.llama.modeling_llama import LlamaDecoderLayer, LlamaRMSNorm, LlamaConfig, LlamaMLP, LlamaAttention, LlamaRotaryEmbedding
5
+ from transformers.utils import logging
6
+ from torch import nn
7
+ import torch.nn.functional as F
8
+ from transformers.modeling_outputs import BaseModelOutputWithPast
9
+ from transformers.cache_utils import Cache, DynamicCache
10
+ from .attn_mask_utils import _prepare_4d_attention_mask_for_sdpa, _prepare_4d_attention_mask
11
+
12
+ logger = logging.get_logger(__name__)
13
+
14
+ class ModifiedLlamaAttention(LlamaAttention):
15
+
16
+ def __init__(self, *args, **kwargs):
17
+ super().__init__(*args, **kwargs)
18
+ self.is_causal = False
19
+
20
+
21
+ class ModifiedLlamaDecoderLayer(LlamaDecoderLayer):
22
+ def __init__(self, config: LlamaConfig, layer_idx: int):
23
+ nn.Module.__init__(self)
24
+ self.hidden_size = config.hidden_size
25
+
26
+ self.self_attn = ModifiedLlamaAttention(config=config, layer_idx=layer_idx)
27
+
28
+ self.mlp = LlamaMLP(config)
29
+ self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
30
+ self.post_attention_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
31
+
32
+
33
+ class LlamaEncoderModel(LlamaModel):
34
+ def __init__(self, config):
35
+ LlamaPreTrainedModel.__init__(self, config)
36
+ self.padding_idx = config.pad_token_id
37
+ self.vocab_size = config.vocab_size
38
+
39
+ self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
40
+ self.layers = nn.ModuleList(
41
+ [ModifiedLlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
42
+ )
43
+ self._use_sdpa = config._attn_implementation == "sdpa"
44
+ self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
45
+ self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
46
+ self.rotary_emb = LlamaRotaryEmbedding(config=config)
47
+
48
+ self.gradient_checkpointing = False
49
+ # Initialize weights and apply final processing
50
+ self.post_init()
51
+
52
+ def forward(
53
+ self,
54
+ input_ids: torch.LongTensor = None,
55
+ attention_mask: Optional[torch.Tensor] = None,
56
+ position_ids: Optional[torch.LongTensor] = None,
57
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
58
+ inputs_embeds: Optional[torch.FloatTensor] = None,
59
+ use_cache: Optional[bool] = None,
60
+ output_attentions: Optional[bool] = None,
61
+ output_hidden_states: Optional[bool] = None,
62
+ return_dict: Optional[bool] = None,
63
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
64
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
65
+ output_hidden_states = (
66
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
67
+ )
68
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
69
+
70
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
71
+
72
+ # retrieve input_ids and inputs_embeds
73
+ if input_ids is not None and inputs_embeds is not None:
74
+ raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
75
+ elif input_ids is not None:
76
+ batch_size, seq_length = input_ids.shape[:2]
77
+ elif inputs_embeds is not None:
78
+ batch_size, seq_length = inputs_embeds.shape[:2]
79
+ else:
80
+ raise ValueError("You have to specify either input_ids or inputs_embeds")
81
+
82
+ if self.gradient_checkpointing and self.training:
83
+ if use_cache:
84
+ logger.warning_once(
85
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
86
+ )
87
+ use_cache = False
88
+
89
+ past_key_values_length = 0
90
+ if use_cache:
91
+ use_legacy_cache = not isinstance(past_key_values, Cache)
92
+ if use_legacy_cache:
93
+ past_key_values = DynamicCache.from_legacy_cache(past_key_values)
94
+ past_key_values_length = past_key_values.get_usable_length(seq_length)
95
+
96
+ if position_ids is None:
97
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
98
+ position_ids = torch.arange(
99
+ past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
100
+ )
101
+ position_ids = position_ids.unsqueeze(0)
102
+
103
+ if inputs_embeds is None:
104
+ inputs_embeds = self.embed_tokens(input_ids)
105
+
106
+ if self._use_flash_attention_2:
107
+ # 2d mask is passed through the layers
108
+ attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
109
+ elif self._use_sdpa and not output_attentions:
110
+ # output_attentions=True can not be supported when using SDPA, and we fall back on
111
+ # the manual implementation that requires a 4D causal mask in all cases.
112
+ attention_mask = _prepare_4d_attention_mask_for_sdpa(
113
+ attention_mask,
114
+ (batch_size, seq_length),
115
+ inputs_embeds,
116
+ past_key_values_length,
117
+ )
118
+ else:
119
+ # 4d mask is passed through the layers
120
+ attention_mask = _prepare_4d_attention_mask(
121
+ attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
122
+ )
123
+
124
+ # embed positions
125
+ hidden_states = inputs_embeds
126
+
127
+ # create position embeddings to be shared across the decoder layers
128
+ position_embeddings = self.rotary_emb(hidden_states, position_ids)
129
+
130
+ # decoder layers
131
+ all_hidden_states = () if output_hidden_states else None
132
+ all_self_attns = () if output_attentions else None
133
+ next_decoder_cache = None
134
+
135
+ for decoder_layer in self.layers:
136
+ if output_hidden_states:
137
+ all_hidden_states += (hidden_states,)
138
+
139
+ if self.gradient_checkpointing and self.training:
140
+ layer_outputs = self._gradient_checkpointing_func(
141
+ decoder_layer.__call__,
142
+ hidden_states,
143
+ attention_mask,
144
+ position_ids,
145
+ past_key_values,
146
+ output_attentions,
147
+ use_cache,
148
+ position_embeddings=position_embeddings,
149
+ )
150
+ else:
151
+ layer_outputs = decoder_layer(
152
+ hidden_states,
153
+ attention_mask=attention_mask,
154
+ position_ids=position_ids,
155
+ past_key_value=past_key_values,
156
+ output_attentions=output_attentions,
157
+ use_cache=use_cache,
158
+ position_embeddings=position_embeddings,
159
+ )
160
+
161
+ hidden_states = layer_outputs[0]
162
+
163
+ if output_attentions:
164
+ all_self_attns += (layer_outputs[1],)
165
+
166
+ hidden_states = self.norm(hidden_states)
167
+
168
+ # add hidden states from the last decoder layer
169
+ if output_hidden_states:
170
+ all_hidden_states += (hidden_states,)
171
+
172
+ return BaseModelOutputWithPast(
173
+ last_hidden_state=hidden_states,
174
+ past_key_values=past_key_values if use_cache else None,
175
+ hidden_states=all_hidden_states,
176
+ attentions=all_self_attns,
177
+ )
special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
teaser.png ADDED

Git LFS Details

  • SHA256: 96efdce73a60b5857328014a626e4a6004452e2444bc3a169c9d2b8b5f4a94ce
  • Pointer size: 131 Bytes
  • Size of remote file: 156 kB
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,2064 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|reserved_special_token_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|reserved_special_token_4|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|reserved_special_token_5|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_6|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_7|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_8|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_9|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_10|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 1000000000000000019884624838656,
2061
+ "pad_token": "<|end_of_text|>",
2062
+ "padding_side": "left",
2063
+ "tokenizer_class": "PreTrainedTokenizerFast"
2064
+ }