jaykaydg commited on
Commit
45c9eab
·
verified ·
1 Parent(s): c74cda2

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +16 -0
  2. checkpoint-1000/README.md +202 -0
  3. checkpoint-1000/adapter_config.json +37 -0
  4. checkpoint-1000/adapter_model.safetensors +3 -0
  5. checkpoint-1000/optimizer.pt +3 -0
  6. checkpoint-1000/rng_state.pth +3 -0
  7. checkpoint-1000/scheduler.pt +3 -0
  8. checkpoint-1000/special_tokens_map.json +23 -0
  9. checkpoint-1000/tokenizer.json +3 -0
  10. checkpoint-1000/tokenizer_config.json +2067 -0
  11. checkpoint-1000/trainer_state.json +1451 -0
  12. checkpoint-1000/training_args.bin +3 -0
  13. checkpoint-1500/README.md +202 -0
  14. checkpoint-1500/adapter_config.json +37 -0
  15. checkpoint-1500/adapter_model.safetensors +3 -0
  16. checkpoint-1500/optimizer.pt +3 -0
  17. checkpoint-1500/rng_state.pth +3 -0
  18. checkpoint-1500/scheduler.pt +3 -0
  19. checkpoint-1500/special_tokens_map.json +23 -0
  20. checkpoint-1500/tokenizer.json +3 -0
  21. checkpoint-1500/tokenizer_config.json +2067 -0
  22. checkpoint-1500/trainer_state.json +2160 -0
  23. checkpoint-1500/training_args.bin +3 -0
  24. checkpoint-2000/README.md +202 -0
  25. checkpoint-2000/adapter_config.json +37 -0
  26. checkpoint-2000/adapter_model.safetensors +3 -0
  27. checkpoint-2000/optimizer.pt +3 -0
  28. checkpoint-2000/rng_state.pth +3 -0
  29. checkpoint-2000/scheduler.pt +3 -0
  30. checkpoint-2000/special_tokens_map.json +23 -0
  31. checkpoint-2000/tokenizer.json +3 -0
  32. checkpoint-2000/tokenizer_config.json +2067 -0
  33. checkpoint-2000/trainer_state.json +2868 -0
  34. checkpoint-2000/training_args.bin +3 -0
  35. checkpoint-2500/README.md +202 -0
  36. checkpoint-2500/adapter_config.json +37 -0
  37. checkpoint-2500/adapter_model.safetensors +3 -0
  38. checkpoint-2500/optimizer.pt +3 -0
  39. checkpoint-2500/rng_state.pth +3 -0
  40. checkpoint-2500/scheduler.pt +3 -0
  41. checkpoint-2500/special_tokens_map.json +23 -0
  42. checkpoint-2500/tokenizer.json +3 -0
  43. checkpoint-2500/tokenizer_config.json +2067 -0
  44. checkpoint-2500/trainer_state.json +3576 -0
  45. checkpoint-2500/training_args.bin +3 -0
  46. checkpoint-3000/README.md +202 -0
  47. checkpoint-3000/adapter_config.json +37 -0
  48. checkpoint-3000/adapter_model.safetensors +3 -0
  49. checkpoint-3000/optimizer.pt +3 -0
  50. checkpoint-3000/rng_state.pth +3 -0
.gitattributes CHANGED
@@ -33,3 +33,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-2500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ checkpoint-3000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ checkpoint-3500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ checkpoint-4000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ checkpoint-4500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
+ checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
45
+ checkpoint-5000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
46
+ checkpoint-5500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
47
+ checkpoint-6000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
48
+ checkpoint-6500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
49
+ checkpoint-7000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
50
+ checkpoint-7500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
51
+ checkpoint-7584/tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-1000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/Llama-3.2-3B-Instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
checkpoint-1000/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Llama-3.2-3B-Instruct",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 16,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 16,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "o_proj",
27
+ "up_proj",
28
+ "q_proj",
29
+ "v_proj",
30
+ "k_proj",
31
+ "down_proj",
32
+ "gate_proj"
33
+ ],
34
+ "task_type": "CAUSAL_LM",
35
+ "use_dora": false,
36
+ "use_rslora": false
37
+ }
checkpoint-1000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40b6cf1a5705aab5c8565e7ed0b7ff124fa86e863ec0bac75311fa1ad692c001
3
+ size 48680136
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9ec21c3d3cf2cc7cb3c2d48e6420cfa8f03d507f097ec80482cdb71deedae12
3
+ size 49846644
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ca74abb9af1cc9151b91103870122159c1f0b4cbbe035d58feaf102cb270d5
3
+ size 14244
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cb34dde9a5646c673426da1d510aa200cd59945601d8c338cc6b922b739cf7
3
+ size 1064
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot_id|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-1000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,2067 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "added_tokens_decoder": {
4
+ "128000": {
5
+ "content": "<|begin_of_text|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "128001": {
13
+ "content": "<|end_of_text|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "128002": {
21
+ "content": "<|reserved_special_token_0|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "128003": {
29
+ "content": "<|reserved_special_token_1|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128004": {
37
+ "content": "<|finetune_right_pad_id|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "128005": {
45
+ "content": "<|reserved_special_token_2|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "128006": {
53
+ "content": "<|start_header_id|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "128007": {
61
+ "content": "<|end_header_id|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "128008": {
69
+ "content": "<|eom_id|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "128009": {
77
+ "content": "<|eot_id|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "128010": {
85
+ "content": "<|python_tag|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "128011": {
93
+ "content": "<|reserved_special_token_3|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "128012": {
101
+ "content": "<|reserved_special_token_4|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "128013": {
109
+ "content": "<|reserved_special_token_5|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "128014": {
117
+ "content": "<|reserved_special_token_6|>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "128015": {
125
+ "content": "<|reserved_special_token_7|>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "128016": {
133
+ "content": "<|reserved_special_token_8|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "128017": {
141
+ "content": "<|reserved_special_token_9|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "128018": {
149
+ "content": "<|reserved_special_token_10|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "128019": {
157
+ "content": "<|reserved_special_token_11|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "128020": {
165
+ "content": "<|reserved_special_token_12|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "128021": {
173
+ "content": "<|reserved_special_token_13|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "128022": {
181
+ "content": "<|reserved_special_token_14|>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "128023": {
189
+ "content": "<|reserved_special_token_15|>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "128024": {
197
+ "content": "<|reserved_special_token_16|>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "128025": {
205
+ "content": "<|reserved_special_token_17|>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "128026": {
213
+ "content": "<|reserved_special_token_18|>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "128027": {
221
+ "content": "<|reserved_special_token_19|>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "128028": {
229
+ "content": "<|reserved_special_token_20|>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "128029": {
237
+ "content": "<|reserved_special_token_21|>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "128030": {
245
+ "content": "<|reserved_special_token_22|>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "128031": {
253
+ "content": "<|reserved_special_token_23|>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "128032": {
261
+ "content": "<|reserved_special_token_24|>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "128033": {
269
+ "content": "<|reserved_special_token_25|>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "128034": {
277
+ "content": "<|reserved_special_token_26|>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "128035": {
285
+ "content": "<|reserved_special_token_27|>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "128036": {
293
+ "content": "<|reserved_special_token_28|>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "128037": {
301
+ "content": "<|reserved_special_token_29|>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "128038": {
309
+ "content": "<|reserved_special_token_30|>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "128039": {
317
+ "content": "<|reserved_special_token_31|>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "128040": {
325
+ "content": "<|reserved_special_token_32|>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "128041": {
333
+ "content": "<|reserved_special_token_33|>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "128042": {
341
+ "content": "<|reserved_special_token_34|>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "128043": {
349
+ "content": "<|reserved_special_token_35|>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "128044": {
357
+ "content": "<|reserved_special_token_36|>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "128045": {
365
+ "content": "<|reserved_special_token_37|>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "128046": {
373
+ "content": "<|reserved_special_token_38|>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "128047": {
381
+ "content": "<|reserved_special_token_39|>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "128048": {
389
+ "content": "<|reserved_special_token_40|>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "128049": {
397
+ "content": "<|reserved_special_token_41|>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "128050": {
405
+ "content": "<|reserved_special_token_42|>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "128051": {
413
+ "content": "<|reserved_special_token_43|>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "128052": {
421
+ "content": "<|reserved_special_token_44|>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "128053": {
429
+ "content": "<|reserved_special_token_45|>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "128054": {
437
+ "content": "<|reserved_special_token_46|>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "128055": {
445
+ "content": "<|reserved_special_token_47|>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "128056": {
453
+ "content": "<|reserved_special_token_48|>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "128057": {
461
+ "content": "<|reserved_special_token_49|>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "128058": {
469
+ "content": "<|reserved_special_token_50|>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "128059": {
477
+ "content": "<|reserved_special_token_51|>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "128060": {
485
+ "content": "<|reserved_special_token_52|>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "128061": {
493
+ "content": "<|reserved_special_token_53|>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "128062": {
501
+ "content": "<|reserved_special_token_54|>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "128063": {
509
+ "content": "<|reserved_special_token_55|>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "128064": {
517
+ "content": "<|reserved_special_token_56|>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "128065": {
525
+ "content": "<|reserved_special_token_57|>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "128066": {
533
+ "content": "<|reserved_special_token_58|>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "128067": {
541
+ "content": "<|reserved_special_token_59|>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "128068": {
549
+ "content": "<|reserved_special_token_60|>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "128069": {
557
+ "content": "<|reserved_special_token_61|>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "128070": {
565
+ "content": "<|reserved_special_token_62|>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "128071": {
573
+ "content": "<|reserved_special_token_63|>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "128072": {
581
+ "content": "<|reserved_special_token_64|>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "128073": {
589
+ "content": "<|reserved_special_token_65|>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "128074": {
597
+ "content": "<|reserved_special_token_66|>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "128075": {
605
+ "content": "<|reserved_special_token_67|>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "128076": {
613
+ "content": "<|reserved_special_token_68|>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "128077": {
621
+ "content": "<|reserved_special_token_69|>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "128078": {
629
+ "content": "<|reserved_special_token_70|>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "128079": {
637
+ "content": "<|reserved_special_token_71|>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "128080": {
645
+ "content": "<|reserved_special_token_72|>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "128081": {
653
+ "content": "<|reserved_special_token_73|>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "128082": {
661
+ "content": "<|reserved_special_token_74|>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "128083": {
669
+ "content": "<|reserved_special_token_75|>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "128084": {
677
+ "content": "<|reserved_special_token_76|>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "128085": {
685
+ "content": "<|reserved_special_token_77|>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "128086": {
693
+ "content": "<|reserved_special_token_78|>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "128087": {
701
+ "content": "<|reserved_special_token_79|>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "128088": {
709
+ "content": "<|reserved_special_token_80|>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "128089": {
717
+ "content": "<|reserved_special_token_81|>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "128090": {
725
+ "content": "<|reserved_special_token_82|>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "128091": {
733
+ "content": "<|reserved_special_token_83|>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "128092": {
741
+ "content": "<|reserved_special_token_84|>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "128093": {
749
+ "content": "<|reserved_special_token_85|>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "128094": {
757
+ "content": "<|reserved_special_token_86|>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "128095": {
765
+ "content": "<|reserved_special_token_87|>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "128096": {
773
+ "content": "<|reserved_special_token_88|>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "128097": {
781
+ "content": "<|reserved_special_token_89|>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "128098": {
789
+ "content": "<|reserved_special_token_90|>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "128099": {
797
+ "content": "<|reserved_special_token_91|>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "128100": {
805
+ "content": "<|reserved_special_token_92|>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "128101": {
813
+ "content": "<|reserved_special_token_93|>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "128102": {
821
+ "content": "<|reserved_special_token_94|>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "128103": {
829
+ "content": "<|reserved_special_token_95|>",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "128104": {
837
+ "content": "<|reserved_special_token_96|>",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "128105": {
845
+ "content": "<|reserved_special_token_97|>",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "128106": {
853
+ "content": "<|reserved_special_token_98|>",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "128107": {
861
+ "content": "<|reserved_special_token_99|>",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "128108": {
869
+ "content": "<|reserved_special_token_100|>",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": true
875
+ },
876
+ "128109": {
877
+ "content": "<|reserved_special_token_101|>",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": true
883
+ },
884
+ "128110": {
885
+ "content": "<|reserved_special_token_102|>",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": true
891
+ },
892
+ "128111": {
893
+ "content": "<|reserved_special_token_103|>",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": true
899
+ },
900
+ "128112": {
901
+ "content": "<|reserved_special_token_104|>",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": true
907
+ },
908
+ "128113": {
909
+ "content": "<|reserved_special_token_105|>",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": true
915
+ },
916
+ "128114": {
917
+ "content": "<|reserved_special_token_106|>",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": true
923
+ },
924
+ "128115": {
925
+ "content": "<|reserved_special_token_107|>",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": true
931
+ },
932
+ "128116": {
933
+ "content": "<|reserved_special_token_108|>",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": true
939
+ },
940
+ "128117": {
941
+ "content": "<|reserved_special_token_109|>",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": true
947
+ },
948
+ "128118": {
949
+ "content": "<|reserved_special_token_110|>",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": true
955
+ },
956
+ "128119": {
957
+ "content": "<|reserved_special_token_111|>",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": true
963
+ },
964
+ "128120": {
965
+ "content": "<|reserved_special_token_112|>",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": true
971
+ },
972
+ "128121": {
973
+ "content": "<|reserved_special_token_113|>",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": true
979
+ },
980
+ "128122": {
981
+ "content": "<|reserved_special_token_114|>",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": true
987
+ },
988
+ "128123": {
989
+ "content": "<|reserved_special_token_115|>",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": true
995
+ },
996
+ "128124": {
997
+ "content": "<|reserved_special_token_116|>",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": true
1003
+ },
1004
+ "128125": {
1005
+ "content": "<|reserved_special_token_117|>",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": true
1011
+ },
1012
+ "128126": {
1013
+ "content": "<|reserved_special_token_118|>",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": true
1019
+ },
1020
+ "128127": {
1021
+ "content": "<|reserved_special_token_119|>",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": true
1027
+ },
1028
+ "128128": {
1029
+ "content": "<|reserved_special_token_120|>",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": true
1035
+ },
1036
+ "128129": {
1037
+ "content": "<|reserved_special_token_121|>",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": true
1043
+ },
1044
+ "128130": {
1045
+ "content": "<|reserved_special_token_122|>",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": true
1051
+ },
1052
+ "128131": {
1053
+ "content": "<|reserved_special_token_123|>",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": true
1059
+ },
1060
+ "128132": {
1061
+ "content": "<|reserved_special_token_124|>",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": true
1067
+ },
1068
+ "128133": {
1069
+ "content": "<|reserved_special_token_125|>",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": true
1075
+ },
1076
+ "128134": {
1077
+ "content": "<|reserved_special_token_126|>",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": true
1083
+ },
1084
+ "128135": {
1085
+ "content": "<|reserved_special_token_127|>",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": true
1091
+ },
1092
+ "128136": {
1093
+ "content": "<|reserved_special_token_128|>",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": true
1099
+ },
1100
+ "128137": {
1101
+ "content": "<|reserved_special_token_129|>",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": true
1107
+ },
1108
+ "128138": {
1109
+ "content": "<|reserved_special_token_130|>",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": true
1115
+ },
1116
+ "128139": {
1117
+ "content": "<|reserved_special_token_131|>",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": true
1123
+ },
1124
+ "128140": {
1125
+ "content": "<|reserved_special_token_132|>",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": true
1131
+ },
1132
+ "128141": {
1133
+ "content": "<|reserved_special_token_133|>",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": true
1139
+ },
1140
+ "128142": {
1141
+ "content": "<|reserved_special_token_134|>",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": true
1147
+ },
1148
+ "128143": {
1149
+ "content": "<|reserved_special_token_135|>",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": true
1155
+ },
1156
+ "128144": {
1157
+ "content": "<|reserved_special_token_136|>",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": true
1163
+ },
1164
+ "128145": {
1165
+ "content": "<|reserved_special_token_137|>",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": true
1171
+ },
1172
+ "128146": {
1173
+ "content": "<|reserved_special_token_138|>",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": true
1179
+ },
1180
+ "128147": {
1181
+ "content": "<|reserved_special_token_139|>",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": true
1187
+ },
1188
+ "128148": {
1189
+ "content": "<|reserved_special_token_140|>",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": true
1195
+ },
1196
+ "128149": {
1197
+ "content": "<|reserved_special_token_141|>",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": true
1203
+ },
1204
+ "128150": {
1205
+ "content": "<|reserved_special_token_142|>",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": true
1211
+ },
1212
+ "128151": {
1213
+ "content": "<|reserved_special_token_143|>",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": true
1219
+ },
1220
+ "128152": {
1221
+ "content": "<|reserved_special_token_144|>",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": true
1227
+ },
1228
+ "128153": {
1229
+ "content": "<|reserved_special_token_145|>",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": true
1235
+ },
1236
+ "128154": {
1237
+ "content": "<|reserved_special_token_146|>",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": true
1243
+ },
1244
+ "128155": {
1245
+ "content": "<|reserved_special_token_147|>",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": true
1251
+ },
1252
+ "128156": {
1253
+ "content": "<|reserved_special_token_148|>",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": true
1259
+ },
1260
+ "128157": {
1261
+ "content": "<|reserved_special_token_149|>",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": true
1267
+ },
1268
+ "128158": {
1269
+ "content": "<|reserved_special_token_150|>",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": true
1275
+ },
1276
+ "128159": {
1277
+ "content": "<|reserved_special_token_151|>",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": true
1283
+ },
1284
+ "128160": {
1285
+ "content": "<|reserved_special_token_152|>",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": true
1291
+ },
1292
+ "128161": {
1293
+ "content": "<|reserved_special_token_153|>",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": true
1299
+ },
1300
+ "128162": {
1301
+ "content": "<|reserved_special_token_154|>",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": true
1307
+ },
1308
+ "128163": {
1309
+ "content": "<|reserved_special_token_155|>",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": true
1315
+ },
1316
+ "128164": {
1317
+ "content": "<|reserved_special_token_156|>",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": true
1323
+ },
1324
+ "128165": {
1325
+ "content": "<|reserved_special_token_157|>",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": true
1331
+ },
1332
+ "128166": {
1333
+ "content": "<|reserved_special_token_158|>",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": true
1339
+ },
1340
+ "128167": {
1341
+ "content": "<|reserved_special_token_159|>",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": true
1347
+ },
1348
+ "128168": {
1349
+ "content": "<|reserved_special_token_160|>",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": true
1355
+ },
1356
+ "128169": {
1357
+ "content": "<|reserved_special_token_161|>",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": true
1363
+ },
1364
+ "128170": {
1365
+ "content": "<|reserved_special_token_162|>",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": true
1371
+ },
1372
+ "128171": {
1373
+ "content": "<|reserved_special_token_163|>",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": true
1379
+ },
1380
+ "128172": {
1381
+ "content": "<|reserved_special_token_164|>",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": true
1387
+ },
1388
+ "128173": {
1389
+ "content": "<|reserved_special_token_165|>",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": true
1395
+ },
1396
+ "128174": {
1397
+ "content": "<|reserved_special_token_166|>",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": true
1403
+ },
1404
+ "128175": {
1405
+ "content": "<|reserved_special_token_167|>",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": true
1411
+ },
1412
+ "128176": {
1413
+ "content": "<|reserved_special_token_168|>",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": true
1419
+ },
1420
+ "128177": {
1421
+ "content": "<|reserved_special_token_169|>",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": true
1427
+ },
1428
+ "128178": {
1429
+ "content": "<|reserved_special_token_170|>",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": true
1435
+ },
1436
+ "128179": {
1437
+ "content": "<|reserved_special_token_171|>",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": true
1443
+ },
1444
+ "128180": {
1445
+ "content": "<|reserved_special_token_172|>",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": true
1451
+ },
1452
+ "128181": {
1453
+ "content": "<|reserved_special_token_173|>",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": true
1459
+ },
1460
+ "128182": {
1461
+ "content": "<|reserved_special_token_174|>",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": true
1467
+ },
1468
+ "128183": {
1469
+ "content": "<|reserved_special_token_175|>",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": true
1475
+ },
1476
+ "128184": {
1477
+ "content": "<|reserved_special_token_176|>",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": true
1483
+ },
1484
+ "128185": {
1485
+ "content": "<|reserved_special_token_177|>",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": true
1491
+ },
1492
+ "128186": {
1493
+ "content": "<|reserved_special_token_178|>",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": true
1499
+ },
1500
+ "128187": {
1501
+ "content": "<|reserved_special_token_179|>",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": true
1507
+ },
1508
+ "128188": {
1509
+ "content": "<|reserved_special_token_180|>",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": true
1515
+ },
1516
+ "128189": {
1517
+ "content": "<|reserved_special_token_181|>",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": true
1523
+ },
1524
+ "128190": {
1525
+ "content": "<|reserved_special_token_182|>",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": true
1531
+ },
1532
+ "128191": {
1533
+ "content": "<|reserved_special_token_183|>",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": true
1539
+ },
1540
+ "128192": {
1541
+ "content": "<|reserved_special_token_184|>",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": true
1547
+ },
1548
+ "128193": {
1549
+ "content": "<|reserved_special_token_185|>",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": true
1555
+ },
1556
+ "128194": {
1557
+ "content": "<|reserved_special_token_186|>",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": true
1563
+ },
1564
+ "128195": {
1565
+ "content": "<|reserved_special_token_187|>",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": true
1571
+ },
1572
+ "128196": {
1573
+ "content": "<|reserved_special_token_188|>",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": true
1579
+ },
1580
+ "128197": {
1581
+ "content": "<|reserved_special_token_189|>",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": true
1587
+ },
1588
+ "128198": {
1589
+ "content": "<|reserved_special_token_190|>",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": true
1595
+ },
1596
+ "128199": {
1597
+ "content": "<|reserved_special_token_191|>",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": true
1603
+ },
1604
+ "128200": {
1605
+ "content": "<|reserved_special_token_192|>",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": true
1611
+ },
1612
+ "128201": {
1613
+ "content": "<|reserved_special_token_193|>",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": true
1619
+ },
1620
+ "128202": {
1621
+ "content": "<|reserved_special_token_194|>",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": true
1627
+ },
1628
+ "128203": {
1629
+ "content": "<|reserved_special_token_195|>",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": true
1635
+ },
1636
+ "128204": {
1637
+ "content": "<|reserved_special_token_196|>",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": true
1643
+ },
1644
+ "128205": {
1645
+ "content": "<|reserved_special_token_197|>",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": true
1651
+ },
1652
+ "128206": {
1653
+ "content": "<|reserved_special_token_198|>",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": true
1659
+ },
1660
+ "128207": {
1661
+ "content": "<|reserved_special_token_199|>",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": true
1667
+ },
1668
+ "128208": {
1669
+ "content": "<|reserved_special_token_200|>",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": true
1675
+ },
1676
+ "128209": {
1677
+ "content": "<|reserved_special_token_201|>",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": true
1683
+ },
1684
+ "128210": {
1685
+ "content": "<|reserved_special_token_202|>",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "128211": {
1693
+ "content": "<|reserved_special_token_203|>",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
+ },
1700
+ "128212": {
1701
+ "content": "<|reserved_special_token_204|>",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": true
1707
+ },
1708
+ "128213": {
1709
+ "content": "<|reserved_special_token_205|>",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": true
1715
+ },
1716
+ "128214": {
1717
+ "content": "<|reserved_special_token_206|>",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": true
1723
+ },
1724
+ "128215": {
1725
+ "content": "<|reserved_special_token_207|>",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": true
1731
+ },
1732
+ "128216": {
1733
+ "content": "<|reserved_special_token_208|>",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": true
1739
+ },
1740
+ "128217": {
1741
+ "content": "<|reserved_special_token_209|>",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": true
1747
+ },
1748
+ "128218": {
1749
+ "content": "<|reserved_special_token_210|>",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": true
1755
+ },
1756
+ "128219": {
1757
+ "content": "<|reserved_special_token_211|>",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": true
1763
+ },
1764
+ "128220": {
1765
+ "content": "<|reserved_special_token_212|>",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": true
1771
+ },
1772
+ "128221": {
1773
+ "content": "<|reserved_special_token_213|>",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": true
1779
+ },
1780
+ "128222": {
1781
+ "content": "<|reserved_special_token_214|>",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": true
1787
+ },
1788
+ "128223": {
1789
+ "content": "<|reserved_special_token_215|>",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": true
1795
+ },
1796
+ "128224": {
1797
+ "content": "<|reserved_special_token_216|>",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": true
1803
+ },
1804
+ "128225": {
1805
+ "content": "<|reserved_special_token_217|>",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": true
1811
+ },
1812
+ "128226": {
1813
+ "content": "<|reserved_special_token_218|>",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": true
1819
+ },
1820
+ "128227": {
1821
+ "content": "<|reserved_special_token_219|>",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": true
1827
+ },
1828
+ "128228": {
1829
+ "content": "<|reserved_special_token_220|>",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": true
1835
+ },
1836
+ "128229": {
1837
+ "content": "<|reserved_special_token_221|>",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": true
1843
+ },
1844
+ "128230": {
1845
+ "content": "<|reserved_special_token_222|>",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": true
1851
+ },
1852
+ "128231": {
1853
+ "content": "<|reserved_special_token_223|>",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": true
1859
+ },
1860
+ "128232": {
1861
+ "content": "<|reserved_special_token_224|>",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": true
1867
+ },
1868
+ "128233": {
1869
+ "content": "<|reserved_special_token_225|>",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": true
1875
+ },
1876
+ "128234": {
1877
+ "content": "<|reserved_special_token_226|>",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": true
1883
+ },
1884
+ "128235": {
1885
+ "content": "<|reserved_special_token_227|>",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": true
1891
+ },
1892
+ "128236": {
1893
+ "content": "<|reserved_special_token_228|>",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": true
1899
+ },
1900
+ "128237": {
1901
+ "content": "<|reserved_special_token_229|>",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": true
1907
+ },
1908
+ "128238": {
1909
+ "content": "<|reserved_special_token_230|>",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": true
1915
+ },
1916
+ "128239": {
1917
+ "content": "<|reserved_special_token_231|>",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": true
1923
+ },
1924
+ "128240": {
1925
+ "content": "<|reserved_special_token_232|>",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": true
1931
+ },
1932
+ "128241": {
1933
+ "content": "<|reserved_special_token_233|>",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": true
1939
+ },
1940
+ "128242": {
1941
+ "content": "<|reserved_special_token_234|>",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": true
1947
+ },
1948
+ "128243": {
1949
+ "content": "<|reserved_special_token_235|>",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": true
1955
+ },
1956
+ "128244": {
1957
+ "content": "<|reserved_special_token_236|>",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": true
1963
+ },
1964
+ "128245": {
1965
+ "content": "<|reserved_special_token_237|>",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": true
1971
+ },
1972
+ "128246": {
1973
+ "content": "<|reserved_special_token_238|>",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": true
1979
+ },
1980
+ "128247": {
1981
+ "content": "<|reserved_special_token_239|>",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": true
1987
+ },
1988
+ "128248": {
1989
+ "content": "<|reserved_special_token_240|>",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": true
1995
+ },
1996
+ "128249": {
1997
+ "content": "<|reserved_special_token_241|>",
1998
+ "lstrip": false,
1999
+ "normalized": false,
2000
+ "rstrip": false,
2001
+ "single_word": false,
2002
+ "special": true
2003
+ },
2004
+ "128250": {
2005
+ "content": "<|reserved_special_token_242|>",
2006
+ "lstrip": false,
2007
+ "normalized": false,
2008
+ "rstrip": false,
2009
+ "single_word": false,
2010
+ "special": true
2011
+ },
2012
+ "128251": {
2013
+ "content": "<|reserved_special_token_243|>",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false,
2018
+ "special": true
2019
+ },
2020
+ "128252": {
2021
+ "content": "<|reserved_special_token_244|>",
2022
+ "lstrip": false,
2023
+ "normalized": false,
2024
+ "rstrip": false,
2025
+ "single_word": false,
2026
+ "special": true
2027
+ },
2028
+ "128253": {
2029
+ "content": "<|reserved_special_token_245|>",
2030
+ "lstrip": false,
2031
+ "normalized": false,
2032
+ "rstrip": false,
2033
+ "single_word": false,
2034
+ "special": true
2035
+ },
2036
+ "128254": {
2037
+ "content": "<|reserved_special_token_246|>",
2038
+ "lstrip": false,
2039
+ "normalized": false,
2040
+ "rstrip": false,
2041
+ "single_word": false,
2042
+ "special": true
2043
+ },
2044
+ "128255": {
2045
+ "content": "<|reserved_special_token_247|>",
2046
+ "lstrip": false,
2047
+ "normalized": false,
2048
+ "rstrip": false,
2049
+ "single_word": false,
2050
+ "special": true
2051
+ }
2052
+ },
2053
+ "bos_token": "<|begin_of_text|>",
2054
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 July 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content'] %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
2055
+ "clean_up_tokenization_spaces": true,
2056
+ "eos_token": "<|eot_id|>",
2057
+ "extra_special_tokens": {},
2058
+ "model_input_names": [
2059
+ "input_ids",
2060
+ "attention_mask"
2061
+ ],
2062
+ "model_max_length": 131072,
2063
+ "pad_token": "<|finetune_right_pad_id|>",
2064
+ "padding_side": "right",
2065
+ "tokenizer_class": "PreTrainedTokenizerFast",
2066
+ "unk_token": null
2067
+ }
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,1451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.2636609320413948,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.001318304660206974,
13
+ "grad_norm": 4.59375,
14
+ "learning_rate": 0.0002,
15
+ "loss": 1.9624,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.002636609320413948,
20
+ "grad_norm": 1.7421875,
21
+ "learning_rate": 0.00019986805647183008,
22
+ "loss": 0.6513,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.003954913980620921,
27
+ "grad_norm": 1.84375,
28
+ "learning_rate": 0.00019973611294366012,
29
+ "loss": 0.1146,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.005273218640827896,
34
+ "grad_norm": 1.3203125,
35
+ "learning_rate": 0.0001996041694154902,
36
+ "loss": 0.0529,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.006591523301034869,
41
+ "grad_norm": 0.40234375,
42
+ "learning_rate": 0.00019947222588732023,
43
+ "loss": 0.1214,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.007909827961241843,
48
+ "grad_norm": 1.5390625,
49
+ "learning_rate": 0.0001993402823591503,
50
+ "loss": 0.0919,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.009228132621448816,
55
+ "grad_norm": 0.06201171875,
56
+ "learning_rate": 0.00019920833883098034,
57
+ "loss": 0.09,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 0.010546437281655791,
62
+ "grad_norm": 1.53125,
63
+ "learning_rate": 0.0001990763953028104,
64
+ "loss": 0.1945,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.011864741941862765,
69
+ "grad_norm": 0.2890625,
70
+ "learning_rate": 0.00019894445177464048,
71
+ "loss": 0.1259,
72
+ "step": 45
73
+ },
74
+ {
75
+ "epoch": 0.013183046602069738,
76
+ "grad_norm": 0.609375,
77
+ "learning_rate": 0.00019881250824647052,
78
+ "loss": 0.027,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 0.014501351262276712,
83
+ "grad_norm": 0.369140625,
84
+ "learning_rate": 0.00019868056471830057,
85
+ "loss": 0.1068,
86
+ "step": 55
87
+ },
88
+ {
89
+ "epoch": 0.015819655922483685,
90
+ "grad_norm": 0.34765625,
91
+ "learning_rate": 0.00019854862119013064,
92
+ "loss": 0.0542,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 0.01713796058269066,
97
+ "grad_norm": 0.055419921875,
98
+ "learning_rate": 0.00019841667766196068,
99
+ "loss": 0.0901,
100
+ "step": 65
101
+ },
102
+ {
103
+ "epoch": 0.018456265242897632,
104
+ "grad_norm": 0.0247802734375,
105
+ "learning_rate": 0.00019828473413379075,
106
+ "loss": 0.0091,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 0.019774569903104607,
111
+ "grad_norm": 0.0079345703125,
112
+ "learning_rate": 0.0001981527906056208,
113
+ "loss": 0.0744,
114
+ "step": 75
115
+ },
116
+ {
117
+ "epoch": 0.021092874563311582,
118
+ "grad_norm": 0.65234375,
119
+ "learning_rate": 0.00019802084707745086,
120
+ "loss": 0.1108,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.022411179223518554,
125
+ "grad_norm": 0.50390625,
126
+ "learning_rate": 0.0001978889035492809,
127
+ "loss": 0.0446,
128
+ "step": 85
129
+ },
130
+ {
131
+ "epoch": 0.02372948388372553,
132
+ "grad_norm": 0.1787109375,
133
+ "learning_rate": 0.00019775696002111097,
134
+ "loss": 0.0982,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 0.0250477885439325,
139
+ "grad_norm": 0.490234375,
140
+ "learning_rate": 0.00019762501649294104,
141
+ "loss": 0.1035,
142
+ "step": 95
143
+ },
144
+ {
145
+ "epoch": 0.026366093204139476,
146
+ "grad_norm": 0.12158203125,
147
+ "learning_rate": 0.00019749307296477108,
148
+ "loss": 0.0401,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 0.02768439786434645,
153
+ "grad_norm": 0.16015625,
154
+ "learning_rate": 0.00019736112943660115,
155
+ "loss": 0.0309,
156
+ "step": 105
157
+ },
158
+ {
159
+ "epoch": 0.029002702524553423,
160
+ "grad_norm": 1.359375,
161
+ "learning_rate": 0.0001972291859084312,
162
+ "loss": 0.1032,
163
+ "step": 110
164
+ },
165
+ {
166
+ "epoch": 0.0303210071847604,
167
+ "grad_norm": 0.52734375,
168
+ "learning_rate": 0.00019709724238026126,
169
+ "loss": 0.0811,
170
+ "step": 115
171
+ },
172
+ {
173
+ "epoch": 0.03163931184496737,
174
+ "grad_norm": 0.177734375,
175
+ "learning_rate": 0.00019696529885209133,
176
+ "loss": 0.0258,
177
+ "step": 120
178
+ },
179
+ {
180
+ "epoch": 0.03295761650517435,
181
+ "grad_norm": 0.234375,
182
+ "learning_rate": 0.00019683335532392137,
183
+ "loss": 0.0437,
184
+ "step": 125
185
+ },
186
+ {
187
+ "epoch": 0.03427592116538132,
188
+ "grad_norm": 1.3046875,
189
+ "learning_rate": 0.00019670141179575144,
190
+ "loss": 0.0967,
191
+ "step": 130
192
+ },
193
+ {
194
+ "epoch": 0.03559422582558829,
195
+ "grad_norm": 0.2734375,
196
+ "learning_rate": 0.00019656946826758148,
197
+ "loss": 0.0132,
198
+ "step": 135
199
+ },
200
+ {
201
+ "epoch": 0.036912530485795264,
202
+ "grad_norm": 0.66015625,
203
+ "learning_rate": 0.00019643752473941155,
204
+ "loss": 0.0396,
205
+ "step": 140
206
+ },
207
+ {
208
+ "epoch": 0.03823083514600224,
209
+ "grad_norm": 1.0546875,
210
+ "learning_rate": 0.0001963055812112416,
211
+ "loss": 0.0449,
212
+ "step": 145
213
+ },
214
+ {
215
+ "epoch": 0.039549139806209214,
216
+ "grad_norm": 0.2021484375,
217
+ "learning_rate": 0.00019617363768307166,
218
+ "loss": 0.1196,
219
+ "step": 150
220
+ },
221
+ {
222
+ "epoch": 0.040867444466416186,
223
+ "grad_norm": 0.5859375,
224
+ "learning_rate": 0.0001960416941549017,
225
+ "loss": 0.0588,
226
+ "step": 155
227
+ },
228
+ {
229
+ "epoch": 0.042185749126623165,
230
+ "grad_norm": 0.06005859375,
231
+ "learning_rate": 0.00019590975062673175,
232
+ "loss": 0.0234,
233
+ "step": 160
234
+ },
235
+ {
236
+ "epoch": 0.04350405378683014,
237
+ "grad_norm": 0.4921875,
238
+ "learning_rate": 0.00019577780709856182,
239
+ "loss": 0.0916,
240
+ "step": 165
241
+ },
242
+ {
243
+ "epoch": 0.04482235844703711,
244
+ "grad_norm": 0.84375,
245
+ "learning_rate": 0.0001956458635703919,
246
+ "loss": 0.0271,
247
+ "step": 170
248
+ },
249
+ {
250
+ "epoch": 0.04614066310724409,
251
+ "grad_norm": 0.8828125,
252
+ "learning_rate": 0.00019551392004222193,
253
+ "loss": 0.0175,
254
+ "step": 175
255
+ },
256
+ {
257
+ "epoch": 0.04745896776745106,
258
+ "grad_norm": 0.0152587890625,
259
+ "learning_rate": 0.000195381976514052,
260
+ "loss": 0.0356,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 0.04877727242765803,
265
+ "grad_norm": 0.09326171875,
266
+ "learning_rate": 0.00019525003298588204,
267
+ "loss": 0.0057,
268
+ "step": 185
269
+ },
270
+ {
271
+ "epoch": 0.050095577087865,
272
+ "grad_norm": 0.24609375,
273
+ "learning_rate": 0.0001951180894577121,
274
+ "loss": 0.0082,
275
+ "step": 190
276
+ },
277
+ {
278
+ "epoch": 0.05141388174807198,
279
+ "grad_norm": 0.05029296875,
280
+ "learning_rate": 0.00019498614592954215,
281
+ "loss": 0.0178,
282
+ "step": 195
283
+ },
284
+ {
285
+ "epoch": 0.05273218640827895,
286
+ "grad_norm": 0.0390625,
287
+ "learning_rate": 0.00019485420240137222,
288
+ "loss": 0.0789,
289
+ "step": 200
290
+ },
291
+ {
292
+ "epoch": 0.054050491068485924,
293
+ "grad_norm": 0.5625,
294
+ "learning_rate": 0.0001947222588732023,
295
+ "loss": 0.0645,
296
+ "step": 205
297
+ },
298
+ {
299
+ "epoch": 0.0553687957286929,
300
+ "grad_norm": 0.53515625,
301
+ "learning_rate": 0.00019459031534503233,
302
+ "loss": 0.116,
303
+ "step": 210
304
+ },
305
+ {
306
+ "epoch": 0.056687100388899875,
307
+ "grad_norm": 0.55078125,
308
+ "learning_rate": 0.0001944583718168624,
309
+ "loss": 0.0516,
310
+ "step": 215
311
+ },
312
+ {
313
+ "epoch": 0.058005405049106847,
314
+ "grad_norm": 0.314453125,
315
+ "learning_rate": 0.00019432642828869244,
316
+ "loss": 0.1019,
317
+ "step": 220
318
+ },
319
+ {
320
+ "epoch": 0.059323709709313825,
321
+ "grad_norm": 0.1123046875,
322
+ "learning_rate": 0.0001941944847605225,
323
+ "loss": 0.0529,
324
+ "step": 225
325
+ },
326
+ {
327
+ "epoch": 0.0606420143695208,
328
+ "grad_norm": 0.4921875,
329
+ "learning_rate": 0.00019406254123235256,
330
+ "loss": 0.0368,
331
+ "step": 230
332
+ },
333
+ {
334
+ "epoch": 0.06196031902972777,
335
+ "grad_norm": 0.054443359375,
336
+ "learning_rate": 0.00019393059770418262,
337
+ "loss": 0.037,
338
+ "step": 235
339
+ },
340
+ {
341
+ "epoch": 0.06327862368993474,
342
+ "grad_norm": 0.008544921875,
343
+ "learning_rate": 0.0001937986541760127,
344
+ "loss": 0.0324,
345
+ "step": 240
346
+ },
347
+ {
348
+ "epoch": 0.06459692835014172,
349
+ "grad_norm": 1.5,
350
+ "learning_rate": 0.00019366671064784274,
351
+ "loss": 0.0334,
352
+ "step": 245
353
+ },
354
+ {
355
+ "epoch": 0.0659152330103487,
356
+ "grad_norm": 0.2109375,
357
+ "learning_rate": 0.0001935347671196728,
358
+ "loss": 0.0671,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 0.06723353767055566,
363
+ "grad_norm": 2.0625,
364
+ "learning_rate": 0.00019340282359150285,
365
+ "loss": 0.1559,
366
+ "step": 255
367
+ },
368
+ {
369
+ "epoch": 0.06855184233076264,
370
+ "grad_norm": 0.7734375,
371
+ "learning_rate": 0.0001932708800633329,
372
+ "loss": 0.0198,
373
+ "step": 260
374
+ },
375
+ {
376
+ "epoch": 0.06987014699096962,
377
+ "grad_norm": 0.42578125,
378
+ "learning_rate": 0.00019313893653516296,
379
+ "loss": 0.0151,
380
+ "step": 265
381
+ },
382
+ {
383
+ "epoch": 0.07118845165117658,
384
+ "grad_norm": 0.1884765625,
385
+ "learning_rate": 0.000193006993006993,
386
+ "loss": 0.0269,
387
+ "step": 270
388
+ },
389
+ {
390
+ "epoch": 0.07250675631138356,
391
+ "grad_norm": 1.546875,
392
+ "learning_rate": 0.00019287504947882307,
393
+ "loss": 0.0565,
394
+ "step": 275
395
+ },
396
+ {
397
+ "epoch": 0.07382506097159053,
398
+ "grad_norm": 0.5078125,
399
+ "learning_rate": 0.0001927431059506531,
400
+ "loss": 0.0942,
401
+ "step": 280
402
+ },
403
+ {
404
+ "epoch": 0.0751433656317975,
405
+ "grad_norm": 0.392578125,
406
+ "learning_rate": 0.00019261116242248318,
407
+ "loss": 0.0061,
408
+ "step": 285
409
+ },
410
+ {
411
+ "epoch": 0.07646167029200449,
412
+ "grad_norm": 1.9140625,
413
+ "learning_rate": 0.00019247921889431325,
414
+ "loss": 0.0497,
415
+ "step": 290
416
+ },
417
+ {
418
+ "epoch": 0.07777997495221145,
419
+ "grad_norm": 0.08837890625,
420
+ "learning_rate": 0.0001923472753661433,
421
+ "loss": 0.0573,
422
+ "step": 295
423
+ },
424
+ {
425
+ "epoch": 0.07909827961241843,
426
+ "grad_norm": 1.046875,
427
+ "learning_rate": 0.00019221533183797336,
428
+ "loss": 0.0528,
429
+ "step": 300
430
+ },
431
+ {
432
+ "epoch": 0.08041658427262541,
433
+ "grad_norm": 0.2275390625,
434
+ "learning_rate": 0.0001920833883098034,
435
+ "loss": 0.0506,
436
+ "step": 305
437
+ },
438
+ {
439
+ "epoch": 0.08173488893283237,
440
+ "grad_norm": 0.08203125,
441
+ "learning_rate": 0.00019195144478163347,
442
+ "loss": 0.0307,
443
+ "step": 310
444
+ },
445
+ {
446
+ "epoch": 0.08305319359303935,
447
+ "grad_norm": 0.111328125,
448
+ "learning_rate": 0.00019181950125346354,
449
+ "loss": 0.0365,
450
+ "step": 315
451
+ },
452
+ {
453
+ "epoch": 0.08437149825324633,
454
+ "grad_norm": 1.2890625,
455
+ "learning_rate": 0.00019168755772529358,
456
+ "loss": 0.0447,
457
+ "step": 320
458
+ },
459
+ {
460
+ "epoch": 0.0856898029134533,
461
+ "grad_norm": 0.6015625,
462
+ "learning_rate": 0.00019155561419712365,
463
+ "loss": 0.0605,
464
+ "step": 325
465
+ },
466
+ {
467
+ "epoch": 0.08700810757366027,
468
+ "grad_norm": 0.71875,
469
+ "learning_rate": 0.0001914236706689537,
470
+ "loss": 0.0846,
471
+ "step": 330
472
+ },
473
+ {
474
+ "epoch": 0.08832641223386725,
475
+ "grad_norm": 0.1494140625,
476
+ "learning_rate": 0.00019129172714078376,
477
+ "loss": 0.0713,
478
+ "step": 335
479
+ },
480
+ {
481
+ "epoch": 0.08964471689407422,
482
+ "grad_norm": 0.1669921875,
483
+ "learning_rate": 0.0001911597836126138,
484
+ "loss": 0.0826,
485
+ "step": 340
486
+ },
487
+ {
488
+ "epoch": 0.0909630215542812,
489
+ "grad_norm": 2.203125,
490
+ "learning_rate": 0.00019102784008444388,
491
+ "loss": 0.0441,
492
+ "step": 345
493
+ },
494
+ {
495
+ "epoch": 0.09228132621448817,
496
+ "grad_norm": 1.21875,
497
+ "learning_rate": 0.00019089589655627395,
498
+ "loss": 0.1378,
499
+ "step": 350
500
+ },
501
+ {
502
+ "epoch": 0.09359963087469514,
503
+ "grad_norm": 3.0625,
504
+ "learning_rate": 0.00019076395302810396,
505
+ "loss": 0.1552,
506
+ "step": 355
507
+ },
508
+ {
509
+ "epoch": 0.09491793553490212,
510
+ "grad_norm": 0.232421875,
511
+ "learning_rate": 0.00019063200949993403,
512
+ "loss": 0.0458,
513
+ "step": 360
514
+ },
515
+ {
516
+ "epoch": 0.0962362401951091,
517
+ "grad_norm": 0.71875,
518
+ "learning_rate": 0.0001905000659717641,
519
+ "loss": 0.0312,
520
+ "step": 365
521
+ },
522
+ {
523
+ "epoch": 0.09755454485531606,
524
+ "grad_norm": 0.0218505859375,
525
+ "learning_rate": 0.00019036812244359414,
526
+ "loss": 0.0247,
527
+ "step": 370
528
+ },
529
+ {
530
+ "epoch": 0.09887284951552304,
531
+ "grad_norm": 0.064453125,
532
+ "learning_rate": 0.0001902361789154242,
533
+ "loss": 0.054,
534
+ "step": 375
535
+ },
536
+ {
537
+ "epoch": 0.10019115417573,
538
+ "grad_norm": 0.021240234375,
539
+ "learning_rate": 0.00019010423538725425,
540
+ "loss": 0.0023,
541
+ "step": 380
542
+ },
543
+ {
544
+ "epoch": 0.10150945883593698,
545
+ "grad_norm": 0.0361328125,
546
+ "learning_rate": 0.00018997229185908432,
547
+ "loss": 0.0884,
548
+ "step": 385
549
+ },
550
+ {
551
+ "epoch": 0.10282776349614396,
552
+ "grad_norm": 1.703125,
553
+ "learning_rate": 0.00018984034833091436,
554
+ "loss": 0.0506,
555
+ "step": 390
556
+ },
557
+ {
558
+ "epoch": 0.10414606815635093,
559
+ "grad_norm": 0.08837890625,
560
+ "learning_rate": 0.00018970840480274443,
561
+ "loss": 0.1123,
562
+ "step": 395
563
+ },
564
+ {
565
+ "epoch": 0.1054643728165579,
566
+ "grad_norm": 0.6953125,
567
+ "learning_rate": 0.0001895764612745745,
568
+ "loss": 0.0597,
569
+ "step": 400
570
+ },
571
+ {
572
+ "epoch": 0.10678267747676488,
573
+ "grad_norm": 0.18359375,
574
+ "learning_rate": 0.00018944451774640454,
575
+ "loss": 0.0138,
576
+ "step": 405
577
+ },
578
+ {
579
+ "epoch": 0.10810098213697185,
580
+ "grad_norm": 0.0272216796875,
581
+ "learning_rate": 0.0001893125742182346,
582
+ "loss": 0.0249,
583
+ "step": 410
584
+ },
585
+ {
586
+ "epoch": 0.10941928679717883,
587
+ "grad_norm": 0.00970458984375,
588
+ "learning_rate": 0.00018918063069006466,
589
+ "loss": 0.0084,
590
+ "step": 415
591
+ },
592
+ {
593
+ "epoch": 0.1107375914573858,
594
+ "grad_norm": 0.54296875,
595
+ "learning_rate": 0.00018904868716189472,
596
+ "loss": 0.0541,
597
+ "step": 420
598
+ },
599
+ {
600
+ "epoch": 0.11205589611759277,
601
+ "grad_norm": 0.74609375,
602
+ "learning_rate": 0.00018891674363372477,
603
+ "loss": 0.007,
604
+ "step": 425
605
+ },
606
+ {
607
+ "epoch": 0.11337420077779975,
608
+ "grad_norm": 0.0211181640625,
609
+ "learning_rate": 0.00018878480010555484,
610
+ "loss": 0.0875,
611
+ "step": 430
612
+ },
613
+ {
614
+ "epoch": 0.11469250543800673,
615
+ "grad_norm": 0.9296875,
616
+ "learning_rate": 0.0001886528565773849,
617
+ "loss": 0.1207,
618
+ "step": 435
619
+ },
620
+ {
621
+ "epoch": 0.11601081009821369,
622
+ "grad_norm": 1.2734375,
623
+ "learning_rate": 0.00018852091304921495,
624
+ "loss": 0.1143,
625
+ "step": 440
626
+ },
627
+ {
628
+ "epoch": 0.11732911475842067,
629
+ "grad_norm": 0.6484375,
630
+ "learning_rate": 0.00018838896952104502,
631
+ "loss": 0.0393,
632
+ "step": 445
633
+ },
634
+ {
635
+ "epoch": 0.11864741941862765,
636
+ "grad_norm": 0.1552734375,
637
+ "learning_rate": 0.00018825702599287506,
638
+ "loss": 0.02,
639
+ "step": 450
640
+ },
641
+ {
642
+ "epoch": 0.11996572407883462,
643
+ "grad_norm": 0.486328125,
644
+ "learning_rate": 0.0001881250824647051,
645
+ "loss": 0.0891,
646
+ "step": 455
647
+ },
648
+ {
649
+ "epoch": 0.1212840287390416,
650
+ "grad_norm": 1.0,
651
+ "learning_rate": 0.00018799313893653517,
652
+ "loss": 0.0469,
653
+ "step": 460
654
+ },
655
+ {
656
+ "epoch": 0.12260233339924857,
657
+ "grad_norm": 0.2099609375,
658
+ "learning_rate": 0.0001878611954083652,
659
+ "loss": 0.019,
660
+ "step": 465
661
+ },
662
+ {
663
+ "epoch": 0.12392063805945554,
664
+ "grad_norm": 0.03857421875,
665
+ "learning_rate": 0.00018772925188019528,
666
+ "loss": 0.007,
667
+ "step": 470
668
+ },
669
+ {
670
+ "epoch": 0.12523894271966252,
671
+ "grad_norm": 0.0257568359375,
672
+ "learning_rate": 0.00018759730835202532,
673
+ "loss": 0.0039,
674
+ "step": 475
675
+ },
676
+ {
677
+ "epoch": 0.12655724737986948,
678
+ "grad_norm": 0.014404296875,
679
+ "learning_rate": 0.0001874653648238554,
680
+ "loss": 0.0043,
681
+ "step": 480
682
+ },
683
+ {
684
+ "epoch": 0.12787555204007647,
685
+ "grad_norm": 0.51953125,
686
+ "learning_rate": 0.00018733342129568546,
687
+ "loss": 0.1326,
688
+ "step": 485
689
+ },
690
+ {
691
+ "epoch": 0.12919385670028344,
692
+ "grad_norm": 0.99609375,
693
+ "learning_rate": 0.0001872014777675155,
694
+ "loss": 0.0369,
695
+ "step": 490
696
+ },
697
+ {
698
+ "epoch": 0.1305121613604904,
699
+ "grad_norm": 0.2734375,
700
+ "learning_rate": 0.00018706953423934557,
701
+ "loss": 0.0395,
702
+ "step": 495
703
+ },
704
+ {
705
+ "epoch": 0.1318304660206974,
706
+ "grad_norm": 0.083984375,
707
+ "learning_rate": 0.00018693759071117561,
708
+ "loss": 0.0284,
709
+ "step": 500
710
+ },
711
+ {
712
+ "epoch": 0.1318304660206974,
713
+ "eval_loss": 0.04542969539761543,
714
+ "eval_model_preparation_time": 0.0076,
715
+ "eval_runtime": 457.5293,
716
+ "eval_samples_per_second": 7.37,
717
+ "eval_steps_per_second": 3.685,
718
+ "step": 500
719
+ },
720
+ {
721
+ "epoch": 0.13314877068090436,
722
+ "grad_norm": 0.0291748046875,
723
+ "learning_rate": 0.00018680564718300568,
724
+ "loss": 0.0533,
725
+ "step": 505
726
+ },
727
+ {
728
+ "epoch": 0.13446707534111133,
729
+ "grad_norm": 0.71484375,
730
+ "learning_rate": 0.00018667370365483575,
731
+ "loss": 0.0183,
732
+ "step": 510
733
+ },
734
+ {
735
+ "epoch": 0.13578538000131832,
736
+ "grad_norm": 0.018798828125,
737
+ "learning_rate": 0.0001865417601266658,
738
+ "loss": 0.0473,
739
+ "step": 515
740
+ },
741
+ {
742
+ "epoch": 0.13710368466152528,
743
+ "grad_norm": 0.388671875,
744
+ "learning_rate": 0.00018640981659849586,
745
+ "loss": 0.0562,
746
+ "step": 520
747
+ },
748
+ {
749
+ "epoch": 0.13842198932173225,
750
+ "grad_norm": 0.77734375,
751
+ "learning_rate": 0.0001862778730703259,
752
+ "loss": 0.0755,
753
+ "step": 525
754
+ },
755
+ {
756
+ "epoch": 0.13974029398193924,
757
+ "grad_norm": 2.8125,
758
+ "learning_rate": 0.00018614592954215598,
759
+ "loss": 0.0422,
760
+ "step": 530
761
+ },
762
+ {
763
+ "epoch": 0.1410585986421462,
764
+ "grad_norm": 0.48828125,
765
+ "learning_rate": 0.00018601398601398602,
766
+ "loss": 0.0882,
767
+ "step": 535
768
+ },
769
+ {
770
+ "epoch": 0.14237690330235317,
771
+ "grad_norm": 0.16015625,
772
+ "learning_rate": 0.0001858820424858161,
773
+ "loss": 0.0131,
774
+ "step": 540
775
+ },
776
+ {
777
+ "epoch": 0.14369520796256013,
778
+ "grad_norm": 0.31640625,
779
+ "learning_rate": 0.00018575009895764616,
780
+ "loss": 0.03,
781
+ "step": 545
782
+ },
783
+ {
784
+ "epoch": 0.14501351262276713,
785
+ "grad_norm": 0.0120849609375,
786
+ "learning_rate": 0.0001856181554294762,
787
+ "loss": 0.0425,
788
+ "step": 550
789
+ },
790
+ {
791
+ "epoch": 0.1463318172829741,
792
+ "grad_norm": 0.390625,
793
+ "learning_rate": 0.00018548621190130624,
794
+ "loss": 0.011,
795
+ "step": 555
796
+ },
797
+ {
798
+ "epoch": 0.14765012194318106,
799
+ "grad_norm": 1.9609375,
800
+ "learning_rate": 0.0001853542683731363,
801
+ "loss": 0.0807,
802
+ "step": 560
803
+ },
804
+ {
805
+ "epoch": 0.14896842660338805,
806
+ "grad_norm": 0.609375,
807
+ "learning_rate": 0.00018522232484496635,
808
+ "loss": 0.0278,
809
+ "step": 565
810
+ },
811
+ {
812
+ "epoch": 0.150286731263595,
813
+ "grad_norm": 0.087890625,
814
+ "learning_rate": 0.00018509038131679642,
815
+ "loss": 0.0484,
816
+ "step": 570
817
+ },
818
+ {
819
+ "epoch": 0.15160503592380198,
820
+ "grad_norm": 0.5078125,
821
+ "learning_rate": 0.00018495843778862646,
822
+ "loss": 0.1277,
823
+ "step": 575
824
+ },
825
+ {
826
+ "epoch": 0.15292334058400897,
827
+ "grad_norm": 0.8125,
828
+ "learning_rate": 0.00018482649426045653,
829
+ "loss": 0.058,
830
+ "step": 580
831
+ },
832
+ {
833
+ "epoch": 0.15424164524421594,
834
+ "grad_norm": 0.22265625,
835
+ "learning_rate": 0.00018469455073228657,
836
+ "loss": 0.0259,
837
+ "step": 585
838
+ },
839
+ {
840
+ "epoch": 0.1555599499044229,
841
+ "grad_norm": 1.8984375,
842
+ "learning_rate": 0.00018456260720411664,
843
+ "loss": 0.113,
844
+ "step": 590
845
+ },
846
+ {
847
+ "epoch": 0.1568782545646299,
848
+ "grad_norm": 0.12451171875,
849
+ "learning_rate": 0.0001844306636759467,
850
+ "loss": 0.0312,
851
+ "step": 595
852
+ },
853
+ {
854
+ "epoch": 0.15819655922483686,
855
+ "grad_norm": 0.0322265625,
856
+ "learning_rate": 0.00018429872014777676,
857
+ "loss": 0.0476,
858
+ "step": 600
859
+ },
860
+ {
861
+ "epoch": 0.15951486388504382,
862
+ "grad_norm": 0.0281982421875,
863
+ "learning_rate": 0.00018416677661960682,
864
+ "loss": 0.0232,
865
+ "step": 605
866
+ },
867
+ {
868
+ "epoch": 0.16083316854525082,
869
+ "grad_norm": 0.57421875,
870
+ "learning_rate": 0.00018403483309143687,
871
+ "loss": 0.1287,
872
+ "step": 610
873
+ },
874
+ {
875
+ "epoch": 0.16215147320545778,
876
+ "grad_norm": 0.765625,
877
+ "learning_rate": 0.00018390288956326694,
878
+ "loss": 0.0991,
879
+ "step": 615
880
+ },
881
+ {
882
+ "epoch": 0.16346977786566474,
883
+ "grad_norm": 0.3125,
884
+ "learning_rate": 0.00018377094603509698,
885
+ "loss": 0.0247,
886
+ "step": 620
887
+ },
888
+ {
889
+ "epoch": 0.16478808252587174,
890
+ "grad_norm": 0.37890625,
891
+ "learning_rate": 0.00018363900250692705,
892
+ "loss": 0.0632,
893
+ "step": 625
894
+ },
895
+ {
896
+ "epoch": 0.1661063871860787,
897
+ "grad_norm": 0.1494140625,
898
+ "learning_rate": 0.00018350705897875712,
899
+ "loss": 0.0314,
900
+ "step": 630
901
+ },
902
+ {
903
+ "epoch": 0.16742469184628567,
904
+ "grad_norm": 0.0673828125,
905
+ "learning_rate": 0.00018337511545058716,
906
+ "loss": 0.0425,
907
+ "step": 635
908
+ },
909
+ {
910
+ "epoch": 0.16874299650649266,
911
+ "grad_norm": 0.396484375,
912
+ "learning_rate": 0.00018324317192241723,
913
+ "loss": 0.0613,
914
+ "step": 640
915
+ },
916
+ {
917
+ "epoch": 0.17006130116669962,
918
+ "grad_norm": 0.057373046875,
919
+ "learning_rate": 0.00018311122839424727,
920
+ "loss": 0.0569,
921
+ "step": 645
922
+ },
923
+ {
924
+ "epoch": 0.1713796058269066,
925
+ "grad_norm": 0.001373291015625,
926
+ "learning_rate": 0.00018297928486607734,
927
+ "loss": 0.007,
928
+ "step": 650
929
+ },
930
+ {
931
+ "epoch": 0.17269791048711358,
932
+ "grad_norm": 1.0859375,
933
+ "learning_rate": 0.00018284734133790738,
934
+ "loss": 0.0189,
935
+ "step": 655
936
+ },
937
+ {
938
+ "epoch": 0.17401621514732055,
939
+ "grad_norm": 0.6015625,
940
+ "learning_rate": 0.00018271539780973742,
941
+ "loss": 0.0601,
942
+ "step": 660
943
+ },
944
+ {
945
+ "epoch": 0.1753345198075275,
946
+ "grad_norm": 0.25390625,
947
+ "learning_rate": 0.0001825834542815675,
948
+ "loss": 0.0211,
949
+ "step": 665
950
+ },
951
+ {
952
+ "epoch": 0.1766528244677345,
953
+ "grad_norm": 2.6875,
954
+ "learning_rate": 0.00018245151075339753,
955
+ "loss": 0.0713,
956
+ "step": 670
957
+ },
958
+ {
959
+ "epoch": 0.17797112912794147,
960
+ "grad_norm": 1.1875,
961
+ "learning_rate": 0.0001823195672252276,
962
+ "loss": 0.0522,
963
+ "step": 675
964
+ },
965
+ {
966
+ "epoch": 0.17928943378814843,
967
+ "grad_norm": 0.025146484375,
968
+ "learning_rate": 0.00018218762369705767,
969
+ "loss": 0.0242,
970
+ "step": 680
971
+ },
972
+ {
973
+ "epoch": 0.18060773844835543,
974
+ "grad_norm": 0.048095703125,
975
+ "learning_rate": 0.00018205568016888772,
976
+ "loss": 0.0129,
977
+ "step": 685
978
+ },
979
+ {
980
+ "epoch": 0.1819260431085624,
981
+ "grad_norm": 0.04541015625,
982
+ "learning_rate": 0.00018192373664071778,
983
+ "loss": 0.0142,
984
+ "step": 690
985
+ },
986
+ {
987
+ "epoch": 0.18324434776876936,
988
+ "grad_norm": 0.00830078125,
989
+ "learning_rate": 0.00018179179311254783,
990
+ "loss": 0.0121,
991
+ "step": 695
992
+ },
993
+ {
994
+ "epoch": 0.18456265242897635,
995
+ "grad_norm": 0.53125,
996
+ "learning_rate": 0.0001816598495843779,
997
+ "loss": 0.0163,
998
+ "step": 700
999
+ },
1000
+ {
1001
+ "epoch": 0.1858809570891833,
1002
+ "grad_norm": 0.185546875,
1003
+ "learning_rate": 0.00018152790605620796,
1004
+ "loss": 0.0203,
1005
+ "step": 705
1006
+ },
1007
+ {
1008
+ "epoch": 0.18719926174939028,
1009
+ "grad_norm": 1.2578125,
1010
+ "learning_rate": 0.000181395962528038,
1011
+ "loss": 0.1548,
1012
+ "step": 710
1013
+ },
1014
+ {
1015
+ "epoch": 0.18851756640959727,
1016
+ "grad_norm": 0.0247802734375,
1017
+ "learning_rate": 0.00018126401899986808,
1018
+ "loss": 0.0543,
1019
+ "step": 715
1020
+ },
1021
+ {
1022
+ "epoch": 0.18983587106980424,
1023
+ "grad_norm": 0.07568359375,
1024
+ "learning_rate": 0.00018113207547169812,
1025
+ "loss": 0.0346,
1026
+ "step": 720
1027
+ },
1028
+ {
1029
+ "epoch": 0.1911541757300112,
1030
+ "grad_norm": 0.1318359375,
1031
+ "learning_rate": 0.0001810001319435282,
1032
+ "loss": 0.03,
1033
+ "step": 725
1034
+ },
1035
+ {
1036
+ "epoch": 0.1924724803902182,
1037
+ "grad_norm": 0.1455078125,
1038
+ "learning_rate": 0.00018086818841535823,
1039
+ "loss": 0.0796,
1040
+ "step": 730
1041
+ },
1042
+ {
1043
+ "epoch": 0.19379078505042516,
1044
+ "grad_norm": 0.09814453125,
1045
+ "learning_rate": 0.0001807362448871883,
1046
+ "loss": 0.0662,
1047
+ "step": 735
1048
+ },
1049
+ {
1050
+ "epoch": 0.19510908971063212,
1051
+ "grad_norm": 0.91015625,
1052
+ "learning_rate": 0.00018060430135901837,
1053
+ "loss": 0.0675,
1054
+ "step": 740
1055
+ },
1056
+ {
1057
+ "epoch": 0.19642739437083911,
1058
+ "grad_norm": 0.10693359375,
1059
+ "learning_rate": 0.0001804723578308484,
1060
+ "loss": 0.0377,
1061
+ "step": 745
1062
+ },
1063
+ {
1064
+ "epoch": 0.19774569903104608,
1065
+ "grad_norm": 0.95703125,
1066
+ "learning_rate": 0.00018034041430267848,
1067
+ "loss": 0.0174,
1068
+ "step": 750
1069
+ },
1070
+ {
1071
+ "epoch": 0.19906400369125304,
1072
+ "grad_norm": 1.7890625,
1073
+ "learning_rate": 0.00018020847077450852,
1074
+ "loss": 0.0278,
1075
+ "step": 755
1076
+ },
1077
+ {
1078
+ "epoch": 0.20038230835146,
1079
+ "grad_norm": 0.8515625,
1080
+ "learning_rate": 0.00018007652724633856,
1081
+ "loss": 0.0113,
1082
+ "step": 760
1083
+ },
1084
+ {
1085
+ "epoch": 0.201700613011667,
1086
+ "grad_norm": 0.016845703125,
1087
+ "learning_rate": 0.00017994458371816863,
1088
+ "loss": 0.0589,
1089
+ "step": 765
1090
+ },
1091
+ {
1092
+ "epoch": 0.20301891767187397,
1093
+ "grad_norm": 0.01043701171875,
1094
+ "learning_rate": 0.00017981264018999867,
1095
+ "loss": 0.0203,
1096
+ "step": 770
1097
+ },
1098
+ {
1099
+ "epoch": 0.20433722233208093,
1100
+ "grad_norm": 0.0242919921875,
1101
+ "learning_rate": 0.00017968069666182874,
1102
+ "loss": 0.0494,
1103
+ "step": 775
1104
+ },
1105
+ {
1106
+ "epoch": 0.20565552699228792,
1107
+ "grad_norm": 0.56640625,
1108
+ "learning_rate": 0.00017954875313365879,
1109
+ "loss": 0.0394,
1110
+ "step": 780
1111
+ },
1112
+ {
1113
+ "epoch": 0.2069738316524949,
1114
+ "grad_norm": 0.06591796875,
1115
+ "learning_rate": 0.00017941680960548886,
1116
+ "loss": 0.0848,
1117
+ "step": 785
1118
+ },
1119
+ {
1120
+ "epoch": 0.20829213631270185,
1121
+ "grad_norm": 0.40234375,
1122
+ "learning_rate": 0.00017928486607731892,
1123
+ "loss": 0.0464,
1124
+ "step": 790
1125
+ },
1126
+ {
1127
+ "epoch": 0.20961044097290885,
1128
+ "grad_norm": 0.06298828125,
1129
+ "learning_rate": 0.00017915292254914897,
1130
+ "loss": 0.0222,
1131
+ "step": 795
1132
+ },
1133
+ {
1134
+ "epoch": 0.2109287456331158,
1135
+ "grad_norm": 0.5390625,
1136
+ "learning_rate": 0.00017902097902097904,
1137
+ "loss": 0.0434,
1138
+ "step": 800
1139
+ },
1140
+ {
1141
+ "epoch": 0.21224705029332278,
1142
+ "grad_norm": 1.390625,
1143
+ "learning_rate": 0.00017888903549280908,
1144
+ "loss": 0.0222,
1145
+ "step": 805
1146
+ },
1147
+ {
1148
+ "epoch": 0.21356535495352977,
1149
+ "grad_norm": 0.0272216796875,
1150
+ "learning_rate": 0.00017875709196463915,
1151
+ "loss": 0.0099,
1152
+ "step": 810
1153
+ },
1154
+ {
1155
+ "epoch": 0.21488365961373673,
1156
+ "grad_norm": 0.10009765625,
1157
+ "learning_rate": 0.0001786251484364692,
1158
+ "loss": 0.0086,
1159
+ "step": 815
1160
+ },
1161
+ {
1162
+ "epoch": 0.2162019642739437,
1163
+ "grad_norm": 0.06396484375,
1164
+ "learning_rate": 0.00017849320490829926,
1165
+ "loss": 0.0715,
1166
+ "step": 820
1167
+ },
1168
+ {
1169
+ "epoch": 0.2175202689341507,
1170
+ "grad_norm": 0.365234375,
1171
+ "learning_rate": 0.00017836126138012933,
1172
+ "loss": 0.0642,
1173
+ "step": 825
1174
+ },
1175
+ {
1176
+ "epoch": 0.21883857359435765,
1177
+ "grad_norm": 0.01519775390625,
1178
+ "learning_rate": 0.00017822931785195937,
1179
+ "loss": 0.0111,
1180
+ "step": 830
1181
+ },
1182
+ {
1183
+ "epoch": 0.22015687825456462,
1184
+ "grad_norm": 1.1640625,
1185
+ "learning_rate": 0.00017809737432378944,
1186
+ "loss": 0.0518,
1187
+ "step": 835
1188
+ },
1189
+ {
1190
+ "epoch": 0.2214751829147716,
1191
+ "grad_norm": 0.00921630859375,
1192
+ "learning_rate": 0.00017796543079561948,
1193
+ "loss": 0.0384,
1194
+ "step": 840
1195
+ },
1196
+ {
1197
+ "epoch": 0.22279348757497858,
1198
+ "grad_norm": 0.33984375,
1199
+ "learning_rate": 0.00017783348726744955,
1200
+ "loss": 0.0204,
1201
+ "step": 845
1202
+ },
1203
+ {
1204
+ "epoch": 0.22411179223518554,
1205
+ "grad_norm": 0.294921875,
1206
+ "learning_rate": 0.00017770154373927962,
1207
+ "loss": 0.0075,
1208
+ "step": 850
1209
+ },
1210
+ {
1211
+ "epoch": 0.22543009689539253,
1212
+ "grad_norm": 0.033203125,
1213
+ "learning_rate": 0.00017756960021110963,
1214
+ "loss": 0.0895,
1215
+ "step": 855
1216
+ },
1217
+ {
1218
+ "epoch": 0.2267484015555995,
1219
+ "grad_norm": 0.08056640625,
1220
+ "learning_rate": 0.0001774376566829397,
1221
+ "loss": 0.1039,
1222
+ "step": 860
1223
+ },
1224
+ {
1225
+ "epoch": 0.22806670621580646,
1226
+ "grad_norm": 0.55078125,
1227
+ "learning_rate": 0.00017730571315476975,
1228
+ "loss": 0.0125,
1229
+ "step": 865
1230
+ },
1231
+ {
1232
+ "epoch": 0.22938501087601346,
1233
+ "grad_norm": 0.5859375,
1234
+ "learning_rate": 0.00017717376962659982,
1235
+ "loss": 0.0381,
1236
+ "step": 870
1237
+ },
1238
+ {
1239
+ "epoch": 0.23070331553622042,
1240
+ "grad_norm": 0.029052734375,
1241
+ "learning_rate": 0.00017704182609842988,
1242
+ "loss": 0.0434,
1243
+ "step": 875
1244
+ },
1245
+ {
1246
+ "epoch": 0.23202162019642739,
1247
+ "grad_norm": 0.43359375,
1248
+ "learning_rate": 0.00017690988257025993,
1249
+ "loss": 0.0799,
1250
+ "step": 880
1251
+ },
1252
+ {
1253
+ "epoch": 0.23333992485663438,
1254
+ "grad_norm": 0.04150390625,
1255
+ "learning_rate": 0.00017677793904209,
1256
+ "loss": 0.0692,
1257
+ "step": 885
1258
+ },
1259
+ {
1260
+ "epoch": 0.23465822951684134,
1261
+ "grad_norm": 0.435546875,
1262
+ "learning_rate": 0.00017664599551392004,
1263
+ "loss": 0.0544,
1264
+ "step": 890
1265
+ },
1266
+ {
1267
+ "epoch": 0.2359765341770483,
1268
+ "grad_norm": 1.171875,
1269
+ "learning_rate": 0.0001765140519857501,
1270
+ "loss": 0.0619,
1271
+ "step": 895
1272
+ },
1273
+ {
1274
+ "epoch": 0.2372948388372553,
1275
+ "grad_norm": 0.01263427734375,
1276
+ "learning_rate": 0.00017638210845758018,
1277
+ "loss": 0.0418,
1278
+ "step": 900
1279
+ },
1280
+ {
1281
+ "epoch": 0.23861314349746227,
1282
+ "grad_norm": 0.017578125,
1283
+ "learning_rate": 0.00017625016492941022,
1284
+ "loss": 0.0195,
1285
+ "step": 905
1286
+ },
1287
+ {
1288
+ "epoch": 0.23993144815766923,
1289
+ "grad_norm": 0.6171875,
1290
+ "learning_rate": 0.0001761182214012403,
1291
+ "loss": 0.067,
1292
+ "step": 910
1293
+ },
1294
+ {
1295
+ "epoch": 0.24124975281787622,
1296
+ "grad_norm": 0.59765625,
1297
+ "learning_rate": 0.00017598627787307033,
1298
+ "loss": 0.049,
1299
+ "step": 915
1300
+ },
1301
+ {
1302
+ "epoch": 0.2425680574780832,
1303
+ "grad_norm": 1.2421875,
1304
+ "learning_rate": 0.0001758543343449004,
1305
+ "loss": 0.0539,
1306
+ "step": 920
1307
+ },
1308
+ {
1309
+ "epoch": 0.24388636213829015,
1310
+ "grad_norm": 0.10302734375,
1311
+ "learning_rate": 0.00017572239081673044,
1312
+ "loss": 0.0725,
1313
+ "step": 925
1314
+ },
1315
+ {
1316
+ "epoch": 0.24520466679849715,
1317
+ "grad_norm": 0.330078125,
1318
+ "learning_rate": 0.0001755904472885605,
1319
+ "loss": 0.064,
1320
+ "step": 930
1321
+ },
1322
+ {
1323
+ "epoch": 0.2465229714587041,
1324
+ "grad_norm": 0.220703125,
1325
+ "learning_rate": 0.00017545850376039058,
1326
+ "loss": 0.0271,
1327
+ "step": 935
1328
+ },
1329
+ {
1330
+ "epoch": 0.24784127611891107,
1331
+ "grad_norm": 0.01470947265625,
1332
+ "learning_rate": 0.00017532656023222062,
1333
+ "loss": 0.0247,
1334
+ "step": 940
1335
+ },
1336
+ {
1337
+ "epoch": 0.24915958077911807,
1338
+ "grad_norm": 0.013427734375,
1339
+ "learning_rate": 0.0001751946167040507,
1340
+ "loss": 0.017,
1341
+ "step": 945
1342
+ },
1343
+ {
1344
+ "epoch": 0.25047788543932503,
1345
+ "grad_norm": 0.58984375,
1346
+ "learning_rate": 0.00017506267317588073,
1347
+ "loss": 0.0254,
1348
+ "step": 950
1349
+ },
1350
+ {
1351
+ "epoch": 0.251796190099532,
1352
+ "grad_norm": 0.412109375,
1353
+ "learning_rate": 0.00017493072964771078,
1354
+ "loss": 0.0186,
1355
+ "step": 955
1356
+ },
1357
+ {
1358
+ "epoch": 0.25311449475973896,
1359
+ "grad_norm": 0.66796875,
1360
+ "learning_rate": 0.00017479878611954084,
1361
+ "loss": 0.0617,
1362
+ "step": 960
1363
+ },
1364
+ {
1365
+ "epoch": 0.25443279941994595,
1366
+ "grad_norm": 0.322265625,
1367
+ "learning_rate": 0.00017466684259137089,
1368
+ "loss": 0.0173,
1369
+ "step": 965
1370
+ },
1371
+ {
1372
+ "epoch": 0.25575110408015295,
1373
+ "grad_norm": 0.83203125,
1374
+ "learning_rate": 0.00017453489906320096,
1375
+ "loss": 0.0512,
1376
+ "step": 970
1377
+ },
1378
+ {
1379
+ "epoch": 0.2570694087403599,
1380
+ "grad_norm": 0.08447265625,
1381
+ "learning_rate": 0.000174402955535031,
1382
+ "loss": 0.0361,
1383
+ "step": 975
1384
+ },
1385
+ {
1386
+ "epoch": 0.2583877134005669,
1387
+ "grad_norm": 0.423828125,
1388
+ "learning_rate": 0.00017427101200686107,
1389
+ "loss": 0.0175,
1390
+ "step": 980
1391
+ },
1392
+ {
1393
+ "epoch": 0.25970601806077387,
1394
+ "grad_norm": 0.77734375,
1395
+ "learning_rate": 0.00017413906847869114,
1396
+ "loss": 0.0139,
1397
+ "step": 985
1398
+ },
1399
+ {
1400
+ "epoch": 0.2610243227209808,
1401
+ "grad_norm": 0.515625,
1402
+ "learning_rate": 0.00017400712495052118,
1403
+ "loss": 0.0948,
1404
+ "step": 990
1405
+ },
1406
+ {
1407
+ "epoch": 0.2623426273811878,
1408
+ "grad_norm": 1.421875,
1409
+ "learning_rate": 0.00017387518142235125,
1410
+ "loss": 0.0406,
1411
+ "step": 995
1412
+ },
1413
+ {
1414
+ "epoch": 0.2636609320413948,
1415
+ "grad_norm": 0.058837890625,
1416
+ "learning_rate": 0.0001737432378941813,
1417
+ "loss": 0.1011,
1418
+ "step": 1000
1419
+ },
1420
+ {
1421
+ "epoch": 0.2636609320413948,
1422
+ "eval_loss": 0.045552924275398254,
1423
+ "eval_model_preparation_time": 0.0076,
1424
+ "eval_runtime": 457.6113,
1425
+ "eval_samples_per_second": 7.369,
1426
+ "eval_steps_per_second": 3.684,
1427
+ "step": 1000
1428
+ }
1429
+ ],
1430
+ "logging_steps": 5,
1431
+ "max_steps": 7584,
1432
+ "num_input_tokens_seen": 0,
1433
+ "num_train_epochs": 2,
1434
+ "save_steps": 500,
1435
+ "stateful_callbacks": {
1436
+ "TrainerControl": {
1437
+ "args": {
1438
+ "should_epoch_stop": false,
1439
+ "should_evaluate": false,
1440
+ "should_log": false,
1441
+ "should_save": true,
1442
+ "should_training_stop": false
1443
+ },
1444
+ "attributes": {}
1445
+ }
1446
+ },
1447
+ "total_flos": 8.07022836673536e+16,
1448
+ "train_batch_size": 2,
1449
+ "trial_name": null,
1450
+ "trial_params": null
1451
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cee8b68ab8cbec1968c4eec747022ea57cd350072097ab37179dd29309655d7
3
+ size 5688
checkpoint-1500/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/Llama-3.2-3B-Instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
checkpoint-1500/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Llama-3.2-3B-Instruct",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 16,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 16,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "o_proj",
27
+ "up_proj",
28
+ "q_proj",
29
+ "v_proj",
30
+ "k_proj",
31
+ "down_proj",
32
+ "gate_proj"
33
+ ],
34
+ "task_type": "CAUSAL_LM",
35
+ "use_dora": false,
36
+ "use_rslora": false
37
+ }
checkpoint-1500/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e71210cede591304d9f8e8baa28687d2dcc8e875430074ed063c2323e4df27de
3
+ size 48680136
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d59fe0c31f6a03193e69e338856a0c6ee57c366f05092f94f1d1272bfc4d1a42
3
+ size 49846644
checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba77c4358d5913436545fc6995706402cd54ccf015646708e622eca7f93ed87
3
+ size 14244
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e488f9cd81d6d035d07c333b21224930ff58c17d27dbd21d9101e5eca05d7d
3
+ size 1064
checkpoint-1500/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot_id|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-1500/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
checkpoint-1500/tokenizer_config.json ADDED
@@ -0,0 +1,2067 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "added_tokens_decoder": {
4
+ "128000": {
5
+ "content": "<|begin_of_text|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "128001": {
13
+ "content": "<|end_of_text|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "128002": {
21
+ "content": "<|reserved_special_token_0|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "128003": {
29
+ "content": "<|reserved_special_token_1|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128004": {
37
+ "content": "<|finetune_right_pad_id|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "128005": {
45
+ "content": "<|reserved_special_token_2|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "128006": {
53
+ "content": "<|start_header_id|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "128007": {
61
+ "content": "<|end_header_id|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "128008": {
69
+ "content": "<|eom_id|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "128009": {
77
+ "content": "<|eot_id|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "128010": {
85
+ "content": "<|python_tag|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "128011": {
93
+ "content": "<|reserved_special_token_3|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "128012": {
101
+ "content": "<|reserved_special_token_4|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "128013": {
109
+ "content": "<|reserved_special_token_5|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "128014": {
117
+ "content": "<|reserved_special_token_6|>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "128015": {
125
+ "content": "<|reserved_special_token_7|>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "128016": {
133
+ "content": "<|reserved_special_token_8|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "128017": {
141
+ "content": "<|reserved_special_token_9|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "128018": {
149
+ "content": "<|reserved_special_token_10|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "128019": {
157
+ "content": "<|reserved_special_token_11|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "128020": {
165
+ "content": "<|reserved_special_token_12|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "128021": {
173
+ "content": "<|reserved_special_token_13|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "128022": {
181
+ "content": "<|reserved_special_token_14|>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "128023": {
189
+ "content": "<|reserved_special_token_15|>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "128024": {
197
+ "content": "<|reserved_special_token_16|>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "128025": {
205
+ "content": "<|reserved_special_token_17|>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "128026": {
213
+ "content": "<|reserved_special_token_18|>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "128027": {
221
+ "content": "<|reserved_special_token_19|>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "128028": {
229
+ "content": "<|reserved_special_token_20|>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "128029": {
237
+ "content": "<|reserved_special_token_21|>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "128030": {
245
+ "content": "<|reserved_special_token_22|>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "128031": {
253
+ "content": "<|reserved_special_token_23|>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "128032": {
261
+ "content": "<|reserved_special_token_24|>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "128033": {
269
+ "content": "<|reserved_special_token_25|>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "128034": {
277
+ "content": "<|reserved_special_token_26|>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "128035": {
285
+ "content": "<|reserved_special_token_27|>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "128036": {
293
+ "content": "<|reserved_special_token_28|>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "128037": {
301
+ "content": "<|reserved_special_token_29|>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "128038": {
309
+ "content": "<|reserved_special_token_30|>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "128039": {
317
+ "content": "<|reserved_special_token_31|>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "128040": {
325
+ "content": "<|reserved_special_token_32|>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "128041": {
333
+ "content": "<|reserved_special_token_33|>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "128042": {
341
+ "content": "<|reserved_special_token_34|>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "128043": {
349
+ "content": "<|reserved_special_token_35|>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "128044": {
357
+ "content": "<|reserved_special_token_36|>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "128045": {
365
+ "content": "<|reserved_special_token_37|>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "128046": {
373
+ "content": "<|reserved_special_token_38|>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "128047": {
381
+ "content": "<|reserved_special_token_39|>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "128048": {
389
+ "content": "<|reserved_special_token_40|>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "128049": {
397
+ "content": "<|reserved_special_token_41|>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "128050": {
405
+ "content": "<|reserved_special_token_42|>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "128051": {
413
+ "content": "<|reserved_special_token_43|>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "128052": {
421
+ "content": "<|reserved_special_token_44|>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "128053": {
429
+ "content": "<|reserved_special_token_45|>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "128054": {
437
+ "content": "<|reserved_special_token_46|>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "128055": {
445
+ "content": "<|reserved_special_token_47|>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "128056": {
453
+ "content": "<|reserved_special_token_48|>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "128057": {
461
+ "content": "<|reserved_special_token_49|>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "128058": {
469
+ "content": "<|reserved_special_token_50|>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "128059": {
477
+ "content": "<|reserved_special_token_51|>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "128060": {
485
+ "content": "<|reserved_special_token_52|>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "128061": {
493
+ "content": "<|reserved_special_token_53|>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "128062": {
501
+ "content": "<|reserved_special_token_54|>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "128063": {
509
+ "content": "<|reserved_special_token_55|>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "128064": {
517
+ "content": "<|reserved_special_token_56|>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "128065": {
525
+ "content": "<|reserved_special_token_57|>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "128066": {
533
+ "content": "<|reserved_special_token_58|>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "128067": {
541
+ "content": "<|reserved_special_token_59|>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "128068": {
549
+ "content": "<|reserved_special_token_60|>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "128069": {
557
+ "content": "<|reserved_special_token_61|>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "128070": {
565
+ "content": "<|reserved_special_token_62|>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "128071": {
573
+ "content": "<|reserved_special_token_63|>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "128072": {
581
+ "content": "<|reserved_special_token_64|>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "128073": {
589
+ "content": "<|reserved_special_token_65|>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "128074": {
597
+ "content": "<|reserved_special_token_66|>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "128075": {
605
+ "content": "<|reserved_special_token_67|>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "128076": {
613
+ "content": "<|reserved_special_token_68|>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "128077": {
621
+ "content": "<|reserved_special_token_69|>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "128078": {
629
+ "content": "<|reserved_special_token_70|>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "128079": {
637
+ "content": "<|reserved_special_token_71|>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "128080": {
645
+ "content": "<|reserved_special_token_72|>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "128081": {
653
+ "content": "<|reserved_special_token_73|>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "128082": {
661
+ "content": "<|reserved_special_token_74|>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "128083": {
669
+ "content": "<|reserved_special_token_75|>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "128084": {
677
+ "content": "<|reserved_special_token_76|>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "128085": {
685
+ "content": "<|reserved_special_token_77|>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "128086": {
693
+ "content": "<|reserved_special_token_78|>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "128087": {
701
+ "content": "<|reserved_special_token_79|>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "128088": {
709
+ "content": "<|reserved_special_token_80|>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "128089": {
717
+ "content": "<|reserved_special_token_81|>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "128090": {
725
+ "content": "<|reserved_special_token_82|>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "128091": {
733
+ "content": "<|reserved_special_token_83|>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "128092": {
741
+ "content": "<|reserved_special_token_84|>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "128093": {
749
+ "content": "<|reserved_special_token_85|>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "128094": {
757
+ "content": "<|reserved_special_token_86|>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "128095": {
765
+ "content": "<|reserved_special_token_87|>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "128096": {
773
+ "content": "<|reserved_special_token_88|>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "128097": {
781
+ "content": "<|reserved_special_token_89|>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "128098": {
789
+ "content": "<|reserved_special_token_90|>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "128099": {
797
+ "content": "<|reserved_special_token_91|>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "128100": {
805
+ "content": "<|reserved_special_token_92|>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "128101": {
813
+ "content": "<|reserved_special_token_93|>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "128102": {
821
+ "content": "<|reserved_special_token_94|>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "128103": {
829
+ "content": "<|reserved_special_token_95|>",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "128104": {
837
+ "content": "<|reserved_special_token_96|>",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "128105": {
845
+ "content": "<|reserved_special_token_97|>",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "128106": {
853
+ "content": "<|reserved_special_token_98|>",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "128107": {
861
+ "content": "<|reserved_special_token_99|>",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "128108": {
869
+ "content": "<|reserved_special_token_100|>",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": true
875
+ },
876
+ "128109": {
877
+ "content": "<|reserved_special_token_101|>",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": true
883
+ },
884
+ "128110": {
885
+ "content": "<|reserved_special_token_102|>",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": true
891
+ },
892
+ "128111": {
893
+ "content": "<|reserved_special_token_103|>",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": true
899
+ },
900
+ "128112": {
901
+ "content": "<|reserved_special_token_104|>",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": true
907
+ },
908
+ "128113": {
909
+ "content": "<|reserved_special_token_105|>",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": true
915
+ },
916
+ "128114": {
917
+ "content": "<|reserved_special_token_106|>",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": true
923
+ },
924
+ "128115": {
925
+ "content": "<|reserved_special_token_107|>",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": true
931
+ },
932
+ "128116": {
933
+ "content": "<|reserved_special_token_108|>",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": true
939
+ },
940
+ "128117": {
941
+ "content": "<|reserved_special_token_109|>",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": true
947
+ },
948
+ "128118": {
949
+ "content": "<|reserved_special_token_110|>",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": true
955
+ },
956
+ "128119": {
957
+ "content": "<|reserved_special_token_111|>",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": true
963
+ },
964
+ "128120": {
965
+ "content": "<|reserved_special_token_112|>",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": true
971
+ },
972
+ "128121": {
973
+ "content": "<|reserved_special_token_113|>",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": true
979
+ },
980
+ "128122": {
981
+ "content": "<|reserved_special_token_114|>",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": true
987
+ },
988
+ "128123": {
989
+ "content": "<|reserved_special_token_115|>",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": true
995
+ },
996
+ "128124": {
997
+ "content": "<|reserved_special_token_116|>",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": true
1003
+ },
1004
+ "128125": {
1005
+ "content": "<|reserved_special_token_117|>",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": true
1011
+ },
1012
+ "128126": {
1013
+ "content": "<|reserved_special_token_118|>",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": true
1019
+ },
1020
+ "128127": {
1021
+ "content": "<|reserved_special_token_119|>",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": true
1027
+ },
1028
+ "128128": {
1029
+ "content": "<|reserved_special_token_120|>",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": true
1035
+ },
1036
+ "128129": {
1037
+ "content": "<|reserved_special_token_121|>",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": true
1043
+ },
1044
+ "128130": {
1045
+ "content": "<|reserved_special_token_122|>",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": true
1051
+ },
1052
+ "128131": {
1053
+ "content": "<|reserved_special_token_123|>",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": true
1059
+ },
1060
+ "128132": {
1061
+ "content": "<|reserved_special_token_124|>",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": true
1067
+ },
1068
+ "128133": {
1069
+ "content": "<|reserved_special_token_125|>",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": true
1075
+ },
1076
+ "128134": {
1077
+ "content": "<|reserved_special_token_126|>",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": true
1083
+ },
1084
+ "128135": {
1085
+ "content": "<|reserved_special_token_127|>",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": true
1091
+ },
1092
+ "128136": {
1093
+ "content": "<|reserved_special_token_128|>",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": true
1099
+ },
1100
+ "128137": {
1101
+ "content": "<|reserved_special_token_129|>",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": true
1107
+ },
1108
+ "128138": {
1109
+ "content": "<|reserved_special_token_130|>",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": true
1115
+ },
1116
+ "128139": {
1117
+ "content": "<|reserved_special_token_131|>",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": true
1123
+ },
1124
+ "128140": {
1125
+ "content": "<|reserved_special_token_132|>",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": true
1131
+ },
1132
+ "128141": {
1133
+ "content": "<|reserved_special_token_133|>",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": true
1139
+ },
1140
+ "128142": {
1141
+ "content": "<|reserved_special_token_134|>",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": true
1147
+ },
1148
+ "128143": {
1149
+ "content": "<|reserved_special_token_135|>",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": true
1155
+ },
1156
+ "128144": {
1157
+ "content": "<|reserved_special_token_136|>",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": true
1163
+ },
1164
+ "128145": {
1165
+ "content": "<|reserved_special_token_137|>",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": true
1171
+ },
1172
+ "128146": {
1173
+ "content": "<|reserved_special_token_138|>",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": true
1179
+ },
1180
+ "128147": {
1181
+ "content": "<|reserved_special_token_139|>",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": true
1187
+ },
1188
+ "128148": {
1189
+ "content": "<|reserved_special_token_140|>",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": true
1195
+ },
1196
+ "128149": {
1197
+ "content": "<|reserved_special_token_141|>",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": true
1203
+ },
1204
+ "128150": {
1205
+ "content": "<|reserved_special_token_142|>",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": true
1211
+ },
1212
+ "128151": {
1213
+ "content": "<|reserved_special_token_143|>",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": true
1219
+ },
1220
+ "128152": {
1221
+ "content": "<|reserved_special_token_144|>",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": true
1227
+ },
1228
+ "128153": {
1229
+ "content": "<|reserved_special_token_145|>",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": true
1235
+ },
1236
+ "128154": {
1237
+ "content": "<|reserved_special_token_146|>",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": true
1243
+ },
1244
+ "128155": {
1245
+ "content": "<|reserved_special_token_147|>",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": true
1251
+ },
1252
+ "128156": {
1253
+ "content": "<|reserved_special_token_148|>",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": true
1259
+ },
1260
+ "128157": {
1261
+ "content": "<|reserved_special_token_149|>",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": true
1267
+ },
1268
+ "128158": {
1269
+ "content": "<|reserved_special_token_150|>",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": true
1275
+ },
1276
+ "128159": {
1277
+ "content": "<|reserved_special_token_151|>",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": true
1283
+ },
1284
+ "128160": {
1285
+ "content": "<|reserved_special_token_152|>",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": true
1291
+ },
1292
+ "128161": {
1293
+ "content": "<|reserved_special_token_153|>",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": true
1299
+ },
1300
+ "128162": {
1301
+ "content": "<|reserved_special_token_154|>",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": true
1307
+ },
1308
+ "128163": {
1309
+ "content": "<|reserved_special_token_155|>",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": true
1315
+ },
1316
+ "128164": {
1317
+ "content": "<|reserved_special_token_156|>",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": true
1323
+ },
1324
+ "128165": {
1325
+ "content": "<|reserved_special_token_157|>",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": true
1331
+ },
1332
+ "128166": {
1333
+ "content": "<|reserved_special_token_158|>",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": true
1339
+ },
1340
+ "128167": {
1341
+ "content": "<|reserved_special_token_159|>",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": true
1347
+ },
1348
+ "128168": {
1349
+ "content": "<|reserved_special_token_160|>",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": true
1355
+ },
1356
+ "128169": {
1357
+ "content": "<|reserved_special_token_161|>",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": true
1363
+ },
1364
+ "128170": {
1365
+ "content": "<|reserved_special_token_162|>",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": true
1371
+ },
1372
+ "128171": {
1373
+ "content": "<|reserved_special_token_163|>",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": true
1379
+ },
1380
+ "128172": {
1381
+ "content": "<|reserved_special_token_164|>",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": true
1387
+ },
1388
+ "128173": {
1389
+ "content": "<|reserved_special_token_165|>",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": true
1395
+ },
1396
+ "128174": {
1397
+ "content": "<|reserved_special_token_166|>",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": true
1403
+ },
1404
+ "128175": {
1405
+ "content": "<|reserved_special_token_167|>",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": true
1411
+ },
1412
+ "128176": {
1413
+ "content": "<|reserved_special_token_168|>",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": true
1419
+ },
1420
+ "128177": {
1421
+ "content": "<|reserved_special_token_169|>",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": true
1427
+ },
1428
+ "128178": {
1429
+ "content": "<|reserved_special_token_170|>",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": true
1435
+ },
1436
+ "128179": {
1437
+ "content": "<|reserved_special_token_171|>",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": true
1443
+ },
1444
+ "128180": {
1445
+ "content": "<|reserved_special_token_172|>",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": true
1451
+ },
1452
+ "128181": {
1453
+ "content": "<|reserved_special_token_173|>",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": true
1459
+ },
1460
+ "128182": {
1461
+ "content": "<|reserved_special_token_174|>",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": true
1467
+ },
1468
+ "128183": {
1469
+ "content": "<|reserved_special_token_175|>",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": true
1475
+ },
1476
+ "128184": {
1477
+ "content": "<|reserved_special_token_176|>",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": true
1483
+ },
1484
+ "128185": {
1485
+ "content": "<|reserved_special_token_177|>",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": true
1491
+ },
1492
+ "128186": {
1493
+ "content": "<|reserved_special_token_178|>",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": true
1499
+ },
1500
+ "128187": {
1501
+ "content": "<|reserved_special_token_179|>",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": true
1507
+ },
1508
+ "128188": {
1509
+ "content": "<|reserved_special_token_180|>",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": true
1515
+ },
1516
+ "128189": {
1517
+ "content": "<|reserved_special_token_181|>",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": true
1523
+ },
1524
+ "128190": {
1525
+ "content": "<|reserved_special_token_182|>",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": true
1531
+ },
1532
+ "128191": {
1533
+ "content": "<|reserved_special_token_183|>",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": true
1539
+ },
1540
+ "128192": {
1541
+ "content": "<|reserved_special_token_184|>",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": true
1547
+ },
1548
+ "128193": {
1549
+ "content": "<|reserved_special_token_185|>",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": true
1555
+ },
1556
+ "128194": {
1557
+ "content": "<|reserved_special_token_186|>",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": true
1563
+ },
1564
+ "128195": {
1565
+ "content": "<|reserved_special_token_187|>",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": true
1571
+ },
1572
+ "128196": {
1573
+ "content": "<|reserved_special_token_188|>",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": true
1579
+ },
1580
+ "128197": {
1581
+ "content": "<|reserved_special_token_189|>",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": true
1587
+ },
1588
+ "128198": {
1589
+ "content": "<|reserved_special_token_190|>",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": true
1595
+ },
1596
+ "128199": {
1597
+ "content": "<|reserved_special_token_191|>",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": true
1603
+ },
1604
+ "128200": {
1605
+ "content": "<|reserved_special_token_192|>",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": true
1611
+ },
1612
+ "128201": {
1613
+ "content": "<|reserved_special_token_193|>",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": true
1619
+ },
1620
+ "128202": {
1621
+ "content": "<|reserved_special_token_194|>",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": true
1627
+ },
1628
+ "128203": {
1629
+ "content": "<|reserved_special_token_195|>",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": true
1635
+ },
1636
+ "128204": {
1637
+ "content": "<|reserved_special_token_196|>",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": true
1643
+ },
1644
+ "128205": {
1645
+ "content": "<|reserved_special_token_197|>",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": true
1651
+ },
1652
+ "128206": {
1653
+ "content": "<|reserved_special_token_198|>",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": true
1659
+ },
1660
+ "128207": {
1661
+ "content": "<|reserved_special_token_199|>",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": true
1667
+ },
1668
+ "128208": {
1669
+ "content": "<|reserved_special_token_200|>",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": true
1675
+ },
1676
+ "128209": {
1677
+ "content": "<|reserved_special_token_201|>",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": true
1683
+ },
1684
+ "128210": {
1685
+ "content": "<|reserved_special_token_202|>",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "128211": {
1693
+ "content": "<|reserved_special_token_203|>",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
+ },
1700
+ "128212": {
1701
+ "content": "<|reserved_special_token_204|>",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": true
1707
+ },
1708
+ "128213": {
1709
+ "content": "<|reserved_special_token_205|>",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": true
1715
+ },
1716
+ "128214": {
1717
+ "content": "<|reserved_special_token_206|>",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": true
1723
+ },
1724
+ "128215": {
1725
+ "content": "<|reserved_special_token_207|>",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": true
1731
+ },
1732
+ "128216": {
1733
+ "content": "<|reserved_special_token_208|>",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": true
1739
+ },
1740
+ "128217": {
1741
+ "content": "<|reserved_special_token_209|>",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": true
1747
+ },
1748
+ "128218": {
1749
+ "content": "<|reserved_special_token_210|>",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": true
1755
+ },
1756
+ "128219": {
1757
+ "content": "<|reserved_special_token_211|>",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": true
1763
+ },
1764
+ "128220": {
1765
+ "content": "<|reserved_special_token_212|>",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": true
1771
+ },
1772
+ "128221": {
1773
+ "content": "<|reserved_special_token_213|>",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": true
1779
+ },
1780
+ "128222": {
1781
+ "content": "<|reserved_special_token_214|>",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": true
1787
+ },
1788
+ "128223": {
1789
+ "content": "<|reserved_special_token_215|>",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": true
1795
+ },
1796
+ "128224": {
1797
+ "content": "<|reserved_special_token_216|>",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": true
1803
+ },
1804
+ "128225": {
1805
+ "content": "<|reserved_special_token_217|>",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": true
1811
+ },
1812
+ "128226": {
1813
+ "content": "<|reserved_special_token_218|>",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": true
1819
+ },
1820
+ "128227": {
1821
+ "content": "<|reserved_special_token_219|>",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": true
1827
+ },
1828
+ "128228": {
1829
+ "content": "<|reserved_special_token_220|>",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": true
1835
+ },
1836
+ "128229": {
1837
+ "content": "<|reserved_special_token_221|>",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": true
1843
+ },
1844
+ "128230": {
1845
+ "content": "<|reserved_special_token_222|>",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": true
1851
+ },
1852
+ "128231": {
1853
+ "content": "<|reserved_special_token_223|>",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": true
1859
+ },
1860
+ "128232": {
1861
+ "content": "<|reserved_special_token_224|>",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": true
1867
+ },
1868
+ "128233": {
1869
+ "content": "<|reserved_special_token_225|>",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": true
1875
+ },
1876
+ "128234": {
1877
+ "content": "<|reserved_special_token_226|>",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": true
1883
+ },
1884
+ "128235": {
1885
+ "content": "<|reserved_special_token_227|>",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": true
1891
+ },
1892
+ "128236": {
1893
+ "content": "<|reserved_special_token_228|>",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": true
1899
+ },
1900
+ "128237": {
1901
+ "content": "<|reserved_special_token_229|>",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": true
1907
+ },
1908
+ "128238": {
1909
+ "content": "<|reserved_special_token_230|>",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": true
1915
+ },
1916
+ "128239": {
1917
+ "content": "<|reserved_special_token_231|>",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": true
1923
+ },
1924
+ "128240": {
1925
+ "content": "<|reserved_special_token_232|>",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": true
1931
+ },
1932
+ "128241": {
1933
+ "content": "<|reserved_special_token_233|>",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": true
1939
+ },
1940
+ "128242": {
1941
+ "content": "<|reserved_special_token_234|>",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": true
1947
+ },
1948
+ "128243": {
1949
+ "content": "<|reserved_special_token_235|>",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": true
1955
+ },
1956
+ "128244": {
1957
+ "content": "<|reserved_special_token_236|>",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": true
1963
+ },
1964
+ "128245": {
1965
+ "content": "<|reserved_special_token_237|>",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": true
1971
+ },
1972
+ "128246": {
1973
+ "content": "<|reserved_special_token_238|>",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": true
1979
+ },
1980
+ "128247": {
1981
+ "content": "<|reserved_special_token_239|>",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": true
1987
+ },
1988
+ "128248": {
1989
+ "content": "<|reserved_special_token_240|>",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": true
1995
+ },
1996
+ "128249": {
1997
+ "content": "<|reserved_special_token_241|>",
1998
+ "lstrip": false,
1999
+ "normalized": false,
2000
+ "rstrip": false,
2001
+ "single_word": false,
2002
+ "special": true
2003
+ },
2004
+ "128250": {
2005
+ "content": "<|reserved_special_token_242|>",
2006
+ "lstrip": false,
2007
+ "normalized": false,
2008
+ "rstrip": false,
2009
+ "single_word": false,
2010
+ "special": true
2011
+ },
2012
+ "128251": {
2013
+ "content": "<|reserved_special_token_243|>",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false,
2018
+ "special": true
2019
+ },
2020
+ "128252": {
2021
+ "content": "<|reserved_special_token_244|>",
2022
+ "lstrip": false,
2023
+ "normalized": false,
2024
+ "rstrip": false,
2025
+ "single_word": false,
2026
+ "special": true
2027
+ },
2028
+ "128253": {
2029
+ "content": "<|reserved_special_token_245|>",
2030
+ "lstrip": false,
2031
+ "normalized": false,
2032
+ "rstrip": false,
2033
+ "single_word": false,
2034
+ "special": true
2035
+ },
2036
+ "128254": {
2037
+ "content": "<|reserved_special_token_246|>",
2038
+ "lstrip": false,
2039
+ "normalized": false,
2040
+ "rstrip": false,
2041
+ "single_word": false,
2042
+ "special": true
2043
+ },
2044
+ "128255": {
2045
+ "content": "<|reserved_special_token_247|>",
2046
+ "lstrip": false,
2047
+ "normalized": false,
2048
+ "rstrip": false,
2049
+ "single_word": false,
2050
+ "special": true
2051
+ }
2052
+ },
2053
+ "bos_token": "<|begin_of_text|>",
2054
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 July 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content'] %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
2055
+ "clean_up_tokenization_spaces": true,
2056
+ "eos_token": "<|eot_id|>",
2057
+ "extra_special_tokens": {},
2058
+ "model_input_names": [
2059
+ "input_ids",
2060
+ "attention_mask"
2061
+ ],
2062
+ "model_max_length": 131072,
2063
+ "pad_token": "<|finetune_right_pad_id|>",
2064
+ "padding_side": "right",
2065
+ "tokenizer_class": "PreTrainedTokenizerFast",
2066
+ "unk_token": null
2067
+ }
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,2160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.39549139806209216,
5
+ "eval_steps": 500,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.001318304660206974,
13
+ "grad_norm": 4.59375,
14
+ "learning_rate": 0.0002,
15
+ "loss": 1.9624,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.002636609320413948,
20
+ "grad_norm": 1.7421875,
21
+ "learning_rate": 0.00019986805647183008,
22
+ "loss": 0.6513,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.003954913980620921,
27
+ "grad_norm": 1.84375,
28
+ "learning_rate": 0.00019973611294366012,
29
+ "loss": 0.1146,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.005273218640827896,
34
+ "grad_norm": 1.3203125,
35
+ "learning_rate": 0.0001996041694154902,
36
+ "loss": 0.0529,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.006591523301034869,
41
+ "grad_norm": 0.40234375,
42
+ "learning_rate": 0.00019947222588732023,
43
+ "loss": 0.1214,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.007909827961241843,
48
+ "grad_norm": 1.5390625,
49
+ "learning_rate": 0.0001993402823591503,
50
+ "loss": 0.0919,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.009228132621448816,
55
+ "grad_norm": 0.06201171875,
56
+ "learning_rate": 0.00019920833883098034,
57
+ "loss": 0.09,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 0.010546437281655791,
62
+ "grad_norm": 1.53125,
63
+ "learning_rate": 0.0001990763953028104,
64
+ "loss": 0.1945,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.011864741941862765,
69
+ "grad_norm": 0.2890625,
70
+ "learning_rate": 0.00019894445177464048,
71
+ "loss": 0.1259,
72
+ "step": 45
73
+ },
74
+ {
75
+ "epoch": 0.013183046602069738,
76
+ "grad_norm": 0.609375,
77
+ "learning_rate": 0.00019881250824647052,
78
+ "loss": 0.027,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 0.014501351262276712,
83
+ "grad_norm": 0.369140625,
84
+ "learning_rate": 0.00019868056471830057,
85
+ "loss": 0.1068,
86
+ "step": 55
87
+ },
88
+ {
89
+ "epoch": 0.015819655922483685,
90
+ "grad_norm": 0.34765625,
91
+ "learning_rate": 0.00019854862119013064,
92
+ "loss": 0.0542,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 0.01713796058269066,
97
+ "grad_norm": 0.055419921875,
98
+ "learning_rate": 0.00019841667766196068,
99
+ "loss": 0.0901,
100
+ "step": 65
101
+ },
102
+ {
103
+ "epoch": 0.018456265242897632,
104
+ "grad_norm": 0.0247802734375,
105
+ "learning_rate": 0.00019828473413379075,
106
+ "loss": 0.0091,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 0.019774569903104607,
111
+ "grad_norm": 0.0079345703125,
112
+ "learning_rate": 0.0001981527906056208,
113
+ "loss": 0.0744,
114
+ "step": 75
115
+ },
116
+ {
117
+ "epoch": 0.021092874563311582,
118
+ "grad_norm": 0.65234375,
119
+ "learning_rate": 0.00019802084707745086,
120
+ "loss": 0.1108,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.022411179223518554,
125
+ "grad_norm": 0.50390625,
126
+ "learning_rate": 0.0001978889035492809,
127
+ "loss": 0.0446,
128
+ "step": 85
129
+ },
130
+ {
131
+ "epoch": 0.02372948388372553,
132
+ "grad_norm": 0.1787109375,
133
+ "learning_rate": 0.00019775696002111097,
134
+ "loss": 0.0982,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 0.0250477885439325,
139
+ "grad_norm": 0.490234375,
140
+ "learning_rate": 0.00019762501649294104,
141
+ "loss": 0.1035,
142
+ "step": 95
143
+ },
144
+ {
145
+ "epoch": 0.026366093204139476,
146
+ "grad_norm": 0.12158203125,
147
+ "learning_rate": 0.00019749307296477108,
148
+ "loss": 0.0401,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 0.02768439786434645,
153
+ "grad_norm": 0.16015625,
154
+ "learning_rate": 0.00019736112943660115,
155
+ "loss": 0.0309,
156
+ "step": 105
157
+ },
158
+ {
159
+ "epoch": 0.029002702524553423,
160
+ "grad_norm": 1.359375,
161
+ "learning_rate": 0.0001972291859084312,
162
+ "loss": 0.1032,
163
+ "step": 110
164
+ },
165
+ {
166
+ "epoch": 0.0303210071847604,
167
+ "grad_norm": 0.52734375,
168
+ "learning_rate": 0.00019709724238026126,
169
+ "loss": 0.0811,
170
+ "step": 115
171
+ },
172
+ {
173
+ "epoch": 0.03163931184496737,
174
+ "grad_norm": 0.177734375,
175
+ "learning_rate": 0.00019696529885209133,
176
+ "loss": 0.0258,
177
+ "step": 120
178
+ },
179
+ {
180
+ "epoch": 0.03295761650517435,
181
+ "grad_norm": 0.234375,
182
+ "learning_rate": 0.00019683335532392137,
183
+ "loss": 0.0437,
184
+ "step": 125
185
+ },
186
+ {
187
+ "epoch": 0.03427592116538132,
188
+ "grad_norm": 1.3046875,
189
+ "learning_rate": 0.00019670141179575144,
190
+ "loss": 0.0967,
191
+ "step": 130
192
+ },
193
+ {
194
+ "epoch": 0.03559422582558829,
195
+ "grad_norm": 0.2734375,
196
+ "learning_rate": 0.00019656946826758148,
197
+ "loss": 0.0132,
198
+ "step": 135
199
+ },
200
+ {
201
+ "epoch": 0.036912530485795264,
202
+ "grad_norm": 0.66015625,
203
+ "learning_rate": 0.00019643752473941155,
204
+ "loss": 0.0396,
205
+ "step": 140
206
+ },
207
+ {
208
+ "epoch": 0.03823083514600224,
209
+ "grad_norm": 1.0546875,
210
+ "learning_rate": 0.0001963055812112416,
211
+ "loss": 0.0449,
212
+ "step": 145
213
+ },
214
+ {
215
+ "epoch": 0.039549139806209214,
216
+ "grad_norm": 0.2021484375,
217
+ "learning_rate": 0.00019617363768307166,
218
+ "loss": 0.1196,
219
+ "step": 150
220
+ },
221
+ {
222
+ "epoch": 0.040867444466416186,
223
+ "grad_norm": 0.5859375,
224
+ "learning_rate": 0.0001960416941549017,
225
+ "loss": 0.0588,
226
+ "step": 155
227
+ },
228
+ {
229
+ "epoch": 0.042185749126623165,
230
+ "grad_norm": 0.06005859375,
231
+ "learning_rate": 0.00019590975062673175,
232
+ "loss": 0.0234,
233
+ "step": 160
234
+ },
235
+ {
236
+ "epoch": 0.04350405378683014,
237
+ "grad_norm": 0.4921875,
238
+ "learning_rate": 0.00019577780709856182,
239
+ "loss": 0.0916,
240
+ "step": 165
241
+ },
242
+ {
243
+ "epoch": 0.04482235844703711,
244
+ "grad_norm": 0.84375,
245
+ "learning_rate": 0.0001956458635703919,
246
+ "loss": 0.0271,
247
+ "step": 170
248
+ },
249
+ {
250
+ "epoch": 0.04614066310724409,
251
+ "grad_norm": 0.8828125,
252
+ "learning_rate": 0.00019551392004222193,
253
+ "loss": 0.0175,
254
+ "step": 175
255
+ },
256
+ {
257
+ "epoch": 0.04745896776745106,
258
+ "grad_norm": 0.0152587890625,
259
+ "learning_rate": 0.000195381976514052,
260
+ "loss": 0.0356,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 0.04877727242765803,
265
+ "grad_norm": 0.09326171875,
266
+ "learning_rate": 0.00019525003298588204,
267
+ "loss": 0.0057,
268
+ "step": 185
269
+ },
270
+ {
271
+ "epoch": 0.050095577087865,
272
+ "grad_norm": 0.24609375,
273
+ "learning_rate": 0.0001951180894577121,
274
+ "loss": 0.0082,
275
+ "step": 190
276
+ },
277
+ {
278
+ "epoch": 0.05141388174807198,
279
+ "grad_norm": 0.05029296875,
280
+ "learning_rate": 0.00019498614592954215,
281
+ "loss": 0.0178,
282
+ "step": 195
283
+ },
284
+ {
285
+ "epoch": 0.05273218640827895,
286
+ "grad_norm": 0.0390625,
287
+ "learning_rate": 0.00019485420240137222,
288
+ "loss": 0.0789,
289
+ "step": 200
290
+ },
291
+ {
292
+ "epoch": 0.054050491068485924,
293
+ "grad_norm": 0.5625,
294
+ "learning_rate": 0.0001947222588732023,
295
+ "loss": 0.0645,
296
+ "step": 205
297
+ },
298
+ {
299
+ "epoch": 0.0553687957286929,
300
+ "grad_norm": 0.53515625,
301
+ "learning_rate": 0.00019459031534503233,
302
+ "loss": 0.116,
303
+ "step": 210
304
+ },
305
+ {
306
+ "epoch": 0.056687100388899875,
307
+ "grad_norm": 0.55078125,
308
+ "learning_rate": 0.0001944583718168624,
309
+ "loss": 0.0516,
310
+ "step": 215
311
+ },
312
+ {
313
+ "epoch": 0.058005405049106847,
314
+ "grad_norm": 0.314453125,
315
+ "learning_rate": 0.00019432642828869244,
316
+ "loss": 0.1019,
317
+ "step": 220
318
+ },
319
+ {
320
+ "epoch": 0.059323709709313825,
321
+ "grad_norm": 0.1123046875,
322
+ "learning_rate": 0.0001941944847605225,
323
+ "loss": 0.0529,
324
+ "step": 225
325
+ },
326
+ {
327
+ "epoch": 0.0606420143695208,
328
+ "grad_norm": 0.4921875,
329
+ "learning_rate": 0.00019406254123235256,
330
+ "loss": 0.0368,
331
+ "step": 230
332
+ },
333
+ {
334
+ "epoch": 0.06196031902972777,
335
+ "grad_norm": 0.054443359375,
336
+ "learning_rate": 0.00019393059770418262,
337
+ "loss": 0.037,
338
+ "step": 235
339
+ },
340
+ {
341
+ "epoch": 0.06327862368993474,
342
+ "grad_norm": 0.008544921875,
343
+ "learning_rate": 0.0001937986541760127,
344
+ "loss": 0.0324,
345
+ "step": 240
346
+ },
347
+ {
348
+ "epoch": 0.06459692835014172,
349
+ "grad_norm": 1.5,
350
+ "learning_rate": 0.00019366671064784274,
351
+ "loss": 0.0334,
352
+ "step": 245
353
+ },
354
+ {
355
+ "epoch": 0.0659152330103487,
356
+ "grad_norm": 0.2109375,
357
+ "learning_rate": 0.0001935347671196728,
358
+ "loss": 0.0671,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 0.06723353767055566,
363
+ "grad_norm": 2.0625,
364
+ "learning_rate": 0.00019340282359150285,
365
+ "loss": 0.1559,
366
+ "step": 255
367
+ },
368
+ {
369
+ "epoch": 0.06855184233076264,
370
+ "grad_norm": 0.7734375,
371
+ "learning_rate": 0.0001932708800633329,
372
+ "loss": 0.0198,
373
+ "step": 260
374
+ },
375
+ {
376
+ "epoch": 0.06987014699096962,
377
+ "grad_norm": 0.42578125,
378
+ "learning_rate": 0.00019313893653516296,
379
+ "loss": 0.0151,
380
+ "step": 265
381
+ },
382
+ {
383
+ "epoch": 0.07118845165117658,
384
+ "grad_norm": 0.1884765625,
385
+ "learning_rate": 0.000193006993006993,
386
+ "loss": 0.0269,
387
+ "step": 270
388
+ },
389
+ {
390
+ "epoch": 0.07250675631138356,
391
+ "grad_norm": 1.546875,
392
+ "learning_rate": 0.00019287504947882307,
393
+ "loss": 0.0565,
394
+ "step": 275
395
+ },
396
+ {
397
+ "epoch": 0.07382506097159053,
398
+ "grad_norm": 0.5078125,
399
+ "learning_rate": 0.0001927431059506531,
400
+ "loss": 0.0942,
401
+ "step": 280
402
+ },
403
+ {
404
+ "epoch": 0.0751433656317975,
405
+ "grad_norm": 0.392578125,
406
+ "learning_rate": 0.00019261116242248318,
407
+ "loss": 0.0061,
408
+ "step": 285
409
+ },
410
+ {
411
+ "epoch": 0.07646167029200449,
412
+ "grad_norm": 1.9140625,
413
+ "learning_rate": 0.00019247921889431325,
414
+ "loss": 0.0497,
415
+ "step": 290
416
+ },
417
+ {
418
+ "epoch": 0.07777997495221145,
419
+ "grad_norm": 0.08837890625,
420
+ "learning_rate": 0.0001923472753661433,
421
+ "loss": 0.0573,
422
+ "step": 295
423
+ },
424
+ {
425
+ "epoch": 0.07909827961241843,
426
+ "grad_norm": 1.046875,
427
+ "learning_rate": 0.00019221533183797336,
428
+ "loss": 0.0528,
429
+ "step": 300
430
+ },
431
+ {
432
+ "epoch": 0.08041658427262541,
433
+ "grad_norm": 0.2275390625,
434
+ "learning_rate": 0.0001920833883098034,
435
+ "loss": 0.0506,
436
+ "step": 305
437
+ },
438
+ {
439
+ "epoch": 0.08173488893283237,
440
+ "grad_norm": 0.08203125,
441
+ "learning_rate": 0.00019195144478163347,
442
+ "loss": 0.0307,
443
+ "step": 310
444
+ },
445
+ {
446
+ "epoch": 0.08305319359303935,
447
+ "grad_norm": 0.111328125,
448
+ "learning_rate": 0.00019181950125346354,
449
+ "loss": 0.0365,
450
+ "step": 315
451
+ },
452
+ {
453
+ "epoch": 0.08437149825324633,
454
+ "grad_norm": 1.2890625,
455
+ "learning_rate": 0.00019168755772529358,
456
+ "loss": 0.0447,
457
+ "step": 320
458
+ },
459
+ {
460
+ "epoch": 0.0856898029134533,
461
+ "grad_norm": 0.6015625,
462
+ "learning_rate": 0.00019155561419712365,
463
+ "loss": 0.0605,
464
+ "step": 325
465
+ },
466
+ {
467
+ "epoch": 0.08700810757366027,
468
+ "grad_norm": 0.71875,
469
+ "learning_rate": 0.0001914236706689537,
470
+ "loss": 0.0846,
471
+ "step": 330
472
+ },
473
+ {
474
+ "epoch": 0.08832641223386725,
475
+ "grad_norm": 0.1494140625,
476
+ "learning_rate": 0.00019129172714078376,
477
+ "loss": 0.0713,
478
+ "step": 335
479
+ },
480
+ {
481
+ "epoch": 0.08964471689407422,
482
+ "grad_norm": 0.1669921875,
483
+ "learning_rate": 0.0001911597836126138,
484
+ "loss": 0.0826,
485
+ "step": 340
486
+ },
487
+ {
488
+ "epoch": 0.0909630215542812,
489
+ "grad_norm": 2.203125,
490
+ "learning_rate": 0.00019102784008444388,
491
+ "loss": 0.0441,
492
+ "step": 345
493
+ },
494
+ {
495
+ "epoch": 0.09228132621448817,
496
+ "grad_norm": 1.21875,
497
+ "learning_rate": 0.00019089589655627395,
498
+ "loss": 0.1378,
499
+ "step": 350
500
+ },
501
+ {
502
+ "epoch": 0.09359963087469514,
503
+ "grad_norm": 3.0625,
504
+ "learning_rate": 0.00019076395302810396,
505
+ "loss": 0.1552,
506
+ "step": 355
507
+ },
508
+ {
509
+ "epoch": 0.09491793553490212,
510
+ "grad_norm": 0.232421875,
511
+ "learning_rate": 0.00019063200949993403,
512
+ "loss": 0.0458,
513
+ "step": 360
514
+ },
515
+ {
516
+ "epoch": 0.0962362401951091,
517
+ "grad_norm": 0.71875,
518
+ "learning_rate": 0.0001905000659717641,
519
+ "loss": 0.0312,
520
+ "step": 365
521
+ },
522
+ {
523
+ "epoch": 0.09755454485531606,
524
+ "grad_norm": 0.0218505859375,
525
+ "learning_rate": 0.00019036812244359414,
526
+ "loss": 0.0247,
527
+ "step": 370
528
+ },
529
+ {
530
+ "epoch": 0.09887284951552304,
531
+ "grad_norm": 0.064453125,
532
+ "learning_rate": 0.0001902361789154242,
533
+ "loss": 0.054,
534
+ "step": 375
535
+ },
536
+ {
537
+ "epoch": 0.10019115417573,
538
+ "grad_norm": 0.021240234375,
539
+ "learning_rate": 0.00019010423538725425,
540
+ "loss": 0.0023,
541
+ "step": 380
542
+ },
543
+ {
544
+ "epoch": 0.10150945883593698,
545
+ "grad_norm": 0.0361328125,
546
+ "learning_rate": 0.00018997229185908432,
547
+ "loss": 0.0884,
548
+ "step": 385
549
+ },
550
+ {
551
+ "epoch": 0.10282776349614396,
552
+ "grad_norm": 1.703125,
553
+ "learning_rate": 0.00018984034833091436,
554
+ "loss": 0.0506,
555
+ "step": 390
556
+ },
557
+ {
558
+ "epoch": 0.10414606815635093,
559
+ "grad_norm": 0.08837890625,
560
+ "learning_rate": 0.00018970840480274443,
561
+ "loss": 0.1123,
562
+ "step": 395
563
+ },
564
+ {
565
+ "epoch": 0.1054643728165579,
566
+ "grad_norm": 0.6953125,
567
+ "learning_rate": 0.0001895764612745745,
568
+ "loss": 0.0597,
569
+ "step": 400
570
+ },
571
+ {
572
+ "epoch": 0.10678267747676488,
573
+ "grad_norm": 0.18359375,
574
+ "learning_rate": 0.00018944451774640454,
575
+ "loss": 0.0138,
576
+ "step": 405
577
+ },
578
+ {
579
+ "epoch": 0.10810098213697185,
580
+ "grad_norm": 0.0272216796875,
581
+ "learning_rate": 0.0001893125742182346,
582
+ "loss": 0.0249,
583
+ "step": 410
584
+ },
585
+ {
586
+ "epoch": 0.10941928679717883,
587
+ "grad_norm": 0.00970458984375,
588
+ "learning_rate": 0.00018918063069006466,
589
+ "loss": 0.0084,
590
+ "step": 415
591
+ },
592
+ {
593
+ "epoch": 0.1107375914573858,
594
+ "grad_norm": 0.54296875,
595
+ "learning_rate": 0.00018904868716189472,
596
+ "loss": 0.0541,
597
+ "step": 420
598
+ },
599
+ {
600
+ "epoch": 0.11205589611759277,
601
+ "grad_norm": 0.74609375,
602
+ "learning_rate": 0.00018891674363372477,
603
+ "loss": 0.007,
604
+ "step": 425
605
+ },
606
+ {
607
+ "epoch": 0.11337420077779975,
608
+ "grad_norm": 0.0211181640625,
609
+ "learning_rate": 0.00018878480010555484,
610
+ "loss": 0.0875,
611
+ "step": 430
612
+ },
613
+ {
614
+ "epoch": 0.11469250543800673,
615
+ "grad_norm": 0.9296875,
616
+ "learning_rate": 0.0001886528565773849,
617
+ "loss": 0.1207,
618
+ "step": 435
619
+ },
620
+ {
621
+ "epoch": 0.11601081009821369,
622
+ "grad_norm": 1.2734375,
623
+ "learning_rate": 0.00018852091304921495,
624
+ "loss": 0.1143,
625
+ "step": 440
626
+ },
627
+ {
628
+ "epoch": 0.11732911475842067,
629
+ "grad_norm": 0.6484375,
630
+ "learning_rate": 0.00018838896952104502,
631
+ "loss": 0.0393,
632
+ "step": 445
633
+ },
634
+ {
635
+ "epoch": 0.11864741941862765,
636
+ "grad_norm": 0.1552734375,
637
+ "learning_rate": 0.00018825702599287506,
638
+ "loss": 0.02,
639
+ "step": 450
640
+ },
641
+ {
642
+ "epoch": 0.11996572407883462,
643
+ "grad_norm": 0.486328125,
644
+ "learning_rate": 0.0001881250824647051,
645
+ "loss": 0.0891,
646
+ "step": 455
647
+ },
648
+ {
649
+ "epoch": 0.1212840287390416,
650
+ "grad_norm": 1.0,
651
+ "learning_rate": 0.00018799313893653517,
652
+ "loss": 0.0469,
653
+ "step": 460
654
+ },
655
+ {
656
+ "epoch": 0.12260233339924857,
657
+ "grad_norm": 0.2099609375,
658
+ "learning_rate": 0.0001878611954083652,
659
+ "loss": 0.019,
660
+ "step": 465
661
+ },
662
+ {
663
+ "epoch": 0.12392063805945554,
664
+ "grad_norm": 0.03857421875,
665
+ "learning_rate": 0.00018772925188019528,
666
+ "loss": 0.007,
667
+ "step": 470
668
+ },
669
+ {
670
+ "epoch": 0.12523894271966252,
671
+ "grad_norm": 0.0257568359375,
672
+ "learning_rate": 0.00018759730835202532,
673
+ "loss": 0.0039,
674
+ "step": 475
675
+ },
676
+ {
677
+ "epoch": 0.12655724737986948,
678
+ "grad_norm": 0.014404296875,
679
+ "learning_rate": 0.0001874653648238554,
680
+ "loss": 0.0043,
681
+ "step": 480
682
+ },
683
+ {
684
+ "epoch": 0.12787555204007647,
685
+ "grad_norm": 0.51953125,
686
+ "learning_rate": 0.00018733342129568546,
687
+ "loss": 0.1326,
688
+ "step": 485
689
+ },
690
+ {
691
+ "epoch": 0.12919385670028344,
692
+ "grad_norm": 0.99609375,
693
+ "learning_rate": 0.0001872014777675155,
694
+ "loss": 0.0369,
695
+ "step": 490
696
+ },
697
+ {
698
+ "epoch": 0.1305121613604904,
699
+ "grad_norm": 0.2734375,
700
+ "learning_rate": 0.00018706953423934557,
701
+ "loss": 0.0395,
702
+ "step": 495
703
+ },
704
+ {
705
+ "epoch": 0.1318304660206974,
706
+ "grad_norm": 0.083984375,
707
+ "learning_rate": 0.00018693759071117561,
708
+ "loss": 0.0284,
709
+ "step": 500
710
+ },
711
+ {
712
+ "epoch": 0.1318304660206974,
713
+ "eval_loss": 0.04542969539761543,
714
+ "eval_model_preparation_time": 0.0076,
715
+ "eval_runtime": 457.5293,
716
+ "eval_samples_per_second": 7.37,
717
+ "eval_steps_per_second": 3.685,
718
+ "step": 500
719
+ },
720
+ {
721
+ "epoch": 0.13314877068090436,
722
+ "grad_norm": 0.0291748046875,
723
+ "learning_rate": 0.00018680564718300568,
724
+ "loss": 0.0533,
725
+ "step": 505
726
+ },
727
+ {
728
+ "epoch": 0.13446707534111133,
729
+ "grad_norm": 0.71484375,
730
+ "learning_rate": 0.00018667370365483575,
731
+ "loss": 0.0183,
732
+ "step": 510
733
+ },
734
+ {
735
+ "epoch": 0.13578538000131832,
736
+ "grad_norm": 0.018798828125,
737
+ "learning_rate": 0.0001865417601266658,
738
+ "loss": 0.0473,
739
+ "step": 515
740
+ },
741
+ {
742
+ "epoch": 0.13710368466152528,
743
+ "grad_norm": 0.388671875,
744
+ "learning_rate": 0.00018640981659849586,
745
+ "loss": 0.0562,
746
+ "step": 520
747
+ },
748
+ {
749
+ "epoch": 0.13842198932173225,
750
+ "grad_norm": 0.77734375,
751
+ "learning_rate": 0.0001862778730703259,
752
+ "loss": 0.0755,
753
+ "step": 525
754
+ },
755
+ {
756
+ "epoch": 0.13974029398193924,
757
+ "grad_norm": 2.8125,
758
+ "learning_rate": 0.00018614592954215598,
759
+ "loss": 0.0422,
760
+ "step": 530
761
+ },
762
+ {
763
+ "epoch": 0.1410585986421462,
764
+ "grad_norm": 0.48828125,
765
+ "learning_rate": 0.00018601398601398602,
766
+ "loss": 0.0882,
767
+ "step": 535
768
+ },
769
+ {
770
+ "epoch": 0.14237690330235317,
771
+ "grad_norm": 0.16015625,
772
+ "learning_rate": 0.0001858820424858161,
773
+ "loss": 0.0131,
774
+ "step": 540
775
+ },
776
+ {
777
+ "epoch": 0.14369520796256013,
778
+ "grad_norm": 0.31640625,
779
+ "learning_rate": 0.00018575009895764616,
780
+ "loss": 0.03,
781
+ "step": 545
782
+ },
783
+ {
784
+ "epoch": 0.14501351262276713,
785
+ "grad_norm": 0.0120849609375,
786
+ "learning_rate": 0.0001856181554294762,
787
+ "loss": 0.0425,
788
+ "step": 550
789
+ },
790
+ {
791
+ "epoch": 0.1463318172829741,
792
+ "grad_norm": 0.390625,
793
+ "learning_rate": 0.00018548621190130624,
794
+ "loss": 0.011,
795
+ "step": 555
796
+ },
797
+ {
798
+ "epoch": 0.14765012194318106,
799
+ "grad_norm": 1.9609375,
800
+ "learning_rate": 0.0001853542683731363,
801
+ "loss": 0.0807,
802
+ "step": 560
803
+ },
804
+ {
805
+ "epoch": 0.14896842660338805,
806
+ "grad_norm": 0.609375,
807
+ "learning_rate": 0.00018522232484496635,
808
+ "loss": 0.0278,
809
+ "step": 565
810
+ },
811
+ {
812
+ "epoch": 0.150286731263595,
813
+ "grad_norm": 0.087890625,
814
+ "learning_rate": 0.00018509038131679642,
815
+ "loss": 0.0484,
816
+ "step": 570
817
+ },
818
+ {
819
+ "epoch": 0.15160503592380198,
820
+ "grad_norm": 0.5078125,
821
+ "learning_rate": 0.00018495843778862646,
822
+ "loss": 0.1277,
823
+ "step": 575
824
+ },
825
+ {
826
+ "epoch": 0.15292334058400897,
827
+ "grad_norm": 0.8125,
828
+ "learning_rate": 0.00018482649426045653,
829
+ "loss": 0.058,
830
+ "step": 580
831
+ },
832
+ {
833
+ "epoch": 0.15424164524421594,
834
+ "grad_norm": 0.22265625,
835
+ "learning_rate": 0.00018469455073228657,
836
+ "loss": 0.0259,
837
+ "step": 585
838
+ },
839
+ {
840
+ "epoch": 0.1555599499044229,
841
+ "grad_norm": 1.8984375,
842
+ "learning_rate": 0.00018456260720411664,
843
+ "loss": 0.113,
844
+ "step": 590
845
+ },
846
+ {
847
+ "epoch": 0.1568782545646299,
848
+ "grad_norm": 0.12451171875,
849
+ "learning_rate": 0.0001844306636759467,
850
+ "loss": 0.0312,
851
+ "step": 595
852
+ },
853
+ {
854
+ "epoch": 0.15819655922483686,
855
+ "grad_norm": 0.0322265625,
856
+ "learning_rate": 0.00018429872014777676,
857
+ "loss": 0.0476,
858
+ "step": 600
859
+ },
860
+ {
861
+ "epoch": 0.15951486388504382,
862
+ "grad_norm": 0.0281982421875,
863
+ "learning_rate": 0.00018416677661960682,
864
+ "loss": 0.0232,
865
+ "step": 605
866
+ },
867
+ {
868
+ "epoch": 0.16083316854525082,
869
+ "grad_norm": 0.57421875,
870
+ "learning_rate": 0.00018403483309143687,
871
+ "loss": 0.1287,
872
+ "step": 610
873
+ },
874
+ {
875
+ "epoch": 0.16215147320545778,
876
+ "grad_norm": 0.765625,
877
+ "learning_rate": 0.00018390288956326694,
878
+ "loss": 0.0991,
879
+ "step": 615
880
+ },
881
+ {
882
+ "epoch": 0.16346977786566474,
883
+ "grad_norm": 0.3125,
884
+ "learning_rate": 0.00018377094603509698,
885
+ "loss": 0.0247,
886
+ "step": 620
887
+ },
888
+ {
889
+ "epoch": 0.16478808252587174,
890
+ "grad_norm": 0.37890625,
891
+ "learning_rate": 0.00018363900250692705,
892
+ "loss": 0.0632,
893
+ "step": 625
894
+ },
895
+ {
896
+ "epoch": 0.1661063871860787,
897
+ "grad_norm": 0.1494140625,
898
+ "learning_rate": 0.00018350705897875712,
899
+ "loss": 0.0314,
900
+ "step": 630
901
+ },
902
+ {
903
+ "epoch": 0.16742469184628567,
904
+ "grad_norm": 0.0673828125,
905
+ "learning_rate": 0.00018337511545058716,
906
+ "loss": 0.0425,
907
+ "step": 635
908
+ },
909
+ {
910
+ "epoch": 0.16874299650649266,
911
+ "grad_norm": 0.396484375,
912
+ "learning_rate": 0.00018324317192241723,
913
+ "loss": 0.0613,
914
+ "step": 640
915
+ },
916
+ {
917
+ "epoch": 0.17006130116669962,
918
+ "grad_norm": 0.057373046875,
919
+ "learning_rate": 0.00018311122839424727,
920
+ "loss": 0.0569,
921
+ "step": 645
922
+ },
923
+ {
924
+ "epoch": 0.1713796058269066,
925
+ "grad_norm": 0.001373291015625,
926
+ "learning_rate": 0.00018297928486607734,
927
+ "loss": 0.007,
928
+ "step": 650
929
+ },
930
+ {
931
+ "epoch": 0.17269791048711358,
932
+ "grad_norm": 1.0859375,
933
+ "learning_rate": 0.00018284734133790738,
934
+ "loss": 0.0189,
935
+ "step": 655
936
+ },
937
+ {
938
+ "epoch": 0.17401621514732055,
939
+ "grad_norm": 0.6015625,
940
+ "learning_rate": 0.00018271539780973742,
941
+ "loss": 0.0601,
942
+ "step": 660
943
+ },
944
+ {
945
+ "epoch": 0.1753345198075275,
946
+ "grad_norm": 0.25390625,
947
+ "learning_rate": 0.0001825834542815675,
948
+ "loss": 0.0211,
949
+ "step": 665
950
+ },
951
+ {
952
+ "epoch": 0.1766528244677345,
953
+ "grad_norm": 2.6875,
954
+ "learning_rate": 0.00018245151075339753,
955
+ "loss": 0.0713,
956
+ "step": 670
957
+ },
958
+ {
959
+ "epoch": 0.17797112912794147,
960
+ "grad_norm": 1.1875,
961
+ "learning_rate": 0.0001823195672252276,
962
+ "loss": 0.0522,
963
+ "step": 675
964
+ },
965
+ {
966
+ "epoch": 0.17928943378814843,
967
+ "grad_norm": 0.025146484375,
968
+ "learning_rate": 0.00018218762369705767,
969
+ "loss": 0.0242,
970
+ "step": 680
971
+ },
972
+ {
973
+ "epoch": 0.18060773844835543,
974
+ "grad_norm": 0.048095703125,
975
+ "learning_rate": 0.00018205568016888772,
976
+ "loss": 0.0129,
977
+ "step": 685
978
+ },
979
+ {
980
+ "epoch": 0.1819260431085624,
981
+ "grad_norm": 0.04541015625,
982
+ "learning_rate": 0.00018192373664071778,
983
+ "loss": 0.0142,
984
+ "step": 690
985
+ },
986
+ {
987
+ "epoch": 0.18324434776876936,
988
+ "grad_norm": 0.00830078125,
989
+ "learning_rate": 0.00018179179311254783,
990
+ "loss": 0.0121,
991
+ "step": 695
992
+ },
993
+ {
994
+ "epoch": 0.18456265242897635,
995
+ "grad_norm": 0.53125,
996
+ "learning_rate": 0.0001816598495843779,
997
+ "loss": 0.0163,
998
+ "step": 700
999
+ },
1000
+ {
1001
+ "epoch": 0.1858809570891833,
1002
+ "grad_norm": 0.185546875,
1003
+ "learning_rate": 0.00018152790605620796,
1004
+ "loss": 0.0203,
1005
+ "step": 705
1006
+ },
1007
+ {
1008
+ "epoch": 0.18719926174939028,
1009
+ "grad_norm": 1.2578125,
1010
+ "learning_rate": 0.000181395962528038,
1011
+ "loss": 0.1548,
1012
+ "step": 710
1013
+ },
1014
+ {
1015
+ "epoch": 0.18851756640959727,
1016
+ "grad_norm": 0.0247802734375,
1017
+ "learning_rate": 0.00018126401899986808,
1018
+ "loss": 0.0543,
1019
+ "step": 715
1020
+ },
1021
+ {
1022
+ "epoch": 0.18983587106980424,
1023
+ "grad_norm": 0.07568359375,
1024
+ "learning_rate": 0.00018113207547169812,
1025
+ "loss": 0.0346,
1026
+ "step": 720
1027
+ },
1028
+ {
1029
+ "epoch": 0.1911541757300112,
1030
+ "grad_norm": 0.1318359375,
1031
+ "learning_rate": 0.0001810001319435282,
1032
+ "loss": 0.03,
1033
+ "step": 725
1034
+ },
1035
+ {
1036
+ "epoch": 0.1924724803902182,
1037
+ "grad_norm": 0.1455078125,
1038
+ "learning_rate": 0.00018086818841535823,
1039
+ "loss": 0.0796,
1040
+ "step": 730
1041
+ },
1042
+ {
1043
+ "epoch": 0.19379078505042516,
1044
+ "grad_norm": 0.09814453125,
1045
+ "learning_rate": 0.0001807362448871883,
1046
+ "loss": 0.0662,
1047
+ "step": 735
1048
+ },
1049
+ {
1050
+ "epoch": 0.19510908971063212,
1051
+ "grad_norm": 0.91015625,
1052
+ "learning_rate": 0.00018060430135901837,
1053
+ "loss": 0.0675,
1054
+ "step": 740
1055
+ },
1056
+ {
1057
+ "epoch": 0.19642739437083911,
1058
+ "grad_norm": 0.10693359375,
1059
+ "learning_rate": 0.0001804723578308484,
1060
+ "loss": 0.0377,
1061
+ "step": 745
1062
+ },
1063
+ {
1064
+ "epoch": 0.19774569903104608,
1065
+ "grad_norm": 0.95703125,
1066
+ "learning_rate": 0.00018034041430267848,
1067
+ "loss": 0.0174,
1068
+ "step": 750
1069
+ },
1070
+ {
1071
+ "epoch": 0.19906400369125304,
1072
+ "grad_norm": 1.7890625,
1073
+ "learning_rate": 0.00018020847077450852,
1074
+ "loss": 0.0278,
1075
+ "step": 755
1076
+ },
1077
+ {
1078
+ "epoch": 0.20038230835146,
1079
+ "grad_norm": 0.8515625,
1080
+ "learning_rate": 0.00018007652724633856,
1081
+ "loss": 0.0113,
1082
+ "step": 760
1083
+ },
1084
+ {
1085
+ "epoch": 0.201700613011667,
1086
+ "grad_norm": 0.016845703125,
1087
+ "learning_rate": 0.00017994458371816863,
1088
+ "loss": 0.0589,
1089
+ "step": 765
1090
+ },
1091
+ {
1092
+ "epoch": 0.20301891767187397,
1093
+ "grad_norm": 0.01043701171875,
1094
+ "learning_rate": 0.00017981264018999867,
1095
+ "loss": 0.0203,
1096
+ "step": 770
1097
+ },
1098
+ {
1099
+ "epoch": 0.20433722233208093,
1100
+ "grad_norm": 0.0242919921875,
1101
+ "learning_rate": 0.00017968069666182874,
1102
+ "loss": 0.0494,
1103
+ "step": 775
1104
+ },
1105
+ {
1106
+ "epoch": 0.20565552699228792,
1107
+ "grad_norm": 0.56640625,
1108
+ "learning_rate": 0.00017954875313365879,
1109
+ "loss": 0.0394,
1110
+ "step": 780
1111
+ },
1112
+ {
1113
+ "epoch": 0.2069738316524949,
1114
+ "grad_norm": 0.06591796875,
1115
+ "learning_rate": 0.00017941680960548886,
1116
+ "loss": 0.0848,
1117
+ "step": 785
1118
+ },
1119
+ {
1120
+ "epoch": 0.20829213631270185,
1121
+ "grad_norm": 0.40234375,
1122
+ "learning_rate": 0.00017928486607731892,
1123
+ "loss": 0.0464,
1124
+ "step": 790
1125
+ },
1126
+ {
1127
+ "epoch": 0.20961044097290885,
1128
+ "grad_norm": 0.06298828125,
1129
+ "learning_rate": 0.00017915292254914897,
1130
+ "loss": 0.0222,
1131
+ "step": 795
1132
+ },
1133
+ {
1134
+ "epoch": 0.2109287456331158,
1135
+ "grad_norm": 0.5390625,
1136
+ "learning_rate": 0.00017902097902097904,
1137
+ "loss": 0.0434,
1138
+ "step": 800
1139
+ },
1140
+ {
1141
+ "epoch": 0.21224705029332278,
1142
+ "grad_norm": 1.390625,
1143
+ "learning_rate": 0.00017888903549280908,
1144
+ "loss": 0.0222,
1145
+ "step": 805
1146
+ },
1147
+ {
1148
+ "epoch": 0.21356535495352977,
1149
+ "grad_norm": 0.0272216796875,
1150
+ "learning_rate": 0.00017875709196463915,
1151
+ "loss": 0.0099,
1152
+ "step": 810
1153
+ },
1154
+ {
1155
+ "epoch": 0.21488365961373673,
1156
+ "grad_norm": 0.10009765625,
1157
+ "learning_rate": 0.0001786251484364692,
1158
+ "loss": 0.0086,
1159
+ "step": 815
1160
+ },
1161
+ {
1162
+ "epoch": 0.2162019642739437,
1163
+ "grad_norm": 0.06396484375,
1164
+ "learning_rate": 0.00017849320490829926,
1165
+ "loss": 0.0715,
1166
+ "step": 820
1167
+ },
1168
+ {
1169
+ "epoch": 0.2175202689341507,
1170
+ "grad_norm": 0.365234375,
1171
+ "learning_rate": 0.00017836126138012933,
1172
+ "loss": 0.0642,
1173
+ "step": 825
1174
+ },
1175
+ {
1176
+ "epoch": 0.21883857359435765,
1177
+ "grad_norm": 0.01519775390625,
1178
+ "learning_rate": 0.00017822931785195937,
1179
+ "loss": 0.0111,
1180
+ "step": 830
1181
+ },
1182
+ {
1183
+ "epoch": 0.22015687825456462,
1184
+ "grad_norm": 1.1640625,
1185
+ "learning_rate": 0.00017809737432378944,
1186
+ "loss": 0.0518,
1187
+ "step": 835
1188
+ },
1189
+ {
1190
+ "epoch": 0.2214751829147716,
1191
+ "grad_norm": 0.00921630859375,
1192
+ "learning_rate": 0.00017796543079561948,
1193
+ "loss": 0.0384,
1194
+ "step": 840
1195
+ },
1196
+ {
1197
+ "epoch": 0.22279348757497858,
1198
+ "grad_norm": 0.33984375,
1199
+ "learning_rate": 0.00017783348726744955,
1200
+ "loss": 0.0204,
1201
+ "step": 845
1202
+ },
1203
+ {
1204
+ "epoch": 0.22411179223518554,
1205
+ "grad_norm": 0.294921875,
1206
+ "learning_rate": 0.00017770154373927962,
1207
+ "loss": 0.0075,
1208
+ "step": 850
1209
+ },
1210
+ {
1211
+ "epoch": 0.22543009689539253,
1212
+ "grad_norm": 0.033203125,
1213
+ "learning_rate": 0.00017756960021110963,
1214
+ "loss": 0.0895,
1215
+ "step": 855
1216
+ },
1217
+ {
1218
+ "epoch": 0.2267484015555995,
1219
+ "grad_norm": 0.08056640625,
1220
+ "learning_rate": 0.0001774376566829397,
1221
+ "loss": 0.1039,
1222
+ "step": 860
1223
+ },
1224
+ {
1225
+ "epoch": 0.22806670621580646,
1226
+ "grad_norm": 0.55078125,
1227
+ "learning_rate": 0.00017730571315476975,
1228
+ "loss": 0.0125,
1229
+ "step": 865
1230
+ },
1231
+ {
1232
+ "epoch": 0.22938501087601346,
1233
+ "grad_norm": 0.5859375,
1234
+ "learning_rate": 0.00017717376962659982,
1235
+ "loss": 0.0381,
1236
+ "step": 870
1237
+ },
1238
+ {
1239
+ "epoch": 0.23070331553622042,
1240
+ "grad_norm": 0.029052734375,
1241
+ "learning_rate": 0.00017704182609842988,
1242
+ "loss": 0.0434,
1243
+ "step": 875
1244
+ },
1245
+ {
1246
+ "epoch": 0.23202162019642739,
1247
+ "grad_norm": 0.43359375,
1248
+ "learning_rate": 0.00017690988257025993,
1249
+ "loss": 0.0799,
1250
+ "step": 880
1251
+ },
1252
+ {
1253
+ "epoch": 0.23333992485663438,
1254
+ "grad_norm": 0.04150390625,
1255
+ "learning_rate": 0.00017677793904209,
1256
+ "loss": 0.0692,
1257
+ "step": 885
1258
+ },
1259
+ {
1260
+ "epoch": 0.23465822951684134,
1261
+ "grad_norm": 0.435546875,
1262
+ "learning_rate": 0.00017664599551392004,
1263
+ "loss": 0.0544,
1264
+ "step": 890
1265
+ },
1266
+ {
1267
+ "epoch": 0.2359765341770483,
1268
+ "grad_norm": 1.171875,
1269
+ "learning_rate": 0.0001765140519857501,
1270
+ "loss": 0.0619,
1271
+ "step": 895
1272
+ },
1273
+ {
1274
+ "epoch": 0.2372948388372553,
1275
+ "grad_norm": 0.01263427734375,
1276
+ "learning_rate": 0.00017638210845758018,
1277
+ "loss": 0.0418,
1278
+ "step": 900
1279
+ },
1280
+ {
1281
+ "epoch": 0.23861314349746227,
1282
+ "grad_norm": 0.017578125,
1283
+ "learning_rate": 0.00017625016492941022,
1284
+ "loss": 0.0195,
1285
+ "step": 905
1286
+ },
1287
+ {
1288
+ "epoch": 0.23993144815766923,
1289
+ "grad_norm": 0.6171875,
1290
+ "learning_rate": 0.0001761182214012403,
1291
+ "loss": 0.067,
1292
+ "step": 910
1293
+ },
1294
+ {
1295
+ "epoch": 0.24124975281787622,
1296
+ "grad_norm": 0.59765625,
1297
+ "learning_rate": 0.00017598627787307033,
1298
+ "loss": 0.049,
1299
+ "step": 915
1300
+ },
1301
+ {
1302
+ "epoch": 0.2425680574780832,
1303
+ "grad_norm": 1.2421875,
1304
+ "learning_rate": 0.0001758543343449004,
1305
+ "loss": 0.0539,
1306
+ "step": 920
1307
+ },
1308
+ {
1309
+ "epoch": 0.24388636213829015,
1310
+ "grad_norm": 0.10302734375,
1311
+ "learning_rate": 0.00017572239081673044,
1312
+ "loss": 0.0725,
1313
+ "step": 925
1314
+ },
1315
+ {
1316
+ "epoch": 0.24520466679849715,
1317
+ "grad_norm": 0.330078125,
1318
+ "learning_rate": 0.0001755904472885605,
1319
+ "loss": 0.064,
1320
+ "step": 930
1321
+ },
1322
+ {
1323
+ "epoch": 0.2465229714587041,
1324
+ "grad_norm": 0.220703125,
1325
+ "learning_rate": 0.00017545850376039058,
1326
+ "loss": 0.0271,
1327
+ "step": 935
1328
+ },
1329
+ {
1330
+ "epoch": 0.24784127611891107,
1331
+ "grad_norm": 0.01470947265625,
1332
+ "learning_rate": 0.00017532656023222062,
1333
+ "loss": 0.0247,
1334
+ "step": 940
1335
+ },
1336
+ {
1337
+ "epoch": 0.24915958077911807,
1338
+ "grad_norm": 0.013427734375,
1339
+ "learning_rate": 0.0001751946167040507,
1340
+ "loss": 0.017,
1341
+ "step": 945
1342
+ },
1343
+ {
1344
+ "epoch": 0.25047788543932503,
1345
+ "grad_norm": 0.58984375,
1346
+ "learning_rate": 0.00017506267317588073,
1347
+ "loss": 0.0254,
1348
+ "step": 950
1349
+ },
1350
+ {
1351
+ "epoch": 0.251796190099532,
1352
+ "grad_norm": 0.412109375,
1353
+ "learning_rate": 0.00017493072964771078,
1354
+ "loss": 0.0186,
1355
+ "step": 955
1356
+ },
1357
+ {
1358
+ "epoch": 0.25311449475973896,
1359
+ "grad_norm": 0.66796875,
1360
+ "learning_rate": 0.00017479878611954084,
1361
+ "loss": 0.0617,
1362
+ "step": 960
1363
+ },
1364
+ {
1365
+ "epoch": 0.25443279941994595,
1366
+ "grad_norm": 0.322265625,
1367
+ "learning_rate": 0.00017466684259137089,
1368
+ "loss": 0.0173,
1369
+ "step": 965
1370
+ },
1371
+ {
1372
+ "epoch": 0.25575110408015295,
1373
+ "grad_norm": 0.83203125,
1374
+ "learning_rate": 0.00017453489906320096,
1375
+ "loss": 0.0512,
1376
+ "step": 970
1377
+ },
1378
+ {
1379
+ "epoch": 0.2570694087403599,
1380
+ "grad_norm": 0.08447265625,
1381
+ "learning_rate": 0.000174402955535031,
1382
+ "loss": 0.0361,
1383
+ "step": 975
1384
+ },
1385
+ {
1386
+ "epoch": 0.2583877134005669,
1387
+ "grad_norm": 0.423828125,
1388
+ "learning_rate": 0.00017427101200686107,
1389
+ "loss": 0.0175,
1390
+ "step": 980
1391
+ },
1392
+ {
1393
+ "epoch": 0.25970601806077387,
1394
+ "grad_norm": 0.77734375,
1395
+ "learning_rate": 0.00017413906847869114,
1396
+ "loss": 0.0139,
1397
+ "step": 985
1398
+ },
1399
+ {
1400
+ "epoch": 0.2610243227209808,
1401
+ "grad_norm": 0.515625,
1402
+ "learning_rate": 0.00017400712495052118,
1403
+ "loss": 0.0948,
1404
+ "step": 990
1405
+ },
1406
+ {
1407
+ "epoch": 0.2623426273811878,
1408
+ "grad_norm": 1.421875,
1409
+ "learning_rate": 0.00017387518142235125,
1410
+ "loss": 0.0406,
1411
+ "step": 995
1412
+ },
1413
+ {
1414
+ "epoch": 0.2636609320413948,
1415
+ "grad_norm": 0.058837890625,
1416
+ "learning_rate": 0.0001737432378941813,
1417
+ "loss": 0.1011,
1418
+ "step": 1000
1419
+ },
1420
+ {
1421
+ "epoch": 0.2636609320413948,
1422
+ "eval_loss": 0.045552924275398254,
1423
+ "eval_model_preparation_time": 0.0076,
1424
+ "eval_runtime": 457.6113,
1425
+ "eval_samples_per_second": 7.369,
1426
+ "eval_steps_per_second": 3.684,
1427
+ "step": 1000
1428
+ },
1429
+ {
1430
+ "epoch": 0.26497923670160173,
1431
+ "grad_norm": 0.380859375,
1432
+ "learning_rate": 0.00017361129436601136,
1433
+ "loss": 0.0711,
1434
+ "step": 1005
1435
+ },
1436
+ {
1437
+ "epoch": 0.2662975413618087,
1438
+ "grad_norm": 0.0208740234375,
1439
+ "learning_rate": 0.00017347935083784143,
1440
+ "loss": 0.0218,
1441
+ "step": 1010
1442
+ },
1443
+ {
1444
+ "epoch": 0.2676158460220157,
1445
+ "grad_norm": 0.04345703125,
1446
+ "learning_rate": 0.00017334740730967147,
1447
+ "loss": 0.0301,
1448
+ "step": 1015
1449
+ },
1450
+ {
1451
+ "epoch": 0.26893415068222265,
1452
+ "grad_norm": 0.2734375,
1453
+ "learning_rate": 0.00017321546378150154,
1454
+ "loss": 0.0721,
1455
+ "step": 1020
1456
+ },
1457
+ {
1458
+ "epoch": 0.27025245534242964,
1459
+ "grad_norm": 0.25390625,
1460
+ "learning_rate": 0.00017308352025333158,
1461
+ "loss": 0.0363,
1462
+ "step": 1025
1463
+ },
1464
+ {
1465
+ "epoch": 0.27157076000263664,
1466
+ "grad_norm": 0.04345703125,
1467
+ "learning_rate": 0.00017295157672516165,
1468
+ "loss": 0.0313,
1469
+ "step": 1030
1470
+ },
1471
+ {
1472
+ "epoch": 0.2728890646628436,
1473
+ "grad_norm": 0.0211181640625,
1474
+ "learning_rate": 0.0001728196331969917,
1475
+ "loss": 0.0385,
1476
+ "step": 1035
1477
+ },
1478
+ {
1479
+ "epoch": 0.27420736932305056,
1480
+ "grad_norm": 0.00787353515625,
1481
+ "learning_rate": 0.00017268768966882176,
1482
+ "loss": 0.0405,
1483
+ "step": 1040
1484
+ },
1485
+ {
1486
+ "epoch": 0.27552567398325756,
1487
+ "grad_norm": 0.484375,
1488
+ "learning_rate": 0.00017255574614065183,
1489
+ "loss": 0.0616,
1490
+ "step": 1045
1491
+ },
1492
+ {
1493
+ "epoch": 0.2768439786434645,
1494
+ "grad_norm": 0.0908203125,
1495
+ "learning_rate": 0.00017242380261248185,
1496
+ "loss": 0.0057,
1497
+ "step": 1050
1498
+ },
1499
+ {
1500
+ "epoch": 0.2781622833036715,
1501
+ "grad_norm": 0.1904296875,
1502
+ "learning_rate": 0.00017229185908431192,
1503
+ "loss": 0.0417,
1504
+ "step": 1055
1505
+ },
1506
+ {
1507
+ "epoch": 0.2794805879638785,
1508
+ "grad_norm": 0.30078125,
1509
+ "learning_rate": 0.00017215991555614196,
1510
+ "loss": 0.0346,
1511
+ "step": 1060
1512
+ },
1513
+ {
1514
+ "epoch": 0.2807988926240854,
1515
+ "grad_norm": 0.016357421875,
1516
+ "learning_rate": 0.00017202797202797203,
1517
+ "loss": 0.0295,
1518
+ "step": 1065
1519
+ },
1520
+ {
1521
+ "epoch": 0.2821171972842924,
1522
+ "grad_norm": 0.490234375,
1523
+ "learning_rate": 0.0001718960284998021,
1524
+ "loss": 0.0448,
1525
+ "step": 1070
1526
+ },
1527
+ {
1528
+ "epoch": 0.28343550194449935,
1529
+ "grad_norm": 0.004241943359375,
1530
+ "learning_rate": 0.00017176408497163214,
1531
+ "loss": 0.0051,
1532
+ "step": 1075
1533
+ },
1534
+ {
1535
+ "epoch": 0.28475380660470634,
1536
+ "grad_norm": 0.01904296875,
1537
+ "learning_rate": 0.0001716321414434622,
1538
+ "loss": 0.0894,
1539
+ "step": 1080
1540
+ },
1541
+ {
1542
+ "epoch": 0.28607211126491333,
1543
+ "grad_norm": 0.83984375,
1544
+ "learning_rate": 0.00017150019791529225,
1545
+ "loss": 0.0288,
1546
+ "step": 1085
1547
+ },
1548
+ {
1549
+ "epoch": 0.28739041592512027,
1550
+ "grad_norm": 0.2021484375,
1551
+ "learning_rate": 0.00017136825438712232,
1552
+ "loss": 0.0222,
1553
+ "step": 1090
1554
+ },
1555
+ {
1556
+ "epoch": 0.28870872058532726,
1557
+ "grad_norm": 0.322265625,
1558
+ "learning_rate": 0.0001712363108589524,
1559
+ "loss": 0.0444,
1560
+ "step": 1095
1561
+ },
1562
+ {
1563
+ "epoch": 0.29002702524553425,
1564
+ "grad_norm": 0.408203125,
1565
+ "learning_rate": 0.00017110436733078243,
1566
+ "loss": 0.0828,
1567
+ "step": 1100
1568
+ },
1569
+ {
1570
+ "epoch": 0.2913453299057412,
1571
+ "grad_norm": 0.04052734375,
1572
+ "learning_rate": 0.0001709724238026125,
1573
+ "loss": 0.0725,
1574
+ "step": 1105
1575
+ },
1576
+ {
1577
+ "epoch": 0.2926636345659482,
1578
+ "grad_norm": 0.2578125,
1579
+ "learning_rate": 0.00017084048027444254,
1580
+ "loss": 0.0204,
1581
+ "step": 1110
1582
+ },
1583
+ {
1584
+ "epoch": 0.2939819392261552,
1585
+ "grad_norm": 0.67578125,
1586
+ "learning_rate": 0.0001707085367462726,
1587
+ "loss": 0.0503,
1588
+ "step": 1115
1589
+ },
1590
+ {
1591
+ "epoch": 0.2953002438863621,
1592
+ "grad_norm": 0.0059814453125,
1593
+ "learning_rate": 0.00017057659321810265,
1594
+ "loss": 0.0144,
1595
+ "step": 1120
1596
+ },
1597
+ {
1598
+ "epoch": 0.2966185485465691,
1599
+ "grad_norm": 0.0269775390625,
1600
+ "learning_rate": 0.00017044464968993272,
1601
+ "loss": 0.0044,
1602
+ "step": 1125
1603
+ },
1604
+ {
1605
+ "epoch": 0.2979368532067761,
1606
+ "grad_norm": 0.1396484375,
1607
+ "learning_rate": 0.0001703127061617628,
1608
+ "loss": 0.013,
1609
+ "step": 1130
1610
+ },
1611
+ {
1612
+ "epoch": 0.29925515786698303,
1613
+ "grad_norm": 0.287109375,
1614
+ "learning_rate": 0.00017018076263359283,
1615
+ "loss": 0.0245,
1616
+ "step": 1135
1617
+ },
1618
+ {
1619
+ "epoch": 0.30057346252719,
1620
+ "grad_norm": 0.26171875,
1621
+ "learning_rate": 0.0001700488191054229,
1622
+ "loss": 0.0247,
1623
+ "step": 1140
1624
+ },
1625
+ {
1626
+ "epoch": 0.301891767187397,
1627
+ "grad_norm": 0.40625,
1628
+ "learning_rate": 0.00016991687557725294,
1629
+ "loss": 0.0402,
1630
+ "step": 1145
1631
+ },
1632
+ {
1633
+ "epoch": 0.30321007184760396,
1634
+ "grad_norm": 1.2578125,
1635
+ "learning_rate": 0.000169784932049083,
1636
+ "loss": 0.0071,
1637
+ "step": 1150
1638
+ },
1639
+ {
1640
+ "epoch": 0.30452837650781095,
1641
+ "grad_norm": 0.330078125,
1642
+ "learning_rate": 0.00016965298852091306,
1643
+ "loss": 0.0177,
1644
+ "step": 1155
1645
+ },
1646
+ {
1647
+ "epoch": 0.30584668116801794,
1648
+ "grad_norm": 0.07275390625,
1649
+ "learning_rate": 0.0001695210449927431,
1650
+ "loss": 0.0029,
1651
+ "step": 1160
1652
+ },
1653
+ {
1654
+ "epoch": 0.3071649858282249,
1655
+ "grad_norm": 0.455078125,
1656
+ "learning_rate": 0.00016938910146457317,
1657
+ "loss": 0.0262,
1658
+ "step": 1165
1659
+ },
1660
+ {
1661
+ "epoch": 0.30848329048843187,
1662
+ "grad_norm": 0.002655029296875,
1663
+ "learning_rate": 0.0001692571579364032,
1664
+ "loss": 0.0346,
1665
+ "step": 1170
1666
+ },
1667
+ {
1668
+ "epoch": 0.30980159514863886,
1669
+ "grad_norm": 0.1748046875,
1670
+ "learning_rate": 0.00016912521440823328,
1671
+ "loss": 0.0494,
1672
+ "step": 1175
1673
+ },
1674
+ {
1675
+ "epoch": 0.3111198998088458,
1676
+ "grad_norm": 1.4609375,
1677
+ "learning_rate": 0.00016899327088006335,
1678
+ "loss": 0.0603,
1679
+ "step": 1180
1680
+ },
1681
+ {
1682
+ "epoch": 0.3124382044690528,
1683
+ "grad_norm": 0.1572265625,
1684
+ "learning_rate": 0.0001688613273518934,
1685
+ "loss": 0.0366,
1686
+ "step": 1185
1687
+ },
1688
+ {
1689
+ "epoch": 0.3137565091292598,
1690
+ "grad_norm": 0.01422119140625,
1691
+ "learning_rate": 0.00016872938382372346,
1692
+ "loss": 0.0678,
1693
+ "step": 1190
1694
+ },
1695
+ {
1696
+ "epoch": 0.3150748137894667,
1697
+ "grad_norm": 0.2412109375,
1698
+ "learning_rate": 0.0001685974402955535,
1699
+ "loss": 0.0359,
1700
+ "step": 1195
1701
+ },
1702
+ {
1703
+ "epoch": 0.3163931184496737,
1704
+ "grad_norm": 0.275390625,
1705
+ "learning_rate": 0.00016846549676738357,
1706
+ "loss": 0.1099,
1707
+ "step": 1200
1708
+ },
1709
+ {
1710
+ "epoch": 0.3177114231098807,
1711
+ "grad_norm": 0.212890625,
1712
+ "learning_rate": 0.00016833355323921364,
1713
+ "loss": 0.0343,
1714
+ "step": 1205
1715
+ },
1716
+ {
1717
+ "epoch": 0.31902972777008765,
1718
+ "grad_norm": 0.0302734375,
1719
+ "learning_rate": 0.00016820160971104368,
1720
+ "loss": 0.0138,
1721
+ "step": 1210
1722
+ },
1723
+ {
1724
+ "epoch": 0.32034803243029464,
1725
+ "grad_norm": 0.016845703125,
1726
+ "learning_rate": 0.00016806966618287375,
1727
+ "loss": 0.0202,
1728
+ "step": 1215
1729
+ },
1730
+ {
1731
+ "epoch": 0.32166633709050163,
1732
+ "grad_norm": 0.1474609375,
1733
+ "learning_rate": 0.0001679377226547038,
1734
+ "loss": 0.0442,
1735
+ "step": 1220
1736
+ },
1737
+ {
1738
+ "epoch": 0.32298464175070857,
1739
+ "grad_norm": 0.049072265625,
1740
+ "learning_rate": 0.00016780577912653386,
1741
+ "loss": 0.0375,
1742
+ "step": 1225
1743
+ },
1744
+ {
1745
+ "epoch": 0.32430294641091556,
1746
+ "grad_norm": 0.1337890625,
1747
+ "learning_rate": 0.0001676738355983639,
1748
+ "loss": 0.01,
1749
+ "step": 1230
1750
+ },
1751
+ {
1752
+ "epoch": 0.32562125107112255,
1753
+ "grad_norm": 0.02197265625,
1754
+ "learning_rate": 0.00016754189207019397,
1755
+ "loss": 0.0139,
1756
+ "step": 1235
1757
+ },
1758
+ {
1759
+ "epoch": 0.3269395557313295,
1760
+ "grad_norm": 0.09228515625,
1761
+ "learning_rate": 0.00016740994854202404,
1762
+ "loss": 0.014,
1763
+ "step": 1240
1764
+ },
1765
+ {
1766
+ "epoch": 0.3282578603915365,
1767
+ "grad_norm": 0.47265625,
1768
+ "learning_rate": 0.00016727800501385408,
1769
+ "loss": 0.1546,
1770
+ "step": 1245
1771
+ },
1772
+ {
1773
+ "epoch": 0.3295761650517435,
1774
+ "grad_norm": 0.02294921875,
1775
+ "learning_rate": 0.00016714606148568413,
1776
+ "loss": 0.0803,
1777
+ "step": 1250
1778
+ },
1779
+ {
1780
+ "epoch": 0.3308944697119504,
1781
+ "grad_norm": 0.185546875,
1782
+ "learning_rate": 0.00016701411795751417,
1783
+ "loss": 0.0376,
1784
+ "step": 1255
1785
+ },
1786
+ {
1787
+ "epoch": 0.3322127743721574,
1788
+ "grad_norm": 0.1123046875,
1789
+ "learning_rate": 0.00016688217442934424,
1790
+ "loss": 0.0375,
1791
+ "step": 1260
1792
+ },
1793
+ {
1794
+ "epoch": 0.3335310790323644,
1795
+ "grad_norm": 1.03125,
1796
+ "learning_rate": 0.0001667502309011743,
1797
+ "loss": 0.0442,
1798
+ "step": 1265
1799
+ },
1800
+ {
1801
+ "epoch": 0.33484938369257133,
1802
+ "grad_norm": 0.0172119140625,
1803
+ "learning_rate": 0.00016661828737300435,
1804
+ "loss": 0.0261,
1805
+ "step": 1270
1806
+ },
1807
+ {
1808
+ "epoch": 0.3361676883527783,
1809
+ "grad_norm": 0.42578125,
1810
+ "learning_rate": 0.00016648634384483442,
1811
+ "loss": 0.0553,
1812
+ "step": 1275
1813
+ },
1814
+ {
1815
+ "epoch": 0.3374859930129853,
1816
+ "grad_norm": 0.1328125,
1817
+ "learning_rate": 0.00016635440031666446,
1818
+ "loss": 0.0065,
1819
+ "step": 1280
1820
+ },
1821
+ {
1822
+ "epoch": 0.33880429767319226,
1823
+ "grad_norm": 0.263671875,
1824
+ "learning_rate": 0.00016622245678849453,
1825
+ "loss": 0.0527,
1826
+ "step": 1285
1827
+ },
1828
+ {
1829
+ "epoch": 0.34012260233339925,
1830
+ "grad_norm": 0.314453125,
1831
+ "learning_rate": 0.0001660905132603246,
1832
+ "loss": 0.0297,
1833
+ "step": 1290
1834
+ },
1835
+ {
1836
+ "epoch": 0.34144090699360624,
1837
+ "grad_norm": 0.04345703125,
1838
+ "learning_rate": 0.00016595856973215464,
1839
+ "loss": 0.0477,
1840
+ "step": 1295
1841
+ },
1842
+ {
1843
+ "epoch": 0.3427592116538132,
1844
+ "grad_norm": 0.08154296875,
1845
+ "learning_rate": 0.0001658266262039847,
1846
+ "loss": 0.0298,
1847
+ "step": 1300
1848
+ },
1849
+ {
1850
+ "epoch": 0.34407751631402017,
1851
+ "grad_norm": 0.08935546875,
1852
+ "learning_rate": 0.00016569468267581475,
1853
+ "loss": 0.0481,
1854
+ "step": 1305
1855
+ },
1856
+ {
1857
+ "epoch": 0.34539582097422716,
1858
+ "grad_norm": 0.06640625,
1859
+ "learning_rate": 0.00016556273914764482,
1860
+ "loss": 0.0153,
1861
+ "step": 1310
1862
+ },
1863
+ {
1864
+ "epoch": 0.3467141256344341,
1865
+ "grad_norm": 0.00592041015625,
1866
+ "learning_rate": 0.00016543079561947486,
1867
+ "loss": 0.0111,
1868
+ "step": 1315
1869
+ },
1870
+ {
1871
+ "epoch": 0.3480324302946411,
1872
+ "grad_norm": 0.2236328125,
1873
+ "learning_rate": 0.00016529885209130493,
1874
+ "loss": 0.0309,
1875
+ "step": 1320
1876
+ },
1877
+ {
1878
+ "epoch": 0.3493507349548481,
1879
+ "grad_norm": 0.0198974609375,
1880
+ "learning_rate": 0.000165166908563135,
1881
+ "loss": 0.0579,
1882
+ "step": 1325
1883
+ },
1884
+ {
1885
+ "epoch": 0.350669039615055,
1886
+ "grad_norm": 0.10107421875,
1887
+ "learning_rate": 0.00016503496503496504,
1888
+ "loss": 0.0055,
1889
+ "step": 1330
1890
+ },
1891
+ {
1892
+ "epoch": 0.351987344275262,
1893
+ "grad_norm": 0.71875,
1894
+ "learning_rate": 0.00016490302150679511,
1895
+ "loss": 0.0299,
1896
+ "step": 1335
1897
+ },
1898
+ {
1899
+ "epoch": 0.353305648935469,
1900
+ "grad_norm": 0.01348876953125,
1901
+ "learning_rate": 0.00016477107797862516,
1902
+ "loss": 0.0943,
1903
+ "step": 1340
1904
+ },
1905
+ {
1906
+ "epoch": 0.35462395359567594,
1907
+ "grad_norm": 0.3046875,
1908
+ "learning_rate": 0.00016463913445045523,
1909
+ "loss": 0.0216,
1910
+ "step": 1345
1911
+ },
1912
+ {
1913
+ "epoch": 0.35594225825588294,
1914
+ "grad_norm": 0.02392578125,
1915
+ "learning_rate": 0.00016450719092228527,
1916
+ "loss": 0.0265,
1917
+ "step": 1350
1918
+ },
1919
+ {
1920
+ "epoch": 0.35726056291608993,
1921
+ "grad_norm": 0.453125,
1922
+ "learning_rate": 0.0001643752473941153,
1923
+ "loss": 0.0539,
1924
+ "step": 1355
1925
+ },
1926
+ {
1927
+ "epoch": 0.35857886757629687,
1928
+ "grad_norm": 0.00823974609375,
1929
+ "learning_rate": 0.00016424330386594538,
1930
+ "loss": 0.0139,
1931
+ "step": 1360
1932
+ },
1933
+ {
1934
+ "epoch": 0.35989717223650386,
1935
+ "grad_norm": 0.55859375,
1936
+ "learning_rate": 0.00016411136033777542,
1937
+ "loss": 0.0428,
1938
+ "step": 1365
1939
+ },
1940
+ {
1941
+ "epoch": 0.36121547689671085,
1942
+ "grad_norm": 0.052734375,
1943
+ "learning_rate": 0.0001639794168096055,
1944
+ "loss": 0.0346,
1945
+ "step": 1370
1946
+ },
1947
+ {
1948
+ "epoch": 0.3625337815569178,
1949
+ "grad_norm": 0.12158203125,
1950
+ "learning_rate": 0.00016384747328143556,
1951
+ "loss": 0.0095,
1952
+ "step": 1375
1953
+ },
1954
+ {
1955
+ "epoch": 0.3638520862171248,
1956
+ "grad_norm": 0.0240478515625,
1957
+ "learning_rate": 0.0001637155297532656,
1958
+ "loss": 0.0224,
1959
+ "step": 1380
1960
+ },
1961
+ {
1962
+ "epoch": 0.3651703908773318,
1963
+ "grad_norm": 0.01318359375,
1964
+ "learning_rate": 0.00016358358622509567,
1965
+ "loss": 0.0316,
1966
+ "step": 1385
1967
+ },
1968
+ {
1969
+ "epoch": 0.3664886955375387,
1970
+ "grad_norm": 0.011962890625,
1971
+ "learning_rate": 0.0001634516426969257,
1972
+ "loss": 0.0051,
1973
+ "step": 1390
1974
+ },
1975
+ {
1976
+ "epoch": 0.3678070001977457,
1977
+ "grad_norm": 0.00396728515625,
1978
+ "learning_rate": 0.00016331969916875578,
1979
+ "loss": 0.038,
1980
+ "step": 1395
1981
+ },
1982
+ {
1983
+ "epoch": 0.3691253048579527,
1984
+ "grad_norm": 0.375,
1985
+ "learning_rate": 0.00016318775564058585,
1986
+ "loss": 0.029,
1987
+ "step": 1400
1988
+ },
1989
+ {
1990
+ "epoch": 0.37044360951815963,
1991
+ "grad_norm": 0.265625,
1992
+ "learning_rate": 0.0001630558121124159,
1993
+ "loss": 0.0072,
1994
+ "step": 1405
1995
+ },
1996
+ {
1997
+ "epoch": 0.3717619141783666,
1998
+ "grad_norm": 0.00127410888671875,
1999
+ "learning_rate": 0.00016292386858424596,
2000
+ "loss": 0.0381,
2001
+ "step": 1410
2002
+ },
2003
+ {
2004
+ "epoch": 0.3730802188385736,
2005
+ "grad_norm": 1.15625,
2006
+ "learning_rate": 0.000162791925056076,
2007
+ "loss": 0.0573,
2008
+ "step": 1415
2009
+ },
2010
+ {
2011
+ "epoch": 0.37439852349878056,
2012
+ "grad_norm": 0.0244140625,
2013
+ "learning_rate": 0.00016265998152790607,
2014
+ "loss": 0.051,
2015
+ "step": 1420
2016
+ },
2017
+ {
2018
+ "epoch": 0.37571682815898755,
2019
+ "grad_norm": 0.0015106201171875,
2020
+ "learning_rate": 0.00016252803799973612,
2021
+ "loss": 0.0239,
2022
+ "step": 1425
2023
+ },
2024
+ {
2025
+ "epoch": 0.37703513281919454,
2026
+ "grad_norm": 0.26953125,
2027
+ "learning_rate": 0.00016239609447156618,
2028
+ "loss": 0.0165,
2029
+ "step": 1430
2030
+ },
2031
+ {
2032
+ "epoch": 0.3783534374794015,
2033
+ "grad_norm": 0.006134033203125,
2034
+ "learning_rate": 0.00016226415094339625,
2035
+ "loss": 0.0071,
2036
+ "step": 1435
2037
+ },
2038
+ {
2039
+ "epoch": 0.37967174213960847,
2040
+ "grad_norm": 2.828125,
2041
+ "learning_rate": 0.0001621322074152263,
2042
+ "loss": 0.0272,
2043
+ "step": 1440
2044
+ },
2045
+ {
2046
+ "epoch": 0.38099004679981546,
2047
+ "grad_norm": 0.349609375,
2048
+ "learning_rate": 0.00016200026388705637,
2049
+ "loss": 0.0647,
2050
+ "step": 1445
2051
+ },
2052
+ {
2053
+ "epoch": 0.3823083514600224,
2054
+ "grad_norm": 0.09326171875,
2055
+ "learning_rate": 0.00016186832035888638,
2056
+ "loss": 0.0262,
2057
+ "step": 1450
2058
+ },
2059
+ {
2060
+ "epoch": 0.3836266561202294,
2061
+ "grad_norm": 0.041015625,
2062
+ "learning_rate": 0.00016173637683071645,
2063
+ "loss": 0.0576,
2064
+ "step": 1455
2065
+ },
2066
+ {
2067
+ "epoch": 0.3849449607804364,
2068
+ "grad_norm": 0.033935546875,
2069
+ "learning_rate": 0.00016160443330254652,
2070
+ "loss": 0.0142,
2071
+ "step": 1460
2072
+ },
2073
+ {
2074
+ "epoch": 0.3862632654406433,
2075
+ "grad_norm": 0.09130859375,
2076
+ "learning_rate": 0.00016147248977437656,
2077
+ "loss": 0.0348,
2078
+ "step": 1465
2079
+ },
2080
+ {
2081
+ "epoch": 0.3875815701008503,
2082
+ "grad_norm": 2.390625,
2083
+ "learning_rate": 0.00016134054624620663,
2084
+ "loss": 0.0672,
2085
+ "step": 1470
2086
+ },
2087
+ {
2088
+ "epoch": 0.3888998747610573,
2089
+ "grad_norm": 0.439453125,
2090
+ "learning_rate": 0.00016120860271803667,
2091
+ "loss": 0.0121,
2092
+ "step": 1475
2093
+ },
2094
+ {
2095
+ "epoch": 0.39021817942126424,
2096
+ "grad_norm": 0.1298828125,
2097
+ "learning_rate": 0.00016107665918986674,
2098
+ "loss": 0.0114,
2099
+ "step": 1480
2100
+ },
2101
+ {
2102
+ "epoch": 0.39153648408147124,
2103
+ "grad_norm": 0.85546875,
2104
+ "learning_rate": 0.0001609447156616968,
2105
+ "loss": 0.0968,
2106
+ "step": 1485
2107
+ },
2108
+ {
2109
+ "epoch": 0.39285478874167823,
2110
+ "grad_norm": 0.703125,
2111
+ "learning_rate": 0.00016081277213352685,
2112
+ "loss": 0.0349,
2113
+ "step": 1490
2114
+ },
2115
+ {
2116
+ "epoch": 0.39417309340188517,
2117
+ "grad_norm": 0.021728515625,
2118
+ "learning_rate": 0.00016068082860535692,
2119
+ "loss": 0.0106,
2120
+ "step": 1495
2121
+ },
2122
+ {
2123
+ "epoch": 0.39549139806209216,
2124
+ "grad_norm": 0.7265625,
2125
+ "learning_rate": 0.00016054888507718696,
2126
+ "loss": 0.0225,
2127
+ "step": 1500
2128
+ },
2129
+ {
2130
+ "epoch": 0.39549139806209216,
2131
+ "eval_loss": 0.03515048325061798,
2132
+ "eval_model_preparation_time": 0.0076,
2133
+ "eval_runtime": 457.3497,
2134
+ "eval_samples_per_second": 7.373,
2135
+ "eval_steps_per_second": 3.686,
2136
+ "step": 1500
2137
+ }
2138
+ ],
2139
+ "logging_steps": 5,
2140
+ "max_steps": 7584,
2141
+ "num_input_tokens_seen": 0,
2142
+ "num_train_epochs": 2,
2143
+ "save_steps": 500,
2144
+ "stateful_callbacks": {
2145
+ "TrainerControl": {
2146
+ "args": {
2147
+ "should_epoch_stop": false,
2148
+ "should_evaluate": false,
2149
+ "should_log": false,
2150
+ "should_save": true,
2151
+ "should_training_stop": false
2152
+ },
2153
+ "attributes": {}
2154
+ }
2155
+ },
2156
+ "total_flos": 1.210468921462825e+17,
2157
+ "train_batch_size": 2,
2158
+ "trial_name": null,
2159
+ "trial_params": null
2160
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cee8b68ab8cbec1968c4eec747022ea57cd350072097ab37179dd29309655d7
3
+ size 5688
checkpoint-2000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/Llama-3.2-3B-Instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
checkpoint-2000/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Llama-3.2-3B-Instruct",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 16,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 16,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "down_proj",
27
+ "gate_proj",
28
+ "q_proj",
29
+ "up_proj",
30
+ "o_proj",
31
+ "v_proj",
32
+ "k_proj"
33
+ ],
34
+ "task_type": "CAUSAL_LM",
35
+ "use_dora": false,
36
+ "use_rslora": false
37
+ }
checkpoint-2000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7879a2046058809073bce1db45056566a43bc72d3a3abc6227d7ac61454497be
3
+ size 97307544
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f10c469d028995fe33d6f97b402322e974d8ebaffe915876f1930af0de5a7c9
3
+ size 50866370
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd55c3b412ed63dda63d944e5148e12c72c1382a50017bf4d626548cc6ff19a6
3
+ size 14244
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a02bc39f46f30e3cf91b0d3feeb294f6ed0b411fe847f9f462a0df4af85e4e0
3
+ size 1064
checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot_id|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-2000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,2067 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "added_tokens_decoder": {
4
+ "128000": {
5
+ "content": "<|begin_of_text|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "128001": {
13
+ "content": "<|end_of_text|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "128002": {
21
+ "content": "<|reserved_special_token_0|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "128003": {
29
+ "content": "<|reserved_special_token_1|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128004": {
37
+ "content": "<|finetune_right_pad_id|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "128005": {
45
+ "content": "<|reserved_special_token_2|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "128006": {
53
+ "content": "<|start_header_id|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "128007": {
61
+ "content": "<|end_header_id|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "128008": {
69
+ "content": "<|eom_id|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "128009": {
77
+ "content": "<|eot_id|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "128010": {
85
+ "content": "<|python_tag|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "128011": {
93
+ "content": "<|reserved_special_token_3|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "128012": {
101
+ "content": "<|reserved_special_token_4|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "128013": {
109
+ "content": "<|reserved_special_token_5|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "128014": {
117
+ "content": "<|reserved_special_token_6|>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "128015": {
125
+ "content": "<|reserved_special_token_7|>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "128016": {
133
+ "content": "<|reserved_special_token_8|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "128017": {
141
+ "content": "<|reserved_special_token_9|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "128018": {
149
+ "content": "<|reserved_special_token_10|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "128019": {
157
+ "content": "<|reserved_special_token_11|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "128020": {
165
+ "content": "<|reserved_special_token_12|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "128021": {
173
+ "content": "<|reserved_special_token_13|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "128022": {
181
+ "content": "<|reserved_special_token_14|>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "128023": {
189
+ "content": "<|reserved_special_token_15|>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "128024": {
197
+ "content": "<|reserved_special_token_16|>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "128025": {
205
+ "content": "<|reserved_special_token_17|>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "128026": {
213
+ "content": "<|reserved_special_token_18|>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "128027": {
221
+ "content": "<|reserved_special_token_19|>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "128028": {
229
+ "content": "<|reserved_special_token_20|>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "128029": {
237
+ "content": "<|reserved_special_token_21|>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "128030": {
245
+ "content": "<|reserved_special_token_22|>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "128031": {
253
+ "content": "<|reserved_special_token_23|>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "128032": {
261
+ "content": "<|reserved_special_token_24|>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "128033": {
269
+ "content": "<|reserved_special_token_25|>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "128034": {
277
+ "content": "<|reserved_special_token_26|>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "128035": {
285
+ "content": "<|reserved_special_token_27|>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "128036": {
293
+ "content": "<|reserved_special_token_28|>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "128037": {
301
+ "content": "<|reserved_special_token_29|>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "128038": {
309
+ "content": "<|reserved_special_token_30|>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "128039": {
317
+ "content": "<|reserved_special_token_31|>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "128040": {
325
+ "content": "<|reserved_special_token_32|>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "128041": {
333
+ "content": "<|reserved_special_token_33|>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "128042": {
341
+ "content": "<|reserved_special_token_34|>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "128043": {
349
+ "content": "<|reserved_special_token_35|>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "128044": {
357
+ "content": "<|reserved_special_token_36|>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "128045": {
365
+ "content": "<|reserved_special_token_37|>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "128046": {
373
+ "content": "<|reserved_special_token_38|>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "128047": {
381
+ "content": "<|reserved_special_token_39|>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "128048": {
389
+ "content": "<|reserved_special_token_40|>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "128049": {
397
+ "content": "<|reserved_special_token_41|>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "128050": {
405
+ "content": "<|reserved_special_token_42|>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "128051": {
413
+ "content": "<|reserved_special_token_43|>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "128052": {
421
+ "content": "<|reserved_special_token_44|>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "128053": {
429
+ "content": "<|reserved_special_token_45|>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "128054": {
437
+ "content": "<|reserved_special_token_46|>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "128055": {
445
+ "content": "<|reserved_special_token_47|>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "128056": {
453
+ "content": "<|reserved_special_token_48|>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "128057": {
461
+ "content": "<|reserved_special_token_49|>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "128058": {
469
+ "content": "<|reserved_special_token_50|>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "128059": {
477
+ "content": "<|reserved_special_token_51|>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "128060": {
485
+ "content": "<|reserved_special_token_52|>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "128061": {
493
+ "content": "<|reserved_special_token_53|>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "128062": {
501
+ "content": "<|reserved_special_token_54|>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "128063": {
509
+ "content": "<|reserved_special_token_55|>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "128064": {
517
+ "content": "<|reserved_special_token_56|>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "128065": {
525
+ "content": "<|reserved_special_token_57|>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "128066": {
533
+ "content": "<|reserved_special_token_58|>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "128067": {
541
+ "content": "<|reserved_special_token_59|>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "128068": {
549
+ "content": "<|reserved_special_token_60|>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "128069": {
557
+ "content": "<|reserved_special_token_61|>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "128070": {
565
+ "content": "<|reserved_special_token_62|>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "128071": {
573
+ "content": "<|reserved_special_token_63|>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "128072": {
581
+ "content": "<|reserved_special_token_64|>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "128073": {
589
+ "content": "<|reserved_special_token_65|>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "128074": {
597
+ "content": "<|reserved_special_token_66|>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "128075": {
605
+ "content": "<|reserved_special_token_67|>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "128076": {
613
+ "content": "<|reserved_special_token_68|>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "128077": {
621
+ "content": "<|reserved_special_token_69|>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "128078": {
629
+ "content": "<|reserved_special_token_70|>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "128079": {
637
+ "content": "<|reserved_special_token_71|>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "128080": {
645
+ "content": "<|reserved_special_token_72|>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "128081": {
653
+ "content": "<|reserved_special_token_73|>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "128082": {
661
+ "content": "<|reserved_special_token_74|>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "128083": {
669
+ "content": "<|reserved_special_token_75|>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "128084": {
677
+ "content": "<|reserved_special_token_76|>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "128085": {
685
+ "content": "<|reserved_special_token_77|>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "128086": {
693
+ "content": "<|reserved_special_token_78|>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "128087": {
701
+ "content": "<|reserved_special_token_79|>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "128088": {
709
+ "content": "<|reserved_special_token_80|>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "128089": {
717
+ "content": "<|reserved_special_token_81|>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "128090": {
725
+ "content": "<|reserved_special_token_82|>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "128091": {
733
+ "content": "<|reserved_special_token_83|>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "128092": {
741
+ "content": "<|reserved_special_token_84|>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "128093": {
749
+ "content": "<|reserved_special_token_85|>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "128094": {
757
+ "content": "<|reserved_special_token_86|>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "128095": {
765
+ "content": "<|reserved_special_token_87|>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "128096": {
773
+ "content": "<|reserved_special_token_88|>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "128097": {
781
+ "content": "<|reserved_special_token_89|>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "128098": {
789
+ "content": "<|reserved_special_token_90|>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "128099": {
797
+ "content": "<|reserved_special_token_91|>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "128100": {
805
+ "content": "<|reserved_special_token_92|>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "128101": {
813
+ "content": "<|reserved_special_token_93|>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "128102": {
821
+ "content": "<|reserved_special_token_94|>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "128103": {
829
+ "content": "<|reserved_special_token_95|>",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "128104": {
837
+ "content": "<|reserved_special_token_96|>",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "128105": {
845
+ "content": "<|reserved_special_token_97|>",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "128106": {
853
+ "content": "<|reserved_special_token_98|>",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "128107": {
861
+ "content": "<|reserved_special_token_99|>",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "128108": {
869
+ "content": "<|reserved_special_token_100|>",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": true
875
+ },
876
+ "128109": {
877
+ "content": "<|reserved_special_token_101|>",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": true
883
+ },
884
+ "128110": {
885
+ "content": "<|reserved_special_token_102|>",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": true
891
+ },
892
+ "128111": {
893
+ "content": "<|reserved_special_token_103|>",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": true
899
+ },
900
+ "128112": {
901
+ "content": "<|reserved_special_token_104|>",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": true
907
+ },
908
+ "128113": {
909
+ "content": "<|reserved_special_token_105|>",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": true
915
+ },
916
+ "128114": {
917
+ "content": "<|reserved_special_token_106|>",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": true
923
+ },
924
+ "128115": {
925
+ "content": "<|reserved_special_token_107|>",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": true
931
+ },
932
+ "128116": {
933
+ "content": "<|reserved_special_token_108|>",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": true
939
+ },
940
+ "128117": {
941
+ "content": "<|reserved_special_token_109|>",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": true
947
+ },
948
+ "128118": {
949
+ "content": "<|reserved_special_token_110|>",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": true
955
+ },
956
+ "128119": {
957
+ "content": "<|reserved_special_token_111|>",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": true
963
+ },
964
+ "128120": {
965
+ "content": "<|reserved_special_token_112|>",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": true
971
+ },
972
+ "128121": {
973
+ "content": "<|reserved_special_token_113|>",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": true
979
+ },
980
+ "128122": {
981
+ "content": "<|reserved_special_token_114|>",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": true
987
+ },
988
+ "128123": {
989
+ "content": "<|reserved_special_token_115|>",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": true
995
+ },
996
+ "128124": {
997
+ "content": "<|reserved_special_token_116|>",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": true
1003
+ },
1004
+ "128125": {
1005
+ "content": "<|reserved_special_token_117|>",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": true
1011
+ },
1012
+ "128126": {
1013
+ "content": "<|reserved_special_token_118|>",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": true
1019
+ },
1020
+ "128127": {
1021
+ "content": "<|reserved_special_token_119|>",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": true
1027
+ },
1028
+ "128128": {
1029
+ "content": "<|reserved_special_token_120|>",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": true
1035
+ },
1036
+ "128129": {
1037
+ "content": "<|reserved_special_token_121|>",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": true
1043
+ },
1044
+ "128130": {
1045
+ "content": "<|reserved_special_token_122|>",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": true
1051
+ },
1052
+ "128131": {
1053
+ "content": "<|reserved_special_token_123|>",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": true
1059
+ },
1060
+ "128132": {
1061
+ "content": "<|reserved_special_token_124|>",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": true
1067
+ },
1068
+ "128133": {
1069
+ "content": "<|reserved_special_token_125|>",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": true
1075
+ },
1076
+ "128134": {
1077
+ "content": "<|reserved_special_token_126|>",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": true
1083
+ },
1084
+ "128135": {
1085
+ "content": "<|reserved_special_token_127|>",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": true
1091
+ },
1092
+ "128136": {
1093
+ "content": "<|reserved_special_token_128|>",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": true
1099
+ },
1100
+ "128137": {
1101
+ "content": "<|reserved_special_token_129|>",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": true
1107
+ },
1108
+ "128138": {
1109
+ "content": "<|reserved_special_token_130|>",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": true
1115
+ },
1116
+ "128139": {
1117
+ "content": "<|reserved_special_token_131|>",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": true
1123
+ },
1124
+ "128140": {
1125
+ "content": "<|reserved_special_token_132|>",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": true
1131
+ },
1132
+ "128141": {
1133
+ "content": "<|reserved_special_token_133|>",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": true
1139
+ },
1140
+ "128142": {
1141
+ "content": "<|reserved_special_token_134|>",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": true
1147
+ },
1148
+ "128143": {
1149
+ "content": "<|reserved_special_token_135|>",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": true
1155
+ },
1156
+ "128144": {
1157
+ "content": "<|reserved_special_token_136|>",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": true
1163
+ },
1164
+ "128145": {
1165
+ "content": "<|reserved_special_token_137|>",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": true
1171
+ },
1172
+ "128146": {
1173
+ "content": "<|reserved_special_token_138|>",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": true
1179
+ },
1180
+ "128147": {
1181
+ "content": "<|reserved_special_token_139|>",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": true
1187
+ },
1188
+ "128148": {
1189
+ "content": "<|reserved_special_token_140|>",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": true
1195
+ },
1196
+ "128149": {
1197
+ "content": "<|reserved_special_token_141|>",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": true
1203
+ },
1204
+ "128150": {
1205
+ "content": "<|reserved_special_token_142|>",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": true
1211
+ },
1212
+ "128151": {
1213
+ "content": "<|reserved_special_token_143|>",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": true
1219
+ },
1220
+ "128152": {
1221
+ "content": "<|reserved_special_token_144|>",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": true
1227
+ },
1228
+ "128153": {
1229
+ "content": "<|reserved_special_token_145|>",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": true
1235
+ },
1236
+ "128154": {
1237
+ "content": "<|reserved_special_token_146|>",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": true
1243
+ },
1244
+ "128155": {
1245
+ "content": "<|reserved_special_token_147|>",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": true
1251
+ },
1252
+ "128156": {
1253
+ "content": "<|reserved_special_token_148|>",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": true
1259
+ },
1260
+ "128157": {
1261
+ "content": "<|reserved_special_token_149|>",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": true
1267
+ },
1268
+ "128158": {
1269
+ "content": "<|reserved_special_token_150|>",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": true
1275
+ },
1276
+ "128159": {
1277
+ "content": "<|reserved_special_token_151|>",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": true
1283
+ },
1284
+ "128160": {
1285
+ "content": "<|reserved_special_token_152|>",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": true
1291
+ },
1292
+ "128161": {
1293
+ "content": "<|reserved_special_token_153|>",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": true
1299
+ },
1300
+ "128162": {
1301
+ "content": "<|reserved_special_token_154|>",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": true
1307
+ },
1308
+ "128163": {
1309
+ "content": "<|reserved_special_token_155|>",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": true
1315
+ },
1316
+ "128164": {
1317
+ "content": "<|reserved_special_token_156|>",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": true
1323
+ },
1324
+ "128165": {
1325
+ "content": "<|reserved_special_token_157|>",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": true
1331
+ },
1332
+ "128166": {
1333
+ "content": "<|reserved_special_token_158|>",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": true
1339
+ },
1340
+ "128167": {
1341
+ "content": "<|reserved_special_token_159|>",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": true
1347
+ },
1348
+ "128168": {
1349
+ "content": "<|reserved_special_token_160|>",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": true
1355
+ },
1356
+ "128169": {
1357
+ "content": "<|reserved_special_token_161|>",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": true
1363
+ },
1364
+ "128170": {
1365
+ "content": "<|reserved_special_token_162|>",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": true
1371
+ },
1372
+ "128171": {
1373
+ "content": "<|reserved_special_token_163|>",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": true
1379
+ },
1380
+ "128172": {
1381
+ "content": "<|reserved_special_token_164|>",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": true
1387
+ },
1388
+ "128173": {
1389
+ "content": "<|reserved_special_token_165|>",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": true
1395
+ },
1396
+ "128174": {
1397
+ "content": "<|reserved_special_token_166|>",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": true
1403
+ },
1404
+ "128175": {
1405
+ "content": "<|reserved_special_token_167|>",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": true
1411
+ },
1412
+ "128176": {
1413
+ "content": "<|reserved_special_token_168|>",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": true
1419
+ },
1420
+ "128177": {
1421
+ "content": "<|reserved_special_token_169|>",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": true
1427
+ },
1428
+ "128178": {
1429
+ "content": "<|reserved_special_token_170|>",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": true
1435
+ },
1436
+ "128179": {
1437
+ "content": "<|reserved_special_token_171|>",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": true
1443
+ },
1444
+ "128180": {
1445
+ "content": "<|reserved_special_token_172|>",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": true
1451
+ },
1452
+ "128181": {
1453
+ "content": "<|reserved_special_token_173|>",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": true
1459
+ },
1460
+ "128182": {
1461
+ "content": "<|reserved_special_token_174|>",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": true
1467
+ },
1468
+ "128183": {
1469
+ "content": "<|reserved_special_token_175|>",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": true
1475
+ },
1476
+ "128184": {
1477
+ "content": "<|reserved_special_token_176|>",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": true
1483
+ },
1484
+ "128185": {
1485
+ "content": "<|reserved_special_token_177|>",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": true
1491
+ },
1492
+ "128186": {
1493
+ "content": "<|reserved_special_token_178|>",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": true
1499
+ },
1500
+ "128187": {
1501
+ "content": "<|reserved_special_token_179|>",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": true
1507
+ },
1508
+ "128188": {
1509
+ "content": "<|reserved_special_token_180|>",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": true
1515
+ },
1516
+ "128189": {
1517
+ "content": "<|reserved_special_token_181|>",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": true
1523
+ },
1524
+ "128190": {
1525
+ "content": "<|reserved_special_token_182|>",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": true
1531
+ },
1532
+ "128191": {
1533
+ "content": "<|reserved_special_token_183|>",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": true
1539
+ },
1540
+ "128192": {
1541
+ "content": "<|reserved_special_token_184|>",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": true
1547
+ },
1548
+ "128193": {
1549
+ "content": "<|reserved_special_token_185|>",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": true
1555
+ },
1556
+ "128194": {
1557
+ "content": "<|reserved_special_token_186|>",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": true
1563
+ },
1564
+ "128195": {
1565
+ "content": "<|reserved_special_token_187|>",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": true
1571
+ },
1572
+ "128196": {
1573
+ "content": "<|reserved_special_token_188|>",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": true
1579
+ },
1580
+ "128197": {
1581
+ "content": "<|reserved_special_token_189|>",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": true
1587
+ },
1588
+ "128198": {
1589
+ "content": "<|reserved_special_token_190|>",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": true
1595
+ },
1596
+ "128199": {
1597
+ "content": "<|reserved_special_token_191|>",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": true
1603
+ },
1604
+ "128200": {
1605
+ "content": "<|reserved_special_token_192|>",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": true
1611
+ },
1612
+ "128201": {
1613
+ "content": "<|reserved_special_token_193|>",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": true
1619
+ },
1620
+ "128202": {
1621
+ "content": "<|reserved_special_token_194|>",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": true
1627
+ },
1628
+ "128203": {
1629
+ "content": "<|reserved_special_token_195|>",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": true
1635
+ },
1636
+ "128204": {
1637
+ "content": "<|reserved_special_token_196|>",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": true
1643
+ },
1644
+ "128205": {
1645
+ "content": "<|reserved_special_token_197|>",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": true
1651
+ },
1652
+ "128206": {
1653
+ "content": "<|reserved_special_token_198|>",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": true
1659
+ },
1660
+ "128207": {
1661
+ "content": "<|reserved_special_token_199|>",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": true
1667
+ },
1668
+ "128208": {
1669
+ "content": "<|reserved_special_token_200|>",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": true
1675
+ },
1676
+ "128209": {
1677
+ "content": "<|reserved_special_token_201|>",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": true
1683
+ },
1684
+ "128210": {
1685
+ "content": "<|reserved_special_token_202|>",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "128211": {
1693
+ "content": "<|reserved_special_token_203|>",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
+ },
1700
+ "128212": {
1701
+ "content": "<|reserved_special_token_204|>",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": true
1707
+ },
1708
+ "128213": {
1709
+ "content": "<|reserved_special_token_205|>",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": true
1715
+ },
1716
+ "128214": {
1717
+ "content": "<|reserved_special_token_206|>",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": true
1723
+ },
1724
+ "128215": {
1725
+ "content": "<|reserved_special_token_207|>",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": true
1731
+ },
1732
+ "128216": {
1733
+ "content": "<|reserved_special_token_208|>",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": true
1739
+ },
1740
+ "128217": {
1741
+ "content": "<|reserved_special_token_209|>",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": true
1747
+ },
1748
+ "128218": {
1749
+ "content": "<|reserved_special_token_210|>",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": true
1755
+ },
1756
+ "128219": {
1757
+ "content": "<|reserved_special_token_211|>",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": true
1763
+ },
1764
+ "128220": {
1765
+ "content": "<|reserved_special_token_212|>",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": true
1771
+ },
1772
+ "128221": {
1773
+ "content": "<|reserved_special_token_213|>",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": true
1779
+ },
1780
+ "128222": {
1781
+ "content": "<|reserved_special_token_214|>",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": true
1787
+ },
1788
+ "128223": {
1789
+ "content": "<|reserved_special_token_215|>",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": true
1795
+ },
1796
+ "128224": {
1797
+ "content": "<|reserved_special_token_216|>",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": true
1803
+ },
1804
+ "128225": {
1805
+ "content": "<|reserved_special_token_217|>",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": true
1811
+ },
1812
+ "128226": {
1813
+ "content": "<|reserved_special_token_218|>",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": true
1819
+ },
1820
+ "128227": {
1821
+ "content": "<|reserved_special_token_219|>",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": true
1827
+ },
1828
+ "128228": {
1829
+ "content": "<|reserved_special_token_220|>",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": true
1835
+ },
1836
+ "128229": {
1837
+ "content": "<|reserved_special_token_221|>",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": true
1843
+ },
1844
+ "128230": {
1845
+ "content": "<|reserved_special_token_222|>",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": true
1851
+ },
1852
+ "128231": {
1853
+ "content": "<|reserved_special_token_223|>",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": true
1859
+ },
1860
+ "128232": {
1861
+ "content": "<|reserved_special_token_224|>",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": true
1867
+ },
1868
+ "128233": {
1869
+ "content": "<|reserved_special_token_225|>",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": true
1875
+ },
1876
+ "128234": {
1877
+ "content": "<|reserved_special_token_226|>",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": true
1883
+ },
1884
+ "128235": {
1885
+ "content": "<|reserved_special_token_227|>",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": true
1891
+ },
1892
+ "128236": {
1893
+ "content": "<|reserved_special_token_228|>",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": true
1899
+ },
1900
+ "128237": {
1901
+ "content": "<|reserved_special_token_229|>",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": true
1907
+ },
1908
+ "128238": {
1909
+ "content": "<|reserved_special_token_230|>",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": true
1915
+ },
1916
+ "128239": {
1917
+ "content": "<|reserved_special_token_231|>",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": true
1923
+ },
1924
+ "128240": {
1925
+ "content": "<|reserved_special_token_232|>",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": true
1931
+ },
1932
+ "128241": {
1933
+ "content": "<|reserved_special_token_233|>",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": true
1939
+ },
1940
+ "128242": {
1941
+ "content": "<|reserved_special_token_234|>",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": true
1947
+ },
1948
+ "128243": {
1949
+ "content": "<|reserved_special_token_235|>",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": true
1955
+ },
1956
+ "128244": {
1957
+ "content": "<|reserved_special_token_236|>",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": true
1963
+ },
1964
+ "128245": {
1965
+ "content": "<|reserved_special_token_237|>",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": true
1971
+ },
1972
+ "128246": {
1973
+ "content": "<|reserved_special_token_238|>",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": true
1979
+ },
1980
+ "128247": {
1981
+ "content": "<|reserved_special_token_239|>",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": true
1987
+ },
1988
+ "128248": {
1989
+ "content": "<|reserved_special_token_240|>",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": true
1995
+ },
1996
+ "128249": {
1997
+ "content": "<|reserved_special_token_241|>",
1998
+ "lstrip": false,
1999
+ "normalized": false,
2000
+ "rstrip": false,
2001
+ "single_word": false,
2002
+ "special": true
2003
+ },
2004
+ "128250": {
2005
+ "content": "<|reserved_special_token_242|>",
2006
+ "lstrip": false,
2007
+ "normalized": false,
2008
+ "rstrip": false,
2009
+ "single_word": false,
2010
+ "special": true
2011
+ },
2012
+ "128251": {
2013
+ "content": "<|reserved_special_token_243|>",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false,
2018
+ "special": true
2019
+ },
2020
+ "128252": {
2021
+ "content": "<|reserved_special_token_244|>",
2022
+ "lstrip": false,
2023
+ "normalized": false,
2024
+ "rstrip": false,
2025
+ "single_word": false,
2026
+ "special": true
2027
+ },
2028
+ "128253": {
2029
+ "content": "<|reserved_special_token_245|>",
2030
+ "lstrip": false,
2031
+ "normalized": false,
2032
+ "rstrip": false,
2033
+ "single_word": false,
2034
+ "special": true
2035
+ },
2036
+ "128254": {
2037
+ "content": "<|reserved_special_token_246|>",
2038
+ "lstrip": false,
2039
+ "normalized": false,
2040
+ "rstrip": false,
2041
+ "single_word": false,
2042
+ "special": true
2043
+ },
2044
+ "128255": {
2045
+ "content": "<|reserved_special_token_247|>",
2046
+ "lstrip": false,
2047
+ "normalized": false,
2048
+ "rstrip": false,
2049
+ "single_word": false,
2050
+ "special": true
2051
+ }
2052
+ },
2053
+ "bos_token": "<|begin_of_text|>",
2054
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 July 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content'] %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
2055
+ "clean_up_tokenization_spaces": true,
2056
+ "eos_token": "<|eot_id|>",
2057
+ "extra_special_tokens": {},
2058
+ "model_input_names": [
2059
+ "input_ids",
2060
+ "attention_mask"
2061
+ ],
2062
+ "model_max_length": 131072,
2063
+ "pad_token": "<|finetune_right_pad_id|>",
2064
+ "padding_side": "right",
2065
+ "tokenizer_class": "PreTrainedTokenizer",
2066
+ "unk_token": null
2067
+ }
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,2868 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.5273218640827896,
5
+ "eval_steps": 500,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.001318304660206974,
13
+ "grad_norm": 4.59375,
14
+ "learning_rate": 0.0002,
15
+ "loss": 1.9624,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.002636609320413948,
20
+ "grad_norm": 1.7421875,
21
+ "learning_rate": 0.00019986805647183008,
22
+ "loss": 0.6513,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.003954913980620921,
27
+ "grad_norm": 1.84375,
28
+ "learning_rate": 0.00019973611294366012,
29
+ "loss": 0.1146,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.005273218640827896,
34
+ "grad_norm": 1.3203125,
35
+ "learning_rate": 0.0001996041694154902,
36
+ "loss": 0.0529,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.006591523301034869,
41
+ "grad_norm": 0.40234375,
42
+ "learning_rate": 0.00019947222588732023,
43
+ "loss": 0.1214,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.007909827961241843,
48
+ "grad_norm": 1.5390625,
49
+ "learning_rate": 0.0001993402823591503,
50
+ "loss": 0.0919,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.009228132621448816,
55
+ "grad_norm": 0.06201171875,
56
+ "learning_rate": 0.00019920833883098034,
57
+ "loss": 0.09,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 0.010546437281655791,
62
+ "grad_norm": 1.53125,
63
+ "learning_rate": 0.0001990763953028104,
64
+ "loss": 0.1945,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.011864741941862765,
69
+ "grad_norm": 0.2890625,
70
+ "learning_rate": 0.00019894445177464048,
71
+ "loss": 0.1259,
72
+ "step": 45
73
+ },
74
+ {
75
+ "epoch": 0.013183046602069738,
76
+ "grad_norm": 0.609375,
77
+ "learning_rate": 0.00019881250824647052,
78
+ "loss": 0.027,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 0.014501351262276712,
83
+ "grad_norm": 0.369140625,
84
+ "learning_rate": 0.00019868056471830057,
85
+ "loss": 0.1068,
86
+ "step": 55
87
+ },
88
+ {
89
+ "epoch": 0.015819655922483685,
90
+ "grad_norm": 0.34765625,
91
+ "learning_rate": 0.00019854862119013064,
92
+ "loss": 0.0542,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 0.01713796058269066,
97
+ "grad_norm": 0.055419921875,
98
+ "learning_rate": 0.00019841667766196068,
99
+ "loss": 0.0901,
100
+ "step": 65
101
+ },
102
+ {
103
+ "epoch": 0.018456265242897632,
104
+ "grad_norm": 0.0247802734375,
105
+ "learning_rate": 0.00019828473413379075,
106
+ "loss": 0.0091,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 0.019774569903104607,
111
+ "grad_norm": 0.0079345703125,
112
+ "learning_rate": 0.0001981527906056208,
113
+ "loss": 0.0744,
114
+ "step": 75
115
+ },
116
+ {
117
+ "epoch": 0.021092874563311582,
118
+ "grad_norm": 0.65234375,
119
+ "learning_rate": 0.00019802084707745086,
120
+ "loss": 0.1108,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.022411179223518554,
125
+ "grad_norm": 0.50390625,
126
+ "learning_rate": 0.0001978889035492809,
127
+ "loss": 0.0446,
128
+ "step": 85
129
+ },
130
+ {
131
+ "epoch": 0.02372948388372553,
132
+ "grad_norm": 0.1787109375,
133
+ "learning_rate": 0.00019775696002111097,
134
+ "loss": 0.0982,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 0.0250477885439325,
139
+ "grad_norm": 0.490234375,
140
+ "learning_rate": 0.00019762501649294104,
141
+ "loss": 0.1035,
142
+ "step": 95
143
+ },
144
+ {
145
+ "epoch": 0.026366093204139476,
146
+ "grad_norm": 0.12158203125,
147
+ "learning_rate": 0.00019749307296477108,
148
+ "loss": 0.0401,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 0.02768439786434645,
153
+ "grad_norm": 0.16015625,
154
+ "learning_rate": 0.00019736112943660115,
155
+ "loss": 0.0309,
156
+ "step": 105
157
+ },
158
+ {
159
+ "epoch": 0.029002702524553423,
160
+ "grad_norm": 1.359375,
161
+ "learning_rate": 0.0001972291859084312,
162
+ "loss": 0.1032,
163
+ "step": 110
164
+ },
165
+ {
166
+ "epoch": 0.0303210071847604,
167
+ "grad_norm": 0.52734375,
168
+ "learning_rate": 0.00019709724238026126,
169
+ "loss": 0.0811,
170
+ "step": 115
171
+ },
172
+ {
173
+ "epoch": 0.03163931184496737,
174
+ "grad_norm": 0.177734375,
175
+ "learning_rate": 0.00019696529885209133,
176
+ "loss": 0.0258,
177
+ "step": 120
178
+ },
179
+ {
180
+ "epoch": 0.03295761650517435,
181
+ "grad_norm": 0.234375,
182
+ "learning_rate": 0.00019683335532392137,
183
+ "loss": 0.0437,
184
+ "step": 125
185
+ },
186
+ {
187
+ "epoch": 0.03427592116538132,
188
+ "grad_norm": 1.3046875,
189
+ "learning_rate": 0.00019670141179575144,
190
+ "loss": 0.0967,
191
+ "step": 130
192
+ },
193
+ {
194
+ "epoch": 0.03559422582558829,
195
+ "grad_norm": 0.2734375,
196
+ "learning_rate": 0.00019656946826758148,
197
+ "loss": 0.0132,
198
+ "step": 135
199
+ },
200
+ {
201
+ "epoch": 0.036912530485795264,
202
+ "grad_norm": 0.66015625,
203
+ "learning_rate": 0.00019643752473941155,
204
+ "loss": 0.0396,
205
+ "step": 140
206
+ },
207
+ {
208
+ "epoch": 0.03823083514600224,
209
+ "grad_norm": 1.0546875,
210
+ "learning_rate": 0.0001963055812112416,
211
+ "loss": 0.0449,
212
+ "step": 145
213
+ },
214
+ {
215
+ "epoch": 0.039549139806209214,
216
+ "grad_norm": 0.2021484375,
217
+ "learning_rate": 0.00019617363768307166,
218
+ "loss": 0.1196,
219
+ "step": 150
220
+ },
221
+ {
222
+ "epoch": 0.040867444466416186,
223
+ "grad_norm": 0.5859375,
224
+ "learning_rate": 0.0001960416941549017,
225
+ "loss": 0.0588,
226
+ "step": 155
227
+ },
228
+ {
229
+ "epoch": 0.042185749126623165,
230
+ "grad_norm": 0.06005859375,
231
+ "learning_rate": 0.00019590975062673175,
232
+ "loss": 0.0234,
233
+ "step": 160
234
+ },
235
+ {
236
+ "epoch": 0.04350405378683014,
237
+ "grad_norm": 0.4921875,
238
+ "learning_rate": 0.00019577780709856182,
239
+ "loss": 0.0916,
240
+ "step": 165
241
+ },
242
+ {
243
+ "epoch": 0.04482235844703711,
244
+ "grad_norm": 0.84375,
245
+ "learning_rate": 0.0001956458635703919,
246
+ "loss": 0.0271,
247
+ "step": 170
248
+ },
249
+ {
250
+ "epoch": 0.04614066310724409,
251
+ "grad_norm": 0.8828125,
252
+ "learning_rate": 0.00019551392004222193,
253
+ "loss": 0.0175,
254
+ "step": 175
255
+ },
256
+ {
257
+ "epoch": 0.04745896776745106,
258
+ "grad_norm": 0.0152587890625,
259
+ "learning_rate": 0.000195381976514052,
260
+ "loss": 0.0356,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 0.04877727242765803,
265
+ "grad_norm": 0.09326171875,
266
+ "learning_rate": 0.00019525003298588204,
267
+ "loss": 0.0057,
268
+ "step": 185
269
+ },
270
+ {
271
+ "epoch": 0.050095577087865,
272
+ "grad_norm": 0.24609375,
273
+ "learning_rate": 0.0001951180894577121,
274
+ "loss": 0.0082,
275
+ "step": 190
276
+ },
277
+ {
278
+ "epoch": 0.05141388174807198,
279
+ "grad_norm": 0.05029296875,
280
+ "learning_rate": 0.00019498614592954215,
281
+ "loss": 0.0178,
282
+ "step": 195
283
+ },
284
+ {
285
+ "epoch": 0.05273218640827895,
286
+ "grad_norm": 0.0390625,
287
+ "learning_rate": 0.00019485420240137222,
288
+ "loss": 0.0789,
289
+ "step": 200
290
+ },
291
+ {
292
+ "epoch": 0.054050491068485924,
293
+ "grad_norm": 0.5625,
294
+ "learning_rate": 0.0001947222588732023,
295
+ "loss": 0.0645,
296
+ "step": 205
297
+ },
298
+ {
299
+ "epoch": 0.0553687957286929,
300
+ "grad_norm": 0.53515625,
301
+ "learning_rate": 0.00019459031534503233,
302
+ "loss": 0.116,
303
+ "step": 210
304
+ },
305
+ {
306
+ "epoch": 0.056687100388899875,
307
+ "grad_norm": 0.55078125,
308
+ "learning_rate": 0.0001944583718168624,
309
+ "loss": 0.0516,
310
+ "step": 215
311
+ },
312
+ {
313
+ "epoch": 0.058005405049106847,
314
+ "grad_norm": 0.314453125,
315
+ "learning_rate": 0.00019432642828869244,
316
+ "loss": 0.1019,
317
+ "step": 220
318
+ },
319
+ {
320
+ "epoch": 0.059323709709313825,
321
+ "grad_norm": 0.1123046875,
322
+ "learning_rate": 0.0001941944847605225,
323
+ "loss": 0.0529,
324
+ "step": 225
325
+ },
326
+ {
327
+ "epoch": 0.0606420143695208,
328
+ "grad_norm": 0.4921875,
329
+ "learning_rate": 0.00019406254123235256,
330
+ "loss": 0.0368,
331
+ "step": 230
332
+ },
333
+ {
334
+ "epoch": 0.06196031902972777,
335
+ "grad_norm": 0.054443359375,
336
+ "learning_rate": 0.00019393059770418262,
337
+ "loss": 0.037,
338
+ "step": 235
339
+ },
340
+ {
341
+ "epoch": 0.06327862368993474,
342
+ "grad_norm": 0.008544921875,
343
+ "learning_rate": 0.0001937986541760127,
344
+ "loss": 0.0324,
345
+ "step": 240
346
+ },
347
+ {
348
+ "epoch": 0.06459692835014172,
349
+ "grad_norm": 1.5,
350
+ "learning_rate": 0.00019366671064784274,
351
+ "loss": 0.0334,
352
+ "step": 245
353
+ },
354
+ {
355
+ "epoch": 0.0659152330103487,
356
+ "grad_norm": 0.2109375,
357
+ "learning_rate": 0.0001935347671196728,
358
+ "loss": 0.0671,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 0.06723353767055566,
363
+ "grad_norm": 2.0625,
364
+ "learning_rate": 0.00019340282359150285,
365
+ "loss": 0.1559,
366
+ "step": 255
367
+ },
368
+ {
369
+ "epoch": 0.06855184233076264,
370
+ "grad_norm": 0.7734375,
371
+ "learning_rate": 0.0001932708800633329,
372
+ "loss": 0.0198,
373
+ "step": 260
374
+ },
375
+ {
376
+ "epoch": 0.06987014699096962,
377
+ "grad_norm": 0.42578125,
378
+ "learning_rate": 0.00019313893653516296,
379
+ "loss": 0.0151,
380
+ "step": 265
381
+ },
382
+ {
383
+ "epoch": 0.07118845165117658,
384
+ "grad_norm": 0.1884765625,
385
+ "learning_rate": 0.000193006993006993,
386
+ "loss": 0.0269,
387
+ "step": 270
388
+ },
389
+ {
390
+ "epoch": 0.07250675631138356,
391
+ "grad_norm": 1.546875,
392
+ "learning_rate": 0.00019287504947882307,
393
+ "loss": 0.0565,
394
+ "step": 275
395
+ },
396
+ {
397
+ "epoch": 0.07382506097159053,
398
+ "grad_norm": 0.5078125,
399
+ "learning_rate": 0.0001927431059506531,
400
+ "loss": 0.0942,
401
+ "step": 280
402
+ },
403
+ {
404
+ "epoch": 0.0751433656317975,
405
+ "grad_norm": 0.392578125,
406
+ "learning_rate": 0.00019261116242248318,
407
+ "loss": 0.0061,
408
+ "step": 285
409
+ },
410
+ {
411
+ "epoch": 0.07646167029200449,
412
+ "grad_norm": 1.9140625,
413
+ "learning_rate": 0.00019247921889431325,
414
+ "loss": 0.0497,
415
+ "step": 290
416
+ },
417
+ {
418
+ "epoch": 0.07777997495221145,
419
+ "grad_norm": 0.08837890625,
420
+ "learning_rate": 0.0001923472753661433,
421
+ "loss": 0.0573,
422
+ "step": 295
423
+ },
424
+ {
425
+ "epoch": 0.07909827961241843,
426
+ "grad_norm": 1.046875,
427
+ "learning_rate": 0.00019221533183797336,
428
+ "loss": 0.0528,
429
+ "step": 300
430
+ },
431
+ {
432
+ "epoch": 0.08041658427262541,
433
+ "grad_norm": 0.2275390625,
434
+ "learning_rate": 0.0001920833883098034,
435
+ "loss": 0.0506,
436
+ "step": 305
437
+ },
438
+ {
439
+ "epoch": 0.08173488893283237,
440
+ "grad_norm": 0.08203125,
441
+ "learning_rate": 0.00019195144478163347,
442
+ "loss": 0.0307,
443
+ "step": 310
444
+ },
445
+ {
446
+ "epoch": 0.08305319359303935,
447
+ "grad_norm": 0.111328125,
448
+ "learning_rate": 0.00019181950125346354,
449
+ "loss": 0.0365,
450
+ "step": 315
451
+ },
452
+ {
453
+ "epoch": 0.08437149825324633,
454
+ "grad_norm": 1.2890625,
455
+ "learning_rate": 0.00019168755772529358,
456
+ "loss": 0.0447,
457
+ "step": 320
458
+ },
459
+ {
460
+ "epoch": 0.0856898029134533,
461
+ "grad_norm": 0.6015625,
462
+ "learning_rate": 0.00019155561419712365,
463
+ "loss": 0.0605,
464
+ "step": 325
465
+ },
466
+ {
467
+ "epoch": 0.08700810757366027,
468
+ "grad_norm": 0.71875,
469
+ "learning_rate": 0.0001914236706689537,
470
+ "loss": 0.0846,
471
+ "step": 330
472
+ },
473
+ {
474
+ "epoch": 0.08832641223386725,
475
+ "grad_norm": 0.1494140625,
476
+ "learning_rate": 0.00019129172714078376,
477
+ "loss": 0.0713,
478
+ "step": 335
479
+ },
480
+ {
481
+ "epoch": 0.08964471689407422,
482
+ "grad_norm": 0.1669921875,
483
+ "learning_rate": 0.0001911597836126138,
484
+ "loss": 0.0826,
485
+ "step": 340
486
+ },
487
+ {
488
+ "epoch": 0.0909630215542812,
489
+ "grad_norm": 2.203125,
490
+ "learning_rate": 0.00019102784008444388,
491
+ "loss": 0.0441,
492
+ "step": 345
493
+ },
494
+ {
495
+ "epoch": 0.09228132621448817,
496
+ "grad_norm": 1.21875,
497
+ "learning_rate": 0.00019089589655627395,
498
+ "loss": 0.1378,
499
+ "step": 350
500
+ },
501
+ {
502
+ "epoch": 0.09359963087469514,
503
+ "grad_norm": 3.0625,
504
+ "learning_rate": 0.00019076395302810396,
505
+ "loss": 0.1552,
506
+ "step": 355
507
+ },
508
+ {
509
+ "epoch": 0.09491793553490212,
510
+ "grad_norm": 0.232421875,
511
+ "learning_rate": 0.00019063200949993403,
512
+ "loss": 0.0458,
513
+ "step": 360
514
+ },
515
+ {
516
+ "epoch": 0.0962362401951091,
517
+ "grad_norm": 0.71875,
518
+ "learning_rate": 0.0001905000659717641,
519
+ "loss": 0.0312,
520
+ "step": 365
521
+ },
522
+ {
523
+ "epoch": 0.09755454485531606,
524
+ "grad_norm": 0.0218505859375,
525
+ "learning_rate": 0.00019036812244359414,
526
+ "loss": 0.0247,
527
+ "step": 370
528
+ },
529
+ {
530
+ "epoch": 0.09887284951552304,
531
+ "grad_norm": 0.064453125,
532
+ "learning_rate": 0.0001902361789154242,
533
+ "loss": 0.054,
534
+ "step": 375
535
+ },
536
+ {
537
+ "epoch": 0.10019115417573,
538
+ "grad_norm": 0.021240234375,
539
+ "learning_rate": 0.00019010423538725425,
540
+ "loss": 0.0023,
541
+ "step": 380
542
+ },
543
+ {
544
+ "epoch": 0.10150945883593698,
545
+ "grad_norm": 0.0361328125,
546
+ "learning_rate": 0.00018997229185908432,
547
+ "loss": 0.0884,
548
+ "step": 385
549
+ },
550
+ {
551
+ "epoch": 0.10282776349614396,
552
+ "grad_norm": 1.703125,
553
+ "learning_rate": 0.00018984034833091436,
554
+ "loss": 0.0506,
555
+ "step": 390
556
+ },
557
+ {
558
+ "epoch": 0.10414606815635093,
559
+ "grad_norm": 0.08837890625,
560
+ "learning_rate": 0.00018970840480274443,
561
+ "loss": 0.1123,
562
+ "step": 395
563
+ },
564
+ {
565
+ "epoch": 0.1054643728165579,
566
+ "grad_norm": 0.6953125,
567
+ "learning_rate": 0.0001895764612745745,
568
+ "loss": 0.0597,
569
+ "step": 400
570
+ },
571
+ {
572
+ "epoch": 0.10678267747676488,
573
+ "grad_norm": 0.18359375,
574
+ "learning_rate": 0.00018944451774640454,
575
+ "loss": 0.0138,
576
+ "step": 405
577
+ },
578
+ {
579
+ "epoch": 0.10810098213697185,
580
+ "grad_norm": 0.0272216796875,
581
+ "learning_rate": 0.0001893125742182346,
582
+ "loss": 0.0249,
583
+ "step": 410
584
+ },
585
+ {
586
+ "epoch": 0.10941928679717883,
587
+ "grad_norm": 0.00970458984375,
588
+ "learning_rate": 0.00018918063069006466,
589
+ "loss": 0.0084,
590
+ "step": 415
591
+ },
592
+ {
593
+ "epoch": 0.1107375914573858,
594
+ "grad_norm": 0.54296875,
595
+ "learning_rate": 0.00018904868716189472,
596
+ "loss": 0.0541,
597
+ "step": 420
598
+ },
599
+ {
600
+ "epoch": 0.11205589611759277,
601
+ "grad_norm": 0.74609375,
602
+ "learning_rate": 0.00018891674363372477,
603
+ "loss": 0.007,
604
+ "step": 425
605
+ },
606
+ {
607
+ "epoch": 0.11337420077779975,
608
+ "grad_norm": 0.0211181640625,
609
+ "learning_rate": 0.00018878480010555484,
610
+ "loss": 0.0875,
611
+ "step": 430
612
+ },
613
+ {
614
+ "epoch": 0.11469250543800673,
615
+ "grad_norm": 0.9296875,
616
+ "learning_rate": 0.0001886528565773849,
617
+ "loss": 0.1207,
618
+ "step": 435
619
+ },
620
+ {
621
+ "epoch": 0.11601081009821369,
622
+ "grad_norm": 1.2734375,
623
+ "learning_rate": 0.00018852091304921495,
624
+ "loss": 0.1143,
625
+ "step": 440
626
+ },
627
+ {
628
+ "epoch": 0.11732911475842067,
629
+ "grad_norm": 0.6484375,
630
+ "learning_rate": 0.00018838896952104502,
631
+ "loss": 0.0393,
632
+ "step": 445
633
+ },
634
+ {
635
+ "epoch": 0.11864741941862765,
636
+ "grad_norm": 0.1552734375,
637
+ "learning_rate": 0.00018825702599287506,
638
+ "loss": 0.02,
639
+ "step": 450
640
+ },
641
+ {
642
+ "epoch": 0.11996572407883462,
643
+ "grad_norm": 0.486328125,
644
+ "learning_rate": 0.0001881250824647051,
645
+ "loss": 0.0891,
646
+ "step": 455
647
+ },
648
+ {
649
+ "epoch": 0.1212840287390416,
650
+ "grad_norm": 1.0,
651
+ "learning_rate": 0.00018799313893653517,
652
+ "loss": 0.0469,
653
+ "step": 460
654
+ },
655
+ {
656
+ "epoch": 0.12260233339924857,
657
+ "grad_norm": 0.2099609375,
658
+ "learning_rate": 0.0001878611954083652,
659
+ "loss": 0.019,
660
+ "step": 465
661
+ },
662
+ {
663
+ "epoch": 0.12392063805945554,
664
+ "grad_norm": 0.03857421875,
665
+ "learning_rate": 0.00018772925188019528,
666
+ "loss": 0.007,
667
+ "step": 470
668
+ },
669
+ {
670
+ "epoch": 0.12523894271966252,
671
+ "grad_norm": 0.0257568359375,
672
+ "learning_rate": 0.00018759730835202532,
673
+ "loss": 0.0039,
674
+ "step": 475
675
+ },
676
+ {
677
+ "epoch": 0.12655724737986948,
678
+ "grad_norm": 0.014404296875,
679
+ "learning_rate": 0.0001874653648238554,
680
+ "loss": 0.0043,
681
+ "step": 480
682
+ },
683
+ {
684
+ "epoch": 0.12787555204007647,
685
+ "grad_norm": 0.51953125,
686
+ "learning_rate": 0.00018733342129568546,
687
+ "loss": 0.1326,
688
+ "step": 485
689
+ },
690
+ {
691
+ "epoch": 0.12919385670028344,
692
+ "grad_norm": 0.99609375,
693
+ "learning_rate": 0.0001872014777675155,
694
+ "loss": 0.0369,
695
+ "step": 490
696
+ },
697
+ {
698
+ "epoch": 0.1305121613604904,
699
+ "grad_norm": 0.2734375,
700
+ "learning_rate": 0.00018706953423934557,
701
+ "loss": 0.0395,
702
+ "step": 495
703
+ },
704
+ {
705
+ "epoch": 0.1318304660206974,
706
+ "grad_norm": 0.083984375,
707
+ "learning_rate": 0.00018693759071117561,
708
+ "loss": 0.0284,
709
+ "step": 500
710
+ },
711
+ {
712
+ "epoch": 0.1318304660206974,
713
+ "eval_loss": 0.04542969539761543,
714
+ "eval_model_preparation_time": 0.0076,
715
+ "eval_runtime": 457.5293,
716
+ "eval_samples_per_second": 7.37,
717
+ "eval_steps_per_second": 3.685,
718
+ "step": 500
719
+ },
720
+ {
721
+ "epoch": 0.13314877068090436,
722
+ "grad_norm": 0.0291748046875,
723
+ "learning_rate": 0.00018680564718300568,
724
+ "loss": 0.0533,
725
+ "step": 505
726
+ },
727
+ {
728
+ "epoch": 0.13446707534111133,
729
+ "grad_norm": 0.71484375,
730
+ "learning_rate": 0.00018667370365483575,
731
+ "loss": 0.0183,
732
+ "step": 510
733
+ },
734
+ {
735
+ "epoch": 0.13578538000131832,
736
+ "grad_norm": 0.018798828125,
737
+ "learning_rate": 0.0001865417601266658,
738
+ "loss": 0.0473,
739
+ "step": 515
740
+ },
741
+ {
742
+ "epoch": 0.13710368466152528,
743
+ "grad_norm": 0.388671875,
744
+ "learning_rate": 0.00018640981659849586,
745
+ "loss": 0.0562,
746
+ "step": 520
747
+ },
748
+ {
749
+ "epoch": 0.13842198932173225,
750
+ "grad_norm": 0.77734375,
751
+ "learning_rate": 0.0001862778730703259,
752
+ "loss": 0.0755,
753
+ "step": 525
754
+ },
755
+ {
756
+ "epoch": 0.13974029398193924,
757
+ "grad_norm": 2.8125,
758
+ "learning_rate": 0.00018614592954215598,
759
+ "loss": 0.0422,
760
+ "step": 530
761
+ },
762
+ {
763
+ "epoch": 0.1410585986421462,
764
+ "grad_norm": 0.48828125,
765
+ "learning_rate": 0.00018601398601398602,
766
+ "loss": 0.0882,
767
+ "step": 535
768
+ },
769
+ {
770
+ "epoch": 0.14237690330235317,
771
+ "grad_norm": 0.16015625,
772
+ "learning_rate": 0.0001858820424858161,
773
+ "loss": 0.0131,
774
+ "step": 540
775
+ },
776
+ {
777
+ "epoch": 0.14369520796256013,
778
+ "grad_norm": 0.31640625,
779
+ "learning_rate": 0.00018575009895764616,
780
+ "loss": 0.03,
781
+ "step": 545
782
+ },
783
+ {
784
+ "epoch": 0.14501351262276713,
785
+ "grad_norm": 0.0120849609375,
786
+ "learning_rate": 0.0001856181554294762,
787
+ "loss": 0.0425,
788
+ "step": 550
789
+ },
790
+ {
791
+ "epoch": 0.1463318172829741,
792
+ "grad_norm": 0.390625,
793
+ "learning_rate": 0.00018548621190130624,
794
+ "loss": 0.011,
795
+ "step": 555
796
+ },
797
+ {
798
+ "epoch": 0.14765012194318106,
799
+ "grad_norm": 1.9609375,
800
+ "learning_rate": 0.0001853542683731363,
801
+ "loss": 0.0807,
802
+ "step": 560
803
+ },
804
+ {
805
+ "epoch": 0.14896842660338805,
806
+ "grad_norm": 0.609375,
807
+ "learning_rate": 0.00018522232484496635,
808
+ "loss": 0.0278,
809
+ "step": 565
810
+ },
811
+ {
812
+ "epoch": 0.150286731263595,
813
+ "grad_norm": 0.087890625,
814
+ "learning_rate": 0.00018509038131679642,
815
+ "loss": 0.0484,
816
+ "step": 570
817
+ },
818
+ {
819
+ "epoch": 0.15160503592380198,
820
+ "grad_norm": 0.5078125,
821
+ "learning_rate": 0.00018495843778862646,
822
+ "loss": 0.1277,
823
+ "step": 575
824
+ },
825
+ {
826
+ "epoch": 0.15292334058400897,
827
+ "grad_norm": 0.8125,
828
+ "learning_rate": 0.00018482649426045653,
829
+ "loss": 0.058,
830
+ "step": 580
831
+ },
832
+ {
833
+ "epoch": 0.15424164524421594,
834
+ "grad_norm": 0.22265625,
835
+ "learning_rate": 0.00018469455073228657,
836
+ "loss": 0.0259,
837
+ "step": 585
838
+ },
839
+ {
840
+ "epoch": 0.1555599499044229,
841
+ "grad_norm": 1.8984375,
842
+ "learning_rate": 0.00018456260720411664,
843
+ "loss": 0.113,
844
+ "step": 590
845
+ },
846
+ {
847
+ "epoch": 0.1568782545646299,
848
+ "grad_norm": 0.12451171875,
849
+ "learning_rate": 0.0001844306636759467,
850
+ "loss": 0.0312,
851
+ "step": 595
852
+ },
853
+ {
854
+ "epoch": 0.15819655922483686,
855
+ "grad_norm": 0.0322265625,
856
+ "learning_rate": 0.00018429872014777676,
857
+ "loss": 0.0476,
858
+ "step": 600
859
+ },
860
+ {
861
+ "epoch": 0.15951486388504382,
862
+ "grad_norm": 0.0281982421875,
863
+ "learning_rate": 0.00018416677661960682,
864
+ "loss": 0.0232,
865
+ "step": 605
866
+ },
867
+ {
868
+ "epoch": 0.16083316854525082,
869
+ "grad_norm": 0.57421875,
870
+ "learning_rate": 0.00018403483309143687,
871
+ "loss": 0.1287,
872
+ "step": 610
873
+ },
874
+ {
875
+ "epoch": 0.16215147320545778,
876
+ "grad_norm": 0.765625,
877
+ "learning_rate": 0.00018390288956326694,
878
+ "loss": 0.0991,
879
+ "step": 615
880
+ },
881
+ {
882
+ "epoch": 0.16346977786566474,
883
+ "grad_norm": 0.3125,
884
+ "learning_rate": 0.00018377094603509698,
885
+ "loss": 0.0247,
886
+ "step": 620
887
+ },
888
+ {
889
+ "epoch": 0.16478808252587174,
890
+ "grad_norm": 0.37890625,
891
+ "learning_rate": 0.00018363900250692705,
892
+ "loss": 0.0632,
893
+ "step": 625
894
+ },
895
+ {
896
+ "epoch": 0.1661063871860787,
897
+ "grad_norm": 0.1494140625,
898
+ "learning_rate": 0.00018350705897875712,
899
+ "loss": 0.0314,
900
+ "step": 630
901
+ },
902
+ {
903
+ "epoch": 0.16742469184628567,
904
+ "grad_norm": 0.0673828125,
905
+ "learning_rate": 0.00018337511545058716,
906
+ "loss": 0.0425,
907
+ "step": 635
908
+ },
909
+ {
910
+ "epoch": 0.16874299650649266,
911
+ "grad_norm": 0.396484375,
912
+ "learning_rate": 0.00018324317192241723,
913
+ "loss": 0.0613,
914
+ "step": 640
915
+ },
916
+ {
917
+ "epoch": 0.17006130116669962,
918
+ "grad_norm": 0.057373046875,
919
+ "learning_rate": 0.00018311122839424727,
920
+ "loss": 0.0569,
921
+ "step": 645
922
+ },
923
+ {
924
+ "epoch": 0.1713796058269066,
925
+ "grad_norm": 0.001373291015625,
926
+ "learning_rate": 0.00018297928486607734,
927
+ "loss": 0.007,
928
+ "step": 650
929
+ },
930
+ {
931
+ "epoch": 0.17269791048711358,
932
+ "grad_norm": 1.0859375,
933
+ "learning_rate": 0.00018284734133790738,
934
+ "loss": 0.0189,
935
+ "step": 655
936
+ },
937
+ {
938
+ "epoch": 0.17401621514732055,
939
+ "grad_norm": 0.6015625,
940
+ "learning_rate": 0.00018271539780973742,
941
+ "loss": 0.0601,
942
+ "step": 660
943
+ },
944
+ {
945
+ "epoch": 0.1753345198075275,
946
+ "grad_norm": 0.25390625,
947
+ "learning_rate": 0.0001825834542815675,
948
+ "loss": 0.0211,
949
+ "step": 665
950
+ },
951
+ {
952
+ "epoch": 0.1766528244677345,
953
+ "grad_norm": 2.6875,
954
+ "learning_rate": 0.00018245151075339753,
955
+ "loss": 0.0713,
956
+ "step": 670
957
+ },
958
+ {
959
+ "epoch": 0.17797112912794147,
960
+ "grad_norm": 1.1875,
961
+ "learning_rate": 0.0001823195672252276,
962
+ "loss": 0.0522,
963
+ "step": 675
964
+ },
965
+ {
966
+ "epoch": 0.17928943378814843,
967
+ "grad_norm": 0.025146484375,
968
+ "learning_rate": 0.00018218762369705767,
969
+ "loss": 0.0242,
970
+ "step": 680
971
+ },
972
+ {
973
+ "epoch": 0.18060773844835543,
974
+ "grad_norm": 0.048095703125,
975
+ "learning_rate": 0.00018205568016888772,
976
+ "loss": 0.0129,
977
+ "step": 685
978
+ },
979
+ {
980
+ "epoch": 0.1819260431085624,
981
+ "grad_norm": 0.04541015625,
982
+ "learning_rate": 0.00018192373664071778,
983
+ "loss": 0.0142,
984
+ "step": 690
985
+ },
986
+ {
987
+ "epoch": 0.18324434776876936,
988
+ "grad_norm": 0.00830078125,
989
+ "learning_rate": 0.00018179179311254783,
990
+ "loss": 0.0121,
991
+ "step": 695
992
+ },
993
+ {
994
+ "epoch": 0.18456265242897635,
995
+ "grad_norm": 0.53125,
996
+ "learning_rate": 0.0001816598495843779,
997
+ "loss": 0.0163,
998
+ "step": 700
999
+ },
1000
+ {
1001
+ "epoch": 0.1858809570891833,
1002
+ "grad_norm": 0.185546875,
1003
+ "learning_rate": 0.00018152790605620796,
1004
+ "loss": 0.0203,
1005
+ "step": 705
1006
+ },
1007
+ {
1008
+ "epoch": 0.18719926174939028,
1009
+ "grad_norm": 1.2578125,
1010
+ "learning_rate": 0.000181395962528038,
1011
+ "loss": 0.1548,
1012
+ "step": 710
1013
+ },
1014
+ {
1015
+ "epoch": 0.18851756640959727,
1016
+ "grad_norm": 0.0247802734375,
1017
+ "learning_rate": 0.00018126401899986808,
1018
+ "loss": 0.0543,
1019
+ "step": 715
1020
+ },
1021
+ {
1022
+ "epoch": 0.18983587106980424,
1023
+ "grad_norm": 0.07568359375,
1024
+ "learning_rate": 0.00018113207547169812,
1025
+ "loss": 0.0346,
1026
+ "step": 720
1027
+ },
1028
+ {
1029
+ "epoch": 0.1911541757300112,
1030
+ "grad_norm": 0.1318359375,
1031
+ "learning_rate": 0.0001810001319435282,
1032
+ "loss": 0.03,
1033
+ "step": 725
1034
+ },
1035
+ {
1036
+ "epoch": 0.1924724803902182,
1037
+ "grad_norm": 0.1455078125,
1038
+ "learning_rate": 0.00018086818841535823,
1039
+ "loss": 0.0796,
1040
+ "step": 730
1041
+ },
1042
+ {
1043
+ "epoch": 0.19379078505042516,
1044
+ "grad_norm": 0.09814453125,
1045
+ "learning_rate": 0.0001807362448871883,
1046
+ "loss": 0.0662,
1047
+ "step": 735
1048
+ },
1049
+ {
1050
+ "epoch": 0.19510908971063212,
1051
+ "grad_norm": 0.91015625,
1052
+ "learning_rate": 0.00018060430135901837,
1053
+ "loss": 0.0675,
1054
+ "step": 740
1055
+ },
1056
+ {
1057
+ "epoch": 0.19642739437083911,
1058
+ "grad_norm": 0.10693359375,
1059
+ "learning_rate": 0.0001804723578308484,
1060
+ "loss": 0.0377,
1061
+ "step": 745
1062
+ },
1063
+ {
1064
+ "epoch": 0.19774569903104608,
1065
+ "grad_norm": 0.95703125,
1066
+ "learning_rate": 0.00018034041430267848,
1067
+ "loss": 0.0174,
1068
+ "step": 750
1069
+ },
1070
+ {
1071
+ "epoch": 0.19906400369125304,
1072
+ "grad_norm": 1.7890625,
1073
+ "learning_rate": 0.00018020847077450852,
1074
+ "loss": 0.0278,
1075
+ "step": 755
1076
+ },
1077
+ {
1078
+ "epoch": 0.20038230835146,
1079
+ "grad_norm": 0.8515625,
1080
+ "learning_rate": 0.00018007652724633856,
1081
+ "loss": 0.0113,
1082
+ "step": 760
1083
+ },
1084
+ {
1085
+ "epoch": 0.201700613011667,
1086
+ "grad_norm": 0.016845703125,
1087
+ "learning_rate": 0.00017994458371816863,
1088
+ "loss": 0.0589,
1089
+ "step": 765
1090
+ },
1091
+ {
1092
+ "epoch": 0.20301891767187397,
1093
+ "grad_norm": 0.01043701171875,
1094
+ "learning_rate": 0.00017981264018999867,
1095
+ "loss": 0.0203,
1096
+ "step": 770
1097
+ },
1098
+ {
1099
+ "epoch": 0.20433722233208093,
1100
+ "grad_norm": 0.0242919921875,
1101
+ "learning_rate": 0.00017968069666182874,
1102
+ "loss": 0.0494,
1103
+ "step": 775
1104
+ },
1105
+ {
1106
+ "epoch": 0.20565552699228792,
1107
+ "grad_norm": 0.56640625,
1108
+ "learning_rate": 0.00017954875313365879,
1109
+ "loss": 0.0394,
1110
+ "step": 780
1111
+ },
1112
+ {
1113
+ "epoch": 0.2069738316524949,
1114
+ "grad_norm": 0.06591796875,
1115
+ "learning_rate": 0.00017941680960548886,
1116
+ "loss": 0.0848,
1117
+ "step": 785
1118
+ },
1119
+ {
1120
+ "epoch": 0.20829213631270185,
1121
+ "grad_norm": 0.40234375,
1122
+ "learning_rate": 0.00017928486607731892,
1123
+ "loss": 0.0464,
1124
+ "step": 790
1125
+ },
1126
+ {
1127
+ "epoch": 0.20961044097290885,
1128
+ "grad_norm": 0.06298828125,
1129
+ "learning_rate": 0.00017915292254914897,
1130
+ "loss": 0.0222,
1131
+ "step": 795
1132
+ },
1133
+ {
1134
+ "epoch": 0.2109287456331158,
1135
+ "grad_norm": 0.5390625,
1136
+ "learning_rate": 0.00017902097902097904,
1137
+ "loss": 0.0434,
1138
+ "step": 800
1139
+ },
1140
+ {
1141
+ "epoch": 0.21224705029332278,
1142
+ "grad_norm": 1.390625,
1143
+ "learning_rate": 0.00017888903549280908,
1144
+ "loss": 0.0222,
1145
+ "step": 805
1146
+ },
1147
+ {
1148
+ "epoch": 0.21356535495352977,
1149
+ "grad_norm": 0.0272216796875,
1150
+ "learning_rate": 0.00017875709196463915,
1151
+ "loss": 0.0099,
1152
+ "step": 810
1153
+ },
1154
+ {
1155
+ "epoch": 0.21488365961373673,
1156
+ "grad_norm": 0.10009765625,
1157
+ "learning_rate": 0.0001786251484364692,
1158
+ "loss": 0.0086,
1159
+ "step": 815
1160
+ },
1161
+ {
1162
+ "epoch": 0.2162019642739437,
1163
+ "grad_norm": 0.06396484375,
1164
+ "learning_rate": 0.00017849320490829926,
1165
+ "loss": 0.0715,
1166
+ "step": 820
1167
+ },
1168
+ {
1169
+ "epoch": 0.2175202689341507,
1170
+ "grad_norm": 0.365234375,
1171
+ "learning_rate": 0.00017836126138012933,
1172
+ "loss": 0.0642,
1173
+ "step": 825
1174
+ },
1175
+ {
1176
+ "epoch": 0.21883857359435765,
1177
+ "grad_norm": 0.01519775390625,
1178
+ "learning_rate": 0.00017822931785195937,
1179
+ "loss": 0.0111,
1180
+ "step": 830
1181
+ },
1182
+ {
1183
+ "epoch": 0.22015687825456462,
1184
+ "grad_norm": 1.1640625,
1185
+ "learning_rate": 0.00017809737432378944,
1186
+ "loss": 0.0518,
1187
+ "step": 835
1188
+ },
1189
+ {
1190
+ "epoch": 0.2214751829147716,
1191
+ "grad_norm": 0.00921630859375,
1192
+ "learning_rate": 0.00017796543079561948,
1193
+ "loss": 0.0384,
1194
+ "step": 840
1195
+ },
1196
+ {
1197
+ "epoch": 0.22279348757497858,
1198
+ "grad_norm": 0.33984375,
1199
+ "learning_rate": 0.00017783348726744955,
1200
+ "loss": 0.0204,
1201
+ "step": 845
1202
+ },
1203
+ {
1204
+ "epoch": 0.22411179223518554,
1205
+ "grad_norm": 0.294921875,
1206
+ "learning_rate": 0.00017770154373927962,
1207
+ "loss": 0.0075,
1208
+ "step": 850
1209
+ },
1210
+ {
1211
+ "epoch": 0.22543009689539253,
1212
+ "grad_norm": 0.033203125,
1213
+ "learning_rate": 0.00017756960021110963,
1214
+ "loss": 0.0895,
1215
+ "step": 855
1216
+ },
1217
+ {
1218
+ "epoch": 0.2267484015555995,
1219
+ "grad_norm": 0.08056640625,
1220
+ "learning_rate": 0.0001774376566829397,
1221
+ "loss": 0.1039,
1222
+ "step": 860
1223
+ },
1224
+ {
1225
+ "epoch": 0.22806670621580646,
1226
+ "grad_norm": 0.55078125,
1227
+ "learning_rate": 0.00017730571315476975,
1228
+ "loss": 0.0125,
1229
+ "step": 865
1230
+ },
1231
+ {
1232
+ "epoch": 0.22938501087601346,
1233
+ "grad_norm": 0.5859375,
1234
+ "learning_rate": 0.00017717376962659982,
1235
+ "loss": 0.0381,
1236
+ "step": 870
1237
+ },
1238
+ {
1239
+ "epoch": 0.23070331553622042,
1240
+ "grad_norm": 0.029052734375,
1241
+ "learning_rate": 0.00017704182609842988,
1242
+ "loss": 0.0434,
1243
+ "step": 875
1244
+ },
1245
+ {
1246
+ "epoch": 0.23202162019642739,
1247
+ "grad_norm": 0.43359375,
1248
+ "learning_rate": 0.00017690988257025993,
1249
+ "loss": 0.0799,
1250
+ "step": 880
1251
+ },
1252
+ {
1253
+ "epoch": 0.23333992485663438,
1254
+ "grad_norm": 0.04150390625,
1255
+ "learning_rate": 0.00017677793904209,
1256
+ "loss": 0.0692,
1257
+ "step": 885
1258
+ },
1259
+ {
1260
+ "epoch": 0.23465822951684134,
1261
+ "grad_norm": 0.435546875,
1262
+ "learning_rate": 0.00017664599551392004,
1263
+ "loss": 0.0544,
1264
+ "step": 890
1265
+ },
1266
+ {
1267
+ "epoch": 0.2359765341770483,
1268
+ "grad_norm": 1.171875,
1269
+ "learning_rate": 0.0001765140519857501,
1270
+ "loss": 0.0619,
1271
+ "step": 895
1272
+ },
1273
+ {
1274
+ "epoch": 0.2372948388372553,
1275
+ "grad_norm": 0.01263427734375,
1276
+ "learning_rate": 0.00017638210845758018,
1277
+ "loss": 0.0418,
1278
+ "step": 900
1279
+ },
1280
+ {
1281
+ "epoch": 0.23861314349746227,
1282
+ "grad_norm": 0.017578125,
1283
+ "learning_rate": 0.00017625016492941022,
1284
+ "loss": 0.0195,
1285
+ "step": 905
1286
+ },
1287
+ {
1288
+ "epoch": 0.23993144815766923,
1289
+ "grad_norm": 0.6171875,
1290
+ "learning_rate": 0.0001761182214012403,
1291
+ "loss": 0.067,
1292
+ "step": 910
1293
+ },
1294
+ {
1295
+ "epoch": 0.24124975281787622,
1296
+ "grad_norm": 0.59765625,
1297
+ "learning_rate": 0.00017598627787307033,
1298
+ "loss": 0.049,
1299
+ "step": 915
1300
+ },
1301
+ {
1302
+ "epoch": 0.2425680574780832,
1303
+ "grad_norm": 1.2421875,
1304
+ "learning_rate": 0.0001758543343449004,
1305
+ "loss": 0.0539,
1306
+ "step": 920
1307
+ },
1308
+ {
1309
+ "epoch": 0.24388636213829015,
1310
+ "grad_norm": 0.10302734375,
1311
+ "learning_rate": 0.00017572239081673044,
1312
+ "loss": 0.0725,
1313
+ "step": 925
1314
+ },
1315
+ {
1316
+ "epoch": 0.24520466679849715,
1317
+ "grad_norm": 0.330078125,
1318
+ "learning_rate": 0.0001755904472885605,
1319
+ "loss": 0.064,
1320
+ "step": 930
1321
+ },
1322
+ {
1323
+ "epoch": 0.2465229714587041,
1324
+ "grad_norm": 0.220703125,
1325
+ "learning_rate": 0.00017545850376039058,
1326
+ "loss": 0.0271,
1327
+ "step": 935
1328
+ },
1329
+ {
1330
+ "epoch": 0.24784127611891107,
1331
+ "grad_norm": 0.01470947265625,
1332
+ "learning_rate": 0.00017532656023222062,
1333
+ "loss": 0.0247,
1334
+ "step": 940
1335
+ },
1336
+ {
1337
+ "epoch": 0.24915958077911807,
1338
+ "grad_norm": 0.013427734375,
1339
+ "learning_rate": 0.0001751946167040507,
1340
+ "loss": 0.017,
1341
+ "step": 945
1342
+ },
1343
+ {
1344
+ "epoch": 0.25047788543932503,
1345
+ "grad_norm": 0.58984375,
1346
+ "learning_rate": 0.00017506267317588073,
1347
+ "loss": 0.0254,
1348
+ "step": 950
1349
+ },
1350
+ {
1351
+ "epoch": 0.251796190099532,
1352
+ "grad_norm": 0.412109375,
1353
+ "learning_rate": 0.00017493072964771078,
1354
+ "loss": 0.0186,
1355
+ "step": 955
1356
+ },
1357
+ {
1358
+ "epoch": 0.25311449475973896,
1359
+ "grad_norm": 0.66796875,
1360
+ "learning_rate": 0.00017479878611954084,
1361
+ "loss": 0.0617,
1362
+ "step": 960
1363
+ },
1364
+ {
1365
+ "epoch": 0.25443279941994595,
1366
+ "grad_norm": 0.322265625,
1367
+ "learning_rate": 0.00017466684259137089,
1368
+ "loss": 0.0173,
1369
+ "step": 965
1370
+ },
1371
+ {
1372
+ "epoch": 0.25575110408015295,
1373
+ "grad_norm": 0.83203125,
1374
+ "learning_rate": 0.00017453489906320096,
1375
+ "loss": 0.0512,
1376
+ "step": 970
1377
+ },
1378
+ {
1379
+ "epoch": 0.2570694087403599,
1380
+ "grad_norm": 0.08447265625,
1381
+ "learning_rate": 0.000174402955535031,
1382
+ "loss": 0.0361,
1383
+ "step": 975
1384
+ },
1385
+ {
1386
+ "epoch": 0.2583877134005669,
1387
+ "grad_norm": 0.423828125,
1388
+ "learning_rate": 0.00017427101200686107,
1389
+ "loss": 0.0175,
1390
+ "step": 980
1391
+ },
1392
+ {
1393
+ "epoch": 0.25970601806077387,
1394
+ "grad_norm": 0.77734375,
1395
+ "learning_rate": 0.00017413906847869114,
1396
+ "loss": 0.0139,
1397
+ "step": 985
1398
+ },
1399
+ {
1400
+ "epoch": 0.2610243227209808,
1401
+ "grad_norm": 0.515625,
1402
+ "learning_rate": 0.00017400712495052118,
1403
+ "loss": 0.0948,
1404
+ "step": 990
1405
+ },
1406
+ {
1407
+ "epoch": 0.2623426273811878,
1408
+ "grad_norm": 1.421875,
1409
+ "learning_rate": 0.00017387518142235125,
1410
+ "loss": 0.0406,
1411
+ "step": 995
1412
+ },
1413
+ {
1414
+ "epoch": 0.2636609320413948,
1415
+ "grad_norm": 0.058837890625,
1416
+ "learning_rate": 0.0001737432378941813,
1417
+ "loss": 0.1011,
1418
+ "step": 1000
1419
+ },
1420
+ {
1421
+ "epoch": 0.2636609320413948,
1422
+ "eval_loss": 0.045552924275398254,
1423
+ "eval_model_preparation_time": 0.0076,
1424
+ "eval_runtime": 457.6113,
1425
+ "eval_samples_per_second": 7.369,
1426
+ "eval_steps_per_second": 3.684,
1427
+ "step": 1000
1428
+ },
1429
+ {
1430
+ "epoch": 0.26497923670160173,
1431
+ "grad_norm": 0.380859375,
1432
+ "learning_rate": 0.00017361129436601136,
1433
+ "loss": 0.0711,
1434
+ "step": 1005
1435
+ },
1436
+ {
1437
+ "epoch": 0.2662975413618087,
1438
+ "grad_norm": 0.0208740234375,
1439
+ "learning_rate": 0.00017347935083784143,
1440
+ "loss": 0.0218,
1441
+ "step": 1010
1442
+ },
1443
+ {
1444
+ "epoch": 0.2676158460220157,
1445
+ "grad_norm": 0.04345703125,
1446
+ "learning_rate": 0.00017334740730967147,
1447
+ "loss": 0.0301,
1448
+ "step": 1015
1449
+ },
1450
+ {
1451
+ "epoch": 0.26893415068222265,
1452
+ "grad_norm": 0.2734375,
1453
+ "learning_rate": 0.00017321546378150154,
1454
+ "loss": 0.0721,
1455
+ "step": 1020
1456
+ },
1457
+ {
1458
+ "epoch": 0.27025245534242964,
1459
+ "grad_norm": 0.25390625,
1460
+ "learning_rate": 0.00017308352025333158,
1461
+ "loss": 0.0363,
1462
+ "step": 1025
1463
+ },
1464
+ {
1465
+ "epoch": 0.27157076000263664,
1466
+ "grad_norm": 0.04345703125,
1467
+ "learning_rate": 0.00017295157672516165,
1468
+ "loss": 0.0313,
1469
+ "step": 1030
1470
+ },
1471
+ {
1472
+ "epoch": 0.2728890646628436,
1473
+ "grad_norm": 0.0211181640625,
1474
+ "learning_rate": 0.0001728196331969917,
1475
+ "loss": 0.0385,
1476
+ "step": 1035
1477
+ },
1478
+ {
1479
+ "epoch": 0.27420736932305056,
1480
+ "grad_norm": 0.00787353515625,
1481
+ "learning_rate": 0.00017268768966882176,
1482
+ "loss": 0.0405,
1483
+ "step": 1040
1484
+ },
1485
+ {
1486
+ "epoch": 0.27552567398325756,
1487
+ "grad_norm": 0.484375,
1488
+ "learning_rate": 0.00017255574614065183,
1489
+ "loss": 0.0616,
1490
+ "step": 1045
1491
+ },
1492
+ {
1493
+ "epoch": 0.2768439786434645,
1494
+ "grad_norm": 0.0908203125,
1495
+ "learning_rate": 0.00017242380261248185,
1496
+ "loss": 0.0057,
1497
+ "step": 1050
1498
+ },
1499
+ {
1500
+ "epoch": 0.2781622833036715,
1501
+ "grad_norm": 0.1904296875,
1502
+ "learning_rate": 0.00017229185908431192,
1503
+ "loss": 0.0417,
1504
+ "step": 1055
1505
+ },
1506
+ {
1507
+ "epoch": 0.2794805879638785,
1508
+ "grad_norm": 0.30078125,
1509
+ "learning_rate": 0.00017215991555614196,
1510
+ "loss": 0.0346,
1511
+ "step": 1060
1512
+ },
1513
+ {
1514
+ "epoch": 0.2807988926240854,
1515
+ "grad_norm": 0.016357421875,
1516
+ "learning_rate": 0.00017202797202797203,
1517
+ "loss": 0.0295,
1518
+ "step": 1065
1519
+ },
1520
+ {
1521
+ "epoch": 0.2821171972842924,
1522
+ "grad_norm": 0.490234375,
1523
+ "learning_rate": 0.0001718960284998021,
1524
+ "loss": 0.0448,
1525
+ "step": 1070
1526
+ },
1527
+ {
1528
+ "epoch": 0.28343550194449935,
1529
+ "grad_norm": 0.004241943359375,
1530
+ "learning_rate": 0.00017176408497163214,
1531
+ "loss": 0.0051,
1532
+ "step": 1075
1533
+ },
1534
+ {
1535
+ "epoch": 0.28475380660470634,
1536
+ "grad_norm": 0.01904296875,
1537
+ "learning_rate": 0.0001716321414434622,
1538
+ "loss": 0.0894,
1539
+ "step": 1080
1540
+ },
1541
+ {
1542
+ "epoch": 0.28607211126491333,
1543
+ "grad_norm": 0.83984375,
1544
+ "learning_rate": 0.00017150019791529225,
1545
+ "loss": 0.0288,
1546
+ "step": 1085
1547
+ },
1548
+ {
1549
+ "epoch": 0.28739041592512027,
1550
+ "grad_norm": 0.2021484375,
1551
+ "learning_rate": 0.00017136825438712232,
1552
+ "loss": 0.0222,
1553
+ "step": 1090
1554
+ },
1555
+ {
1556
+ "epoch": 0.28870872058532726,
1557
+ "grad_norm": 0.322265625,
1558
+ "learning_rate": 0.0001712363108589524,
1559
+ "loss": 0.0444,
1560
+ "step": 1095
1561
+ },
1562
+ {
1563
+ "epoch": 0.29002702524553425,
1564
+ "grad_norm": 0.408203125,
1565
+ "learning_rate": 0.00017110436733078243,
1566
+ "loss": 0.0828,
1567
+ "step": 1100
1568
+ },
1569
+ {
1570
+ "epoch": 0.2913453299057412,
1571
+ "grad_norm": 0.04052734375,
1572
+ "learning_rate": 0.0001709724238026125,
1573
+ "loss": 0.0725,
1574
+ "step": 1105
1575
+ },
1576
+ {
1577
+ "epoch": 0.2926636345659482,
1578
+ "grad_norm": 0.2578125,
1579
+ "learning_rate": 0.00017084048027444254,
1580
+ "loss": 0.0204,
1581
+ "step": 1110
1582
+ },
1583
+ {
1584
+ "epoch": 0.2939819392261552,
1585
+ "grad_norm": 0.67578125,
1586
+ "learning_rate": 0.0001707085367462726,
1587
+ "loss": 0.0503,
1588
+ "step": 1115
1589
+ },
1590
+ {
1591
+ "epoch": 0.2953002438863621,
1592
+ "grad_norm": 0.0059814453125,
1593
+ "learning_rate": 0.00017057659321810265,
1594
+ "loss": 0.0144,
1595
+ "step": 1120
1596
+ },
1597
+ {
1598
+ "epoch": 0.2966185485465691,
1599
+ "grad_norm": 0.0269775390625,
1600
+ "learning_rate": 0.00017044464968993272,
1601
+ "loss": 0.0044,
1602
+ "step": 1125
1603
+ },
1604
+ {
1605
+ "epoch": 0.2979368532067761,
1606
+ "grad_norm": 0.1396484375,
1607
+ "learning_rate": 0.0001703127061617628,
1608
+ "loss": 0.013,
1609
+ "step": 1130
1610
+ },
1611
+ {
1612
+ "epoch": 0.29925515786698303,
1613
+ "grad_norm": 0.287109375,
1614
+ "learning_rate": 0.00017018076263359283,
1615
+ "loss": 0.0245,
1616
+ "step": 1135
1617
+ },
1618
+ {
1619
+ "epoch": 0.30057346252719,
1620
+ "grad_norm": 0.26171875,
1621
+ "learning_rate": 0.0001700488191054229,
1622
+ "loss": 0.0247,
1623
+ "step": 1140
1624
+ },
1625
+ {
1626
+ "epoch": 0.301891767187397,
1627
+ "grad_norm": 0.40625,
1628
+ "learning_rate": 0.00016991687557725294,
1629
+ "loss": 0.0402,
1630
+ "step": 1145
1631
+ },
1632
+ {
1633
+ "epoch": 0.30321007184760396,
1634
+ "grad_norm": 1.2578125,
1635
+ "learning_rate": 0.000169784932049083,
1636
+ "loss": 0.0071,
1637
+ "step": 1150
1638
+ },
1639
+ {
1640
+ "epoch": 0.30452837650781095,
1641
+ "grad_norm": 0.330078125,
1642
+ "learning_rate": 0.00016965298852091306,
1643
+ "loss": 0.0177,
1644
+ "step": 1155
1645
+ },
1646
+ {
1647
+ "epoch": 0.30584668116801794,
1648
+ "grad_norm": 0.07275390625,
1649
+ "learning_rate": 0.0001695210449927431,
1650
+ "loss": 0.0029,
1651
+ "step": 1160
1652
+ },
1653
+ {
1654
+ "epoch": 0.3071649858282249,
1655
+ "grad_norm": 0.455078125,
1656
+ "learning_rate": 0.00016938910146457317,
1657
+ "loss": 0.0262,
1658
+ "step": 1165
1659
+ },
1660
+ {
1661
+ "epoch": 0.30848329048843187,
1662
+ "grad_norm": 0.002655029296875,
1663
+ "learning_rate": 0.0001692571579364032,
1664
+ "loss": 0.0346,
1665
+ "step": 1170
1666
+ },
1667
+ {
1668
+ "epoch": 0.30980159514863886,
1669
+ "grad_norm": 0.1748046875,
1670
+ "learning_rate": 0.00016912521440823328,
1671
+ "loss": 0.0494,
1672
+ "step": 1175
1673
+ },
1674
+ {
1675
+ "epoch": 0.3111198998088458,
1676
+ "grad_norm": 1.4609375,
1677
+ "learning_rate": 0.00016899327088006335,
1678
+ "loss": 0.0603,
1679
+ "step": 1180
1680
+ },
1681
+ {
1682
+ "epoch": 0.3124382044690528,
1683
+ "grad_norm": 0.1572265625,
1684
+ "learning_rate": 0.0001688613273518934,
1685
+ "loss": 0.0366,
1686
+ "step": 1185
1687
+ },
1688
+ {
1689
+ "epoch": 0.3137565091292598,
1690
+ "grad_norm": 0.01422119140625,
1691
+ "learning_rate": 0.00016872938382372346,
1692
+ "loss": 0.0678,
1693
+ "step": 1190
1694
+ },
1695
+ {
1696
+ "epoch": 0.3150748137894667,
1697
+ "grad_norm": 0.2412109375,
1698
+ "learning_rate": 0.0001685974402955535,
1699
+ "loss": 0.0359,
1700
+ "step": 1195
1701
+ },
1702
+ {
1703
+ "epoch": 0.3163931184496737,
1704
+ "grad_norm": 0.275390625,
1705
+ "learning_rate": 0.00016846549676738357,
1706
+ "loss": 0.1099,
1707
+ "step": 1200
1708
+ },
1709
+ {
1710
+ "epoch": 0.3177114231098807,
1711
+ "grad_norm": 0.212890625,
1712
+ "learning_rate": 0.00016833355323921364,
1713
+ "loss": 0.0343,
1714
+ "step": 1205
1715
+ },
1716
+ {
1717
+ "epoch": 0.31902972777008765,
1718
+ "grad_norm": 0.0302734375,
1719
+ "learning_rate": 0.00016820160971104368,
1720
+ "loss": 0.0138,
1721
+ "step": 1210
1722
+ },
1723
+ {
1724
+ "epoch": 0.32034803243029464,
1725
+ "grad_norm": 0.016845703125,
1726
+ "learning_rate": 0.00016806966618287375,
1727
+ "loss": 0.0202,
1728
+ "step": 1215
1729
+ },
1730
+ {
1731
+ "epoch": 0.32166633709050163,
1732
+ "grad_norm": 0.1474609375,
1733
+ "learning_rate": 0.0001679377226547038,
1734
+ "loss": 0.0442,
1735
+ "step": 1220
1736
+ },
1737
+ {
1738
+ "epoch": 0.32298464175070857,
1739
+ "grad_norm": 0.049072265625,
1740
+ "learning_rate": 0.00016780577912653386,
1741
+ "loss": 0.0375,
1742
+ "step": 1225
1743
+ },
1744
+ {
1745
+ "epoch": 0.32430294641091556,
1746
+ "grad_norm": 0.1337890625,
1747
+ "learning_rate": 0.0001676738355983639,
1748
+ "loss": 0.01,
1749
+ "step": 1230
1750
+ },
1751
+ {
1752
+ "epoch": 0.32562125107112255,
1753
+ "grad_norm": 0.02197265625,
1754
+ "learning_rate": 0.00016754189207019397,
1755
+ "loss": 0.0139,
1756
+ "step": 1235
1757
+ },
1758
+ {
1759
+ "epoch": 0.3269395557313295,
1760
+ "grad_norm": 0.09228515625,
1761
+ "learning_rate": 0.00016740994854202404,
1762
+ "loss": 0.014,
1763
+ "step": 1240
1764
+ },
1765
+ {
1766
+ "epoch": 0.3282578603915365,
1767
+ "grad_norm": 0.47265625,
1768
+ "learning_rate": 0.00016727800501385408,
1769
+ "loss": 0.1546,
1770
+ "step": 1245
1771
+ },
1772
+ {
1773
+ "epoch": 0.3295761650517435,
1774
+ "grad_norm": 0.02294921875,
1775
+ "learning_rate": 0.00016714606148568413,
1776
+ "loss": 0.0803,
1777
+ "step": 1250
1778
+ },
1779
+ {
1780
+ "epoch": 0.3308944697119504,
1781
+ "grad_norm": 0.185546875,
1782
+ "learning_rate": 0.00016701411795751417,
1783
+ "loss": 0.0376,
1784
+ "step": 1255
1785
+ },
1786
+ {
1787
+ "epoch": 0.3322127743721574,
1788
+ "grad_norm": 0.1123046875,
1789
+ "learning_rate": 0.00016688217442934424,
1790
+ "loss": 0.0375,
1791
+ "step": 1260
1792
+ },
1793
+ {
1794
+ "epoch": 0.3335310790323644,
1795
+ "grad_norm": 1.03125,
1796
+ "learning_rate": 0.0001667502309011743,
1797
+ "loss": 0.0442,
1798
+ "step": 1265
1799
+ },
1800
+ {
1801
+ "epoch": 0.33484938369257133,
1802
+ "grad_norm": 0.0172119140625,
1803
+ "learning_rate": 0.00016661828737300435,
1804
+ "loss": 0.0261,
1805
+ "step": 1270
1806
+ },
1807
+ {
1808
+ "epoch": 0.3361676883527783,
1809
+ "grad_norm": 0.42578125,
1810
+ "learning_rate": 0.00016648634384483442,
1811
+ "loss": 0.0553,
1812
+ "step": 1275
1813
+ },
1814
+ {
1815
+ "epoch": 0.3374859930129853,
1816
+ "grad_norm": 0.1328125,
1817
+ "learning_rate": 0.00016635440031666446,
1818
+ "loss": 0.0065,
1819
+ "step": 1280
1820
+ },
1821
+ {
1822
+ "epoch": 0.33880429767319226,
1823
+ "grad_norm": 0.263671875,
1824
+ "learning_rate": 0.00016622245678849453,
1825
+ "loss": 0.0527,
1826
+ "step": 1285
1827
+ },
1828
+ {
1829
+ "epoch": 0.34012260233339925,
1830
+ "grad_norm": 0.314453125,
1831
+ "learning_rate": 0.0001660905132603246,
1832
+ "loss": 0.0297,
1833
+ "step": 1290
1834
+ },
1835
+ {
1836
+ "epoch": 0.34144090699360624,
1837
+ "grad_norm": 0.04345703125,
1838
+ "learning_rate": 0.00016595856973215464,
1839
+ "loss": 0.0477,
1840
+ "step": 1295
1841
+ },
1842
+ {
1843
+ "epoch": 0.3427592116538132,
1844
+ "grad_norm": 0.08154296875,
1845
+ "learning_rate": 0.0001658266262039847,
1846
+ "loss": 0.0298,
1847
+ "step": 1300
1848
+ },
1849
+ {
1850
+ "epoch": 0.34407751631402017,
1851
+ "grad_norm": 0.08935546875,
1852
+ "learning_rate": 0.00016569468267581475,
1853
+ "loss": 0.0481,
1854
+ "step": 1305
1855
+ },
1856
+ {
1857
+ "epoch": 0.34539582097422716,
1858
+ "grad_norm": 0.06640625,
1859
+ "learning_rate": 0.00016556273914764482,
1860
+ "loss": 0.0153,
1861
+ "step": 1310
1862
+ },
1863
+ {
1864
+ "epoch": 0.3467141256344341,
1865
+ "grad_norm": 0.00592041015625,
1866
+ "learning_rate": 0.00016543079561947486,
1867
+ "loss": 0.0111,
1868
+ "step": 1315
1869
+ },
1870
+ {
1871
+ "epoch": 0.3480324302946411,
1872
+ "grad_norm": 0.2236328125,
1873
+ "learning_rate": 0.00016529885209130493,
1874
+ "loss": 0.0309,
1875
+ "step": 1320
1876
+ },
1877
+ {
1878
+ "epoch": 0.3493507349548481,
1879
+ "grad_norm": 0.0198974609375,
1880
+ "learning_rate": 0.000165166908563135,
1881
+ "loss": 0.0579,
1882
+ "step": 1325
1883
+ },
1884
+ {
1885
+ "epoch": 0.350669039615055,
1886
+ "grad_norm": 0.10107421875,
1887
+ "learning_rate": 0.00016503496503496504,
1888
+ "loss": 0.0055,
1889
+ "step": 1330
1890
+ },
1891
+ {
1892
+ "epoch": 0.351987344275262,
1893
+ "grad_norm": 0.71875,
1894
+ "learning_rate": 0.00016490302150679511,
1895
+ "loss": 0.0299,
1896
+ "step": 1335
1897
+ },
1898
+ {
1899
+ "epoch": 0.353305648935469,
1900
+ "grad_norm": 0.01348876953125,
1901
+ "learning_rate": 0.00016477107797862516,
1902
+ "loss": 0.0943,
1903
+ "step": 1340
1904
+ },
1905
+ {
1906
+ "epoch": 0.35462395359567594,
1907
+ "grad_norm": 0.3046875,
1908
+ "learning_rate": 0.00016463913445045523,
1909
+ "loss": 0.0216,
1910
+ "step": 1345
1911
+ },
1912
+ {
1913
+ "epoch": 0.35594225825588294,
1914
+ "grad_norm": 0.02392578125,
1915
+ "learning_rate": 0.00016450719092228527,
1916
+ "loss": 0.0265,
1917
+ "step": 1350
1918
+ },
1919
+ {
1920
+ "epoch": 0.35726056291608993,
1921
+ "grad_norm": 0.453125,
1922
+ "learning_rate": 0.0001643752473941153,
1923
+ "loss": 0.0539,
1924
+ "step": 1355
1925
+ },
1926
+ {
1927
+ "epoch": 0.35857886757629687,
1928
+ "grad_norm": 0.00823974609375,
1929
+ "learning_rate": 0.00016424330386594538,
1930
+ "loss": 0.0139,
1931
+ "step": 1360
1932
+ },
1933
+ {
1934
+ "epoch": 0.35989717223650386,
1935
+ "grad_norm": 0.55859375,
1936
+ "learning_rate": 0.00016411136033777542,
1937
+ "loss": 0.0428,
1938
+ "step": 1365
1939
+ },
1940
+ {
1941
+ "epoch": 0.36121547689671085,
1942
+ "grad_norm": 0.052734375,
1943
+ "learning_rate": 0.0001639794168096055,
1944
+ "loss": 0.0346,
1945
+ "step": 1370
1946
+ },
1947
+ {
1948
+ "epoch": 0.3625337815569178,
1949
+ "grad_norm": 0.12158203125,
1950
+ "learning_rate": 0.00016384747328143556,
1951
+ "loss": 0.0095,
1952
+ "step": 1375
1953
+ },
1954
+ {
1955
+ "epoch": 0.3638520862171248,
1956
+ "grad_norm": 0.0240478515625,
1957
+ "learning_rate": 0.0001637155297532656,
1958
+ "loss": 0.0224,
1959
+ "step": 1380
1960
+ },
1961
+ {
1962
+ "epoch": 0.3651703908773318,
1963
+ "grad_norm": 0.01318359375,
1964
+ "learning_rate": 0.00016358358622509567,
1965
+ "loss": 0.0316,
1966
+ "step": 1385
1967
+ },
1968
+ {
1969
+ "epoch": 0.3664886955375387,
1970
+ "grad_norm": 0.011962890625,
1971
+ "learning_rate": 0.0001634516426969257,
1972
+ "loss": 0.0051,
1973
+ "step": 1390
1974
+ },
1975
+ {
1976
+ "epoch": 0.3678070001977457,
1977
+ "grad_norm": 0.00396728515625,
1978
+ "learning_rate": 0.00016331969916875578,
1979
+ "loss": 0.038,
1980
+ "step": 1395
1981
+ },
1982
+ {
1983
+ "epoch": 0.3691253048579527,
1984
+ "grad_norm": 0.375,
1985
+ "learning_rate": 0.00016318775564058585,
1986
+ "loss": 0.029,
1987
+ "step": 1400
1988
+ },
1989
+ {
1990
+ "epoch": 0.37044360951815963,
1991
+ "grad_norm": 0.265625,
1992
+ "learning_rate": 0.0001630558121124159,
1993
+ "loss": 0.0072,
1994
+ "step": 1405
1995
+ },
1996
+ {
1997
+ "epoch": 0.3717619141783666,
1998
+ "grad_norm": 0.00127410888671875,
1999
+ "learning_rate": 0.00016292386858424596,
2000
+ "loss": 0.0381,
2001
+ "step": 1410
2002
+ },
2003
+ {
2004
+ "epoch": 0.3730802188385736,
2005
+ "grad_norm": 1.15625,
2006
+ "learning_rate": 0.000162791925056076,
2007
+ "loss": 0.0573,
2008
+ "step": 1415
2009
+ },
2010
+ {
2011
+ "epoch": 0.37439852349878056,
2012
+ "grad_norm": 0.0244140625,
2013
+ "learning_rate": 0.00016265998152790607,
2014
+ "loss": 0.051,
2015
+ "step": 1420
2016
+ },
2017
+ {
2018
+ "epoch": 0.37571682815898755,
2019
+ "grad_norm": 0.0015106201171875,
2020
+ "learning_rate": 0.00016252803799973612,
2021
+ "loss": 0.0239,
2022
+ "step": 1425
2023
+ },
2024
+ {
2025
+ "epoch": 0.37703513281919454,
2026
+ "grad_norm": 0.26953125,
2027
+ "learning_rate": 0.00016239609447156618,
2028
+ "loss": 0.0165,
2029
+ "step": 1430
2030
+ },
2031
+ {
2032
+ "epoch": 0.3783534374794015,
2033
+ "grad_norm": 0.006134033203125,
2034
+ "learning_rate": 0.00016226415094339625,
2035
+ "loss": 0.0071,
2036
+ "step": 1435
2037
+ },
2038
+ {
2039
+ "epoch": 0.37967174213960847,
2040
+ "grad_norm": 2.828125,
2041
+ "learning_rate": 0.0001621322074152263,
2042
+ "loss": 0.0272,
2043
+ "step": 1440
2044
+ },
2045
+ {
2046
+ "epoch": 0.38099004679981546,
2047
+ "grad_norm": 0.349609375,
2048
+ "learning_rate": 0.00016200026388705637,
2049
+ "loss": 0.0647,
2050
+ "step": 1445
2051
+ },
2052
+ {
2053
+ "epoch": 0.3823083514600224,
2054
+ "grad_norm": 0.09326171875,
2055
+ "learning_rate": 0.00016186832035888638,
2056
+ "loss": 0.0262,
2057
+ "step": 1450
2058
+ },
2059
+ {
2060
+ "epoch": 0.3836266561202294,
2061
+ "grad_norm": 0.041015625,
2062
+ "learning_rate": 0.00016173637683071645,
2063
+ "loss": 0.0576,
2064
+ "step": 1455
2065
+ },
2066
+ {
2067
+ "epoch": 0.3849449607804364,
2068
+ "grad_norm": 0.033935546875,
2069
+ "learning_rate": 0.00016160443330254652,
2070
+ "loss": 0.0142,
2071
+ "step": 1460
2072
+ },
2073
+ {
2074
+ "epoch": 0.3862632654406433,
2075
+ "grad_norm": 0.09130859375,
2076
+ "learning_rate": 0.00016147248977437656,
2077
+ "loss": 0.0348,
2078
+ "step": 1465
2079
+ },
2080
+ {
2081
+ "epoch": 0.3875815701008503,
2082
+ "grad_norm": 2.390625,
2083
+ "learning_rate": 0.00016134054624620663,
2084
+ "loss": 0.0672,
2085
+ "step": 1470
2086
+ },
2087
+ {
2088
+ "epoch": 0.3888998747610573,
2089
+ "grad_norm": 0.439453125,
2090
+ "learning_rate": 0.00016120860271803667,
2091
+ "loss": 0.0121,
2092
+ "step": 1475
2093
+ },
2094
+ {
2095
+ "epoch": 0.39021817942126424,
2096
+ "grad_norm": 0.1298828125,
2097
+ "learning_rate": 0.00016107665918986674,
2098
+ "loss": 0.0114,
2099
+ "step": 1480
2100
+ },
2101
+ {
2102
+ "epoch": 0.39153648408147124,
2103
+ "grad_norm": 0.85546875,
2104
+ "learning_rate": 0.0001609447156616968,
2105
+ "loss": 0.0968,
2106
+ "step": 1485
2107
+ },
2108
+ {
2109
+ "epoch": 0.39285478874167823,
2110
+ "grad_norm": 0.703125,
2111
+ "learning_rate": 0.00016081277213352685,
2112
+ "loss": 0.0349,
2113
+ "step": 1490
2114
+ },
2115
+ {
2116
+ "epoch": 0.39417309340188517,
2117
+ "grad_norm": 0.021728515625,
2118
+ "learning_rate": 0.00016068082860535692,
2119
+ "loss": 0.0106,
2120
+ "step": 1495
2121
+ },
2122
+ {
2123
+ "epoch": 0.39549139806209216,
2124
+ "grad_norm": 0.7265625,
2125
+ "learning_rate": 0.00016054888507718696,
2126
+ "loss": 0.0225,
2127
+ "step": 1500
2128
+ },
2129
+ {
2130
+ "epoch": 0.39549139806209216,
2131
+ "eval_loss": 0.03515048325061798,
2132
+ "eval_model_preparation_time": 0.0076,
2133
+ "eval_runtime": 457.3497,
2134
+ "eval_samples_per_second": 7.373,
2135
+ "eval_steps_per_second": 3.686,
2136
+ "step": 1500
2137
+ },
2138
+ {
2139
+ "epoch": 0.3968097027222991,
2140
+ "grad_norm": 0.016519820317626,
2141
+ "learning_rate": 0.00016041694154901703,
2142
+ "loss": 0.0202,
2143
+ "step": 1505
2144
+ },
2145
+ {
2146
+ "epoch": 0.3981280073825061,
2147
+ "grad_norm": 0.8505942225456238,
2148
+ "learning_rate": 0.00016028499802084708,
2149
+ "loss": 0.0541,
2150
+ "step": 1510
2151
+ },
2152
+ {
2153
+ "epoch": 0.3994463120427131,
2154
+ "grad_norm": 0.04163295030593872,
2155
+ "learning_rate": 0.00016015305449267714,
2156
+ "loss": 0.0037,
2157
+ "step": 1515
2158
+ },
2159
+ {
2160
+ "epoch": 0.40076461670292,
2161
+ "grad_norm": 0.011332935653626919,
2162
+ "learning_rate": 0.00016002111096450721,
2163
+ "loss": 0.0459,
2164
+ "step": 1520
2165
+ },
2166
+ {
2167
+ "epoch": 0.402082921363127,
2168
+ "grad_norm": 0.9360129833221436,
2169
+ "learning_rate": 0.00015988916743633726,
2170
+ "loss": 0.013,
2171
+ "step": 1525
2172
+ },
2173
+ {
2174
+ "epoch": 0.403401226023334,
2175
+ "grad_norm": 0.11991436779499054,
2176
+ "learning_rate": 0.00015975722390816733,
2177
+ "loss": 0.0079,
2178
+ "step": 1530
2179
+ },
2180
+ {
2181
+ "epoch": 0.40471953068354094,
2182
+ "grad_norm": 0.36911076307296753,
2183
+ "learning_rate": 0.00015962528037999737,
2184
+ "loss": 0.0638,
2185
+ "step": 1535
2186
+ },
2187
+ {
2188
+ "epoch": 0.40603783534374793,
2189
+ "grad_norm": 0.020278634503483772,
2190
+ "learning_rate": 0.00015949333685182744,
2191
+ "loss": 0.0217,
2192
+ "step": 1540
2193
+ },
2194
+ {
2195
+ "epoch": 0.4073561400039549,
2196
+ "grad_norm": 0.14263059198856354,
2197
+ "learning_rate": 0.0001593613933236575,
2198
+ "loss": 0.0495,
2199
+ "step": 1545
2200
+ },
2201
+ {
2202
+ "epoch": 0.40867444466416186,
2203
+ "grad_norm": 0.09494803845882416,
2204
+ "learning_rate": 0.00015922944979548752,
2205
+ "loss": 0.0248,
2206
+ "step": 1550
2207
+ },
2208
+ {
2209
+ "epoch": 0.40999274932436885,
2210
+ "grad_norm": 0.23064319789409637,
2211
+ "learning_rate": 0.0001590975062673176,
2212
+ "loss": 0.0285,
2213
+ "step": 1555
2214
+ },
2215
+ {
2216
+ "epoch": 0.41131105398457585,
2217
+ "grad_norm": 0.32220256328582764,
2218
+ "learning_rate": 0.00015896556273914763,
2219
+ "loss": 0.0537,
2220
+ "step": 1560
2221
+ },
2222
+ {
2223
+ "epoch": 0.4126293586447828,
2224
+ "grad_norm": 0.41208815574645996,
2225
+ "learning_rate": 0.0001588336192109777,
2226
+ "loss": 0.0453,
2227
+ "step": 1565
2228
+ },
2229
+ {
2230
+ "epoch": 0.4139476633049898,
2231
+ "grad_norm": 0.03775424137711525,
2232
+ "learning_rate": 0.00015870167568280777,
2233
+ "loss": 0.0134,
2234
+ "step": 1570
2235
+ },
2236
+ {
2237
+ "epoch": 0.41526596796519677,
2238
+ "grad_norm": 0.6526333093643188,
2239
+ "learning_rate": 0.0001585697321546378,
2240
+ "loss": 0.0329,
2241
+ "step": 1575
2242
+ },
2243
+ {
2244
+ "epoch": 0.4165842726254037,
2245
+ "grad_norm": 1.001305103302002,
2246
+ "learning_rate": 0.00015843778862646788,
2247
+ "loss": 0.0912,
2248
+ "step": 1580
2249
+ },
2250
+ {
2251
+ "epoch": 0.4179025772856107,
2252
+ "grad_norm": 0.4055219888687134,
2253
+ "learning_rate": 0.00015830584509829792,
2254
+ "loss": 0.0519,
2255
+ "step": 1585
2256
+ },
2257
+ {
2258
+ "epoch": 0.4192208819458177,
2259
+ "grad_norm": 0.035015616565942764,
2260
+ "learning_rate": 0.000158173901570128,
2261
+ "loss": 0.0191,
2262
+ "step": 1590
2263
+ },
2264
+ {
2265
+ "epoch": 0.42053918660602463,
2266
+ "grad_norm": 0.09326844662427902,
2267
+ "learning_rate": 0.00015804195804195806,
2268
+ "loss": 0.0106,
2269
+ "step": 1595
2270
+ },
2271
+ {
2272
+ "epoch": 0.4218574912662316,
2273
+ "grad_norm": 0.06223440542817116,
2274
+ "learning_rate": 0.0001579100145137881,
2275
+ "loss": 0.0113,
2276
+ "step": 1600
2277
+ },
2278
+ {
2279
+ "epoch": 0.4231757959264386,
2280
+ "grad_norm": 0.0625135526061058,
2281
+ "learning_rate": 0.00015777807098561817,
2282
+ "loss": 0.0191,
2283
+ "step": 1605
2284
+ },
2285
+ {
2286
+ "epoch": 0.42449410058664555,
2287
+ "grad_norm": 0.2645983099937439,
2288
+ "learning_rate": 0.00015764612745744822,
2289
+ "loss": 0.0829,
2290
+ "step": 1610
2291
+ },
2292
+ {
2293
+ "epoch": 0.42581240524685254,
2294
+ "grad_norm": 0.009632415138185024,
2295
+ "learning_rate": 0.00015751418392927829,
2296
+ "loss": 0.0542,
2297
+ "step": 1615
2298
+ },
2299
+ {
2300
+ "epoch": 0.42713070990705954,
2301
+ "grad_norm": 0.01979319378733635,
2302
+ "learning_rate": 0.00015738224040110833,
2303
+ "loss": 0.0517,
2304
+ "step": 1620
2305
+ },
2306
+ {
2307
+ "epoch": 0.4284490145672665,
2308
+ "grad_norm": 0.3065454065799713,
2309
+ "learning_rate": 0.0001572502968729384,
2310
+ "loss": 0.0738,
2311
+ "step": 1625
2312
+ },
2313
+ {
2314
+ "epoch": 0.42976731922747347,
2315
+ "grad_norm": 0.09581473469734192,
2316
+ "learning_rate": 0.00015711835334476847,
2317
+ "loss": 0.0571,
2318
+ "step": 1630
2319
+ },
2320
+ {
2321
+ "epoch": 0.43108562388768046,
2322
+ "grad_norm": 0.23746591806411743,
2323
+ "learning_rate": 0.0001569864098165985,
2324
+ "loss": 0.0128,
2325
+ "step": 1635
2326
+ },
2327
+ {
2328
+ "epoch": 0.4324039285478874,
2329
+ "grad_norm": 0.936278760433197,
2330
+ "learning_rate": 0.00015685446628842858,
2331
+ "loss": 0.0665,
2332
+ "step": 1640
2333
+ },
2334
+ {
2335
+ "epoch": 0.4337222332080944,
2336
+ "grad_norm": 0.18487441539764404,
2337
+ "learning_rate": 0.00015672252276025862,
2338
+ "loss": 0.0527,
2339
+ "step": 1645
2340
+ },
2341
+ {
2342
+ "epoch": 0.4350405378683014,
2343
+ "grad_norm": 0.6980624794960022,
2344
+ "learning_rate": 0.00015659057923208866,
2345
+ "loss": 0.0613,
2346
+ "step": 1650
2347
+ },
2348
+ {
2349
+ "epoch": 0.4363588425285083,
2350
+ "grad_norm": 0.4696301221847534,
2351
+ "learning_rate": 0.00015645863570391873,
2352
+ "loss": 0.0569,
2353
+ "step": 1655
2354
+ },
2355
+ {
2356
+ "epoch": 0.4376771471887153,
2357
+ "grad_norm": 0.15083105862140656,
2358
+ "learning_rate": 0.00015632669217574877,
2359
+ "loss": 0.0394,
2360
+ "step": 1660
2361
+ },
2362
+ {
2363
+ "epoch": 0.4389954518489223,
2364
+ "grad_norm": 0.44701239466667175,
2365
+ "learning_rate": 0.00015619474864757884,
2366
+ "loss": 0.0494,
2367
+ "step": 1665
2368
+ },
2369
+ {
2370
+ "epoch": 0.44031375650912924,
2371
+ "grad_norm": 0.07418403029441833,
2372
+ "learning_rate": 0.00015606280511940888,
2373
+ "loss": 0.0291,
2374
+ "step": 1670
2375
+ },
2376
+ {
2377
+ "epoch": 0.44163206116933623,
2378
+ "grad_norm": 0.02311861515045166,
2379
+ "learning_rate": 0.00015593086159123895,
2380
+ "loss": 0.0304,
2381
+ "step": 1675
2382
+ },
2383
+ {
2384
+ "epoch": 0.4429503658295432,
2385
+ "grad_norm": 0.4416038990020752,
2386
+ "learning_rate": 0.00015579891806306902,
2387
+ "loss": 0.0176,
2388
+ "step": 1680
2389
+ },
2390
+ {
2391
+ "epoch": 0.44426867048975016,
2392
+ "grad_norm": 0.5124915242195129,
2393
+ "learning_rate": 0.00015566697453489906,
2394
+ "loss": 0.0454,
2395
+ "step": 1685
2396
+ },
2397
+ {
2398
+ "epoch": 0.44558697514995715,
2399
+ "grad_norm": 0.3159286081790924,
2400
+ "learning_rate": 0.00015553503100672913,
2401
+ "loss": 0.047,
2402
+ "step": 1690
2403
+ },
2404
+ {
2405
+ "epoch": 0.44690527981016415,
2406
+ "grad_norm": 0.032126396894454956,
2407
+ "learning_rate": 0.00015540308747855918,
2408
+ "loss": 0.0151,
2409
+ "step": 1695
2410
+ },
2411
+ {
2412
+ "epoch": 0.4482235844703711,
2413
+ "grad_norm": 0.04663548618555069,
2414
+ "learning_rate": 0.00015527114395038924,
2415
+ "loss": 0.0375,
2416
+ "step": 1700
2417
+ },
2418
+ {
2419
+ "epoch": 0.4495418891305781,
2420
+ "grad_norm": 0.013753900304436684,
2421
+ "learning_rate": 0.0001551392004222193,
2422
+ "loss": 0.0485,
2423
+ "step": 1705
2424
+ },
2425
+ {
2426
+ "epoch": 0.45086019379078507,
2427
+ "grad_norm": 1.9952393770217896,
2428
+ "learning_rate": 0.00015500725689404936,
2429
+ "loss": 0.0625,
2430
+ "step": 1710
2431
+ },
2432
+ {
2433
+ "epoch": 0.452178498450992,
2434
+ "grad_norm": 0.014283270575106144,
2435
+ "learning_rate": 0.00015487531336587943,
2436
+ "loss": 0.0037,
2437
+ "step": 1715
2438
+ },
2439
+ {
2440
+ "epoch": 0.453496803111199,
2441
+ "grad_norm": 0.3897913098335266,
2442
+ "learning_rate": 0.00015474336983770947,
2443
+ "loss": 0.0304,
2444
+ "step": 1720
2445
+ },
2446
+ {
2447
+ "epoch": 0.454815107771406,
2448
+ "grad_norm": 0.3730885684490204,
2449
+ "learning_rate": 0.00015461142630953954,
2450
+ "loss": 0.0115,
2451
+ "step": 1725
2452
+ },
2453
+ {
2454
+ "epoch": 0.45613341243161293,
2455
+ "grad_norm": 0.035858724266290665,
2456
+ "learning_rate": 0.00015447948278136958,
2457
+ "loss": 0.0021,
2458
+ "step": 1730
2459
+ },
2460
+ {
2461
+ "epoch": 0.4574517170918199,
2462
+ "grad_norm": 0.20589517056941986,
2463
+ "learning_rate": 0.00015434753925319965,
2464
+ "loss": 0.0132,
2465
+ "step": 1735
2466
+ },
2467
+ {
2468
+ "epoch": 0.4587700217520269,
2469
+ "grad_norm": 0.004939342383295298,
2470
+ "learning_rate": 0.00015421559572502972,
2471
+ "loss": 0.0471,
2472
+ "step": 1740
2473
+ },
2474
+ {
2475
+ "epoch": 0.46008832641223385,
2476
+ "grad_norm": 0.03493283689022064,
2477
+ "learning_rate": 0.00015408365219685976,
2478
+ "loss": 0.0062,
2479
+ "step": 1745
2480
+ },
2481
+ {
2482
+ "epoch": 0.46140663107244084,
2483
+ "grad_norm": 0.045927103608846664,
2484
+ "learning_rate": 0.0001539517086686898,
2485
+ "loss": 0.0283,
2486
+ "step": 1750
2487
+ },
2488
+ {
2489
+ "epoch": 0.46272493573264784,
2490
+ "grad_norm": 0.012629454955458641,
2491
+ "learning_rate": 0.00015381976514051984,
2492
+ "loss": 0.0133,
2493
+ "step": 1755
2494
+ },
2495
+ {
2496
+ "epoch": 0.46404324039285477,
2497
+ "grad_norm": 0.8001697659492493,
2498
+ "learning_rate": 0.0001536878216123499,
2499
+ "loss": 0.0224,
2500
+ "step": 1760
2501
+ },
2502
+ {
2503
+ "epoch": 0.46536154505306176,
2504
+ "grad_norm": 0.002036362886428833,
2505
+ "learning_rate": 0.00015355587808417998,
2506
+ "loss": 0.0066,
2507
+ "step": 1765
2508
+ },
2509
+ {
2510
+ "epoch": 0.46667984971326876,
2511
+ "grad_norm": 1.0261330604553223,
2512
+ "learning_rate": 0.00015342393455601002,
2513
+ "loss": 0.191,
2514
+ "step": 1770
2515
+ },
2516
+ {
2517
+ "epoch": 0.4679981543734757,
2518
+ "grad_norm": 0.3033429682254791,
2519
+ "learning_rate": 0.0001532919910278401,
2520
+ "loss": 0.0222,
2521
+ "step": 1775
2522
+ },
2523
+ {
2524
+ "epoch": 0.4693164590336827,
2525
+ "grad_norm": 0.36911338567733765,
2526
+ "learning_rate": 0.00015316004749967014,
2527
+ "loss": 0.0363,
2528
+ "step": 1780
2529
+ },
2530
+ {
2531
+ "epoch": 0.4706347636938897,
2532
+ "grad_norm": 0.0406811460852623,
2533
+ "learning_rate": 0.0001530281039715002,
2534
+ "loss": 0.0283,
2535
+ "step": 1785
2536
+ },
2537
+ {
2538
+ "epoch": 0.4719530683540966,
2539
+ "grad_norm": 0.23334211111068726,
2540
+ "learning_rate": 0.00015289616044333027,
2541
+ "loss": 0.0274,
2542
+ "step": 1790
2543
+ },
2544
+ {
2545
+ "epoch": 0.4732713730143036,
2546
+ "grad_norm": 0.013081169687211514,
2547
+ "learning_rate": 0.00015276421691516032,
2548
+ "loss": 0.0221,
2549
+ "step": 1795
2550
+ },
2551
+ {
2552
+ "epoch": 0.4745896776745106,
2553
+ "grad_norm": 0.2480790615081787,
2554
+ "learning_rate": 0.00015263227338699039,
2555
+ "loss": 0.019,
2556
+ "step": 1800
2557
+ },
2558
+ {
2559
+ "epoch": 0.47590798233471754,
2560
+ "grad_norm": 0.0373196005821228,
2561
+ "learning_rate": 0.00015250032985882043,
2562
+ "loss": 0.0292,
2563
+ "step": 1805
2564
+ },
2565
+ {
2566
+ "epoch": 0.47722628699492453,
2567
+ "grad_norm": 0.004609994124621153,
2568
+ "learning_rate": 0.0001523683863306505,
2569
+ "loss": 0.0918,
2570
+ "step": 1810
2571
+ },
2572
+ {
2573
+ "epoch": 0.4785445916551315,
2574
+ "grad_norm": 0.02370987832546234,
2575
+ "learning_rate": 0.00015223644280248054,
2576
+ "loss": 0.0462,
2577
+ "step": 1815
2578
+ },
2579
+ {
2580
+ "epoch": 0.47986289631533846,
2581
+ "grad_norm": 0.05842221528291702,
2582
+ "learning_rate": 0.0001521044992743106,
2583
+ "loss": 0.0595,
2584
+ "step": 1820
2585
+ },
2586
+ {
2587
+ "epoch": 0.48118120097554545,
2588
+ "grad_norm": 0.009685276076197624,
2589
+ "learning_rate": 0.00015197255574614068,
2590
+ "loss": 0.0074,
2591
+ "step": 1825
2592
+ },
2593
+ {
2594
+ "epoch": 0.48249950563575245,
2595
+ "grad_norm": 0.8933250308036804,
2596
+ "learning_rate": 0.00015184061221797072,
2597
+ "loss": 0.0757,
2598
+ "step": 1830
2599
+ },
2600
+ {
2601
+ "epoch": 0.4838178102959594,
2602
+ "grad_norm": 0.07075401395559311,
2603
+ "learning_rate": 0.0001517086686898008,
2604
+ "loss": 0.0226,
2605
+ "step": 1835
2606
+ },
2607
+ {
2608
+ "epoch": 0.4851361149561664,
2609
+ "grad_norm": 0.732706606388092,
2610
+ "learning_rate": 0.00015157672516163083,
2611
+ "loss": 0.0161,
2612
+ "step": 1840
2613
+ },
2614
+ {
2615
+ "epoch": 0.48645441961637337,
2616
+ "grad_norm": 1.1897023916244507,
2617
+ "learning_rate": 0.0001514447816334609,
2618
+ "loss": 0.0265,
2619
+ "step": 1845
2620
+ },
2621
+ {
2622
+ "epoch": 0.4877727242765803,
2623
+ "grad_norm": 0.052572328597307205,
2624
+ "learning_rate": 0.00015131283810529094,
2625
+ "loss": 0.0094,
2626
+ "step": 1850
2627
+ },
2628
+ {
2629
+ "epoch": 0.4890910289367873,
2630
+ "grad_norm": 0.08263898640871048,
2631
+ "learning_rate": 0.00015118089457712098,
2632
+ "loss": 0.0631,
2633
+ "step": 1855
2634
+ },
2635
+ {
2636
+ "epoch": 0.4904093335969943,
2637
+ "grad_norm": 0.03225664421916008,
2638
+ "learning_rate": 0.00015104895104895105,
2639
+ "loss": 0.023,
2640
+ "step": 1860
2641
+ },
2642
+ {
2643
+ "epoch": 0.4917276382572012,
2644
+ "grad_norm": 0.007935039699077606,
2645
+ "learning_rate": 0.0001509170075207811,
2646
+ "loss": 0.0039,
2647
+ "step": 1865
2648
+ },
2649
+ {
2650
+ "epoch": 0.4930459429174082,
2651
+ "grad_norm": 0.00830796267837286,
2652
+ "learning_rate": 0.00015078506399261116,
2653
+ "loss": 0.007,
2654
+ "step": 1870
2655
+ },
2656
+ {
2657
+ "epoch": 0.4943642475776152,
2658
+ "grad_norm": 0.08042234182357788,
2659
+ "learning_rate": 0.00015065312046444123,
2660
+ "loss": 0.0366,
2661
+ "step": 1875
2662
+ },
2663
+ {
2664
+ "epoch": 0.49568255223782215,
2665
+ "grad_norm": 0.009092851541936398,
2666
+ "learning_rate": 0.00015052117693627128,
2667
+ "loss": 0.0107,
2668
+ "step": 1880
2669
+ },
2670
+ {
2671
+ "epoch": 0.49700085689802914,
2672
+ "grad_norm": 0.2674141824245453,
2673
+ "learning_rate": 0.00015038923340810135,
2674
+ "loss": 0.0076,
2675
+ "step": 1885
2676
+ },
2677
+ {
2678
+ "epoch": 0.49831916155823613,
2679
+ "grad_norm": 0.07694366574287415,
2680
+ "learning_rate": 0.0001502572898799314,
2681
+ "loss": 0.0252,
2682
+ "step": 1890
2683
+ },
2684
+ {
2685
+ "epoch": 0.49963746621844307,
2686
+ "grad_norm": 0.5699467062950134,
2687
+ "learning_rate": 0.00015012534635176146,
2688
+ "loss": 0.0487,
2689
+ "step": 1895
2690
+ },
2691
+ {
2692
+ "epoch": 0.5009557708786501,
2693
+ "grad_norm": 0.18800878524780273,
2694
+ "learning_rate": 0.0001499934028235915,
2695
+ "loss": 0.0183,
2696
+ "step": 1900
2697
+ },
2698
+ {
2699
+ "epoch": 0.5022740755388571,
2700
+ "grad_norm": 0.019469989463686943,
2701
+ "learning_rate": 0.00014986145929542157,
2702
+ "loss": 0.0268,
2703
+ "step": 1905
2704
+ },
2705
+ {
2706
+ "epoch": 0.503592380199064,
2707
+ "grad_norm": 0.01890506222844124,
2708
+ "learning_rate": 0.00014972951576725164,
2709
+ "loss": 0.0449,
2710
+ "step": 1910
2711
+ },
2712
+ {
2713
+ "epoch": 0.5049106848592709,
2714
+ "grad_norm": 0.0006314461352303624,
2715
+ "learning_rate": 0.00014959757223908168,
2716
+ "loss": 0.0056,
2717
+ "step": 1915
2718
+ },
2719
+ {
2720
+ "epoch": 0.5062289895194779,
2721
+ "grad_norm": 0.32654041051864624,
2722
+ "learning_rate": 0.00014946562871091175,
2723
+ "loss": 0.0256,
2724
+ "step": 1920
2725
+ },
2726
+ {
2727
+ "epoch": 0.5075472941796849,
2728
+ "grad_norm": 0.7803483605384827,
2729
+ "learning_rate": 0.0001493336851827418,
2730
+ "loss": 0.0374,
2731
+ "step": 1925
2732
+ },
2733
+ {
2734
+ "epoch": 0.5088655988398919,
2735
+ "grad_norm": 0.028441445901989937,
2736
+ "learning_rate": 0.00014920174165457186,
2737
+ "loss": 0.0161,
2738
+ "step": 1930
2739
+ },
2740
+ {
2741
+ "epoch": 0.5101839035000989,
2742
+ "grad_norm": 0.028379200026392937,
2743
+ "learning_rate": 0.00014906979812640193,
2744
+ "loss": 0.0151,
2745
+ "step": 1935
2746
+ },
2747
+ {
2748
+ "epoch": 0.5115022081603059,
2749
+ "grad_norm": 0.021159596741199493,
2750
+ "learning_rate": 0.00014893785459823197,
2751
+ "loss": 0.0303,
2752
+ "step": 1940
2753
+ },
2754
+ {
2755
+ "epoch": 0.5128205128205128,
2756
+ "grad_norm": 0.24903325736522675,
2757
+ "learning_rate": 0.000148805911070062,
2758
+ "loss": 0.0076,
2759
+ "step": 1945
2760
+ },
2761
+ {
2762
+ "epoch": 0.5141388174807198,
2763
+ "grad_norm": 0.007065301761031151,
2764
+ "learning_rate": 0.00014867396754189206,
2765
+ "loss": 0.022,
2766
+ "step": 1950
2767
+ },
2768
+ {
2769
+ "epoch": 0.5154571221409268,
2770
+ "grad_norm": 0.004032329190522432,
2771
+ "learning_rate": 0.00014854202401372212,
2772
+ "loss": 0.0083,
2773
+ "step": 1955
2774
+ },
2775
+ {
2776
+ "epoch": 0.5167754268011338,
2777
+ "grad_norm": 0.3045775592327118,
2778
+ "learning_rate": 0.0001484100804855522,
2779
+ "loss": 0.0113,
2780
+ "step": 1960
2781
+ },
2782
+ {
2783
+ "epoch": 0.5180937314613407,
2784
+ "grad_norm": 0.36974939703941345,
2785
+ "learning_rate": 0.00014827813695738224,
2786
+ "loss": 0.0267,
2787
+ "step": 1965
2788
+ },
2789
+ {
2790
+ "epoch": 0.5194120361215477,
2791
+ "grad_norm": 0.009729950688779354,
2792
+ "learning_rate": 0.0001481461934292123,
2793
+ "loss": 0.027,
2794
+ "step": 1970
2795
+ },
2796
+ {
2797
+ "epoch": 0.5207303407817546,
2798
+ "grad_norm": 0.0013097926275804639,
2799
+ "learning_rate": 0.00014801424990104235,
2800
+ "loss": 0.003,
2801
+ "step": 1975
2802
+ },
2803
+ {
2804
+ "epoch": 0.5220486454419616,
2805
+ "grad_norm": 0.0706263929605484,
2806
+ "learning_rate": 0.00014788230637287242,
2807
+ "loss": 0.0193,
2808
+ "step": 1980
2809
+ },
2810
+ {
2811
+ "epoch": 0.5233669501021686,
2812
+ "grad_norm": 1.435702919960022,
2813
+ "learning_rate": 0.00014775036284470249,
2814
+ "loss": 0.0647,
2815
+ "step": 1985
2816
+ },
2817
+ {
2818
+ "epoch": 0.5246852547623756,
2819
+ "grad_norm": 0.00661757867783308,
2820
+ "learning_rate": 0.00014761841931653253,
2821
+ "loss": 0.0373,
2822
+ "step": 1990
2823
+ },
2824
+ {
2825
+ "epoch": 0.5260035594225826,
2826
+ "grad_norm": 0.12014541029930115,
2827
+ "learning_rate": 0.0001474864757883626,
2828
+ "loss": 0.0178,
2829
+ "step": 1995
2830
+ },
2831
+ {
2832
+ "epoch": 0.5273218640827896,
2833
+ "grad_norm": 1.0549248456954956,
2834
+ "learning_rate": 0.00014735453226019264,
2835
+ "loss": 0.0191,
2836
+ "step": 2000
2837
+ },
2838
+ {
2839
+ "epoch": 0.5273218640827896,
2840
+ "eval_loss": 0.037292081862688065,
2841
+ "eval_runtime": 454.3033,
2842
+ "eval_samples_per_second": 7.422,
2843
+ "eval_steps_per_second": 3.711,
2844
+ "step": 2000
2845
+ }
2846
+ ],
2847
+ "logging_steps": 5,
2848
+ "max_steps": 7584,
2849
+ "num_input_tokens_seen": 0,
2850
+ "num_train_epochs": 2,
2851
+ "save_steps": 500,
2852
+ "stateful_callbacks": {
2853
+ "TrainerControl": {
2854
+ "args": {
2855
+ "should_epoch_stop": false,
2856
+ "should_evaluate": false,
2857
+ "should_log": false,
2858
+ "should_save": true,
2859
+ "should_training_stop": false
2860
+ },
2861
+ "attributes": {}
2862
+ }
2863
+ },
2864
+ "total_flos": 1.6140463556817715e+17,
2865
+ "train_batch_size": 2,
2866
+ "trial_name": null,
2867
+ "trial_params": null
2868
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79dfa687fdd0c9908ab6b63535817e7567b29b0b483ac228723218f6f5fdeec5
3
+ size 5688
checkpoint-2500/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/Llama-3.2-3B-Instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
checkpoint-2500/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Llama-3.2-3B-Instruct",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 16,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 16,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "down_proj",
27
+ "gate_proj",
28
+ "q_proj",
29
+ "up_proj",
30
+ "o_proj",
31
+ "v_proj",
32
+ "k_proj"
33
+ ],
34
+ "task_type": "CAUSAL_LM",
35
+ "use_dora": false,
36
+ "use_rslora": false
37
+ }
checkpoint-2500/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a6211c65c5d4a0068d229acf0a297a352ad92f0b64f5164e0d585aee56e6e6d
3
+ size 97307544
checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0c2fb708dde05534fb432625f17830675269b0ce06972aa78dfd143c6fdea2
3
+ size 50866370
checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:301bae54f3043b54f78ee582aa05b8bf01996d027ad704f9a95f10d8be516262
3
+ size 14244
checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb2463bd0f8eade5c4e011fd3af237c0b30fc7086ad97457f073fe0018b5d34c
3
+ size 1064
checkpoint-2500/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot_id|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-2500/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
checkpoint-2500/tokenizer_config.json ADDED
@@ -0,0 +1,2067 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "added_tokens_decoder": {
4
+ "128000": {
5
+ "content": "<|begin_of_text|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "128001": {
13
+ "content": "<|end_of_text|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "128002": {
21
+ "content": "<|reserved_special_token_0|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "128003": {
29
+ "content": "<|reserved_special_token_1|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128004": {
37
+ "content": "<|finetune_right_pad_id|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "128005": {
45
+ "content": "<|reserved_special_token_2|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "128006": {
53
+ "content": "<|start_header_id|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "128007": {
61
+ "content": "<|end_header_id|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "128008": {
69
+ "content": "<|eom_id|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "128009": {
77
+ "content": "<|eot_id|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "128010": {
85
+ "content": "<|python_tag|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "128011": {
93
+ "content": "<|reserved_special_token_3|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "128012": {
101
+ "content": "<|reserved_special_token_4|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "128013": {
109
+ "content": "<|reserved_special_token_5|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "128014": {
117
+ "content": "<|reserved_special_token_6|>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "128015": {
125
+ "content": "<|reserved_special_token_7|>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "128016": {
133
+ "content": "<|reserved_special_token_8|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "128017": {
141
+ "content": "<|reserved_special_token_9|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "128018": {
149
+ "content": "<|reserved_special_token_10|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "128019": {
157
+ "content": "<|reserved_special_token_11|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "128020": {
165
+ "content": "<|reserved_special_token_12|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "128021": {
173
+ "content": "<|reserved_special_token_13|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "128022": {
181
+ "content": "<|reserved_special_token_14|>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "128023": {
189
+ "content": "<|reserved_special_token_15|>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "128024": {
197
+ "content": "<|reserved_special_token_16|>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "128025": {
205
+ "content": "<|reserved_special_token_17|>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "128026": {
213
+ "content": "<|reserved_special_token_18|>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "128027": {
221
+ "content": "<|reserved_special_token_19|>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "128028": {
229
+ "content": "<|reserved_special_token_20|>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "128029": {
237
+ "content": "<|reserved_special_token_21|>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "128030": {
245
+ "content": "<|reserved_special_token_22|>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "128031": {
253
+ "content": "<|reserved_special_token_23|>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "128032": {
261
+ "content": "<|reserved_special_token_24|>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "128033": {
269
+ "content": "<|reserved_special_token_25|>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "128034": {
277
+ "content": "<|reserved_special_token_26|>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "128035": {
285
+ "content": "<|reserved_special_token_27|>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "128036": {
293
+ "content": "<|reserved_special_token_28|>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "128037": {
301
+ "content": "<|reserved_special_token_29|>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "128038": {
309
+ "content": "<|reserved_special_token_30|>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "128039": {
317
+ "content": "<|reserved_special_token_31|>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "128040": {
325
+ "content": "<|reserved_special_token_32|>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "128041": {
333
+ "content": "<|reserved_special_token_33|>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "128042": {
341
+ "content": "<|reserved_special_token_34|>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "128043": {
349
+ "content": "<|reserved_special_token_35|>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "128044": {
357
+ "content": "<|reserved_special_token_36|>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "128045": {
365
+ "content": "<|reserved_special_token_37|>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "128046": {
373
+ "content": "<|reserved_special_token_38|>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "128047": {
381
+ "content": "<|reserved_special_token_39|>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "128048": {
389
+ "content": "<|reserved_special_token_40|>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "128049": {
397
+ "content": "<|reserved_special_token_41|>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "128050": {
405
+ "content": "<|reserved_special_token_42|>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "128051": {
413
+ "content": "<|reserved_special_token_43|>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "128052": {
421
+ "content": "<|reserved_special_token_44|>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "128053": {
429
+ "content": "<|reserved_special_token_45|>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "128054": {
437
+ "content": "<|reserved_special_token_46|>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "128055": {
445
+ "content": "<|reserved_special_token_47|>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "128056": {
453
+ "content": "<|reserved_special_token_48|>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "128057": {
461
+ "content": "<|reserved_special_token_49|>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "128058": {
469
+ "content": "<|reserved_special_token_50|>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "128059": {
477
+ "content": "<|reserved_special_token_51|>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "128060": {
485
+ "content": "<|reserved_special_token_52|>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "128061": {
493
+ "content": "<|reserved_special_token_53|>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "128062": {
501
+ "content": "<|reserved_special_token_54|>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "128063": {
509
+ "content": "<|reserved_special_token_55|>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "128064": {
517
+ "content": "<|reserved_special_token_56|>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "128065": {
525
+ "content": "<|reserved_special_token_57|>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "128066": {
533
+ "content": "<|reserved_special_token_58|>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "128067": {
541
+ "content": "<|reserved_special_token_59|>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "128068": {
549
+ "content": "<|reserved_special_token_60|>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "128069": {
557
+ "content": "<|reserved_special_token_61|>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "128070": {
565
+ "content": "<|reserved_special_token_62|>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "128071": {
573
+ "content": "<|reserved_special_token_63|>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "128072": {
581
+ "content": "<|reserved_special_token_64|>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "128073": {
589
+ "content": "<|reserved_special_token_65|>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "128074": {
597
+ "content": "<|reserved_special_token_66|>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "128075": {
605
+ "content": "<|reserved_special_token_67|>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "128076": {
613
+ "content": "<|reserved_special_token_68|>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "128077": {
621
+ "content": "<|reserved_special_token_69|>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "128078": {
629
+ "content": "<|reserved_special_token_70|>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "128079": {
637
+ "content": "<|reserved_special_token_71|>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "128080": {
645
+ "content": "<|reserved_special_token_72|>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "128081": {
653
+ "content": "<|reserved_special_token_73|>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "128082": {
661
+ "content": "<|reserved_special_token_74|>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "128083": {
669
+ "content": "<|reserved_special_token_75|>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "128084": {
677
+ "content": "<|reserved_special_token_76|>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "128085": {
685
+ "content": "<|reserved_special_token_77|>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "128086": {
693
+ "content": "<|reserved_special_token_78|>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "128087": {
701
+ "content": "<|reserved_special_token_79|>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "128088": {
709
+ "content": "<|reserved_special_token_80|>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "128089": {
717
+ "content": "<|reserved_special_token_81|>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "128090": {
725
+ "content": "<|reserved_special_token_82|>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "128091": {
733
+ "content": "<|reserved_special_token_83|>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "128092": {
741
+ "content": "<|reserved_special_token_84|>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "128093": {
749
+ "content": "<|reserved_special_token_85|>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "128094": {
757
+ "content": "<|reserved_special_token_86|>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "128095": {
765
+ "content": "<|reserved_special_token_87|>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "128096": {
773
+ "content": "<|reserved_special_token_88|>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "128097": {
781
+ "content": "<|reserved_special_token_89|>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "128098": {
789
+ "content": "<|reserved_special_token_90|>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "128099": {
797
+ "content": "<|reserved_special_token_91|>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "128100": {
805
+ "content": "<|reserved_special_token_92|>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "128101": {
813
+ "content": "<|reserved_special_token_93|>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "128102": {
821
+ "content": "<|reserved_special_token_94|>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "128103": {
829
+ "content": "<|reserved_special_token_95|>",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "128104": {
837
+ "content": "<|reserved_special_token_96|>",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "128105": {
845
+ "content": "<|reserved_special_token_97|>",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "128106": {
853
+ "content": "<|reserved_special_token_98|>",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "128107": {
861
+ "content": "<|reserved_special_token_99|>",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "128108": {
869
+ "content": "<|reserved_special_token_100|>",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": true
875
+ },
876
+ "128109": {
877
+ "content": "<|reserved_special_token_101|>",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": true
883
+ },
884
+ "128110": {
885
+ "content": "<|reserved_special_token_102|>",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": true
891
+ },
892
+ "128111": {
893
+ "content": "<|reserved_special_token_103|>",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": true
899
+ },
900
+ "128112": {
901
+ "content": "<|reserved_special_token_104|>",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": true
907
+ },
908
+ "128113": {
909
+ "content": "<|reserved_special_token_105|>",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": true
915
+ },
916
+ "128114": {
917
+ "content": "<|reserved_special_token_106|>",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": true
923
+ },
924
+ "128115": {
925
+ "content": "<|reserved_special_token_107|>",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": true
931
+ },
932
+ "128116": {
933
+ "content": "<|reserved_special_token_108|>",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": true
939
+ },
940
+ "128117": {
941
+ "content": "<|reserved_special_token_109|>",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": true
947
+ },
948
+ "128118": {
949
+ "content": "<|reserved_special_token_110|>",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": true
955
+ },
956
+ "128119": {
957
+ "content": "<|reserved_special_token_111|>",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": true
963
+ },
964
+ "128120": {
965
+ "content": "<|reserved_special_token_112|>",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": true
971
+ },
972
+ "128121": {
973
+ "content": "<|reserved_special_token_113|>",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": true
979
+ },
980
+ "128122": {
981
+ "content": "<|reserved_special_token_114|>",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": true
987
+ },
988
+ "128123": {
989
+ "content": "<|reserved_special_token_115|>",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": true
995
+ },
996
+ "128124": {
997
+ "content": "<|reserved_special_token_116|>",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": true
1003
+ },
1004
+ "128125": {
1005
+ "content": "<|reserved_special_token_117|>",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": true
1011
+ },
1012
+ "128126": {
1013
+ "content": "<|reserved_special_token_118|>",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": true
1019
+ },
1020
+ "128127": {
1021
+ "content": "<|reserved_special_token_119|>",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": true
1027
+ },
1028
+ "128128": {
1029
+ "content": "<|reserved_special_token_120|>",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": true
1035
+ },
1036
+ "128129": {
1037
+ "content": "<|reserved_special_token_121|>",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": true
1043
+ },
1044
+ "128130": {
1045
+ "content": "<|reserved_special_token_122|>",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": true
1051
+ },
1052
+ "128131": {
1053
+ "content": "<|reserved_special_token_123|>",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": true
1059
+ },
1060
+ "128132": {
1061
+ "content": "<|reserved_special_token_124|>",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": true
1067
+ },
1068
+ "128133": {
1069
+ "content": "<|reserved_special_token_125|>",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": true
1075
+ },
1076
+ "128134": {
1077
+ "content": "<|reserved_special_token_126|>",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": true
1083
+ },
1084
+ "128135": {
1085
+ "content": "<|reserved_special_token_127|>",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": true
1091
+ },
1092
+ "128136": {
1093
+ "content": "<|reserved_special_token_128|>",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": true
1099
+ },
1100
+ "128137": {
1101
+ "content": "<|reserved_special_token_129|>",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": true
1107
+ },
1108
+ "128138": {
1109
+ "content": "<|reserved_special_token_130|>",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": true
1115
+ },
1116
+ "128139": {
1117
+ "content": "<|reserved_special_token_131|>",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": true
1123
+ },
1124
+ "128140": {
1125
+ "content": "<|reserved_special_token_132|>",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": true
1131
+ },
1132
+ "128141": {
1133
+ "content": "<|reserved_special_token_133|>",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": true
1139
+ },
1140
+ "128142": {
1141
+ "content": "<|reserved_special_token_134|>",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": true
1147
+ },
1148
+ "128143": {
1149
+ "content": "<|reserved_special_token_135|>",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": true
1155
+ },
1156
+ "128144": {
1157
+ "content": "<|reserved_special_token_136|>",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": true
1163
+ },
1164
+ "128145": {
1165
+ "content": "<|reserved_special_token_137|>",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": true
1171
+ },
1172
+ "128146": {
1173
+ "content": "<|reserved_special_token_138|>",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": true
1179
+ },
1180
+ "128147": {
1181
+ "content": "<|reserved_special_token_139|>",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": true
1187
+ },
1188
+ "128148": {
1189
+ "content": "<|reserved_special_token_140|>",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": true
1195
+ },
1196
+ "128149": {
1197
+ "content": "<|reserved_special_token_141|>",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": true
1203
+ },
1204
+ "128150": {
1205
+ "content": "<|reserved_special_token_142|>",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": true
1211
+ },
1212
+ "128151": {
1213
+ "content": "<|reserved_special_token_143|>",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": true
1219
+ },
1220
+ "128152": {
1221
+ "content": "<|reserved_special_token_144|>",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": true
1227
+ },
1228
+ "128153": {
1229
+ "content": "<|reserved_special_token_145|>",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": true
1235
+ },
1236
+ "128154": {
1237
+ "content": "<|reserved_special_token_146|>",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": true
1243
+ },
1244
+ "128155": {
1245
+ "content": "<|reserved_special_token_147|>",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": true
1251
+ },
1252
+ "128156": {
1253
+ "content": "<|reserved_special_token_148|>",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": true
1259
+ },
1260
+ "128157": {
1261
+ "content": "<|reserved_special_token_149|>",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": true
1267
+ },
1268
+ "128158": {
1269
+ "content": "<|reserved_special_token_150|>",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": true
1275
+ },
1276
+ "128159": {
1277
+ "content": "<|reserved_special_token_151|>",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": true
1283
+ },
1284
+ "128160": {
1285
+ "content": "<|reserved_special_token_152|>",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": true
1291
+ },
1292
+ "128161": {
1293
+ "content": "<|reserved_special_token_153|>",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": true
1299
+ },
1300
+ "128162": {
1301
+ "content": "<|reserved_special_token_154|>",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": true
1307
+ },
1308
+ "128163": {
1309
+ "content": "<|reserved_special_token_155|>",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": true
1315
+ },
1316
+ "128164": {
1317
+ "content": "<|reserved_special_token_156|>",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": true
1323
+ },
1324
+ "128165": {
1325
+ "content": "<|reserved_special_token_157|>",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": true
1331
+ },
1332
+ "128166": {
1333
+ "content": "<|reserved_special_token_158|>",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": true
1339
+ },
1340
+ "128167": {
1341
+ "content": "<|reserved_special_token_159|>",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": true
1347
+ },
1348
+ "128168": {
1349
+ "content": "<|reserved_special_token_160|>",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": true
1355
+ },
1356
+ "128169": {
1357
+ "content": "<|reserved_special_token_161|>",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": true
1363
+ },
1364
+ "128170": {
1365
+ "content": "<|reserved_special_token_162|>",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": true
1371
+ },
1372
+ "128171": {
1373
+ "content": "<|reserved_special_token_163|>",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": true
1379
+ },
1380
+ "128172": {
1381
+ "content": "<|reserved_special_token_164|>",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": true
1387
+ },
1388
+ "128173": {
1389
+ "content": "<|reserved_special_token_165|>",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": true
1395
+ },
1396
+ "128174": {
1397
+ "content": "<|reserved_special_token_166|>",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": true
1403
+ },
1404
+ "128175": {
1405
+ "content": "<|reserved_special_token_167|>",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": true
1411
+ },
1412
+ "128176": {
1413
+ "content": "<|reserved_special_token_168|>",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": true
1419
+ },
1420
+ "128177": {
1421
+ "content": "<|reserved_special_token_169|>",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": true
1427
+ },
1428
+ "128178": {
1429
+ "content": "<|reserved_special_token_170|>",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": true
1435
+ },
1436
+ "128179": {
1437
+ "content": "<|reserved_special_token_171|>",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": true
1443
+ },
1444
+ "128180": {
1445
+ "content": "<|reserved_special_token_172|>",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": true
1451
+ },
1452
+ "128181": {
1453
+ "content": "<|reserved_special_token_173|>",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": true
1459
+ },
1460
+ "128182": {
1461
+ "content": "<|reserved_special_token_174|>",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": true
1467
+ },
1468
+ "128183": {
1469
+ "content": "<|reserved_special_token_175|>",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": true
1475
+ },
1476
+ "128184": {
1477
+ "content": "<|reserved_special_token_176|>",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": true
1483
+ },
1484
+ "128185": {
1485
+ "content": "<|reserved_special_token_177|>",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": true
1491
+ },
1492
+ "128186": {
1493
+ "content": "<|reserved_special_token_178|>",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": true
1499
+ },
1500
+ "128187": {
1501
+ "content": "<|reserved_special_token_179|>",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": true
1507
+ },
1508
+ "128188": {
1509
+ "content": "<|reserved_special_token_180|>",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": true
1515
+ },
1516
+ "128189": {
1517
+ "content": "<|reserved_special_token_181|>",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": true
1523
+ },
1524
+ "128190": {
1525
+ "content": "<|reserved_special_token_182|>",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": true
1531
+ },
1532
+ "128191": {
1533
+ "content": "<|reserved_special_token_183|>",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": true
1539
+ },
1540
+ "128192": {
1541
+ "content": "<|reserved_special_token_184|>",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": true
1547
+ },
1548
+ "128193": {
1549
+ "content": "<|reserved_special_token_185|>",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": true
1555
+ },
1556
+ "128194": {
1557
+ "content": "<|reserved_special_token_186|>",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": true
1563
+ },
1564
+ "128195": {
1565
+ "content": "<|reserved_special_token_187|>",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": true
1571
+ },
1572
+ "128196": {
1573
+ "content": "<|reserved_special_token_188|>",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": true
1579
+ },
1580
+ "128197": {
1581
+ "content": "<|reserved_special_token_189|>",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": true
1587
+ },
1588
+ "128198": {
1589
+ "content": "<|reserved_special_token_190|>",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": true
1595
+ },
1596
+ "128199": {
1597
+ "content": "<|reserved_special_token_191|>",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": true
1603
+ },
1604
+ "128200": {
1605
+ "content": "<|reserved_special_token_192|>",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": true
1611
+ },
1612
+ "128201": {
1613
+ "content": "<|reserved_special_token_193|>",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": true
1619
+ },
1620
+ "128202": {
1621
+ "content": "<|reserved_special_token_194|>",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": true
1627
+ },
1628
+ "128203": {
1629
+ "content": "<|reserved_special_token_195|>",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": true
1635
+ },
1636
+ "128204": {
1637
+ "content": "<|reserved_special_token_196|>",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": true
1643
+ },
1644
+ "128205": {
1645
+ "content": "<|reserved_special_token_197|>",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": true
1651
+ },
1652
+ "128206": {
1653
+ "content": "<|reserved_special_token_198|>",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": true
1659
+ },
1660
+ "128207": {
1661
+ "content": "<|reserved_special_token_199|>",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": true
1667
+ },
1668
+ "128208": {
1669
+ "content": "<|reserved_special_token_200|>",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": true
1675
+ },
1676
+ "128209": {
1677
+ "content": "<|reserved_special_token_201|>",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": true
1683
+ },
1684
+ "128210": {
1685
+ "content": "<|reserved_special_token_202|>",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "128211": {
1693
+ "content": "<|reserved_special_token_203|>",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
+ },
1700
+ "128212": {
1701
+ "content": "<|reserved_special_token_204|>",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": true
1707
+ },
1708
+ "128213": {
1709
+ "content": "<|reserved_special_token_205|>",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": true
1715
+ },
1716
+ "128214": {
1717
+ "content": "<|reserved_special_token_206|>",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": true
1723
+ },
1724
+ "128215": {
1725
+ "content": "<|reserved_special_token_207|>",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": true
1731
+ },
1732
+ "128216": {
1733
+ "content": "<|reserved_special_token_208|>",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": true
1739
+ },
1740
+ "128217": {
1741
+ "content": "<|reserved_special_token_209|>",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": true
1747
+ },
1748
+ "128218": {
1749
+ "content": "<|reserved_special_token_210|>",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": true
1755
+ },
1756
+ "128219": {
1757
+ "content": "<|reserved_special_token_211|>",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": true
1763
+ },
1764
+ "128220": {
1765
+ "content": "<|reserved_special_token_212|>",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": true
1771
+ },
1772
+ "128221": {
1773
+ "content": "<|reserved_special_token_213|>",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": true
1779
+ },
1780
+ "128222": {
1781
+ "content": "<|reserved_special_token_214|>",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": true
1787
+ },
1788
+ "128223": {
1789
+ "content": "<|reserved_special_token_215|>",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": true
1795
+ },
1796
+ "128224": {
1797
+ "content": "<|reserved_special_token_216|>",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": true
1803
+ },
1804
+ "128225": {
1805
+ "content": "<|reserved_special_token_217|>",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": true
1811
+ },
1812
+ "128226": {
1813
+ "content": "<|reserved_special_token_218|>",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": true
1819
+ },
1820
+ "128227": {
1821
+ "content": "<|reserved_special_token_219|>",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": true
1827
+ },
1828
+ "128228": {
1829
+ "content": "<|reserved_special_token_220|>",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": true
1835
+ },
1836
+ "128229": {
1837
+ "content": "<|reserved_special_token_221|>",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": true
1843
+ },
1844
+ "128230": {
1845
+ "content": "<|reserved_special_token_222|>",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": true
1851
+ },
1852
+ "128231": {
1853
+ "content": "<|reserved_special_token_223|>",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": true
1859
+ },
1860
+ "128232": {
1861
+ "content": "<|reserved_special_token_224|>",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": true
1867
+ },
1868
+ "128233": {
1869
+ "content": "<|reserved_special_token_225|>",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": true
1875
+ },
1876
+ "128234": {
1877
+ "content": "<|reserved_special_token_226|>",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": true
1883
+ },
1884
+ "128235": {
1885
+ "content": "<|reserved_special_token_227|>",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": true
1891
+ },
1892
+ "128236": {
1893
+ "content": "<|reserved_special_token_228|>",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": true
1899
+ },
1900
+ "128237": {
1901
+ "content": "<|reserved_special_token_229|>",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": true
1907
+ },
1908
+ "128238": {
1909
+ "content": "<|reserved_special_token_230|>",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": true
1915
+ },
1916
+ "128239": {
1917
+ "content": "<|reserved_special_token_231|>",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": true
1923
+ },
1924
+ "128240": {
1925
+ "content": "<|reserved_special_token_232|>",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": true
1931
+ },
1932
+ "128241": {
1933
+ "content": "<|reserved_special_token_233|>",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": true
1939
+ },
1940
+ "128242": {
1941
+ "content": "<|reserved_special_token_234|>",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": true
1947
+ },
1948
+ "128243": {
1949
+ "content": "<|reserved_special_token_235|>",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": true
1955
+ },
1956
+ "128244": {
1957
+ "content": "<|reserved_special_token_236|>",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": true
1963
+ },
1964
+ "128245": {
1965
+ "content": "<|reserved_special_token_237|>",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": true
1971
+ },
1972
+ "128246": {
1973
+ "content": "<|reserved_special_token_238|>",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": true
1979
+ },
1980
+ "128247": {
1981
+ "content": "<|reserved_special_token_239|>",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": true
1987
+ },
1988
+ "128248": {
1989
+ "content": "<|reserved_special_token_240|>",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": true
1995
+ },
1996
+ "128249": {
1997
+ "content": "<|reserved_special_token_241|>",
1998
+ "lstrip": false,
1999
+ "normalized": false,
2000
+ "rstrip": false,
2001
+ "single_word": false,
2002
+ "special": true
2003
+ },
2004
+ "128250": {
2005
+ "content": "<|reserved_special_token_242|>",
2006
+ "lstrip": false,
2007
+ "normalized": false,
2008
+ "rstrip": false,
2009
+ "single_word": false,
2010
+ "special": true
2011
+ },
2012
+ "128251": {
2013
+ "content": "<|reserved_special_token_243|>",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false,
2018
+ "special": true
2019
+ },
2020
+ "128252": {
2021
+ "content": "<|reserved_special_token_244|>",
2022
+ "lstrip": false,
2023
+ "normalized": false,
2024
+ "rstrip": false,
2025
+ "single_word": false,
2026
+ "special": true
2027
+ },
2028
+ "128253": {
2029
+ "content": "<|reserved_special_token_245|>",
2030
+ "lstrip": false,
2031
+ "normalized": false,
2032
+ "rstrip": false,
2033
+ "single_word": false,
2034
+ "special": true
2035
+ },
2036
+ "128254": {
2037
+ "content": "<|reserved_special_token_246|>",
2038
+ "lstrip": false,
2039
+ "normalized": false,
2040
+ "rstrip": false,
2041
+ "single_word": false,
2042
+ "special": true
2043
+ },
2044
+ "128255": {
2045
+ "content": "<|reserved_special_token_247|>",
2046
+ "lstrip": false,
2047
+ "normalized": false,
2048
+ "rstrip": false,
2049
+ "single_word": false,
2050
+ "special": true
2051
+ }
2052
+ },
2053
+ "bos_token": "<|begin_of_text|>",
2054
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 July 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content'] %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
2055
+ "clean_up_tokenization_spaces": true,
2056
+ "eos_token": "<|eot_id|>",
2057
+ "extra_special_tokens": {},
2058
+ "model_input_names": [
2059
+ "input_ids",
2060
+ "attention_mask"
2061
+ ],
2062
+ "model_max_length": 131072,
2063
+ "pad_token": "<|finetune_right_pad_id|>",
2064
+ "padding_side": "right",
2065
+ "tokenizer_class": "PreTrainedTokenizer",
2066
+ "unk_token": null
2067
+ }
checkpoint-2500/trainer_state.json ADDED
@@ -0,0 +1,3576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.659152330103487,
5
+ "eval_steps": 500,
6
+ "global_step": 2500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.001318304660206974,
13
+ "grad_norm": 4.59375,
14
+ "learning_rate": 0.0002,
15
+ "loss": 1.9624,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.002636609320413948,
20
+ "grad_norm": 1.7421875,
21
+ "learning_rate": 0.00019986805647183008,
22
+ "loss": 0.6513,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.003954913980620921,
27
+ "grad_norm": 1.84375,
28
+ "learning_rate": 0.00019973611294366012,
29
+ "loss": 0.1146,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.005273218640827896,
34
+ "grad_norm": 1.3203125,
35
+ "learning_rate": 0.0001996041694154902,
36
+ "loss": 0.0529,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.006591523301034869,
41
+ "grad_norm": 0.40234375,
42
+ "learning_rate": 0.00019947222588732023,
43
+ "loss": 0.1214,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.007909827961241843,
48
+ "grad_norm": 1.5390625,
49
+ "learning_rate": 0.0001993402823591503,
50
+ "loss": 0.0919,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.009228132621448816,
55
+ "grad_norm": 0.06201171875,
56
+ "learning_rate": 0.00019920833883098034,
57
+ "loss": 0.09,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 0.010546437281655791,
62
+ "grad_norm": 1.53125,
63
+ "learning_rate": 0.0001990763953028104,
64
+ "loss": 0.1945,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.011864741941862765,
69
+ "grad_norm": 0.2890625,
70
+ "learning_rate": 0.00019894445177464048,
71
+ "loss": 0.1259,
72
+ "step": 45
73
+ },
74
+ {
75
+ "epoch": 0.013183046602069738,
76
+ "grad_norm": 0.609375,
77
+ "learning_rate": 0.00019881250824647052,
78
+ "loss": 0.027,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 0.014501351262276712,
83
+ "grad_norm": 0.369140625,
84
+ "learning_rate": 0.00019868056471830057,
85
+ "loss": 0.1068,
86
+ "step": 55
87
+ },
88
+ {
89
+ "epoch": 0.015819655922483685,
90
+ "grad_norm": 0.34765625,
91
+ "learning_rate": 0.00019854862119013064,
92
+ "loss": 0.0542,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 0.01713796058269066,
97
+ "grad_norm": 0.055419921875,
98
+ "learning_rate": 0.00019841667766196068,
99
+ "loss": 0.0901,
100
+ "step": 65
101
+ },
102
+ {
103
+ "epoch": 0.018456265242897632,
104
+ "grad_norm": 0.0247802734375,
105
+ "learning_rate": 0.00019828473413379075,
106
+ "loss": 0.0091,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 0.019774569903104607,
111
+ "grad_norm": 0.0079345703125,
112
+ "learning_rate": 0.0001981527906056208,
113
+ "loss": 0.0744,
114
+ "step": 75
115
+ },
116
+ {
117
+ "epoch": 0.021092874563311582,
118
+ "grad_norm": 0.65234375,
119
+ "learning_rate": 0.00019802084707745086,
120
+ "loss": 0.1108,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.022411179223518554,
125
+ "grad_norm": 0.50390625,
126
+ "learning_rate": 0.0001978889035492809,
127
+ "loss": 0.0446,
128
+ "step": 85
129
+ },
130
+ {
131
+ "epoch": 0.02372948388372553,
132
+ "grad_norm": 0.1787109375,
133
+ "learning_rate": 0.00019775696002111097,
134
+ "loss": 0.0982,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 0.0250477885439325,
139
+ "grad_norm": 0.490234375,
140
+ "learning_rate": 0.00019762501649294104,
141
+ "loss": 0.1035,
142
+ "step": 95
143
+ },
144
+ {
145
+ "epoch": 0.026366093204139476,
146
+ "grad_norm": 0.12158203125,
147
+ "learning_rate": 0.00019749307296477108,
148
+ "loss": 0.0401,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 0.02768439786434645,
153
+ "grad_norm": 0.16015625,
154
+ "learning_rate": 0.00019736112943660115,
155
+ "loss": 0.0309,
156
+ "step": 105
157
+ },
158
+ {
159
+ "epoch": 0.029002702524553423,
160
+ "grad_norm": 1.359375,
161
+ "learning_rate": 0.0001972291859084312,
162
+ "loss": 0.1032,
163
+ "step": 110
164
+ },
165
+ {
166
+ "epoch": 0.0303210071847604,
167
+ "grad_norm": 0.52734375,
168
+ "learning_rate": 0.00019709724238026126,
169
+ "loss": 0.0811,
170
+ "step": 115
171
+ },
172
+ {
173
+ "epoch": 0.03163931184496737,
174
+ "grad_norm": 0.177734375,
175
+ "learning_rate": 0.00019696529885209133,
176
+ "loss": 0.0258,
177
+ "step": 120
178
+ },
179
+ {
180
+ "epoch": 0.03295761650517435,
181
+ "grad_norm": 0.234375,
182
+ "learning_rate": 0.00019683335532392137,
183
+ "loss": 0.0437,
184
+ "step": 125
185
+ },
186
+ {
187
+ "epoch": 0.03427592116538132,
188
+ "grad_norm": 1.3046875,
189
+ "learning_rate": 0.00019670141179575144,
190
+ "loss": 0.0967,
191
+ "step": 130
192
+ },
193
+ {
194
+ "epoch": 0.03559422582558829,
195
+ "grad_norm": 0.2734375,
196
+ "learning_rate": 0.00019656946826758148,
197
+ "loss": 0.0132,
198
+ "step": 135
199
+ },
200
+ {
201
+ "epoch": 0.036912530485795264,
202
+ "grad_norm": 0.66015625,
203
+ "learning_rate": 0.00019643752473941155,
204
+ "loss": 0.0396,
205
+ "step": 140
206
+ },
207
+ {
208
+ "epoch": 0.03823083514600224,
209
+ "grad_norm": 1.0546875,
210
+ "learning_rate": 0.0001963055812112416,
211
+ "loss": 0.0449,
212
+ "step": 145
213
+ },
214
+ {
215
+ "epoch": 0.039549139806209214,
216
+ "grad_norm": 0.2021484375,
217
+ "learning_rate": 0.00019617363768307166,
218
+ "loss": 0.1196,
219
+ "step": 150
220
+ },
221
+ {
222
+ "epoch": 0.040867444466416186,
223
+ "grad_norm": 0.5859375,
224
+ "learning_rate": 0.0001960416941549017,
225
+ "loss": 0.0588,
226
+ "step": 155
227
+ },
228
+ {
229
+ "epoch": 0.042185749126623165,
230
+ "grad_norm": 0.06005859375,
231
+ "learning_rate": 0.00019590975062673175,
232
+ "loss": 0.0234,
233
+ "step": 160
234
+ },
235
+ {
236
+ "epoch": 0.04350405378683014,
237
+ "grad_norm": 0.4921875,
238
+ "learning_rate": 0.00019577780709856182,
239
+ "loss": 0.0916,
240
+ "step": 165
241
+ },
242
+ {
243
+ "epoch": 0.04482235844703711,
244
+ "grad_norm": 0.84375,
245
+ "learning_rate": 0.0001956458635703919,
246
+ "loss": 0.0271,
247
+ "step": 170
248
+ },
249
+ {
250
+ "epoch": 0.04614066310724409,
251
+ "grad_norm": 0.8828125,
252
+ "learning_rate": 0.00019551392004222193,
253
+ "loss": 0.0175,
254
+ "step": 175
255
+ },
256
+ {
257
+ "epoch": 0.04745896776745106,
258
+ "grad_norm": 0.0152587890625,
259
+ "learning_rate": 0.000195381976514052,
260
+ "loss": 0.0356,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 0.04877727242765803,
265
+ "grad_norm": 0.09326171875,
266
+ "learning_rate": 0.00019525003298588204,
267
+ "loss": 0.0057,
268
+ "step": 185
269
+ },
270
+ {
271
+ "epoch": 0.050095577087865,
272
+ "grad_norm": 0.24609375,
273
+ "learning_rate": 0.0001951180894577121,
274
+ "loss": 0.0082,
275
+ "step": 190
276
+ },
277
+ {
278
+ "epoch": 0.05141388174807198,
279
+ "grad_norm": 0.05029296875,
280
+ "learning_rate": 0.00019498614592954215,
281
+ "loss": 0.0178,
282
+ "step": 195
283
+ },
284
+ {
285
+ "epoch": 0.05273218640827895,
286
+ "grad_norm": 0.0390625,
287
+ "learning_rate": 0.00019485420240137222,
288
+ "loss": 0.0789,
289
+ "step": 200
290
+ },
291
+ {
292
+ "epoch": 0.054050491068485924,
293
+ "grad_norm": 0.5625,
294
+ "learning_rate": 0.0001947222588732023,
295
+ "loss": 0.0645,
296
+ "step": 205
297
+ },
298
+ {
299
+ "epoch": 0.0553687957286929,
300
+ "grad_norm": 0.53515625,
301
+ "learning_rate": 0.00019459031534503233,
302
+ "loss": 0.116,
303
+ "step": 210
304
+ },
305
+ {
306
+ "epoch": 0.056687100388899875,
307
+ "grad_norm": 0.55078125,
308
+ "learning_rate": 0.0001944583718168624,
309
+ "loss": 0.0516,
310
+ "step": 215
311
+ },
312
+ {
313
+ "epoch": 0.058005405049106847,
314
+ "grad_norm": 0.314453125,
315
+ "learning_rate": 0.00019432642828869244,
316
+ "loss": 0.1019,
317
+ "step": 220
318
+ },
319
+ {
320
+ "epoch": 0.059323709709313825,
321
+ "grad_norm": 0.1123046875,
322
+ "learning_rate": 0.0001941944847605225,
323
+ "loss": 0.0529,
324
+ "step": 225
325
+ },
326
+ {
327
+ "epoch": 0.0606420143695208,
328
+ "grad_norm": 0.4921875,
329
+ "learning_rate": 0.00019406254123235256,
330
+ "loss": 0.0368,
331
+ "step": 230
332
+ },
333
+ {
334
+ "epoch": 0.06196031902972777,
335
+ "grad_norm": 0.054443359375,
336
+ "learning_rate": 0.00019393059770418262,
337
+ "loss": 0.037,
338
+ "step": 235
339
+ },
340
+ {
341
+ "epoch": 0.06327862368993474,
342
+ "grad_norm": 0.008544921875,
343
+ "learning_rate": 0.0001937986541760127,
344
+ "loss": 0.0324,
345
+ "step": 240
346
+ },
347
+ {
348
+ "epoch": 0.06459692835014172,
349
+ "grad_norm": 1.5,
350
+ "learning_rate": 0.00019366671064784274,
351
+ "loss": 0.0334,
352
+ "step": 245
353
+ },
354
+ {
355
+ "epoch": 0.0659152330103487,
356
+ "grad_norm": 0.2109375,
357
+ "learning_rate": 0.0001935347671196728,
358
+ "loss": 0.0671,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 0.06723353767055566,
363
+ "grad_norm": 2.0625,
364
+ "learning_rate": 0.00019340282359150285,
365
+ "loss": 0.1559,
366
+ "step": 255
367
+ },
368
+ {
369
+ "epoch": 0.06855184233076264,
370
+ "grad_norm": 0.7734375,
371
+ "learning_rate": 0.0001932708800633329,
372
+ "loss": 0.0198,
373
+ "step": 260
374
+ },
375
+ {
376
+ "epoch": 0.06987014699096962,
377
+ "grad_norm": 0.42578125,
378
+ "learning_rate": 0.00019313893653516296,
379
+ "loss": 0.0151,
380
+ "step": 265
381
+ },
382
+ {
383
+ "epoch": 0.07118845165117658,
384
+ "grad_norm": 0.1884765625,
385
+ "learning_rate": 0.000193006993006993,
386
+ "loss": 0.0269,
387
+ "step": 270
388
+ },
389
+ {
390
+ "epoch": 0.07250675631138356,
391
+ "grad_norm": 1.546875,
392
+ "learning_rate": 0.00019287504947882307,
393
+ "loss": 0.0565,
394
+ "step": 275
395
+ },
396
+ {
397
+ "epoch": 0.07382506097159053,
398
+ "grad_norm": 0.5078125,
399
+ "learning_rate": 0.0001927431059506531,
400
+ "loss": 0.0942,
401
+ "step": 280
402
+ },
403
+ {
404
+ "epoch": 0.0751433656317975,
405
+ "grad_norm": 0.392578125,
406
+ "learning_rate": 0.00019261116242248318,
407
+ "loss": 0.0061,
408
+ "step": 285
409
+ },
410
+ {
411
+ "epoch": 0.07646167029200449,
412
+ "grad_norm": 1.9140625,
413
+ "learning_rate": 0.00019247921889431325,
414
+ "loss": 0.0497,
415
+ "step": 290
416
+ },
417
+ {
418
+ "epoch": 0.07777997495221145,
419
+ "grad_norm": 0.08837890625,
420
+ "learning_rate": 0.0001923472753661433,
421
+ "loss": 0.0573,
422
+ "step": 295
423
+ },
424
+ {
425
+ "epoch": 0.07909827961241843,
426
+ "grad_norm": 1.046875,
427
+ "learning_rate": 0.00019221533183797336,
428
+ "loss": 0.0528,
429
+ "step": 300
430
+ },
431
+ {
432
+ "epoch": 0.08041658427262541,
433
+ "grad_norm": 0.2275390625,
434
+ "learning_rate": 0.0001920833883098034,
435
+ "loss": 0.0506,
436
+ "step": 305
437
+ },
438
+ {
439
+ "epoch": 0.08173488893283237,
440
+ "grad_norm": 0.08203125,
441
+ "learning_rate": 0.00019195144478163347,
442
+ "loss": 0.0307,
443
+ "step": 310
444
+ },
445
+ {
446
+ "epoch": 0.08305319359303935,
447
+ "grad_norm": 0.111328125,
448
+ "learning_rate": 0.00019181950125346354,
449
+ "loss": 0.0365,
450
+ "step": 315
451
+ },
452
+ {
453
+ "epoch": 0.08437149825324633,
454
+ "grad_norm": 1.2890625,
455
+ "learning_rate": 0.00019168755772529358,
456
+ "loss": 0.0447,
457
+ "step": 320
458
+ },
459
+ {
460
+ "epoch": 0.0856898029134533,
461
+ "grad_norm": 0.6015625,
462
+ "learning_rate": 0.00019155561419712365,
463
+ "loss": 0.0605,
464
+ "step": 325
465
+ },
466
+ {
467
+ "epoch": 0.08700810757366027,
468
+ "grad_norm": 0.71875,
469
+ "learning_rate": 0.0001914236706689537,
470
+ "loss": 0.0846,
471
+ "step": 330
472
+ },
473
+ {
474
+ "epoch": 0.08832641223386725,
475
+ "grad_norm": 0.1494140625,
476
+ "learning_rate": 0.00019129172714078376,
477
+ "loss": 0.0713,
478
+ "step": 335
479
+ },
480
+ {
481
+ "epoch": 0.08964471689407422,
482
+ "grad_norm": 0.1669921875,
483
+ "learning_rate": 0.0001911597836126138,
484
+ "loss": 0.0826,
485
+ "step": 340
486
+ },
487
+ {
488
+ "epoch": 0.0909630215542812,
489
+ "grad_norm": 2.203125,
490
+ "learning_rate": 0.00019102784008444388,
491
+ "loss": 0.0441,
492
+ "step": 345
493
+ },
494
+ {
495
+ "epoch": 0.09228132621448817,
496
+ "grad_norm": 1.21875,
497
+ "learning_rate": 0.00019089589655627395,
498
+ "loss": 0.1378,
499
+ "step": 350
500
+ },
501
+ {
502
+ "epoch": 0.09359963087469514,
503
+ "grad_norm": 3.0625,
504
+ "learning_rate": 0.00019076395302810396,
505
+ "loss": 0.1552,
506
+ "step": 355
507
+ },
508
+ {
509
+ "epoch": 0.09491793553490212,
510
+ "grad_norm": 0.232421875,
511
+ "learning_rate": 0.00019063200949993403,
512
+ "loss": 0.0458,
513
+ "step": 360
514
+ },
515
+ {
516
+ "epoch": 0.0962362401951091,
517
+ "grad_norm": 0.71875,
518
+ "learning_rate": 0.0001905000659717641,
519
+ "loss": 0.0312,
520
+ "step": 365
521
+ },
522
+ {
523
+ "epoch": 0.09755454485531606,
524
+ "grad_norm": 0.0218505859375,
525
+ "learning_rate": 0.00019036812244359414,
526
+ "loss": 0.0247,
527
+ "step": 370
528
+ },
529
+ {
530
+ "epoch": 0.09887284951552304,
531
+ "grad_norm": 0.064453125,
532
+ "learning_rate": 0.0001902361789154242,
533
+ "loss": 0.054,
534
+ "step": 375
535
+ },
536
+ {
537
+ "epoch": 0.10019115417573,
538
+ "grad_norm": 0.021240234375,
539
+ "learning_rate": 0.00019010423538725425,
540
+ "loss": 0.0023,
541
+ "step": 380
542
+ },
543
+ {
544
+ "epoch": 0.10150945883593698,
545
+ "grad_norm": 0.0361328125,
546
+ "learning_rate": 0.00018997229185908432,
547
+ "loss": 0.0884,
548
+ "step": 385
549
+ },
550
+ {
551
+ "epoch": 0.10282776349614396,
552
+ "grad_norm": 1.703125,
553
+ "learning_rate": 0.00018984034833091436,
554
+ "loss": 0.0506,
555
+ "step": 390
556
+ },
557
+ {
558
+ "epoch": 0.10414606815635093,
559
+ "grad_norm": 0.08837890625,
560
+ "learning_rate": 0.00018970840480274443,
561
+ "loss": 0.1123,
562
+ "step": 395
563
+ },
564
+ {
565
+ "epoch": 0.1054643728165579,
566
+ "grad_norm": 0.6953125,
567
+ "learning_rate": 0.0001895764612745745,
568
+ "loss": 0.0597,
569
+ "step": 400
570
+ },
571
+ {
572
+ "epoch": 0.10678267747676488,
573
+ "grad_norm": 0.18359375,
574
+ "learning_rate": 0.00018944451774640454,
575
+ "loss": 0.0138,
576
+ "step": 405
577
+ },
578
+ {
579
+ "epoch": 0.10810098213697185,
580
+ "grad_norm": 0.0272216796875,
581
+ "learning_rate": 0.0001893125742182346,
582
+ "loss": 0.0249,
583
+ "step": 410
584
+ },
585
+ {
586
+ "epoch": 0.10941928679717883,
587
+ "grad_norm": 0.00970458984375,
588
+ "learning_rate": 0.00018918063069006466,
589
+ "loss": 0.0084,
590
+ "step": 415
591
+ },
592
+ {
593
+ "epoch": 0.1107375914573858,
594
+ "grad_norm": 0.54296875,
595
+ "learning_rate": 0.00018904868716189472,
596
+ "loss": 0.0541,
597
+ "step": 420
598
+ },
599
+ {
600
+ "epoch": 0.11205589611759277,
601
+ "grad_norm": 0.74609375,
602
+ "learning_rate": 0.00018891674363372477,
603
+ "loss": 0.007,
604
+ "step": 425
605
+ },
606
+ {
607
+ "epoch": 0.11337420077779975,
608
+ "grad_norm": 0.0211181640625,
609
+ "learning_rate": 0.00018878480010555484,
610
+ "loss": 0.0875,
611
+ "step": 430
612
+ },
613
+ {
614
+ "epoch": 0.11469250543800673,
615
+ "grad_norm": 0.9296875,
616
+ "learning_rate": 0.0001886528565773849,
617
+ "loss": 0.1207,
618
+ "step": 435
619
+ },
620
+ {
621
+ "epoch": 0.11601081009821369,
622
+ "grad_norm": 1.2734375,
623
+ "learning_rate": 0.00018852091304921495,
624
+ "loss": 0.1143,
625
+ "step": 440
626
+ },
627
+ {
628
+ "epoch": 0.11732911475842067,
629
+ "grad_norm": 0.6484375,
630
+ "learning_rate": 0.00018838896952104502,
631
+ "loss": 0.0393,
632
+ "step": 445
633
+ },
634
+ {
635
+ "epoch": 0.11864741941862765,
636
+ "grad_norm": 0.1552734375,
637
+ "learning_rate": 0.00018825702599287506,
638
+ "loss": 0.02,
639
+ "step": 450
640
+ },
641
+ {
642
+ "epoch": 0.11996572407883462,
643
+ "grad_norm": 0.486328125,
644
+ "learning_rate": 0.0001881250824647051,
645
+ "loss": 0.0891,
646
+ "step": 455
647
+ },
648
+ {
649
+ "epoch": 0.1212840287390416,
650
+ "grad_norm": 1.0,
651
+ "learning_rate": 0.00018799313893653517,
652
+ "loss": 0.0469,
653
+ "step": 460
654
+ },
655
+ {
656
+ "epoch": 0.12260233339924857,
657
+ "grad_norm": 0.2099609375,
658
+ "learning_rate": 0.0001878611954083652,
659
+ "loss": 0.019,
660
+ "step": 465
661
+ },
662
+ {
663
+ "epoch": 0.12392063805945554,
664
+ "grad_norm": 0.03857421875,
665
+ "learning_rate": 0.00018772925188019528,
666
+ "loss": 0.007,
667
+ "step": 470
668
+ },
669
+ {
670
+ "epoch": 0.12523894271966252,
671
+ "grad_norm": 0.0257568359375,
672
+ "learning_rate": 0.00018759730835202532,
673
+ "loss": 0.0039,
674
+ "step": 475
675
+ },
676
+ {
677
+ "epoch": 0.12655724737986948,
678
+ "grad_norm": 0.014404296875,
679
+ "learning_rate": 0.0001874653648238554,
680
+ "loss": 0.0043,
681
+ "step": 480
682
+ },
683
+ {
684
+ "epoch": 0.12787555204007647,
685
+ "grad_norm": 0.51953125,
686
+ "learning_rate": 0.00018733342129568546,
687
+ "loss": 0.1326,
688
+ "step": 485
689
+ },
690
+ {
691
+ "epoch": 0.12919385670028344,
692
+ "grad_norm": 0.99609375,
693
+ "learning_rate": 0.0001872014777675155,
694
+ "loss": 0.0369,
695
+ "step": 490
696
+ },
697
+ {
698
+ "epoch": 0.1305121613604904,
699
+ "grad_norm": 0.2734375,
700
+ "learning_rate": 0.00018706953423934557,
701
+ "loss": 0.0395,
702
+ "step": 495
703
+ },
704
+ {
705
+ "epoch": 0.1318304660206974,
706
+ "grad_norm": 0.083984375,
707
+ "learning_rate": 0.00018693759071117561,
708
+ "loss": 0.0284,
709
+ "step": 500
710
+ },
711
+ {
712
+ "epoch": 0.1318304660206974,
713
+ "eval_loss": 0.04542969539761543,
714
+ "eval_model_preparation_time": 0.0076,
715
+ "eval_runtime": 457.5293,
716
+ "eval_samples_per_second": 7.37,
717
+ "eval_steps_per_second": 3.685,
718
+ "step": 500
719
+ },
720
+ {
721
+ "epoch": 0.13314877068090436,
722
+ "grad_norm": 0.0291748046875,
723
+ "learning_rate": 0.00018680564718300568,
724
+ "loss": 0.0533,
725
+ "step": 505
726
+ },
727
+ {
728
+ "epoch": 0.13446707534111133,
729
+ "grad_norm": 0.71484375,
730
+ "learning_rate": 0.00018667370365483575,
731
+ "loss": 0.0183,
732
+ "step": 510
733
+ },
734
+ {
735
+ "epoch": 0.13578538000131832,
736
+ "grad_norm": 0.018798828125,
737
+ "learning_rate": 0.0001865417601266658,
738
+ "loss": 0.0473,
739
+ "step": 515
740
+ },
741
+ {
742
+ "epoch": 0.13710368466152528,
743
+ "grad_norm": 0.388671875,
744
+ "learning_rate": 0.00018640981659849586,
745
+ "loss": 0.0562,
746
+ "step": 520
747
+ },
748
+ {
749
+ "epoch": 0.13842198932173225,
750
+ "grad_norm": 0.77734375,
751
+ "learning_rate": 0.0001862778730703259,
752
+ "loss": 0.0755,
753
+ "step": 525
754
+ },
755
+ {
756
+ "epoch": 0.13974029398193924,
757
+ "grad_norm": 2.8125,
758
+ "learning_rate": 0.00018614592954215598,
759
+ "loss": 0.0422,
760
+ "step": 530
761
+ },
762
+ {
763
+ "epoch": 0.1410585986421462,
764
+ "grad_norm": 0.48828125,
765
+ "learning_rate": 0.00018601398601398602,
766
+ "loss": 0.0882,
767
+ "step": 535
768
+ },
769
+ {
770
+ "epoch": 0.14237690330235317,
771
+ "grad_norm": 0.16015625,
772
+ "learning_rate": 0.0001858820424858161,
773
+ "loss": 0.0131,
774
+ "step": 540
775
+ },
776
+ {
777
+ "epoch": 0.14369520796256013,
778
+ "grad_norm": 0.31640625,
779
+ "learning_rate": 0.00018575009895764616,
780
+ "loss": 0.03,
781
+ "step": 545
782
+ },
783
+ {
784
+ "epoch": 0.14501351262276713,
785
+ "grad_norm": 0.0120849609375,
786
+ "learning_rate": 0.0001856181554294762,
787
+ "loss": 0.0425,
788
+ "step": 550
789
+ },
790
+ {
791
+ "epoch": 0.1463318172829741,
792
+ "grad_norm": 0.390625,
793
+ "learning_rate": 0.00018548621190130624,
794
+ "loss": 0.011,
795
+ "step": 555
796
+ },
797
+ {
798
+ "epoch": 0.14765012194318106,
799
+ "grad_norm": 1.9609375,
800
+ "learning_rate": 0.0001853542683731363,
801
+ "loss": 0.0807,
802
+ "step": 560
803
+ },
804
+ {
805
+ "epoch": 0.14896842660338805,
806
+ "grad_norm": 0.609375,
807
+ "learning_rate": 0.00018522232484496635,
808
+ "loss": 0.0278,
809
+ "step": 565
810
+ },
811
+ {
812
+ "epoch": 0.150286731263595,
813
+ "grad_norm": 0.087890625,
814
+ "learning_rate": 0.00018509038131679642,
815
+ "loss": 0.0484,
816
+ "step": 570
817
+ },
818
+ {
819
+ "epoch": 0.15160503592380198,
820
+ "grad_norm": 0.5078125,
821
+ "learning_rate": 0.00018495843778862646,
822
+ "loss": 0.1277,
823
+ "step": 575
824
+ },
825
+ {
826
+ "epoch": 0.15292334058400897,
827
+ "grad_norm": 0.8125,
828
+ "learning_rate": 0.00018482649426045653,
829
+ "loss": 0.058,
830
+ "step": 580
831
+ },
832
+ {
833
+ "epoch": 0.15424164524421594,
834
+ "grad_norm": 0.22265625,
835
+ "learning_rate": 0.00018469455073228657,
836
+ "loss": 0.0259,
837
+ "step": 585
838
+ },
839
+ {
840
+ "epoch": 0.1555599499044229,
841
+ "grad_norm": 1.8984375,
842
+ "learning_rate": 0.00018456260720411664,
843
+ "loss": 0.113,
844
+ "step": 590
845
+ },
846
+ {
847
+ "epoch": 0.1568782545646299,
848
+ "grad_norm": 0.12451171875,
849
+ "learning_rate": 0.0001844306636759467,
850
+ "loss": 0.0312,
851
+ "step": 595
852
+ },
853
+ {
854
+ "epoch": 0.15819655922483686,
855
+ "grad_norm": 0.0322265625,
856
+ "learning_rate": 0.00018429872014777676,
857
+ "loss": 0.0476,
858
+ "step": 600
859
+ },
860
+ {
861
+ "epoch": 0.15951486388504382,
862
+ "grad_norm": 0.0281982421875,
863
+ "learning_rate": 0.00018416677661960682,
864
+ "loss": 0.0232,
865
+ "step": 605
866
+ },
867
+ {
868
+ "epoch": 0.16083316854525082,
869
+ "grad_norm": 0.57421875,
870
+ "learning_rate": 0.00018403483309143687,
871
+ "loss": 0.1287,
872
+ "step": 610
873
+ },
874
+ {
875
+ "epoch": 0.16215147320545778,
876
+ "grad_norm": 0.765625,
877
+ "learning_rate": 0.00018390288956326694,
878
+ "loss": 0.0991,
879
+ "step": 615
880
+ },
881
+ {
882
+ "epoch": 0.16346977786566474,
883
+ "grad_norm": 0.3125,
884
+ "learning_rate": 0.00018377094603509698,
885
+ "loss": 0.0247,
886
+ "step": 620
887
+ },
888
+ {
889
+ "epoch": 0.16478808252587174,
890
+ "grad_norm": 0.37890625,
891
+ "learning_rate": 0.00018363900250692705,
892
+ "loss": 0.0632,
893
+ "step": 625
894
+ },
895
+ {
896
+ "epoch": 0.1661063871860787,
897
+ "grad_norm": 0.1494140625,
898
+ "learning_rate": 0.00018350705897875712,
899
+ "loss": 0.0314,
900
+ "step": 630
901
+ },
902
+ {
903
+ "epoch": 0.16742469184628567,
904
+ "grad_norm": 0.0673828125,
905
+ "learning_rate": 0.00018337511545058716,
906
+ "loss": 0.0425,
907
+ "step": 635
908
+ },
909
+ {
910
+ "epoch": 0.16874299650649266,
911
+ "grad_norm": 0.396484375,
912
+ "learning_rate": 0.00018324317192241723,
913
+ "loss": 0.0613,
914
+ "step": 640
915
+ },
916
+ {
917
+ "epoch": 0.17006130116669962,
918
+ "grad_norm": 0.057373046875,
919
+ "learning_rate": 0.00018311122839424727,
920
+ "loss": 0.0569,
921
+ "step": 645
922
+ },
923
+ {
924
+ "epoch": 0.1713796058269066,
925
+ "grad_norm": 0.001373291015625,
926
+ "learning_rate": 0.00018297928486607734,
927
+ "loss": 0.007,
928
+ "step": 650
929
+ },
930
+ {
931
+ "epoch": 0.17269791048711358,
932
+ "grad_norm": 1.0859375,
933
+ "learning_rate": 0.00018284734133790738,
934
+ "loss": 0.0189,
935
+ "step": 655
936
+ },
937
+ {
938
+ "epoch": 0.17401621514732055,
939
+ "grad_norm": 0.6015625,
940
+ "learning_rate": 0.00018271539780973742,
941
+ "loss": 0.0601,
942
+ "step": 660
943
+ },
944
+ {
945
+ "epoch": 0.1753345198075275,
946
+ "grad_norm": 0.25390625,
947
+ "learning_rate": 0.0001825834542815675,
948
+ "loss": 0.0211,
949
+ "step": 665
950
+ },
951
+ {
952
+ "epoch": 0.1766528244677345,
953
+ "grad_norm": 2.6875,
954
+ "learning_rate": 0.00018245151075339753,
955
+ "loss": 0.0713,
956
+ "step": 670
957
+ },
958
+ {
959
+ "epoch": 0.17797112912794147,
960
+ "grad_norm": 1.1875,
961
+ "learning_rate": 0.0001823195672252276,
962
+ "loss": 0.0522,
963
+ "step": 675
964
+ },
965
+ {
966
+ "epoch": 0.17928943378814843,
967
+ "grad_norm": 0.025146484375,
968
+ "learning_rate": 0.00018218762369705767,
969
+ "loss": 0.0242,
970
+ "step": 680
971
+ },
972
+ {
973
+ "epoch": 0.18060773844835543,
974
+ "grad_norm": 0.048095703125,
975
+ "learning_rate": 0.00018205568016888772,
976
+ "loss": 0.0129,
977
+ "step": 685
978
+ },
979
+ {
980
+ "epoch": 0.1819260431085624,
981
+ "grad_norm": 0.04541015625,
982
+ "learning_rate": 0.00018192373664071778,
983
+ "loss": 0.0142,
984
+ "step": 690
985
+ },
986
+ {
987
+ "epoch": 0.18324434776876936,
988
+ "grad_norm": 0.00830078125,
989
+ "learning_rate": 0.00018179179311254783,
990
+ "loss": 0.0121,
991
+ "step": 695
992
+ },
993
+ {
994
+ "epoch": 0.18456265242897635,
995
+ "grad_norm": 0.53125,
996
+ "learning_rate": 0.0001816598495843779,
997
+ "loss": 0.0163,
998
+ "step": 700
999
+ },
1000
+ {
1001
+ "epoch": 0.1858809570891833,
1002
+ "grad_norm": 0.185546875,
1003
+ "learning_rate": 0.00018152790605620796,
1004
+ "loss": 0.0203,
1005
+ "step": 705
1006
+ },
1007
+ {
1008
+ "epoch": 0.18719926174939028,
1009
+ "grad_norm": 1.2578125,
1010
+ "learning_rate": 0.000181395962528038,
1011
+ "loss": 0.1548,
1012
+ "step": 710
1013
+ },
1014
+ {
1015
+ "epoch": 0.18851756640959727,
1016
+ "grad_norm": 0.0247802734375,
1017
+ "learning_rate": 0.00018126401899986808,
1018
+ "loss": 0.0543,
1019
+ "step": 715
1020
+ },
1021
+ {
1022
+ "epoch": 0.18983587106980424,
1023
+ "grad_norm": 0.07568359375,
1024
+ "learning_rate": 0.00018113207547169812,
1025
+ "loss": 0.0346,
1026
+ "step": 720
1027
+ },
1028
+ {
1029
+ "epoch": 0.1911541757300112,
1030
+ "grad_norm": 0.1318359375,
1031
+ "learning_rate": 0.0001810001319435282,
1032
+ "loss": 0.03,
1033
+ "step": 725
1034
+ },
1035
+ {
1036
+ "epoch": 0.1924724803902182,
1037
+ "grad_norm": 0.1455078125,
1038
+ "learning_rate": 0.00018086818841535823,
1039
+ "loss": 0.0796,
1040
+ "step": 730
1041
+ },
1042
+ {
1043
+ "epoch": 0.19379078505042516,
1044
+ "grad_norm": 0.09814453125,
1045
+ "learning_rate": 0.0001807362448871883,
1046
+ "loss": 0.0662,
1047
+ "step": 735
1048
+ },
1049
+ {
1050
+ "epoch": 0.19510908971063212,
1051
+ "grad_norm": 0.91015625,
1052
+ "learning_rate": 0.00018060430135901837,
1053
+ "loss": 0.0675,
1054
+ "step": 740
1055
+ },
1056
+ {
1057
+ "epoch": 0.19642739437083911,
1058
+ "grad_norm": 0.10693359375,
1059
+ "learning_rate": 0.0001804723578308484,
1060
+ "loss": 0.0377,
1061
+ "step": 745
1062
+ },
1063
+ {
1064
+ "epoch": 0.19774569903104608,
1065
+ "grad_norm": 0.95703125,
1066
+ "learning_rate": 0.00018034041430267848,
1067
+ "loss": 0.0174,
1068
+ "step": 750
1069
+ },
1070
+ {
1071
+ "epoch": 0.19906400369125304,
1072
+ "grad_norm": 1.7890625,
1073
+ "learning_rate": 0.00018020847077450852,
1074
+ "loss": 0.0278,
1075
+ "step": 755
1076
+ },
1077
+ {
1078
+ "epoch": 0.20038230835146,
1079
+ "grad_norm": 0.8515625,
1080
+ "learning_rate": 0.00018007652724633856,
1081
+ "loss": 0.0113,
1082
+ "step": 760
1083
+ },
1084
+ {
1085
+ "epoch": 0.201700613011667,
1086
+ "grad_norm": 0.016845703125,
1087
+ "learning_rate": 0.00017994458371816863,
1088
+ "loss": 0.0589,
1089
+ "step": 765
1090
+ },
1091
+ {
1092
+ "epoch": 0.20301891767187397,
1093
+ "grad_norm": 0.01043701171875,
1094
+ "learning_rate": 0.00017981264018999867,
1095
+ "loss": 0.0203,
1096
+ "step": 770
1097
+ },
1098
+ {
1099
+ "epoch": 0.20433722233208093,
1100
+ "grad_norm": 0.0242919921875,
1101
+ "learning_rate": 0.00017968069666182874,
1102
+ "loss": 0.0494,
1103
+ "step": 775
1104
+ },
1105
+ {
1106
+ "epoch": 0.20565552699228792,
1107
+ "grad_norm": 0.56640625,
1108
+ "learning_rate": 0.00017954875313365879,
1109
+ "loss": 0.0394,
1110
+ "step": 780
1111
+ },
1112
+ {
1113
+ "epoch": 0.2069738316524949,
1114
+ "grad_norm": 0.06591796875,
1115
+ "learning_rate": 0.00017941680960548886,
1116
+ "loss": 0.0848,
1117
+ "step": 785
1118
+ },
1119
+ {
1120
+ "epoch": 0.20829213631270185,
1121
+ "grad_norm": 0.40234375,
1122
+ "learning_rate": 0.00017928486607731892,
1123
+ "loss": 0.0464,
1124
+ "step": 790
1125
+ },
1126
+ {
1127
+ "epoch": 0.20961044097290885,
1128
+ "grad_norm": 0.06298828125,
1129
+ "learning_rate": 0.00017915292254914897,
1130
+ "loss": 0.0222,
1131
+ "step": 795
1132
+ },
1133
+ {
1134
+ "epoch": 0.2109287456331158,
1135
+ "grad_norm": 0.5390625,
1136
+ "learning_rate": 0.00017902097902097904,
1137
+ "loss": 0.0434,
1138
+ "step": 800
1139
+ },
1140
+ {
1141
+ "epoch": 0.21224705029332278,
1142
+ "grad_norm": 1.390625,
1143
+ "learning_rate": 0.00017888903549280908,
1144
+ "loss": 0.0222,
1145
+ "step": 805
1146
+ },
1147
+ {
1148
+ "epoch": 0.21356535495352977,
1149
+ "grad_norm": 0.0272216796875,
1150
+ "learning_rate": 0.00017875709196463915,
1151
+ "loss": 0.0099,
1152
+ "step": 810
1153
+ },
1154
+ {
1155
+ "epoch": 0.21488365961373673,
1156
+ "grad_norm": 0.10009765625,
1157
+ "learning_rate": 0.0001786251484364692,
1158
+ "loss": 0.0086,
1159
+ "step": 815
1160
+ },
1161
+ {
1162
+ "epoch": 0.2162019642739437,
1163
+ "grad_norm": 0.06396484375,
1164
+ "learning_rate": 0.00017849320490829926,
1165
+ "loss": 0.0715,
1166
+ "step": 820
1167
+ },
1168
+ {
1169
+ "epoch": 0.2175202689341507,
1170
+ "grad_norm": 0.365234375,
1171
+ "learning_rate": 0.00017836126138012933,
1172
+ "loss": 0.0642,
1173
+ "step": 825
1174
+ },
1175
+ {
1176
+ "epoch": 0.21883857359435765,
1177
+ "grad_norm": 0.01519775390625,
1178
+ "learning_rate": 0.00017822931785195937,
1179
+ "loss": 0.0111,
1180
+ "step": 830
1181
+ },
1182
+ {
1183
+ "epoch": 0.22015687825456462,
1184
+ "grad_norm": 1.1640625,
1185
+ "learning_rate": 0.00017809737432378944,
1186
+ "loss": 0.0518,
1187
+ "step": 835
1188
+ },
1189
+ {
1190
+ "epoch": 0.2214751829147716,
1191
+ "grad_norm": 0.00921630859375,
1192
+ "learning_rate": 0.00017796543079561948,
1193
+ "loss": 0.0384,
1194
+ "step": 840
1195
+ },
1196
+ {
1197
+ "epoch": 0.22279348757497858,
1198
+ "grad_norm": 0.33984375,
1199
+ "learning_rate": 0.00017783348726744955,
1200
+ "loss": 0.0204,
1201
+ "step": 845
1202
+ },
1203
+ {
1204
+ "epoch": 0.22411179223518554,
1205
+ "grad_norm": 0.294921875,
1206
+ "learning_rate": 0.00017770154373927962,
1207
+ "loss": 0.0075,
1208
+ "step": 850
1209
+ },
1210
+ {
1211
+ "epoch": 0.22543009689539253,
1212
+ "grad_norm": 0.033203125,
1213
+ "learning_rate": 0.00017756960021110963,
1214
+ "loss": 0.0895,
1215
+ "step": 855
1216
+ },
1217
+ {
1218
+ "epoch": 0.2267484015555995,
1219
+ "grad_norm": 0.08056640625,
1220
+ "learning_rate": 0.0001774376566829397,
1221
+ "loss": 0.1039,
1222
+ "step": 860
1223
+ },
1224
+ {
1225
+ "epoch": 0.22806670621580646,
1226
+ "grad_norm": 0.55078125,
1227
+ "learning_rate": 0.00017730571315476975,
1228
+ "loss": 0.0125,
1229
+ "step": 865
1230
+ },
1231
+ {
1232
+ "epoch": 0.22938501087601346,
1233
+ "grad_norm": 0.5859375,
1234
+ "learning_rate": 0.00017717376962659982,
1235
+ "loss": 0.0381,
1236
+ "step": 870
1237
+ },
1238
+ {
1239
+ "epoch": 0.23070331553622042,
1240
+ "grad_norm": 0.029052734375,
1241
+ "learning_rate": 0.00017704182609842988,
1242
+ "loss": 0.0434,
1243
+ "step": 875
1244
+ },
1245
+ {
1246
+ "epoch": 0.23202162019642739,
1247
+ "grad_norm": 0.43359375,
1248
+ "learning_rate": 0.00017690988257025993,
1249
+ "loss": 0.0799,
1250
+ "step": 880
1251
+ },
1252
+ {
1253
+ "epoch": 0.23333992485663438,
1254
+ "grad_norm": 0.04150390625,
1255
+ "learning_rate": 0.00017677793904209,
1256
+ "loss": 0.0692,
1257
+ "step": 885
1258
+ },
1259
+ {
1260
+ "epoch": 0.23465822951684134,
1261
+ "grad_norm": 0.435546875,
1262
+ "learning_rate": 0.00017664599551392004,
1263
+ "loss": 0.0544,
1264
+ "step": 890
1265
+ },
1266
+ {
1267
+ "epoch": 0.2359765341770483,
1268
+ "grad_norm": 1.171875,
1269
+ "learning_rate": 0.0001765140519857501,
1270
+ "loss": 0.0619,
1271
+ "step": 895
1272
+ },
1273
+ {
1274
+ "epoch": 0.2372948388372553,
1275
+ "grad_norm": 0.01263427734375,
1276
+ "learning_rate": 0.00017638210845758018,
1277
+ "loss": 0.0418,
1278
+ "step": 900
1279
+ },
1280
+ {
1281
+ "epoch": 0.23861314349746227,
1282
+ "grad_norm": 0.017578125,
1283
+ "learning_rate": 0.00017625016492941022,
1284
+ "loss": 0.0195,
1285
+ "step": 905
1286
+ },
1287
+ {
1288
+ "epoch": 0.23993144815766923,
1289
+ "grad_norm": 0.6171875,
1290
+ "learning_rate": 0.0001761182214012403,
1291
+ "loss": 0.067,
1292
+ "step": 910
1293
+ },
1294
+ {
1295
+ "epoch": 0.24124975281787622,
1296
+ "grad_norm": 0.59765625,
1297
+ "learning_rate": 0.00017598627787307033,
1298
+ "loss": 0.049,
1299
+ "step": 915
1300
+ },
1301
+ {
1302
+ "epoch": 0.2425680574780832,
1303
+ "grad_norm": 1.2421875,
1304
+ "learning_rate": 0.0001758543343449004,
1305
+ "loss": 0.0539,
1306
+ "step": 920
1307
+ },
1308
+ {
1309
+ "epoch": 0.24388636213829015,
1310
+ "grad_norm": 0.10302734375,
1311
+ "learning_rate": 0.00017572239081673044,
1312
+ "loss": 0.0725,
1313
+ "step": 925
1314
+ },
1315
+ {
1316
+ "epoch": 0.24520466679849715,
1317
+ "grad_norm": 0.330078125,
1318
+ "learning_rate": 0.0001755904472885605,
1319
+ "loss": 0.064,
1320
+ "step": 930
1321
+ },
1322
+ {
1323
+ "epoch": 0.2465229714587041,
1324
+ "grad_norm": 0.220703125,
1325
+ "learning_rate": 0.00017545850376039058,
1326
+ "loss": 0.0271,
1327
+ "step": 935
1328
+ },
1329
+ {
1330
+ "epoch": 0.24784127611891107,
1331
+ "grad_norm": 0.01470947265625,
1332
+ "learning_rate": 0.00017532656023222062,
1333
+ "loss": 0.0247,
1334
+ "step": 940
1335
+ },
1336
+ {
1337
+ "epoch": 0.24915958077911807,
1338
+ "grad_norm": 0.013427734375,
1339
+ "learning_rate": 0.0001751946167040507,
1340
+ "loss": 0.017,
1341
+ "step": 945
1342
+ },
1343
+ {
1344
+ "epoch": 0.25047788543932503,
1345
+ "grad_norm": 0.58984375,
1346
+ "learning_rate": 0.00017506267317588073,
1347
+ "loss": 0.0254,
1348
+ "step": 950
1349
+ },
1350
+ {
1351
+ "epoch": 0.251796190099532,
1352
+ "grad_norm": 0.412109375,
1353
+ "learning_rate": 0.00017493072964771078,
1354
+ "loss": 0.0186,
1355
+ "step": 955
1356
+ },
1357
+ {
1358
+ "epoch": 0.25311449475973896,
1359
+ "grad_norm": 0.66796875,
1360
+ "learning_rate": 0.00017479878611954084,
1361
+ "loss": 0.0617,
1362
+ "step": 960
1363
+ },
1364
+ {
1365
+ "epoch": 0.25443279941994595,
1366
+ "grad_norm": 0.322265625,
1367
+ "learning_rate": 0.00017466684259137089,
1368
+ "loss": 0.0173,
1369
+ "step": 965
1370
+ },
1371
+ {
1372
+ "epoch": 0.25575110408015295,
1373
+ "grad_norm": 0.83203125,
1374
+ "learning_rate": 0.00017453489906320096,
1375
+ "loss": 0.0512,
1376
+ "step": 970
1377
+ },
1378
+ {
1379
+ "epoch": 0.2570694087403599,
1380
+ "grad_norm": 0.08447265625,
1381
+ "learning_rate": 0.000174402955535031,
1382
+ "loss": 0.0361,
1383
+ "step": 975
1384
+ },
1385
+ {
1386
+ "epoch": 0.2583877134005669,
1387
+ "grad_norm": 0.423828125,
1388
+ "learning_rate": 0.00017427101200686107,
1389
+ "loss": 0.0175,
1390
+ "step": 980
1391
+ },
1392
+ {
1393
+ "epoch": 0.25970601806077387,
1394
+ "grad_norm": 0.77734375,
1395
+ "learning_rate": 0.00017413906847869114,
1396
+ "loss": 0.0139,
1397
+ "step": 985
1398
+ },
1399
+ {
1400
+ "epoch": 0.2610243227209808,
1401
+ "grad_norm": 0.515625,
1402
+ "learning_rate": 0.00017400712495052118,
1403
+ "loss": 0.0948,
1404
+ "step": 990
1405
+ },
1406
+ {
1407
+ "epoch": 0.2623426273811878,
1408
+ "grad_norm": 1.421875,
1409
+ "learning_rate": 0.00017387518142235125,
1410
+ "loss": 0.0406,
1411
+ "step": 995
1412
+ },
1413
+ {
1414
+ "epoch": 0.2636609320413948,
1415
+ "grad_norm": 0.058837890625,
1416
+ "learning_rate": 0.0001737432378941813,
1417
+ "loss": 0.1011,
1418
+ "step": 1000
1419
+ },
1420
+ {
1421
+ "epoch": 0.2636609320413948,
1422
+ "eval_loss": 0.045552924275398254,
1423
+ "eval_model_preparation_time": 0.0076,
1424
+ "eval_runtime": 457.6113,
1425
+ "eval_samples_per_second": 7.369,
1426
+ "eval_steps_per_second": 3.684,
1427
+ "step": 1000
1428
+ },
1429
+ {
1430
+ "epoch": 0.26497923670160173,
1431
+ "grad_norm": 0.380859375,
1432
+ "learning_rate": 0.00017361129436601136,
1433
+ "loss": 0.0711,
1434
+ "step": 1005
1435
+ },
1436
+ {
1437
+ "epoch": 0.2662975413618087,
1438
+ "grad_norm": 0.0208740234375,
1439
+ "learning_rate": 0.00017347935083784143,
1440
+ "loss": 0.0218,
1441
+ "step": 1010
1442
+ },
1443
+ {
1444
+ "epoch": 0.2676158460220157,
1445
+ "grad_norm": 0.04345703125,
1446
+ "learning_rate": 0.00017334740730967147,
1447
+ "loss": 0.0301,
1448
+ "step": 1015
1449
+ },
1450
+ {
1451
+ "epoch": 0.26893415068222265,
1452
+ "grad_norm": 0.2734375,
1453
+ "learning_rate": 0.00017321546378150154,
1454
+ "loss": 0.0721,
1455
+ "step": 1020
1456
+ },
1457
+ {
1458
+ "epoch": 0.27025245534242964,
1459
+ "grad_norm": 0.25390625,
1460
+ "learning_rate": 0.00017308352025333158,
1461
+ "loss": 0.0363,
1462
+ "step": 1025
1463
+ },
1464
+ {
1465
+ "epoch": 0.27157076000263664,
1466
+ "grad_norm": 0.04345703125,
1467
+ "learning_rate": 0.00017295157672516165,
1468
+ "loss": 0.0313,
1469
+ "step": 1030
1470
+ },
1471
+ {
1472
+ "epoch": 0.2728890646628436,
1473
+ "grad_norm": 0.0211181640625,
1474
+ "learning_rate": 0.0001728196331969917,
1475
+ "loss": 0.0385,
1476
+ "step": 1035
1477
+ },
1478
+ {
1479
+ "epoch": 0.27420736932305056,
1480
+ "grad_norm": 0.00787353515625,
1481
+ "learning_rate": 0.00017268768966882176,
1482
+ "loss": 0.0405,
1483
+ "step": 1040
1484
+ },
1485
+ {
1486
+ "epoch": 0.27552567398325756,
1487
+ "grad_norm": 0.484375,
1488
+ "learning_rate": 0.00017255574614065183,
1489
+ "loss": 0.0616,
1490
+ "step": 1045
1491
+ },
1492
+ {
1493
+ "epoch": 0.2768439786434645,
1494
+ "grad_norm": 0.0908203125,
1495
+ "learning_rate": 0.00017242380261248185,
1496
+ "loss": 0.0057,
1497
+ "step": 1050
1498
+ },
1499
+ {
1500
+ "epoch": 0.2781622833036715,
1501
+ "grad_norm": 0.1904296875,
1502
+ "learning_rate": 0.00017229185908431192,
1503
+ "loss": 0.0417,
1504
+ "step": 1055
1505
+ },
1506
+ {
1507
+ "epoch": 0.2794805879638785,
1508
+ "grad_norm": 0.30078125,
1509
+ "learning_rate": 0.00017215991555614196,
1510
+ "loss": 0.0346,
1511
+ "step": 1060
1512
+ },
1513
+ {
1514
+ "epoch": 0.2807988926240854,
1515
+ "grad_norm": 0.016357421875,
1516
+ "learning_rate": 0.00017202797202797203,
1517
+ "loss": 0.0295,
1518
+ "step": 1065
1519
+ },
1520
+ {
1521
+ "epoch": 0.2821171972842924,
1522
+ "grad_norm": 0.490234375,
1523
+ "learning_rate": 0.0001718960284998021,
1524
+ "loss": 0.0448,
1525
+ "step": 1070
1526
+ },
1527
+ {
1528
+ "epoch": 0.28343550194449935,
1529
+ "grad_norm": 0.004241943359375,
1530
+ "learning_rate": 0.00017176408497163214,
1531
+ "loss": 0.0051,
1532
+ "step": 1075
1533
+ },
1534
+ {
1535
+ "epoch": 0.28475380660470634,
1536
+ "grad_norm": 0.01904296875,
1537
+ "learning_rate": 0.0001716321414434622,
1538
+ "loss": 0.0894,
1539
+ "step": 1080
1540
+ },
1541
+ {
1542
+ "epoch": 0.28607211126491333,
1543
+ "grad_norm": 0.83984375,
1544
+ "learning_rate": 0.00017150019791529225,
1545
+ "loss": 0.0288,
1546
+ "step": 1085
1547
+ },
1548
+ {
1549
+ "epoch": 0.28739041592512027,
1550
+ "grad_norm": 0.2021484375,
1551
+ "learning_rate": 0.00017136825438712232,
1552
+ "loss": 0.0222,
1553
+ "step": 1090
1554
+ },
1555
+ {
1556
+ "epoch": 0.28870872058532726,
1557
+ "grad_norm": 0.322265625,
1558
+ "learning_rate": 0.0001712363108589524,
1559
+ "loss": 0.0444,
1560
+ "step": 1095
1561
+ },
1562
+ {
1563
+ "epoch": 0.29002702524553425,
1564
+ "grad_norm": 0.408203125,
1565
+ "learning_rate": 0.00017110436733078243,
1566
+ "loss": 0.0828,
1567
+ "step": 1100
1568
+ },
1569
+ {
1570
+ "epoch": 0.2913453299057412,
1571
+ "grad_norm": 0.04052734375,
1572
+ "learning_rate": 0.0001709724238026125,
1573
+ "loss": 0.0725,
1574
+ "step": 1105
1575
+ },
1576
+ {
1577
+ "epoch": 0.2926636345659482,
1578
+ "grad_norm": 0.2578125,
1579
+ "learning_rate": 0.00017084048027444254,
1580
+ "loss": 0.0204,
1581
+ "step": 1110
1582
+ },
1583
+ {
1584
+ "epoch": 0.2939819392261552,
1585
+ "grad_norm": 0.67578125,
1586
+ "learning_rate": 0.0001707085367462726,
1587
+ "loss": 0.0503,
1588
+ "step": 1115
1589
+ },
1590
+ {
1591
+ "epoch": 0.2953002438863621,
1592
+ "grad_norm": 0.0059814453125,
1593
+ "learning_rate": 0.00017057659321810265,
1594
+ "loss": 0.0144,
1595
+ "step": 1120
1596
+ },
1597
+ {
1598
+ "epoch": 0.2966185485465691,
1599
+ "grad_norm": 0.0269775390625,
1600
+ "learning_rate": 0.00017044464968993272,
1601
+ "loss": 0.0044,
1602
+ "step": 1125
1603
+ },
1604
+ {
1605
+ "epoch": 0.2979368532067761,
1606
+ "grad_norm": 0.1396484375,
1607
+ "learning_rate": 0.0001703127061617628,
1608
+ "loss": 0.013,
1609
+ "step": 1130
1610
+ },
1611
+ {
1612
+ "epoch": 0.29925515786698303,
1613
+ "grad_norm": 0.287109375,
1614
+ "learning_rate": 0.00017018076263359283,
1615
+ "loss": 0.0245,
1616
+ "step": 1135
1617
+ },
1618
+ {
1619
+ "epoch": 0.30057346252719,
1620
+ "grad_norm": 0.26171875,
1621
+ "learning_rate": 0.0001700488191054229,
1622
+ "loss": 0.0247,
1623
+ "step": 1140
1624
+ },
1625
+ {
1626
+ "epoch": 0.301891767187397,
1627
+ "grad_norm": 0.40625,
1628
+ "learning_rate": 0.00016991687557725294,
1629
+ "loss": 0.0402,
1630
+ "step": 1145
1631
+ },
1632
+ {
1633
+ "epoch": 0.30321007184760396,
1634
+ "grad_norm": 1.2578125,
1635
+ "learning_rate": 0.000169784932049083,
1636
+ "loss": 0.0071,
1637
+ "step": 1150
1638
+ },
1639
+ {
1640
+ "epoch": 0.30452837650781095,
1641
+ "grad_norm": 0.330078125,
1642
+ "learning_rate": 0.00016965298852091306,
1643
+ "loss": 0.0177,
1644
+ "step": 1155
1645
+ },
1646
+ {
1647
+ "epoch": 0.30584668116801794,
1648
+ "grad_norm": 0.07275390625,
1649
+ "learning_rate": 0.0001695210449927431,
1650
+ "loss": 0.0029,
1651
+ "step": 1160
1652
+ },
1653
+ {
1654
+ "epoch": 0.3071649858282249,
1655
+ "grad_norm": 0.455078125,
1656
+ "learning_rate": 0.00016938910146457317,
1657
+ "loss": 0.0262,
1658
+ "step": 1165
1659
+ },
1660
+ {
1661
+ "epoch": 0.30848329048843187,
1662
+ "grad_norm": 0.002655029296875,
1663
+ "learning_rate": 0.0001692571579364032,
1664
+ "loss": 0.0346,
1665
+ "step": 1170
1666
+ },
1667
+ {
1668
+ "epoch": 0.30980159514863886,
1669
+ "grad_norm": 0.1748046875,
1670
+ "learning_rate": 0.00016912521440823328,
1671
+ "loss": 0.0494,
1672
+ "step": 1175
1673
+ },
1674
+ {
1675
+ "epoch": 0.3111198998088458,
1676
+ "grad_norm": 1.4609375,
1677
+ "learning_rate": 0.00016899327088006335,
1678
+ "loss": 0.0603,
1679
+ "step": 1180
1680
+ },
1681
+ {
1682
+ "epoch": 0.3124382044690528,
1683
+ "grad_norm": 0.1572265625,
1684
+ "learning_rate": 0.0001688613273518934,
1685
+ "loss": 0.0366,
1686
+ "step": 1185
1687
+ },
1688
+ {
1689
+ "epoch": 0.3137565091292598,
1690
+ "grad_norm": 0.01422119140625,
1691
+ "learning_rate": 0.00016872938382372346,
1692
+ "loss": 0.0678,
1693
+ "step": 1190
1694
+ },
1695
+ {
1696
+ "epoch": 0.3150748137894667,
1697
+ "grad_norm": 0.2412109375,
1698
+ "learning_rate": 0.0001685974402955535,
1699
+ "loss": 0.0359,
1700
+ "step": 1195
1701
+ },
1702
+ {
1703
+ "epoch": 0.3163931184496737,
1704
+ "grad_norm": 0.275390625,
1705
+ "learning_rate": 0.00016846549676738357,
1706
+ "loss": 0.1099,
1707
+ "step": 1200
1708
+ },
1709
+ {
1710
+ "epoch": 0.3177114231098807,
1711
+ "grad_norm": 0.212890625,
1712
+ "learning_rate": 0.00016833355323921364,
1713
+ "loss": 0.0343,
1714
+ "step": 1205
1715
+ },
1716
+ {
1717
+ "epoch": 0.31902972777008765,
1718
+ "grad_norm": 0.0302734375,
1719
+ "learning_rate": 0.00016820160971104368,
1720
+ "loss": 0.0138,
1721
+ "step": 1210
1722
+ },
1723
+ {
1724
+ "epoch": 0.32034803243029464,
1725
+ "grad_norm": 0.016845703125,
1726
+ "learning_rate": 0.00016806966618287375,
1727
+ "loss": 0.0202,
1728
+ "step": 1215
1729
+ },
1730
+ {
1731
+ "epoch": 0.32166633709050163,
1732
+ "grad_norm": 0.1474609375,
1733
+ "learning_rate": 0.0001679377226547038,
1734
+ "loss": 0.0442,
1735
+ "step": 1220
1736
+ },
1737
+ {
1738
+ "epoch": 0.32298464175070857,
1739
+ "grad_norm": 0.049072265625,
1740
+ "learning_rate": 0.00016780577912653386,
1741
+ "loss": 0.0375,
1742
+ "step": 1225
1743
+ },
1744
+ {
1745
+ "epoch": 0.32430294641091556,
1746
+ "grad_norm": 0.1337890625,
1747
+ "learning_rate": 0.0001676738355983639,
1748
+ "loss": 0.01,
1749
+ "step": 1230
1750
+ },
1751
+ {
1752
+ "epoch": 0.32562125107112255,
1753
+ "grad_norm": 0.02197265625,
1754
+ "learning_rate": 0.00016754189207019397,
1755
+ "loss": 0.0139,
1756
+ "step": 1235
1757
+ },
1758
+ {
1759
+ "epoch": 0.3269395557313295,
1760
+ "grad_norm": 0.09228515625,
1761
+ "learning_rate": 0.00016740994854202404,
1762
+ "loss": 0.014,
1763
+ "step": 1240
1764
+ },
1765
+ {
1766
+ "epoch": 0.3282578603915365,
1767
+ "grad_norm": 0.47265625,
1768
+ "learning_rate": 0.00016727800501385408,
1769
+ "loss": 0.1546,
1770
+ "step": 1245
1771
+ },
1772
+ {
1773
+ "epoch": 0.3295761650517435,
1774
+ "grad_norm": 0.02294921875,
1775
+ "learning_rate": 0.00016714606148568413,
1776
+ "loss": 0.0803,
1777
+ "step": 1250
1778
+ },
1779
+ {
1780
+ "epoch": 0.3308944697119504,
1781
+ "grad_norm": 0.185546875,
1782
+ "learning_rate": 0.00016701411795751417,
1783
+ "loss": 0.0376,
1784
+ "step": 1255
1785
+ },
1786
+ {
1787
+ "epoch": 0.3322127743721574,
1788
+ "grad_norm": 0.1123046875,
1789
+ "learning_rate": 0.00016688217442934424,
1790
+ "loss": 0.0375,
1791
+ "step": 1260
1792
+ },
1793
+ {
1794
+ "epoch": 0.3335310790323644,
1795
+ "grad_norm": 1.03125,
1796
+ "learning_rate": 0.0001667502309011743,
1797
+ "loss": 0.0442,
1798
+ "step": 1265
1799
+ },
1800
+ {
1801
+ "epoch": 0.33484938369257133,
1802
+ "grad_norm": 0.0172119140625,
1803
+ "learning_rate": 0.00016661828737300435,
1804
+ "loss": 0.0261,
1805
+ "step": 1270
1806
+ },
1807
+ {
1808
+ "epoch": 0.3361676883527783,
1809
+ "grad_norm": 0.42578125,
1810
+ "learning_rate": 0.00016648634384483442,
1811
+ "loss": 0.0553,
1812
+ "step": 1275
1813
+ },
1814
+ {
1815
+ "epoch": 0.3374859930129853,
1816
+ "grad_norm": 0.1328125,
1817
+ "learning_rate": 0.00016635440031666446,
1818
+ "loss": 0.0065,
1819
+ "step": 1280
1820
+ },
1821
+ {
1822
+ "epoch": 0.33880429767319226,
1823
+ "grad_norm": 0.263671875,
1824
+ "learning_rate": 0.00016622245678849453,
1825
+ "loss": 0.0527,
1826
+ "step": 1285
1827
+ },
1828
+ {
1829
+ "epoch": 0.34012260233339925,
1830
+ "grad_norm": 0.314453125,
1831
+ "learning_rate": 0.0001660905132603246,
1832
+ "loss": 0.0297,
1833
+ "step": 1290
1834
+ },
1835
+ {
1836
+ "epoch": 0.34144090699360624,
1837
+ "grad_norm": 0.04345703125,
1838
+ "learning_rate": 0.00016595856973215464,
1839
+ "loss": 0.0477,
1840
+ "step": 1295
1841
+ },
1842
+ {
1843
+ "epoch": 0.3427592116538132,
1844
+ "grad_norm": 0.08154296875,
1845
+ "learning_rate": 0.0001658266262039847,
1846
+ "loss": 0.0298,
1847
+ "step": 1300
1848
+ },
1849
+ {
1850
+ "epoch": 0.34407751631402017,
1851
+ "grad_norm": 0.08935546875,
1852
+ "learning_rate": 0.00016569468267581475,
1853
+ "loss": 0.0481,
1854
+ "step": 1305
1855
+ },
1856
+ {
1857
+ "epoch": 0.34539582097422716,
1858
+ "grad_norm": 0.06640625,
1859
+ "learning_rate": 0.00016556273914764482,
1860
+ "loss": 0.0153,
1861
+ "step": 1310
1862
+ },
1863
+ {
1864
+ "epoch": 0.3467141256344341,
1865
+ "grad_norm": 0.00592041015625,
1866
+ "learning_rate": 0.00016543079561947486,
1867
+ "loss": 0.0111,
1868
+ "step": 1315
1869
+ },
1870
+ {
1871
+ "epoch": 0.3480324302946411,
1872
+ "grad_norm": 0.2236328125,
1873
+ "learning_rate": 0.00016529885209130493,
1874
+ "loss": 0.0309,
1875
+ "step": 1320
1876
+ },
1877
+ {
1878
+ "epoch": 0.3493507349548481,
1879
+ "grad_norm": 0.0198974609375,
1880
+ "learning_rate": 0.000165166908563135,
1881
+ "loss": 0.0579,
1882
+ "step": 1325
1883
+ },
1884
+ {
1885
+ "epoch": 0.350669039615055,
1886
+ "grad_norm": 0.10107421875,
1887
+ "learning_rate": 0.00016503496503496504,
1888
+ "loss": 0.0055,
1889
+ "step": 1330
1890
+ },
1891
+ {
1892
+ "epoch": 0.351987344275262,
1893
+ "grad_norm": 0.71875,
1894
+ "learning_rate": 0.00016490302150679511,
1895
+ "loss": 0.0299,
1896
+ "step": 1335
1897
+ },
1898
+ {
1899
+ "epoch": 0.353305648935469,
1900
+ "grad_norm": 0.01348876953125,
1901
+ "learning_rate": 0.00016477107797862516,
1902
+ "loss": 0.0943,
1903
+ "step": 1340
1904
+ },
1905
+ {
1906
+ "epoch": 0.35462395359567594,
1907
+ "grad_norm": 0.3046875,
1908
+ "learning_rate": 0.00016463913445045523,
1909
+ "loss": 0.0216,
1910
+ "step": 1345
1911
+ },
1912
+ {
1913
+ "epoch": 0.35594225825588294,
1914
+ "grad_norm": 0.02392578125,
1915
+ "learning_rate": 0.00016450719092228527,
1916
+ "loss": 0.0265,
1917
+ "step": 1350
1918
+ },
1919
+ {
1920
+ "epoch": 0.35726056291608993,
1921
+ "grad_norm": 0.453125,
1922
+ "learning_rate": 0.0001643752473941153,
1923
+ "loss": 0.0539,
1924
+ "step": 1355
1925
+ },
1926
+ {
1927
+ "epoch": 0.35857886757629687,
1928
+ "grad_norm": 0.00823974609375,
1929
+ "learning_rate": 0.00016424330386594538,
1930
+ "loss": 0.0139,
1931
+ "step": 1360
1932
+ },
1933
+ {
1934
+ "epoch": 0.35989717223650386,
1935
+ "grad_norm": 0.55859375,
1936
+ "learning_rate": 0.00016411136033777542,
1937
+ "loss": 0.0428,
1938
+ "step": 1365
1939
+ },
1940
+ {
1941
+ "epoch": 0.36121547689671085,
1942
+ "grad_norm": 0.052734375,
1943
+ "learning_rate": 0.0001639794168096055,
1944
+ "loss": 0.0346,
1945
+ "step": 1370
1946
+ },
1947
+ {
1948
+ "epoch": 0.3625337815569178,
1949
+ "grad_norm": 0.12158203125,
1950
+ "learning_rate": 0.00016384747328143556,
1951
+ "loss": 0.0095,
1952
+ "step": 1375
1953
+ },
1954
+ {
1955
+ "epoch": 0.3638520862171248,
1956
+ "grad_norm": 0.0240478515625,
1957
+ "learning_rate": 0.0001637155297532656,
1958
+ "loss": 0.0224,
1959
+ "step": 1380
1960
+ },
1961
+ {
1962
+ "epoch": 0.3651703908773318,
1963
+ "grad_norm": 0.01318359375,
1964
+ "learning_rate": 0.00016358358622509567,
1965
+ "loss": 0.0316,
1966
+ "step": 1385
1967
+ },
1968
+ {
1969
+ "epoch": 0.3664886955375387,
1970
+ "grad_norm": 0.011962890625,
1971
+ "learning_rate": 0.0001634516426969257,
1972
+ "loss": 0.0051,
1973
+ "step": 1390
1974
+ },
1975
+ {
1976
+ "epoch": 0.3678070001977457,
1977
+ "grad_norm": 0.00396728515625,
1978
+ "learning_rate": 0.00016331969916875578,
1979
+ "loss": 0.038,
1980
+ "step": 1395
1981
+ },
1982
+ {
1983
+ "epoch": 0.3691253048579527,
1984
+ "grad_norm": 0.375,
1985
+ "learning_rate": 0.00016318775564058585,
1986
+ "loss": 0.029,
1987
+ "step": 1400
1988
+ },
1989
+ {
1990
+ "epoch": 0.37044360951815963,
1991
+ "grad_norm": 0.265625,
1992
+ "learning_rate": 0.0001630558121124159,
1993
+ "loss": 0.0072,
1994
+ "step": 1405
1995
+ },
1996
+ {
1997
+ "epoch": 0.3717619141783666,
1998
+ "grad_norm": 0.00127410888671875,
1999
+ "learning_rate": 0.00016292386858424596,
2000
+ "loss": 0.0381,
2001
+ "step": 1410
2002
+ },
2003
+ {
2004
+ "epoch": 0.3730802188385736,
2005
+ "grad_norm": 1.15625,
2006
+ "learning_rate": 0.000162791925056076,
2007
+ "loss": 0.0573,
2008
+ "step": 1415
2009
+ },
2010
+ {
2011
+ "epoch": 0.37439852349878056,
2012
+ "grad_norm": 0.0244140625,
2013
+ "learning_rate": 0.00016265998152790607,
2014
+ "loss": 0.051,
2015
+ "step": 1420
2016
+ },
2017
+ {
2018
+ "epoch": 0.37571682815898755,
2019
+ "grad_norm": 0.0015106201171875,
2020
+ "learning_rate": 0.00016252803799973612,
2021
+ "loss": 0.0239,
2022
+ "step": 1425
2023
+ },
2024
+ {
2025
+ "epoch": 0.37703513281919454,
2026
+ "grad_norm": 0.26953125,
2027
+ "learning_rate": 0.00016239609447156618,
2028
+ "loss": 0.0165,
2029
+ "step": 1430
2030
+ },
2031
+ {
2032
+ "epoch": 0.3783534374794015,
2033
+ "grad_norm": 0.006134033203125,
2034
+ "learning_rate": 0.00016226415094339625,
2035
+ "loss": 0.0071,
2036
+ "step": 1435
2037
+ },
2038
+ {
2039
+ "epoch": 0.37967174213960847,
2040
+ "grad_norm": 2.828125,
2041
+ "learning_rate": 0.0001621322074152263,
2042
+ "loss": 0.0272,
2043
+ "step": 1440
2044
+ },
2045
+ {
2046
+ "epoch": 0.38099004679981546,
2047
+ "grad_norm": 0.349609375,
2048
+ "learning_rate": 0.00016200026388705637,
2049
+ "loss": 0.0647,
2050
+ "step": 1445
2051
+ },
2052
+ {
2053
+ "epoch": 0.3823083514600224,
2054
+ "grad_norm": 0.09326171875,
2055
+ "learning_rate": 0.00016186832035888638,
2056
+ "loss": 0.0262,
2057
+ "step": 1450
2058
+ },
2059
+ {
2060
+ "epoch": 0.3836266561202294,
2061
+ "grad_norm": 0.041015625,
2062
+ "learning_rate": 0.00016173637683071645,
2063
+ "loss": 0.0576,
2064
+ "step": 1455
2065
+ },
2066
+ {
2067
+ "epoch": 0.3849449607804364,
2068
+ "grad_norm": 0.033935546875,
2069
+ "learning_rate": 0.00016160443330254652,
2070
+ "loss": 0.0142,
2071
+ "step": 1460
2072
+ },
2073
+ {
2074
+ "epoch": 0.3862632654406433,
2075
+ "grad_norm": 0.09130859375,
2076
+ "learning_rate": 0.00016147248977437656,
2077
+ "loss": 0.0348,
2078
+ "step": 1465
2079
+ },
2080
+ {
2081
+ "epoch": 0.3875815701008503,
2082
+ "grad_norm": 2.390625,
2083
+ "learning_rate": 0.00016134054624620663,
2084
+ "loss": 0.0672,
2085
+ "step": 1470
2086
+ },
2087
+ {
2088
+ "epoch": 0.3888998747610573,
2089
+ "grad_norm": 0.439453125,
2090
+ "learning_rate": 0.00016120860271803667,
2091
+ "loss": 0.0121,
2092
+ "step": 1475
2093
+ },
2094
+ {
2095
+ "epoch": 0.39021817942126424,
2096
+ "grad_norm": 0.1298828125,
2097
+ "learning_rate": 0.00016107665918986674,
2098
+ "loss": 0.0114,
2099
+ "step": 1480
2100
+ },
2101
+ {
2102
+ "epoch": 0.39153648408147124,
2103
+ "grad_norm": 0.85546875,
2104
+ "learning_rate": 0.0001609447156616968,
2105
+ "loss": 0.0968,
2106
+ "step": 1485
2107
+ },
2108
+ {
2109
+ "epoch": 0.39285478874167823,
2110
+ "grad_norm": 0.703125,
2111
+ "learning_rate": 0.00016081277213352685,
2112
+ "loss": 0.0349,
2113
+ "step": 1490
2114
+ },
2115
+ {
2116
+ "epoch": 0.39417309340188517,
2117
+ "grad_norm": 0.021728515625,
2118
+ "learning_rate": 0.00016068082860535692,
2119
+ "loss": 0.0106,
2120
+ "step": 1495
2121
+ },
2122
+ {
2123
+ "epoch": 0.39549139806209216,
2124
+ "grad_norm": 0.7265625,
2125
+ "learning_rate": 0.00016054888507718696,
2126
+ "loss": 0.0225,
2127
+ "step": 1500
2128
+ },
2129
+ {
2130
+ "epoch": 0.39549139806209216,
2131
+ "eval_loss": 0.03515048325061798,
2132
+ "eval_model_preparation_time": 0.0076,
2133
+ "eval_runtime": 457.3497,
2134
+ "eval_samples_per_second": 7.373,
2135
+ "eval_steps_per_second": 3.686,
2136
+ "step": 1500
2137
+ },
2138
+ {
2139
+ "epoch": 0.3968097027222991,
2140
+ "grad_norm": 0.016519820317626,
2141
+ "learning_rate": 0.00016041694154901703,
2142
+ "loss": 0.0202,
2143
+ "step": 1505
2144
+ },
2145
+ {
2146
+ "epoch": 0.3981280073825061,
2147
+ "grad_norm": 0.8505942225456238,
2148
+ "learning_rate": 0.00016028499802084708,
2149
+ "loss": 0.0541,
2150
+ "step": 1510
2151
+ },
2152
+ {
2153
+ "epoch": 0.3994463120427131,
2154
+ "grad_norm": 0.04163295030593872,
2155
+ "learning_rate": 0.00016015305449267714,
2156
+ "loss": 0.0037,
2157
+ "step": 1515
2158
+ },
2159
+ {
2160
+ "epoch": 0.40076461670292,
2161
+ "grad_norm": 0.011332935653626919,
2162
+ "learning_rate": 0.00016002111096450721,
2163
+ "loss": 0.0459,
2164
+ "step": 1520
2165
+ },
2166
+ {
2167
+ "epoch": 0.402082921363127,
2168
+ "grad_norm": 0.9360129833221436,
2169
+ "learning_rate": 0.00015988916743633726,
2170
+ "loss": 0.013,
2171
+ "step": 1525
2172
+ },
2173
+ {
2174
+ "epoch": 0.403401226023334,
2175
+ "grad_norm": 0.11991436779499054,
2176
+ "learning_rate": 0.00015975722390816733,
2177
+ "loss": 0.0079,
2178
+ "step": 1530
2179
+ },
2180
+ {
2181
+ "epoch": 0.40471953068354094,
2182
+ "grad_norm": 0.36911076307296753,
2183
+ "learning_rate": 0.00015962528037999737,
2184
+ "loss": 0.0638,
2185
+ "step": 1535
2186
+ },
2187
+ {
2188
+ "epoch": 0.40603783534374793,
2189
+ "grad_norm": 0.020278634503483772,
2190
+ "learning_rate": 0.00015949333685182744,
2191
+ "loss": 0.0217,
2192
+ "step": 1540
2193
+ },
2194
+ {
2195
+ "epoch": 0.4073561400039549,
2196
+ "grad_norm": 0.14263059198856354,
2197
+ "learning_rate": 0.0001593613933236575,
2198
+ "loss": 0.0495,
2199
+ "step": 1545
2200
+ },
2201
+ {
2202
+ "epoch": 0.40867444466416186,
2203
+ "grad_norm": 0.09494803845882416,
2204
+ "learning_rate": 0.00015922944979548752,
2205
+ "loss": 0.0248,
2206
+ "step": 1550
2207
+ },
2208
+ {
2209
+ "epoch": 0.40999274932436885,
2210
+ "grad_norm": 0.23064319789409637,
2211
+ "learning_rate": 0.0001590975062673176,
2212
+ "loss": 0.0285,
2213
+ "step": 1555
2214
+ },
2215
+ {
2216
+ "epoch": 0.41131105398457585,
2217
+ "grad_norm": 0.32220256328582764,
2218
+ "learning_rate": 0.00015896556273914763,
2219
+ "loss": 0.0537,
2220
+ "step": 1560
2221
+ },
2222
+ {
2223
+ "epoch": 0.4126293586447828,
2224
+ "grad_norm": 0.41208815574645996,
2225
+ "learning_rate": 0.0001588336192109777,
2226
+ "loss": 0.0453,
2227
+ "step": 1565
2228
+ },
2229
+ {
2230
+ "epoch": 0.4139476633049898,
2231
+ "grad_norm": 0.03775424137711525,
2232
+ "learning_rate": 0.00015870167568280777,
2233
+ "loss": 0.0134,
2234
+ "step": 1570
2235
+ },
2236
+ {
2237
+ "epoch": 0.41526596796519677,
2238
+ "grad_norm": 0.6526333093643188,
2239
+ "learning_rate": 0.0001585697321546378,
2240
+ "loss": 0.0329,
2241
+ "step": 1575
2242
+ },
2243
+ {
2244
+ "epoch": 0.4165842726254037,
2245
+ "grad_norm": 1.001305103302002,
2246
+ "learning_rate": 0.00015843778862646788,
2247
+ "loss": 0.0912,
2248
+ "step": 1580
2249
+ },
2250
+ {
2251
+ "epoch": 0.4179025772856107,
2252
+ "grad_norm": 0.4055219888687134,
2253
+ "learning_rate": 0.00015830584509829792,
2254
+ "loss": 0.0519,
2255
+ "step": 1585
2256
+ },
2257
+ {
2258
+ "epoch": 0.4192208819458177,
2259
+ "grad_norm": 0.035015616565942764,
2260
+ "learning_rate": 0.000158173901570128,
2261
+ "loss": 0.0191,
2262
+ "step": 1590
2263
+ },
2264
+ {
2265
+ "epoch": 0.42053918660602463,
2266
+ "grad_norm": 0.09326844662427902,
2267
+ "learning_rate": 0.00015804195804195806,
2268
+ "loss": 0.0106,
2269
+ "step": 1595
2270
+ },
2271
+ {
2272
+ "epoch": 0.4218574912662316,
2273
+ "grad_norm": 0.06223440542817116,
2274
+ "learning_rate": 0.0001579100145137881,
2275
+ "loss": 0.0113,
2276
+ "step": 1600
2277
+ },
2278
+ {
2279
+ "epoch": 0.4231757959264386,
2280
+ "grad_norm": 0.0625135526061058,
2281
+ "learning_rate": 0.00015777807098561817,
2282
+ "loss": 0.0191,
2283
+ "step": 1605
2284
+ },
2285
+ {
2286
+ "epoch": 0.42449410058664555,
2287
+ "grad_norm": 0.2645983099937439,
2288
+ "learning_rate": 0.00015764612745744822,
2289
+ "loss": 0.0829,
2290
+ "step": 1610
2291
+ },
2292
+ {
2293
+ "epoch": 0.42581240524685254,
2294
+ "grad_norm": 0.009632415138185024,
2295
+ "learning_rate": 0.00015751418392927829,
2296
+ "loss": 0.0542,
2297
+ "step": 1615
2298
+ },
2299
+ {
2300
+ "epoch": 0.42713070990705954,
2301
+ "grad_norm": 0.01979319378733635,
2302
+ "learning_rate": 0.00015738224040110833,
2303
+ "loss": 0.0517,
2304
+ "step": 1620
2305
+ },
2306
+ {
2307
+ "epoch": 0.4284490145672665,
2308
+ "grad_norm": 0.3065454065799713,
2309
+ "learning_rate": 0.0001572502968729384,
2310
+ "loss": 0.0738,
2311
+ "step": 1625
2312
+ },
2313
+ {
2314
+ "epoch": 0.42976731922747347,
2315
+ "grad_norm": 0.09581473469734192,
2316
+ "learning_rate": 0.00015711835334476847,
2317
+ "loss": 0.0571,
2318
+ "step": 1630
2319
+ },
2320
+ {
2321
+ "epoch": 0.43108562388768046,
2322
+ "grad_norm": 0.23746591806411743,
2323
+ "learning_rate": 0.0001569864098165985,
2324
+ "loss": 0.0128,
2325
+ "step": 1635
2326
+ },
2327
+ {
2328
+ "epoch": 0.4324039285478874,
2329
+ "grad_norm": 0.936278760433197,
2330
+ "learning_rate": 0.00015685446628842858,
2331
+ "loss": 0.0665,
2332
+ "step": 1640
2333
+ },
2334
+ {
2335
+ "epoch": 0.4337222332080944,
2336
+ "grad_norm": 0.18487441539764404,
2337
+ "learning_rate": 0.00015672252276025862,
2338
+ "loss": 0.0527,
2339
+ "step": 1645
2340
+ },
2341
+ {
2342
+ "epoch": 0.4350405378683014,
2343
+ "grad_norm": 0.6980624794960022,
2344
+ "learning_rate": 0.00015659057923208866,
2345
+ "loss": 0.0613,
2346
+ "step": 1650
2347
+ },
2348
+ {
2349
+ "epoch": 0.4363588425285083,
2350
+ "grad_norm": 0.4696301221847534,
2351
+ "learning_rate": 0.00015645863570391873,
2352
+ "loss": 0.0569,
2353
+ "step": 1655
2354
+ },
2355
+ {
2356
+ "epoch": 0.4376771471887153,
2357
+ "grad_norm": 0.15083105862140656,
2358
+ "learning_rate": 0.00015632669217574877,
2359
+ "loss": 0.0394,
2360
+ "step": 1660
2361
+ },
2362
+ {
2363
+ "epoch": 0.4389954518489223,
2364
+ "grad_norm": 0.44701239466667175,
2365
+ "learning_rate": 0.00015619474864757884,
2366
+ "loss": 0.0494,
2367
+ "step": 1665
2368
+ },
2369
+ {
2370
+ "epoch": 0.44031375650912924,
2371
+ "grad_norm": 0.07418403029441833,
2372
+ "learning_rate": 0.00015606280511940888,
2373
+ "loss": 0.0291,
2374
+ "step": 1670
2375
+ },
2376
+ {
2377
+ "epoch": 0.44163206116933623,
2378
+ "grad_norm": 0.02311861515045166,
2379
+ "learning_rate": 0.00015593086159123895,
2380
+ "loss": 0.0304,
2381
+ "step": 1675
2382
+ },
2383
+ {
2384
+ "epoch": 0.4429503658295432,
2385
+ "grad_norm": 0.4416038990020752,
2386
+ "learning_rate": 0.00015579891806306902,
2387
+ "loss": 0.0176,
2388
+ "step": 1680
2389
+ },
2390
+ {
2391
+ "epoch": 0.44426867048975016,
2392
+ "grad_norm": 0.5124915242195129,
2393
+ "learning_rate": 0.00015566697453489906,
2394
+ "loss": 0.0454,
2395
+ "step": 1685
2396
+ },
2397
+ {
2398
+ "epoch": 0.44558697514995715,
2399
+ "grad_norm": 0.3159286081790924,
2400
+ "learning_rate": 0.00015553503100672913,
2401
+ "loss": 0.047,
2402
+ "step": 1690
2403
+ },
2404
+ {
2405
+ "epoch": 0.44690527981016415,
2406
+ "grad_norm": 0.032126396894454956,
2407
+ "learning_rate": 0.00015540308747855918,
2408
+ "loss": 0.0151,
2409
+ "step": 1695
2410
+ },
2411
+ {
2412
+ "epoch": 0.4482235844703711,
2413
+ "grad_norm": 0.04663548618555069,
2414
+ "learning_rate": 0.00015527114395038924,
2415
+ "loss": 0.0375,
2416
+ "step": 1700
2417
+ },
2418
+ {
2419
+ "epoch": 0.4495418891305781,
2420
+ "grad_norm": 0.013753900304436684,
2421
+ "learning_rate": 0.0001551392004222193,
2422
+ "loss": 0.0485,
2423
+ "step": 1705
2424
+ },
2425
+ {
2426
+ "epoch": 0.45086019379078507,
2427
+ "grad_norm": 1.9952393770217896,
2428
+ "learning_rate": 0.00015500725689404936,
2429
+ "loss": 0.0625,
2430
+ "step": 1710
2431
+ },
2432
+ {
2433
+ "epoch": 0.452178498450992,
2434
+ "grad_norm": 0.014283270575106144,
2435
+ "learning_rate": 0.00015487531336587943,
2436
+ "loss": 0.0037,
2437
+ "step": 1715
2438
+ },
2439
+ {
2440
+ "epoch": 0.453496803111199,
2441
+ "grad_norm": 0.3897913098335266,
2442
+ "learning_rate": 0.00015474336983770947,
2443
+ "loss": 0.0304,
2444
+ "step": 1720
2445
+ },
2446
+ {
2447
+ "epoch": 0.454815107771406,
2448
+ "grad_norm": 0.3730885684490204,
2449
+ "learning_rate": 0.00015461142630953954,
2450
+ "loss": 0.0115,
2451
+ "step": 1725
2452
+ },
2453
+ {
2454
+ "epoch": 0.45613341243161293,
2455
+ "grad_norm": 0.035858724266290665,
2456
+ "learning_rate": 0.00015447948278136958,
2457
+ "loss": 0.0021,
2458
+ "step": 1730
2459
+ },
2460
+ {
2461
+ "epoch": 0.4574517170918199,
2462
+ "grad_norm": 0.20589517056941986,
2463
+ "learning_rate": 0.00015434753925319965,
2464
+ "loss": 0.0132,
2465
+ "step": 1735
2466
+ },
2467
+ {
2468
+ "epoch": 0.4587700217520269,
2469
+ "grad_norm": 0.004939342383295298,
2470
+ "learning_rate": 0.00015421559572502972,
2471
+ "loss": 0.0471,
2472
+ "step": 1740
2473
+ },
2474
+ {
2475
+ "epoch": 0.46008832641223385,
2476
+ "grad_norm": 0.03493283689022064,
2477
+ "learning_rate": 0.00015408365219685976,
2478
+ "loss": 0.0062,
2479
+ "step": 1745
2480
+ },
2481
+ {
2482
+ "epoch": 0.46140663107244084,
2483
+ "grad_norm": 0.045927103608846664,
2484
+ "learning_rate": 0.0001539517086686898,
2485
+ "loss": 0.0283,
2486
+ "step": 1750
2487
+ },
2488
+ {
2489
+ "epoch": 0.46272493573264784,
2490
+ "grad_norm": 0.012629454955458641,
2491
+ "learning_rate": 0.00015381976514051984,
2492
+ "loss": 0.0133,
2493
+ "step": 1755
2494
+ },
2495
+ {
2496
+ "epoch": 0.46404324039285477,
2497
+ "grad_norm": 0.8001697659492493,
2498
+ "learning_rate": 0.0001536878216123499,
2499
+ "loss": 0.0224,
2500
+ "step": 1760
2501
+ },
2502
+ {
2503
+ "epoch": 0.46536154505306176,
2504
+ "grad_norm": 0.002036362886428833,
2505
+ "learning_rate": 0.00015355587808417998,
2506
+ "loss": 0.0066,
2507
+ "step": 1765
2508
+ },
2509
+ {
2510
+ "epoch": 0.46667984971326876,
2511
+ "grad_norm": 1.0261330604553223,
2512
+ "learning_rate": 0.00015342393455601002,
2513
+ "loss": 0.191,
2514
+ "step": 1770
2515
+ },
2516
+ {
2517
+ "epoch": 0.4679981543734757,
2518
+ "grad_norm": 0.3033429682254791,
2519
+ "learning_rate": 0.0001532919910278401,
2520
+ "loss": 0.0222,
2521
+ "step": 1775
2522
+ },
2523
+ {
2524
+ "epoch": 0.4693164590336827,
2525
+ "grad_norm": 0.36911338567733765,
2526
+ "learning_rate": 0.00015316004749967014,
2527
+ "loss": 0.0363,
2528
+ "step": 1780
2529
+ },
2530
+ {
2531
+ "epoch": 0.4706347636938897,
2532
+ "grad_norm": 0.0406811460852623,
2533
+ "learning_rate": 0.0001530281039715002,
2534
+ "loss": 0.0283,
2535
+ "step": 1785
2536
+ },
2537
+ {
2538
+ "epoch": 0.4719530683540966,
2539
+ "grad_norm": 0.23334211111068726,
2540
+ "learning_rate": 0.00015289616044333027,
2541
+ "loss": 0.0274,
2542
+ "step": 1790
2543
+ },
2544
+ {
2545
+ "epoch": 0.4732713730143036,
2546
+ "grad_norm": 0.013081169687211514,
2547
+ "learning_rate": 0.00015276421691516032,
2548
+ "loss": 0.0221,
2549
+ "step": 1795
2550
+ },
2551
+ {
2552
+ "epoch": 0.4745896776745106,
2553
+ "grad_norm": 0.2480790615081787,
2554
+ "learning_rate": 0.00015263227338699039,
2555
+ "loss": 0.019,
2556
+ "step": 1800
2557
+ },
2558
+ {
2559
+ "epoch": 0.47590798233471754,
2560
+ "grad_norm": 0.0373196005821228,
2561
+ "learning_rate": 0.00015250032985882043,
2562
+ "loss": 0.0292,
2563
+ "step": 1805
2564
+ },
2565
+ {
2566
+ "epoch": 0.47722628699492453,
2567
+ "grad_norm": 0.004609994124621153,
2568
+ "learning_rate": 0.0001523683863306505,
2569
+ "loss": 0.0918,
2570
+ "step": 1810
2571
+ },
2572
+ {
2573
+ "epoch": 0.4785445916551315,
2574
+ "grad_norm": 0.02370987832546234,
2575
+ "learning_rate": 0.00015223644280248054,
2576
+ "loss": 0.0462,
2577
+ "step": 1815
2578
+ },
2579
+ {
2580
+ "epoch": 0.47986289631533846,
2581
+ "grad_norm": 0.05842221528291702,
2582
+ "learning_rate": 0.0001521044992743106,
2583
+ "loss": 0.0595,
2584
+ "step": 1820
2585
+ },
2586
+ {
2587
+ "epoch": 0.48118120097554545,
2588
+ "grad_norm": 0.009685276076197624,
2589
+ "learning_rate": 0.00015197255574614068,
2590
+ "loss": 0.0074,
2591
+ "step": 1825
2592
+ },
2593
+ {
2594
+ "epoch": 0.48249950563575245,
2595
+ "grad_norm": 0.8933250308036804,
2596
+ "learning_rate": 0.00015184061221797072,
2597
+ "loss": 0.0757,
2598
+ "step": 1830
2599
+ },
2600
+ {
2601
+ "epoch": 0.4838178102959594,
2602
+ "grad_norm": 0.07075401395559311,
2603
+ "learning_rate": 0.0001517086686898008,
2604
+ "loss": 0.0226,
2605
+ "step": 1835
2606
+ },
2607
+ {
2608
+ "epoch": 0.4851361149561664,
2609
+ "grad_norm": 0.732706606388092,
2610
+ "learning_rate": 0.00015157672516163083,
2611
+ "loss": 0.0161,
2612
+ "step": 1840
2613
+ },
2614
+ {
2615
+ "epoch": 0.48645441961637337,
2616
+ "grad_norm": 1.1897023916244507,
2617
+ "learning_rate": 0.0001514447816334609,
2618
+ "loss": 0.0265,
2619
+ "step": 1845
2620
+ },
2621
+ {
2622
+ "epoch": 0.4877727242765803,
2623
+ "grad_norm": 0.052572328597307205,
2624
+ "learning_rate": 0.00015131283810529094,
2625
+ "loss": 0.0094,
2626
+ "step": 1850
2627
+ },
2628
+ {
2629
+ "epoch": 0.4890910289367873,
2630
+ "grad_norm": 0.08263898640871048,
2631
+ "learning_rate": 0.00015118089457712098,
2632
+ "loss": 0.0631,
2633
+ "step": 1855
2634
+ },
2635
+ {
2636
+ "epoch": 0.4904093335969943,
2637
+ "grad_norm": 0.03225664421916008,
2638
+ "learning_rate": 0.00015104895104895105,
2639
+ "loss": 0.023,
2640
+ "step": 1860
2641
+ },
2642
+ {
2643
+ "epoch": 0.4917276382572012,
2644
+ "grad_norm": 0.007935039699077606,
2645
+ "learning_rate": 0.0001509170075207811,
2646
+ "loss": 0.0039,
2647
+ "step": 1865
2648
+ },
2649
+ {
2650
+ "epoch": 0.4930459429174082,
2651
+ "grad_norm": 0.00830796267837286,
2652
+ "learning_rate": 0.00015078506399261116,
2653
+ "loss": 0.007,
2654
+ "step": 1870
2655
+ },
2656
+ {
2657
+ "epoch": 0.4943642475776152,
2658
+ "grad_norm": 0.08042234182357788,
2659
+ "learning_rate": 0.00015065312046444123,
2660
+ "loss": 0.0366,
2661
+ "step": 1875
2662
+ },
2663
+ {
2664
+ "epoch": 0.49568255223782215,
2665
+ "grad_norm": 0.009092851541936398,
2666
+ "learning_rate": 0.00015052117693627128,
2667
+ "loss": 0.0107,
2668
+ "step": 1880
2669
+ },
2670
+ {
2671
+ "epoch": 0.49700085689802914,
2672
+ "grad_norm": 0.2674141824245453,
2673
+ "learning_rate": 0.00015038923340810135,
2674
+ "loss": 0.0076,
2675
+ "step": 1885
2676
+ },
2677
+ {
2678
+ "epoch": 0.49831916155823613,
2679
+ "grad_norm": 0.07694366574287415,
2680
+ "learning_rate": 0.0001502572898799314,
2681
+ "loss": 0.0252,
2682
+ "step": 1890
2683
+ },
2684
+ {
2685
+ "epoch": 0.49963746621844307,
2686
+ "grad_norm": 0.5699467062950134,
2687
+ "learning_rate": 0.00015012534635176146,
2688
+ "loss": 0.0487,
2689
+ "step": 1895
2690
+ },
2691
+ {
2692
+ "epoch": 0.5009557708786501,
2693
+ "grad_norm": 0.18800878524780273,
2694
+ "learning_rate": 0.0001499934028235915,
2695
+ "loss": 0.0183,
2696
+ "step": 1900
2697
+ },
2698
+ {
2699
+ "epoch": 0.5022740755388571,
2700
+ "grad_norm": 0.019469989463686943,
2701
+ "learning_rate": 0.00014986145929542157,
2702
+ "loss": 0.0268,
2703
+ "step": 1905
2704
+ },
2705
+ {
2706
+ "epoch": 0.503592380199064,
2707
+ "grad_norm": 0.01890506222844124,
2708
+ "learning_rate": 0.00014972951576725164,
2709
+ "loss": 0.0449,
2710
+ "step": 1910
2711
+ },
2712
+ {
2713
+ "epoch": 0.5049106848592709,
2714
+ "grad_norm": 0.0006314461352303624,
2715
+ "learning_rate": 0.00014959757223908168,
2716
+ "loss": 0.0056,
2717
+ "step": 1915
2718
+ },
2719
+ {
2720
+ "epoch": 0.5062289895194779,
2721
+ "grad_norm": 0.32654041051864624,
2722
+ "learning_rate": 0.00014946562871091175,
2723
+ "loss": 0.0256,
2724
+ "step": 1920
2725
+ },
2726
+ {
2727
+ "epoch": 0.5075472941796849,
2728
+ "grad_norm": 0.7803483605384827,
2729
+ "learning_rate": 0.0001493336851827418,
2730
+ "loss": 0.0374,
2731
+ "step": 1925
2732
+ },
2733
+ {
2734
+ "epoch": 0.5088655988398919,
2735
+ "grad_norm": 0.028441445901989937,
2736
+ "learning_rate": 0.00014920174165457186,
2737
+ "loss": 0.0161,
2738
+ "step": 1930
2739
+ },
2740
+ {
2741
+ "epoch": 0.5101839035000989,
2742
+ "grad_norm": 0.028379200026392937,
2743
+ "learning_rate": 0.00014906979812640193,
2744
+ "loss": 0.0151,
2745
+ "step": 1935
2746
+ },
2747
+ {
2748
+ "epoch": 0.5115022081603059,
2749
+ "grad_norm": 0.021159596741199493,
2750
+ "learning_rate": 0.00014893785459823197,
2751
+ "loss": 0.0303,
2752
+ "step": 1940
2753
+ },
2754
+ {
2755
+ "epoch": 0.5128205128205128,
2756
+ "grad_norm": 0.24903325736522675,
2757
+ "learning_rate": 0.000148805911070062,
2758
+ "loss": 0.0076,
2759
+ "step": 1945
2760
+ },
2761
+ {
2762
+ "epoch": 0.5141388174807198,
2763
+ "grad_norm": 0.007065301761031151,
2764
+ "learning_rate": 0.00014867396754189206,
2765
+ "loss": 0.022,
2766
+ "step": 1950
2767
+ },
2768
+ {
2769
+ "epoch": 0.5154571221409268,
2770
+ "grad_norm": 0.004032329190522432,
2771
+ "learning_rate": 0.00014854202401372212,
2772
+ "loss": 0.0083,
2773
+ "step": 1955
2774
+ },
2775
+ {
2776
+ "epoch": 0.5167754268011338,
2777
+ "grad_norm": 0.3045775592327118,
2778
+ "learning_rate": 0.0001484100804855522,
2779
+ "loss": 0.0113,
2780
+ "step": 1960
2781
+ },
2782
+ {
2783
+ "epoch": 0.5180937314613407,
2784
+ "grad_norm": 0.36974939703941345,
2785
+ "learning_rate": 0.00014827813695738224,
2786
+ "loss": 0.0267,
2787
+ "step": 1965
2788
+ },
2789
+ {
2790
+ "epoch": 0.5194120361215477,
2791
+ "grad_norm": 0.009729950688779354,
2792
+ "learning_rate": 0.0001481461934292123,
2793
+ "loss": 0.027,
2794
+ "step": 1970
2795
+ },
2796
+ {
2797
+ "epoch": 0.5207303407817546,
2798
+ "grad_norm": 0.0013097926275804639,
2799
+ "learning_rate": 0.00014801424990104235,
2800
+ "loss": 0.003,
2801
+ "step": 1975
2802
+ },
2803
+ {
2804
+ "epoch": 0.5220486454419616,
2805
+ "grad_norm": 0.0706263929605484,
2806
+ "learning_rate": 0.00014788230637287242,
2807
+ "loss": 0.0193,
2808
+ "step": 1980
2809
+ },
2810
+ {
2811
+ "epoch": 0.5233669501021686,
2812
+ "grad_norm": 1.435702919960022,
2813
+ "learning_rate": 0.00014775036284470249,
2814
+ "loss": 0.0647,
2815
+ "step": 1985
2816
+ },
2817
+ {
2818
+ "epoch": 0.5246852547623756,
2819
+ "grad_norm": 0.00661757867783308,
2820
+ "learning_rate": 0.00014761841931653253,
2821
+ "loss": 0.0373,
2822
+ "step": 1990
2823
+ },
2824
+ {
2825
+ "epoch": 0.5260035594225826,
2826
+ "grad_norm": 0.12014541029930115,
2827
+ "learning_rate": 0.0001474864757883626,
2828
+ "loss": 0.0178,
2829
+ "step": 1995
2830
+ },
2831
+ {
2832
+ "epoch": 0.5273218640827896,
2833
+ "grad_norm": 1.0549248456954956,
2834
+ "learning_rate": 0.00014735453226019264,
2835
+ "loss": 0.0191,
2836
+ "step": 2000
2837
+ },
2838
+ {
2839
+ "epoch": 0.5273218640827896,
2840
+ "eval_loss": 0.037292081862688065,
2841
+ "eval_runtime": 454.3033,
2842
+ "eval_samples_per_second": 7.422,
2843
+ "eval_steps_per_second": 3.711,
2844
+ "step": 2000
2845
+ },
2846
+ {
2847
+ "epoch": 0.5286401687429965,
2848
+ "grad_norm": 0.47634151577949524,
2849
+ "learning_rate": 0.0001472225887320227,
2850
+ "loss": 0.0404,
2851
+ "step": 2005
2852
+ },
2853
+ {
2854
+ "epoch": 0.5299584734032035,
2855
+ "grad_norm": 0.006752463988959789,
2856
+ "learning_rate": 0.00014709064520385275,
2857
+ "loss": 0.034,
2858
+ "step": 2010
2859
+ },
2860
+ {
2861
+ "epoch": 0.5312767780634104,
2862
+ "grad_norm": 0.20780125260353088,
2863
+ "learning_rate": 0.00014695870167568282,
2864
+ "loss": 0.0421,
2865
+ "step": 2015
2866
+ },
2867
+ {
2868
+ "epoch": 0.5325950827236174,
2869
+ "grad_norm": 0.010941066779196262,
2870
+ "learning_rate": 0.0001468267581475129,
2871
+ "loss": 0.0086,
2872
+ "step": 2020
2873
+ },
2874
+ {
2875
+ "epoch": 0.5339133873838244,
2876
+ "grad_norm": 0.3439581096172333,
2877
+ "learning_rate": 0.00014669481461934293,
2878
+ "loss": 0.0187,
2879
+ "step": 2025
2880
+ },
2881
+ {
2882
+ "epoch": 0.5352316920440314,
2883
+ "grad_norm": 0.14961636066436768,
2884
+ "learning_rate": 0.000146562871091173,
2885
+ "loss": 0.0504,
2886
+ "step": 2030
2887
+ },
2888
+ {
2889
+ "epoch": 0.5365499967042383,
2890
+ "grad_norm": 0.0044641937129199505,
2891
+ "learning_rate": 0.00014643092756300304,
2892
+ "loss": 0.0134,
2893
+ "step": 2035
2894
+ },
2895
+ {
2896
+ "epoch": 0.5378683013644453,
2897
+ "grad_norm": 0.14088386297225952,
2898
+ "learning_rate": 0.0001462989840348331,
2899
+ "loss": 0.0096,
2900
+ "step": 2040
2901
+ },
2902
+ {
2903
+ "epoch": 0.5391866060246523,
2904
+ "grad_norm": 0.48116979002952576,
2905
+ "learning_rate": 0.00014616704050666315,
2906
+ "loss": 0.0124,
2907
+ "step": 2045
2908
+ },
2909
+ {
2910
+ "epoch": 0.5405049106848593,
2911
+ "grad_norm": 0.3688766360282898,
2912
+ "learning_rate": 0.0001460350969784932,
2913
+ "loss": 0.0226,
2914
+ "step": 2050
2915
+ },
2916
+ {
2917
+ "epoch": 0.5418232153450663,
2918
+ "grad_norm": 0.002938181860372424,
2919
+ "learning_rate": 0.00014590315345032326,
2920
+ "loss": 0.0267,
2921
+ "step": 2055
2922
+ },
2923
+ {
2924
+ "epoch": 0.5431415200052733,
2925
+ "grad_norm": 0.3335214853286743,
2926
+ "learning_rate": 0.0001457712099221533,
2927
+ "loss": 0.0367,
2928
+ "step": 2060
2929
+ },
2930
+ {
2931
+ "epoch": 0.5444598246654802,
2932
+ "grad_norm": 0.004644686821848154,
2933
+ "learning_rate": 0.00014563926639398338,
2934
+ "loss": 0.0121,
2935
+ "step": 2065
2936
+ },
2937
+ {
2938
+ "epoch": 0.5457781293256871,
2939
+ "grad_norm": 0.19505545496940613,
2940
+ "learning_rate": 0.00014550732286581345,
2941
+ "loss": 0.0591,
2942
+ "step": 2070
2943
+ },
2944
+ {
2945
+ "epoch": 0.5470964339858941,
2946
+ "grad_norm": 0.018028756603598595,
2947
+ "learning_rate": 0.0001453753793376435,
2948
+ "loss": 0.0131,
2949
+ "step": 2075
2950
+ },
2951
+ {
2952
+ "epoch": 0.5484147386461011,
2953
+ "grad_norm": 0.045639291405677795,
2954
+ "learning_rate": 0.00014524343580947356,
2955
+ "loss": 0.0443,
2956
+ "step": 2080
2957
+ },
2958
+ {
2959
+ "epoch": 0.5497330433063081,
2960
+ "grad_norm": 0.727981686592102,
2961
+ "learning_rate": 0.0001451114922813036,
2962
+ "loss": 0.0205,
2963
+ "step": 2085
2964
+ },
2965
+ {
2966
+ "epoch": 0.5510513479665151,
2967
+ "grad_norm": 0.03766491636633873,
2968
+ "learning_rate": 0.00014497954875313367,
2969
+ "loss": 0.0067,
2970
+ "step": 2090
2971
+ },
2972
+ {
2973
+ "epoch": 0.552369652626722,
2974
+ "grad_norm": 0.1911504715681076,
2975
+ "learning_rate": 0.0001448476052249637,
2976
+ "loss": 0.0397,
2977
+ "step": 2095
2978
+ },
2979
+ {
2980
+ "epoch": 0.553687957286929,
2981
+ "grad_norm": 0.08238353580236435,
2982
+ "learning_rate": 0.00014471566169679378,
2983
+ "loss": 0.0513,
2984
+ "step": 2100
2985
+ },
2986
+ {
2987
+ "epoch": 0.555006261947136,
2988
+ "grad_norm": 0.06317206472158432,
2989
+ "learning_rate": 0.00014458371816862385,
2990
+ "loss": 0.0178,
2991
+ "step": 2105
2992
+ },
2993
+ {
2994
+ "epoch": 0.556324566607343,
2995
+ "grad_norm": 0.0652734637260437,
2996
+ "learning_rate": 0.0001444517746404539,
2997
+ "loss": 0.0184,
2998
+ "step": 2110
2999
+ },
3000
+ {
3001
+ "epoch": 0.55764287126755,
3002
+ "grad_norm": 0.05471858009696007,
3003
+ "learning_rate": 0.00014431983111228396,
3004
+ "loss": 0.0089,
3005
+ "step": 2115
3006
+ },
3007
+ {
3008
+ "epoch": 0.558961175927757,
3009
+ "grad_norm": 0.005062670446932316,
3010
+ "learning_rate": 0.000144187887584114,
3011
+ "loss": 0.0052,
3012
+ "step": 2120
3013
+ },
3014
+ {
3015
+ "epoch": 0.5602794805879638,
3016
+ "grad_norm": 0.06337414681911469,
3017
+ "learning_rate": 0.00014405594405594407,
3018
+ "loss": 0.053,
3019
+ "step": 2125
3020
+ },
3021
+ {
3022
+ "epoch": 0.5615977852481708,
3023
+ "grad_norm": 0.33745357394218445,
3024
+ "learning_rate": 0.00014392400052777414,
3025
+ "loss": 0.0166,
3026
+ "step": 2130
3027
+ },
3028
+ {
3029
+ "epoch": 0.5629160899083778,
3030
+ "grad_norm": 0.7382741570472717,
3031
+ "learning_rate": 0.00014379205699960418,
3032
+ "loss": 0.0191,
3033
+ "step": 2135
3034
+ },
3035
+ {
3036
+ "epoch": 0.5642343945685848,
3037
+ "grad_norm": 0.007551972754299641,
3038
+ "learning_rate": 0.00014366011347143425,
3039
+ "loss": 0.0022,
3040
+ "step": 2140
3041
+ },
3042
+ {
3043
+ "epoch": 0.5655526992287918,
3044
+ "grad_norm": 0.6260896921157837,
3045
+ "learning_rate": 0.00014352816994326427,
3046
+ "loss": 0.0095,
3047
+ "step": 2145
3048
+ },
3049
+ {
3050
+ "epoch": 0.5668710038889987,
3051
+ "grad_norm": 0.11619322001934052,
3052
+ "learning_rate": 0.00014339622641509434,
3053
+ "loss": 0.015,
3054
+ "step": 2150
3055
+ },
3056
+ {
3057
+ "epoch": 0.5681893085492057,
3058
+ "grad_norm": 1.1440670490264893,
3059
+ "learning_rate": 0.0001432642828869244,
3060
+ "loss": 0.1343,
3061
+ "step": 2155
3062
+ },
3063
+ {
3064
+ "epoch": 0.5695076132094127,
3065
+ "grad_norm": 1.1793878078460693,
3066
+ "learning_rate": 0.00014313233935875445,
3067
+ "loss": 0.0968,
3068
+ "step": 2160
3069
+ },
3070
+ {
3071
+ "epoch": 0.5708259178696197,
3072
+ "grad_norm": 0.6865736842155457,
3073
+ "learning_rate": 0.00014300039583058452,
3074
+ "loss": 0.0195,
3075
+ "step": 2165
3076
+ },
3077
+ {
3078
+ "epoch": 0.5721442225298267,
3079
+ "grad_norm": 0.140816792845726,
3080
+ "learning_rate": 0.00014286845230241456,
3081
+ "loss": 0.0761,
3082
+ "step": 2170
3083
+ },
3084
+ {
3085
+ "epoch": 0.5734625271900337,
3086
+ "grad_norm": 0.04071786254644394,
3087
+ "learning_rate": 0.00014273650877424463,
3088
+ "loss": 0.0193,
3089
+ "step": 2175
3090
+ },
3091
+ {
3092
+ "epoch": 0.5747808318502405,
3093
+ "grad_norm": 0.044617727398872375,
3094
+ "learning_rate": 0.0001426045652460747,
3095
+ "loss": 0.0112,
3096
+ "step": 2180
3097
+ },
3098
+ {
3099
+ "epoch": 0.5760991365104475,
3100
+ "grad_norm": 0.11001799255609512,
3101
+ "learning_rate": 0.00014247262171790474,
3102
+ "loss": 0.0039,
3103
+ "step": 2185
3104
+ },
3105
+ {
3106
+ "epoch": 0.5774174411706545,
3107
+ "grad_norm": 0.0036315324250608683,
3108
+ "learning_rate": 0.0001423406781897348,
3109
+ "loss": 0.0038,
3110
+ "step": 2190
3111
+ },
3112
+ {
3113
+ "epoch": 0.5787357458308615,
3114
+ "grad_norm": 0.9866570830345154,
3115
+ "learning_rate": 0.00014220873466156485,
3116
+ "loss": 0.025,
3117
+ "step": 2195
3118
+ },
3119
+ {
3120
+ "epoch": 0.5800540504910685,
3121
+ "grad_norm": 0.023570384830236435,
3122
+ "learning_rate": 0.00014207679113339492,
3123
+ "loss": 0.0468,
3124
+ "step": 2200
3125
+ },
3126
+ {
3127
+ "epoch": 0.5813723551512755,
3128
+ "grad_norm": 0.20010559260845184,
3129
+ "learning_rate": 0.00014194484760522496,
3130
+ "loss": 0.0198,
3131
+ "step": 2205
3132
+ },
3133
+ {
3134
+ "epoch": 0.5826906598114824,
3135
+ "grad_norm": 0.06153270602226257,
3136
+ "learning_rate": 0.00014181290407705503,
3137
+ "loss": 0.0764,
3138
+ "step": 2210
3139
+ },
3140
+ {
3141
+ "epoch": 0.5840089644716894,
3142
+ "grad_norm": 0.033162448555231094,
3143
+ "learning_rate": 0.0001416809605488851,
3144
+ "loss": 0.028,
3145
+ "step": 2215
3146
+ },
3147
+ {
3148
+ "epoch": 0.5853272691318964,
3149
+ "grad_norm": 0.428382933139801,
3150
+ "learning_rate": 0.00014154901702071514,
3151
+ "loss": 0.0652,
3152
+ "step": 2220
3153
+ },
3154
+ {
3155
+ "epoch": 0.5866455737921034,
3156
+ "grad_norm": 0.25004762411117554,
3157
+ "learning_rate": 0.0001414170734925452,
3158
+ "loss": 0.0411,
3159
+ "step": 2225
3160
+ },
3161
+ {
3162
+ "epoch": 0.5879638784523104,
3163
+ "grad_norm": 0.22649863362312317,
3164
+ "learning_rate": 0.00014128512996437525,
3165
+ "loss": 0.0517,
3166
+ "step": 2230
3167
+ },
3168
+ {
3169
+ "epoch": 0.5892821831125173,
3170
+ "grad_norm": 0.035932112485170364,
3171
+ "learning_rate": 0.00014115318643620532,
3172
+ "loss": 0.015,
3173
+ "step": 2235
3174
+ },
3175
+ {
3176
+ "epoch": 0.5906004877727242,
3177
+ "grad_norm": 0.3800172507762909,
3178
+ "learning_rate": 0.00014102124290803536,
3179
+ "loss": 0.0324,
3180
+ "step": 2240
3181
+ },
3182
+ {
3183
+ "epoch": 0.5919187924329312,
3184
+ "grad_norm": 0.6974118947982788,
3185
+ "learning_rate": 0.0001408892993798654,
3186
+ "loss": 0.0216,
3187
+ "step": 2245
3188
+ },
3189
+ {
3190
+ "epoch": 0.5932370970931382,
3191
+ "grad_norm": 0.15472032129764557,
3192
+ "learning_rate": 0.00014075735585169548,
3193
+ "loss": 0.0164,
3194
+ "step": 2250
3195
+ },
3196
+ {
3197
+ "epoch": 0.5945554017533452,
3198
+ "grad_norm": 0.015000814571976662,
3199
+ "learning_rate": 0.00014062541232352552,
3200
+ "loss": 0.0395,
3201
+ "step": 2255
3202
+ },
3203
+ {
3204
+ "epoch": 0.5958737064135522,
3205
+ "grad_norm": 0.052086081355810165,
3206
+ "learning_rate": 0.0001404934687953556,
3207
+ "loss": 0.0032,
3208
+ "step": 2260
3209
+ },
3210
+ {
3211
+ "epoch": 0.5971920110737592,
3212
+ "grad_norm": 0.004600350745022297,
3213
+ "learning_rate": 0.00014036152526718566,
3214
+ "loss": 0.0056,
3215
+ "step": 2265
3216
+ },
3217
+ {
3218
+ "epoch": 0.5985103157339661,
3219
+ "grad_norm": 0.4940958321094513,
3220
+ "learning_rate": 0.0001402295817390157,
3221
+ "loss": 0.0206,
3222
+ "step": 2270
3223
+ },
3224
+ {
3225
+ "epoch": 0.5998286203941731,
3226
+ "grad_norm": 0.09658394008874893,
3227
+ "learning_rate": 0.00014009763821084577,
3228
+ "loss": 0.0052,
3229
+ "step": 2275
3230
+ },
3231
+ {
3232
+ "epoch": 0.60114692505438,
3233
+ "grad_norm": 0.00020539117394946516,
3234
+ "learning_rate": 0.0001399656946826758,
3235
+ "loss": 0.087,
3236
+ "step": 2280
3237
+ },
3238
+ {
3239
+ "epoch": 0.602465229714587,
3240
+ "grad_norm": 0.1871018409729004,
3241
+ "learning_rate": 0.00013983375115450588,
3242
+ "loss": 0.0812,
3243
+ "step": 2285
3244
+ },
3245
+ {
3246
+ "epoch": 0.603783534374794,
3247
+ "grad_norm": 0.02583954855799675,
3248
+ "learning_rate": 0.00013970180762633592,
3249
+ "loss": 0.0232,
3250
+ "step": 2290
3251
+ },
3252
+ {
3253
+ "epoch": 0.605101839035001,
3254
+ "grad_norm": 1.2103784084320068,
3255
+ "learning_rate": 0.000139569864098166,
3256
+ "loss": 0.0151,
3257
+ "step": 2295
3258
+ },
3259
+ {
3260
+ "epoch": 0.6064201436952079,
3261
+ "grad_norm": 0.023514943197369576,
3262
+ "learning_rate": 0.00013943792056999606,
3263
+ "loss": 0.0193,
3264
+ "step": 2300
3265
+ },
3266
+ {
3267
+ "epoch": 0.6077384483554149,
3268
+ "grad_norm": 0.0076395305804908276,
3269
+ "learning_rate": 0.0001393059770418261,
3270
+ "loss": 0.0379,
3271
+ "step": 2305
3272
+ },
3273
+ {
3274
+ "epoch": 0.6090567530156219,
3275
+ "grad_norm": 0.12412039190530777,
3276
+ "learning_rate": 0.00013917403351365617,
3277
+ "loss": 0.0095,
3278
+ "step": 2310
3279
+ },
3280
+ {
3281
+ "epoch": 0.6103750576758289,
3282
+ "grad_norm": 0.021904783323407173,
3283
+ "learning_rate": 0.0001390420899854862,
3284
+ "loss": 0.0166,
3285
+ "step": 2315
3286
+ },
3287
+ {
3288
+ "epoch": 0.6116933623360359,
3289
+ "grad_norm": 0.004012851510196924,
3290
+ "learning_rate": 0.00013891014645731628,
3291
+ "loss": 0.0103,
3292
+ "step": 2320
3293
+ },
3294
+ {
3295
+ "epoch": 0.6130116669962429,
3296
+ "grad_norm": 0.007267913781106472,
3297
+ "learning_rate": 0.00013877820292914635,
3298
+ "loss": 0.0708,
3299
+ "step": 2325
3300
+ },
3301
+ {
3302
+ "epoch": 0.6143299716564498,
3303
+ "grad_norm": 0.10363642126321793,
3304
+ "learning_rate": 0.0001386462594009764,
3305
+ "loss": 0.0473,
3306
+ "step": 2330
3307
+ },
3308
+ {
3309
+ "epoch": 0.6156482763166568,
3310
+ "grad_norm": 0.04899830371141434,
3311
+ "learning_rate": 0.00013851431587280646,
3312
+ "loss": 0.0283,
3313
+ "step": 2335
3314
+ },
3315
+ {
3316
+ "epoch": 0.6169665809768637,
3317
+ "grad_norm": 0.39460498094558716,
3318
+ "learning_rate": 0.0001383823723446365,
3319
+ "loss": 0.0597,
3320
+ "step": 2340
3321
+ },
3322
+ {
3323
+ "epoch": 0.6182848856370707,
3324
+ "grad_norm": 0.04092290997505188,
3325
+ "learning_rate": 0.00013825042881646655,
3326
+ "loss": 0.0167,
3327
+ "step": 2345
3328
+ },
3329
+ {
3330
+ "epoch": 0.6196031902972777,
3331
+ "grad_norm": 0.2781132161617279,
3332
+ "learning_rate": 0.00013811848528829662,
3333
+ "loss": 0.0097,
3334
+ "step": 2350
3335
+ },
3336
+ {
3337
+ "epoch": 0.6209214949574847,
3338
+ "grad_norm": 0.041443537920713425,
3339
+ "learning_rate": 0.00013798654176012666,
3340
+ "loss": 0.0226,
3341
+ "step": 2355
3342
+ },
3343
+ {
3344
+ "epoch": 0.6222397996176916,
3345
+ "grad_norm": 0.1242462694644928,
3346
+ "learning_rate": 0.00013785459823195673,
3347
+ "loss": 0.0055,
3348
+ "step": 2360
3349
+ },
3350
+ {
3351
+ "epoch": 0.6235581042778986,
3352
+ "grad_norm": 0.4440467357635498,
3353
+ "learning_rate": 0.00013772265470378677,
3354
+ "loss": 0.049,
3355
+ "step": 2365
3356
+ },
3357
+ {
3358
+ "epoch": 0.6248764089381056,
3359
+ "grad_norm": 0.014354427345097065,
3360
+ "learning_rate": 0.00013759071117561684,
3361
+ "loss": 0.0327,
3362
+ "step": 2370
3363
+ },
3364
+ {
3365
+ "epoch": 0.6261947135983126,
3366
+ "grad_norm": 0.011539973318576813,
3367
+ "learning_rate": 0.0001374587676474469,
3368
+ "loss": 0.0222,
3369
+ "step": 2375
3370
+ },
3371
+ {
3372
+ "epoch": 0.6275130182585196,
3373
+ "grad_norm": 0.23539051413536072,
3374
+ "learning_rate": 0.00013732682411927695,
3375
+ "loss": 0.0816,
3376
+ "step": 2380
3377
+ },
3378
+ {
3379
+ "epoch": 0.6288313229187266,
3380
+ "grad_norm": 0.26793941855430603,
3381
+ "learning_rate": 0.00013719488059110702,
3382
+ "loss": 0.0325,
3383
+ "step": 2385
3384
+ },
3385
+ {
3386
+ "epoch": 0.6301496275789334,
3387
+ "grad_norm": 0.01662217453122139,
3388
+ "learning_rate": 0.00013706293706293706,
3389
+ "loss": 0.0221,
3390
+ "step": 2390
3391
+ },
3392
+ {
3393
+ "epoch": 0.6314679322391404,
3394
+ "grad_norm": 0.30669671297073364,
3395
+ "learning_rate": 0.00013693099353476713,
3396
+ "loss": 0.026,
3397
+ "step": 2395
3398
+ },
3399
+ {
3400
+ "epoch": 0.6327862368993474,
3401
+ "grad_norm": 0.03350894898176193,
3402
+ "learning_rate": 0.00013679905000659717,
3403
+ "loss": 0.0072,
3404
+ "step": 2400
3405
+ },
3406
+ {
3407
+ "epoch": 0.6341045415595544,
3408
+ "grad_norm": 0.014983875676989555,
3409
+ "learning_rate": 0.00013666710647842724,
3410
+ "loss": 0.049,
3411
+ "step": 2405
3412
+ },
3413
+ {
3414
+ "epoch": 0.6354228462197614,
3415
+ "grad_norm": 1.8989384174346924,
3416
+ "learning_rate": 0.0001365351629502573,
3417
+ "loss": 0.0335,
3418
+ "step": 2410
3419
+ },
3420
+ {
3421
+ "epoch": 0.6367411508799684,
3422
+ "grad_norm": 0.030135562643408775,
3423
+ "learning_rate": 0.00013640321942208735,
3424
+ "loss": 0.0051,
3425
+ "step": 2415
3426
+ },
3427
+ {
3428
+ "epoch": 0.6380594555401753,
3429
+ "grad_norm": 0.02079075388610363,
3430
+ "learning_rate": 0.00013627127589391742,
3431
+ "loss": 0.0138,
3432
+ "step": 2420
3433
+ },
3434
+ {
3435
+ "epoch": 0.6393777602003823,
3436
+ "grad_norm": 0.06065403297543526,
3437
+ "learning_rate": 0.00013613933236574746,
3438
+ "loss": 0.0357,
3439
+ "step": 2425
3440
+ },
3441
+ {
3442
+ "epoch": 0.6406960648605893,
3443
+ "grad_norm": 0.2980937659740448,
3444
+ "learning_rate": 0.00013600738883757753,
3445
+ "loss": 0.0138,
3446
+ "step": 2430
3447
+ },
3448
+ {
3449
+ "epoch": 0.6420143695207963,
3450
+ "grad_norm": 0.4820438623428345,
3451
+ "learning_rate": 0.00013587544530940758,
3452
+ "loss": 0.01,
3453
+ "step": 2435
3454
+ },
3455
+ {
3456
+ "epoch": 0.6433326741810033,
3457
+ "grad_norm": 0.005618259310722351,
3458
+ "learning_rate": 0.00013574350178123765,
3459
+ "loss": 0.0052,
3460
+ "step": 2440
3461
+ },
3462
+ {
3463
+ "epoch": 0.6446509788412103,
3464
+ "grad_norm": 0.7173821926116943,
3465
+ "learning_rate": 0.0001356115582530677,
3466
+ "loss": 0.0133,
3467
+ "step": 2445
3468
+ },
3469
+ {
3470
+ "epoch": 0.6459692835014171,
3471
+ "grad_norm": 0.0053142281249165535,
3472
+ "learning_rate": 0.00013547961472489773,
3473
+ "loss": 0.0045,
3474
+ "step": 2450
3475
+ },
3476
+ {
3477
+ "epoch": 0.6472875881616241,
3478
+ "grad_norm": 0.06118829548358917,
3479
+ "learning_rate": 0.0001353476711967278,
3480
+ "loss": 0.056,
3481
+ "step": 2455
3482
+ },
3483
+ {
3484
+ "epoch": 0.6486058928218311,
3485
+ "grad_norm": 3.5878078937530518,
3486
+ "learning_rate": 0.00013521572766855787,
3487
+ "loss": 0.0232,
3488
+ "step": 2460
3489
+ },
3490
+ {
3491
+ "epoch": 0.6499241974820381,
3492
+ "grad_norm": 0.004911276511847973,
3493
+ "learning_rate": 0.0001350837841403879,
3494
+ "loss": 0.0074,
3495
+ "step": 2465
3496
+ },
3497
+ {
3498
+ "epoch": 0.6512425021422451,
3499
+ "grad_norm": 0.0028026222717016935,
3500
+ "learning_rate": 0.00013495184061221798,
3501
+ "loss": 0.0782,
3502
+ "step": 2470
3503
+ },
3504
+ {
3505
+ "epoch": 0.6525608068024521,
3506
+ "grad_norm": 0.7317615747451782,
3507
+ "learning_rate": 0.00013481989708404802,
3508
+ "loss": 0.0222,
3509
+ "step": 2475
3510
+ },
3511
+ {
3512
+ "epoch": 0.653879111462659,
3513
+ "grad_norm": 0.01835751160979271,
3514
+ "learning_rate": 0.0001346879535558781,
3515
+ "loss": 0.0661,
3516
+ "step": 2480
3517
+ },
3518
+ {
3519
+ "epoch": 0.655197416122866,
3520
+ "grad_norm": 0.03598962351679802,
3521
+ "learning_rate": 0.00013455601002770813,
3522
+ "loss": 0.0395,
3523
+ "step": 2485
3524
+ },
3525
+ {
3526
+ "epoch": 0.656515720783073,
3527
+ "grad_norm": 0.013886351138353348,
3528
+ "learning_rate": 0.0001344240664995382,
3529
+ "loss": 0.0156,
3530
+ "step": 2490
3531
+ },
3532
+ {
3533
+ "epoch": 0.65783402544328,
3534
+ "grad_norm": 5.741530895233154,
3535
+ "learning_rate": 0.00013429212297136827,
3536
+ "loss": 0.0317,
3537
+ "step": 2495
3538
+ },
3539
+ {
3540
+ "epoch": 0.659152330103487,
3541
+ "grad_norm": 0.20793496072292328,
3542
+ "learning_rate": 0.0001341601794431983,
3543
+ "loss": 0.0072,
3544
+ "step": 2500
3545
+ },
3546
+ {
3547
+ "epoch": 0.659152330103487,
3548
+ "eval_loss": 0.0300898440182209,
3549
+ "eval_runtime": 453.0554,
3550
+ "eval_samples_per_second": 7.443,
3551
+ "eval_steps_per_second": 3.721,
3552
+ "step": 2500
3553
+ }
3554
+ ],
3555
+ "logging_steps": 5,
3556
+ "max_steps": 7584,
3557
+ "num_input_tokens_seen": 0,
3558
+ "num_train_epochs": 2,
3559
+ "save_steps": 500,
3560
+ "stateful_callbacks": {
3561
+ "TrainerControl": {
3562
+ "args": {
3563
+ "should_epoch_stop": false,
3564
+ "should_evaluate": false,
3565
+ "should_log": false,
3566
+ "should_save": true,
3567
+ "should_training_stop": false
3568
+ },
3569
+ "attributes": {}
3570
+ }
3571
+ },
3572
+ "total_flos": 2.0176108255414272e+17,
3573
+ "train_batch_size": 2,
3574
+ "trial_name": null,
3575
+ "trial_params": null
3576
+ }
checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79dfa687fdd0c9908ab6b63535817e7567b29b0b483ac228723218f6f5fdeec5
3
+ size 5688
checkpoint-3000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/Llama-3.2-3B-Instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
checkpoint-3000/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Llama-3.2-3B-Instruct",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 16,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 16,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "down_proj",
27
+ "gate_proj",
28
+ "q_proj",
29
+ "up_proj",
30
+ "o_proj",
31
+ "v_proj",
32
+ "k_proj"
33
+ ],
34
+ "task_type": "CAUSAL_LM",
35
+ "use_dora": false,
36
+ "use_rslora": false
37
+ }
checkpoint-3000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f0f44795d8575a3c49edf9d1d7c450bed2a65e0cb892185ee78c4885650e54f
3
+ size 97307544
checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c41fb29aec65b50a5d99c079cd25509fe2a31b0a42659ed697257bb037877391
3
+ size 50866370
checkpoint-3000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cf932362a907148c2c99f7826e21fa7280b5016d990a6358e60ec3cec98b016
3
+ size 14244