aseratus1 commited on
Commit
f80c787
·
verified ·
1 Parent(s): 5e6ad35

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c834c94ccd6327a6e0489d64b97849b6d81c33c5c4bd07105bf49ae53b004bd8
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:227708832b49ac7061c8c8cedfec3f38b21c76d72436fca62de0d18c0032ab53
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abda3127c9ade52926ee1f9e398cf3e4081ef61c32ee845d52ad38f8184b8f26
3
- size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d9fb08196de88b0882b7cc0b21ec7bbcdafdbe26d855d24029b88bdd529d41c
3
+ size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5504ed3905f70a4dc8b693243a3717452b706115306b2472b12a161c8cfc3819
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4430b9a08c075060ef6b7ad7a7977beb00c91a14854ffdc791c60cb3093cb1e9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6848a8859edc1a631d95dd7c6bbe41d62f8deb9b0c380a9e73b539cd4782f1cd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c5a8c7855b3cc55dff44b95db370c984be8d56bad23c2aea8770dee5814ed88
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6163225173950195,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.05687203791469194,
5
  "eval_steps": 150,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -128,6 +128,119 @@
128
  "eval_samples_per_second": 21.015,
129
  "eval_steps_per_second": 5.256,
130
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "logging_steps": 10,
@@ -156,7 +269,7 @@
156
  "attributes": {}
157
  }
158
  },
159
- "total_flos": 1.3274919492963533e+17,
160
  "train_batch_size": 8,
161
  "trial_name": null,
162
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.5615507960319519,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.11374407582938388,
5
  "eval_steps": 150,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
128
  "eval_samples_per_second": 21.015,
129
  "eval_steps_per_second": 5.256,
130
  "step": 150
131
+ },
132
+ {
133
+ "epoch": 0.06066350710900474,
134
+ "grad_norm": 0.9639042019844055,
135
+ "learning_rate": 9.955490656615086e-05,
136
+ "loss": 0.9907,
137
+ "step": 160
138
+ },
139
+ {
140
+ "epoch": 0.06445497630331753,
141
+ "grad_norm": 1.0571763515472412,
142
+ "learning_rate": 9.947045166170315e-05,
143
+ "loss": 0.6059,
144
+ "step": 170
145
+ },
146
+ {
147
+ "epoch": 0.06824644549763033,
148
+ "grad_norm": 0.8203420639038086,
149
+ "learning_rate": 9.937870702220684e-05,
150
+ "loss": 0.5407,
151
+ "step": 180
152
+ },
153
+ {
154
+ "epoch": 0.07203791469194312,
155
+ "grad_norm": 0.7809204459190369,
156
+ "learning_rate": 9.927968616672416e-05,
157
+ "loss": 0.4592,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 0.07582938388625593,
162
+ "grad_norm": 0.6434981226921082,
163
+ "learning_rate": 9.917340368650657e-05,
164
+ "loss": 0.3295,
165
+ "step": 200
166
+ },
167
+ {
168
+ "epoch": 0.07962085308056872,
169
+ "grad_norm": 0.8934921026229858,
170
+ "learning_rate": 9.905987524284471e-05,
171
+ "loss": 0.9521,
172
+ "step": 210
173
+ },
174
+ {
175
+ "epoch": 0.08341232227488152,
176
+ "grad_norm": 0.8254252672195435,
177
+ "learning_rate": 9.89391175647606e-05,
178
+ "loss": 0.5738,
179
+ "step": 220
180
+ },
181
+ {
182
+ "epoch": 0.08720379146919431,
183
+ "grad_norm": 0.840071976184845,
184
+ "learning_rate": 9.881114844654249e-05,
185
+ "loss": 0.5222,
186
+ "step": 230
187
+ },
188
+ {
189
+ "epoch": 0.0909952606635071,
190
+ "grad_norm": 0.8142471313476562,
191
+ "learning_rate": 9.867598674512288e-05,
192
+ "loss": 0.4142,
193
+ "step": 240
194
+ },
195
+ {
196
+ "epoch": 0.0947867298578199,
197
+ "grad_norm": 0.6625562310218811,
198
+ "learning_rate": 9.853365237729976e-05,
199
+ "loss": 0.3158,
200
+ "step": 250
201
+ },
202
+ {
203
+ "epoch": 0.0985781990521327,
204
+ "grad_norm": 0.9738134145736694,
205
+ "learning_rate": 9.838416631680176e-05,
206
+ "loss": 0.9377,
207
+ "step": 260
208
+ },
209
+ {
210
+ "epoch": 0.1023696682464455,
211
+ "grad_norm": 0.919395387172699,
212
+ "learning_rate": 9.822755059119765e-05,
213
+ "loss": 0.5472,
214
+ "step": 270
215
+ },
216
+ {
217
+ "epoch": 0.1061611374407583,
218
+ "grad_norm": 0.9126551151275635,
219
+ "learning_rate": 9.806382827865035e-05,
220
+ "loss": 0.4959,
221
+ "step": 280
222
+ },
223
+ {
224
+ "epoch": 0.10995260663507109,
225
+ "grad_norm": 0.7662134766578674,
226
+ "learning_rate": 9.78930235045163e-05,
227
+ "loss": 0.4471,
228
+ "step": 290
229
+ },
230
+ {
231
+ "epoch": 0.11374407582938388,
232
+ "grad_norm": 0.6991143226623535,
233
+ "learning_rate": 9.771516143779049e-05,
234
+ "loss": 0.345,
235
+ "step": 300
236
+ },
237
+ {
238
+ "epoch": 0.11374407582938388,
239
+ "eval_loss": 0.5615507960319519,
240
+ "eval_runtime": 210.9515,
241
+ "eval_samples_per_second": 21.057,
242
+ "eval_steps_per_second": 5.267,
243
+ "step": 300
244
  }
245
  ],
246
  "logging_steps": 10,
 
269
  "attributes": {}
270
  }
271
  },
272
+ "total_flos": 2.6549838985927066e+17,
273
  "train_batch_size": 8,
274
  "trial_name": null,
275
  "trial_params": null