jeo053 commited on
Commit
7a2951a
·
verified ·
1 Parent(s): 4a0308b

Training in progress, step 20000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afe01e300f8643a64c61c95d5bd2ad7a24d0656449a61c59adbba0be3df400bb
3
  size 959732416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18e226bfabb4fe37953d03ddbd4d68cb6331b76339a65c4652c13e68da7c732e
3
  size 959732416
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:722396805f72f69e17d77d25f49c1ffd5e62d9bd873d12c24dc1a795814b13a4
3
  size 1915006400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f46b965df64512eadca15157f363bb6b9d956cdb921d1cf19f79f3e3de5b89f3
3
  size 1915006400
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a021a0be63196a5c519dcf5db6eb93af128af867e8fa23972d45d6a295260aef
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f10ca53587facccfd8ce6c38c8ee3d38f84b1cd52bf1939f663991a4aa3ab223
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a606e680bc86afd847529dd70a95fba19d7ed0518faff7d75b81a55c5474456c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6a37679919f879e26f31535286e088a72dcdededa90c5c4ca97536707dfcd97
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5155123260103975,
5
  "eval_steps": 500,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -243,6 +243,89 @@
243
  "learning_rate": 2.4844876739896027e-05,
244
  "loss": 0.069,
245
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  }
247
  ],
248
  "logging_steps": 500,
@@ -262,7 +345,7 @@
262
  "attributes": {}
263
  }
264
  },
265
- "total_flos": 5.4135558058216e+18,
266
  "train_batch_size": 2,
267
  "trial_name": null,
268
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.35401643468053,
5
  "eval_steps": 500,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
243
  "learning_rate": 2.4844876739896027e-05,
244
  "loss": 0.069,
245
  "step": 15000
246
+ },
247
+ {
248
+ "epoch": 2.599362736877411,
249
+ "grad_norm": 0.22130186855793,
250
+ "learning_rate": 2.4006372631225894e-05,
251
+ "loss": 0.0732,
252
+ "step": 15500
253
+ },
254
+ {
255
+ "epoch": 2.683213147744424,
256
+ "grad_norm": 0.7159423828125,
257
+ "learning_rate": 2.3167868522555762e-05,
258
+ "loss": 0.0665,
259
+ "step": 16000
260
+ },
261
+ {
262
+ "epoch": 2.7670635586114374,
263
+ "grad_norm": 0.7721897959709167,
264
+ "learning_rate": 2.232936441388563e-05,
265
+ "loss": 0.065,
266
+ "step": 16500
267
+ },
268
+ {
269
+ "epoch": 2.8509139694784507,
270
+ "grad_norm": 0.2992958426475525,
271
+ "learning_rate": 2.1490860305215497e-05,
272
+ "loss": 0.0635,
273
+ "step": 17000
274
+ },
275
+ {
276
+ "epoch": 2.934764380345464,
277
+ "grad_norm": 0.4838363528251648,
278
+ "learning_rate": 2.0652356196545364e-05,
279
+ "loss": 0.0659,
280
+ "step": 17500
281
+ },
282
+ {
283
+ "epoch": 3.0,
284
+ "eval_gen_len": 100.0,
285
+ "eval_loss": 0.0834273174405098,
286
+ "eval_rouge1": 1.8169,
287
+ "eval_rouge2": 1.2573,
288
+ "eval_rougeL": 1.8423,
289
+ "eval_rougeLsum": 1.8571,
290
+ "eval_runtime": 1629.4126,
291
+ "eval_samples_per_second": 0.814,
292
+ "eval_steps_per_second": 0.407,
293
+ "step": 17889
294
+ },
295
+ {
296
+ "epoch": 3.0186147912124768,
297
+ "grad_norm": 0.36135250329971313,
298
+ "learning_rate": 1.9813852087875232e-05,
299
+ "loss": 0.0577,
300
+ "step": 18000
301
+ },
302
+ {
303
+ "epoch": 3.10246520207949,
304
+ "grad_norm": 0.4394136667251587,
305
+ "learning_rate": 1.89753479792051e-05,
306
+ "loss": 0.0466,
307
+ "step": 18500
308
+ },
309
+ {
310
+ "epoch": 3.1863156129465033,
311
+ "grad_norm": 0.38663652539253235,
312
+ "learning_rate": 1.8136843870534967e-05,
313
+ "loss": 0.0488,
314
+ "step": 19000
315
+ },
316
+ {
317
+ "epoch": 3.2701660238135166,
318
+ "grad_norm": 0.6833186745643616,
319
+ "learning_rate": 1.7298339761864835e-05,
320
+ "loss": 0.0426,
321
+ "step": 19500
322
+ },
323
+ {
324
+ "epoch": 3.35401643468053,
325
+ "grad_norm": 0.2934403121471405,
326
+ "learning_rate": 1.6459835653194702e-05,
327
+ "loss": 0.0453,
328
+ "step": 20000
329
  }
330
  ],
331
  "logging_steps": 500,
 
345
  "attributes": {}
346
  }
347
  },
348
+ "total_flos": 7.21801425313176e+18,
349
  "train_batch_size": 2,
350
  "trial_name": null,
351
  "trial_params": null