aseratus1 commited on
Commit
75645ec
·
verified ·
1 Parent(s): 8133982

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54920ba191c3897d93abfae0a14a21bd7a86a9692f46148ebd173663fb27535f
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a54da16fe5b7240520a8f0594ca1a306a0a1ecaaf0a8669778d4b19f6f436fe0
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9701a6fcd0bec7a7a691c63dc3f672f22d9831364b8c114a2763218f599fac02
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5682899f455e6629f68ca1f9cdb66fc03fe34158aef39f8de70b66a8ec81423
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94ab18e1d59252106afdf4da18e343f22032a2767a4c2723009e6558803b0e96
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820de53dfaf045d5a737950c45788f808d1be0eca4d5a60d4c9e76874bc164e2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f915568f86cbbb4ec27d1f37808bebd339a27f0f49c65b6033fc00a9a70a87ef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f713db28527f6f7f6479d56becbe3f5b41c00c9e5161b1a032f149ce03e9872
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.53005450963974,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
- "epoch": 0.17061611374407584,
5
  "eval_steps": 150,
6
- "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -354,6 +354,119 @@
354
  "eval_samples_per_second": 21.047,
355
  "eval_steps_per_second": 5.264,
356
  "step": 450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  }
358
  ],
359
  "logging_steps": 10,
@@ -382,7 +495,7 @@
382
  "attributes": {}
383
  }
384
  },
385
- "total_flos": 3.9862471318359245e+17,
386
  "train_batch_size": 8,
387
  "trial_name": null,
388
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.5122374892234802,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
+ "epoch": 0.22748815165876776,
5
  "eval_steps": 150,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
354
  "eval_samples_per_second": 21.047,
355
  "eval_steps_per_second": 5.264,
356
  "step": 450
357
+ },
358
+ {
359
+ "epoch": 0.17440758293838862,
360
+ "grad_norm": 0.840986430644989,
361
+ "learning_rate": 9.393409622681559e-05,
362
+ "loss": 0.8218,
363
+ "step": 460
364
+ },
365
+ {
366
+ "epoch": 0.17819905213270143,
367
+ "grad_norm": 0.8100599050521851,
368
+ "learning_rate": 9.364110200687738e-05,
369
+ "loss": 0.5342,
370
+ "step": 470
371
+ },
372
+ {
373
+ "epoch": 0.1819905213270142,
374
+ "grad_norm": 0.8114942908287048,
375
+ "learning_rate": 9.33416770376702e-05,
376
+ "loss": 0.4595,
377
+ "step": 480
378
+ },
379
+ {
380
+ "epoch": 0.18578199052132702,
381
+ "grad_norm": 0.6884726285934448,
382
+ "learning_rate": 9.303586544106115e-05,
383
+ "loss": 0.411,
384
+ "step": 490
385
+ },
386
+ {
387
+ "epoch": 0.1895734597156398,
388
+ "grad_norm": 0.8265155553817749,
389
+ "learning_rate": 9.272371228002091e-05,
390
+ "loss": 0.3095,
391
+ "step": 500
392
+ },
393
+ {
394
+ "epoch": 0.1933649289099526,
395
+ "grad_norm": 0.8282197117805481,
396
+ "learning_rate": 9.240526355198353e-05,
397
+ "loss": 0.8212,
398
+ "step": 510
399
+ },
400
+ {
401
+ "epoch": 0.1971563981042654,
402
+ "grad_norm": 0.783984899520874,
403
+ "learning_rate": 9.208056618206853e-05,
404
+ "loss": 0.5332,
405
+ "step": 520
406
+ },
407
+ {
408
+ "epoch": 0.2009478672985782,
409
+ "grad_norm": 0.7088342905044556,
410
+ "learning_rate": 9.174966801616603e-05,
411
+ "loss": 0.459,
412
+ "step": 530
413
+ },
414
+ {
415
+ "epoch": 0.204739336492891,
416
+ "grad_norm": 0.7358261942863464,
417
+ "learning_rate": 9.141261781388664e-05,
418
+ "loss": 0.3989,
419
+ "step": 540
420
+ },
421
+ {
422
+ "epoch": 0.20853080568720378,
423
+ "grad_norm": 0.686541736125946,
424
+ "learning_rate": 9.10694652413763e-05,
425
+ "loss": 0.3132,
426
+ "step": 550
427
+ },
428
+ {
429
+ "epoch": 0.2123222748815166,
430
+ "grad_norm": 0.8187770247459412,
431
+ "learning_rate": 9.072026086399777e-05,
432
+ "loss": 0.8469,
433
+ "step": 560
434
+ },
435
+ {
436
+ "epoch": 0.2161137440758294,
437
+ "grad_norm": 0.8380711674690247,
438
+ "learning_rate": 9.03650561388796e-05,
439
+ "loss": 0.5158,
440
+ "step": 570
441
+ },
442
+ {
443
+ "epoch": 0.21990521327014217,
444
+ "grad_norm": 0.7505501508712769,
445
+ "learning_rate": 9.000390340733353e-05,
446
+ "loss": 0.4408,
447
+ "step": 580
448
+ },
449
+ {
450
+ "epoch": 0.22369668246445498,
451
+ "grad_norm": 0.7203567028045654,
452
+ "learning_rate": 8.963685588714185e-05,
453
+ "loss": 0.3953,
454
+ "step": 590
455
+ },
456
+ {
457
+ "epoch": 0.22748815165876776,
458
+ "grad_norm": 0.6150539517402649,
459
+ "learning_rate": 8.926396766471537e-05,
460
+ "loss": 0.2903,
461
+ "step": 600
462
+ },
463
+ {
464
+ "epoch": 0.22748815165876776,
465
+ "eval_loss": 0.5122374892234802,
466
+ "eval_runtime": 211.7681,
467
+ "eval_samples_per_second": 20.976,
468
+ "eval_steps_per_second": 5.246,
469
+ "step": 600
470
  }
471
  ],
472
  "logging_steps": 10,
 
495
  "attributes": {}
496
  }
497
  },
498
+ "total_flos": 5.31562472310571e+17,
499
  "train_batch_size": 8,
500
  "trial_name": null,
501
  "trial_params": null