aseratus1 commited on
Commit
18d7f75
·
verified ·
1 Parent(s): 75645ec

Training in progress, step 750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a54da16fe5b7240520a8f0594ca1a306a0a1ecaaf0a8669778d4b19f6f436fe0
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6a5e7e2e9841d8d40165440a3b114fc6d3a87902db66ffbd1b617c4aa2a1f92
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5682899f455e6629f68ca1f9cdb66fc03fe34158aef39f8de70b66a8ec81423
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c836f87dcb2db7f41a447900b41dd00eae6cdbaa06e92c32e9807a9f578ad1e6
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:820de53dfaf045d5a737950c45788f808d1be0eca4d5a60d4c9e76874bc164e2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b06ba8d87a0f7bcf601543861884286b2d36e86437be80b16b850d3463989780
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f713db28527f6f7f6479d56becbe3f5b41c00c9e5161b1a032f149ce03e9872
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66d2088500e6ca21e61da50d6c34e788ca76aa5deb1c73d3c503a66913944d14
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.5122374892234802,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
- "epoch": 0.22748815165876776,
5
  "eval_steps": 150,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -467,6 +467,119 @@
467
  "eval_samples_per_second": 20.976,
468
  "eval_steps_per_second": 5.246,
469
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470
  }
471
  ],
472
  "logging_steps": 10,
@@ -495,7 +608,7 @@
495
  "attributes": {}
496
  }
497
  },
498
- "total_flos": 5.31562472310571e+17,
499
  "train_batch_size": 8,
500
  "trial_name": null,
501
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.49101725220680237,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-750",
4
+ "epoch": 0.2843601895734597,
5
  "eval_steps": 150,
6
+ "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
467
  "eval_samples_per_second": 20.976,
468
  "eval_steps_per_second": 5.246,
469
  "step": 600
470
+ },
471
+ {
472
+ "epoch": 0.23127962085308057,
473
+ "grad_norm": 0.8199161291122437,
474
+ "learning_rate": 8.888529368712357e-05,
475
+ "loss": 0.8378,
476
+ "step": 610
477
+ },
478
+ {
479
+ "epoch": 0.23507109004739338,
480
+ "grad_norm": 0.7916120886802673,
481
+ "learning_rate": 8.850088975399781e-05,
482
+ "loss": 0.5298,
483
+ "step": 620
484
+ },
485
+ {
486
+ "epoch": 0.23886255924170616,
487
+ "grad_norm": 0.836801290512085,
488
+ "learning_rate": 8.811081250930902e-05,
489
+ "loss": 0.4505,
490
+ "step": 630
491
+ },
492
+ {
493
+ "epoch": 0.24265402843601896,
494
+ "grad_norm": 0.688791036605835,
495
+ "learning_rate": 8.771511943302079e-05,
496
+ "loss": 0.4163,
497
+ "step": 640
498
+ },
499
+ {
500
+ "epoch": 0.24644549763033174,
501
+ "grad_norm": 0.7080439925193787,
502
+ "learning_rate": 8.731386883261952e-05,
503
+ "loss": 0.3036,
504
+ "step": 650
505
+ },
506
+ {
507
+ "epoch": 0.2502369668246445,
508
+ "grad_norm": 0.7296505570411682,
509
+ "learning_rate": 8.690711983452243e-05,
510
+ "loss": 0.8379,
511
+ "step": 660
512
+ },
513
+ {
514
+ "epoch": 0.25402843601895736,
515
+ "grad_norm": 0.7905020117759705,
516
+ "learning_rate": 8.649493237536499e-05,
517
+ "loss": 0.5306,
518
+ "step": 670
519
+ },
520
+ {
521
+ "epoch": 0.25781990521327014,
522
+ "grad_norm": 0.8950763940811157,
523
+ "learning_rate": 8.60773671931689e-05,
524
+ "loss": 0.46,
525
+ "step": 680
526
+ },
527
+ {
528
+ "epoch": 0.2616113744075829,
529
+ "grad_norm": 0.7092387080192566,
530
+ "learning_rate": 8.56544858183921e-05,
531
+ "loss": 0.3971,
532
+ "step": 690
533
+ },
534
+ {
535
+ "epoch": 0.26540284360189575,
536
+ "grad_norm": 0.7552494406700134,
537
+ "learning_rate": 8.522635056486181e-05,
538
+ "loss": 0.2735,
539
+ "step": 700
540
+ },
541
+ {
542
+ "epoch": 0.26919431279620853,
543
+ "grad_norm": 0.7642441391944885,
544
+ "learning_rate": 8.479302452059238e-05,
545
+ "loss": 0.7899,
546
+ "step": 710
547
+ },
548
+ {
549
+ "epoch": 0.2729857819905213,
550
+ "grad_norm": 0.773184061050415,
551
+ "learning_rate": 8.435457153848887e-05,
552
+ "loss": 0.5101,
553
+ "step": 720
554
+ },
555
+ {
556
+ "epoch": 0.27677725118483415,
557
+ "grad_norm": 0.8441540002822876,
558
+ "learning_rate": 8.391105622693793e-05,
559
+ "loss": 0.4299,
560
+ "step": 730
561
+ },
562
+ {
563
+ "epoch": 0.28056872037914693,
564
+ "grad_norm": 0.6840046048164368,
565
+ "learning_rate": 8.346254394028754e-05,
566
+ "loss": 0.3795,
567
+ "step": 740
568
+ },
569
+ {
570
+ "epoch": 0.2843601895734597,
571
+ "grad_norm": 0.5915653705596924,
572
+ "learning_rate": 8.30091007692166e-05,
573
+ "loss": 0.2805,
574
+ "step": 750
575
+ },
576
+ {
577
+ "epoch": 0.2843601895734597,
578
+ "eval_loss": 0.49101725220680237,
579
+ "eval_runtime": 211.5623,
580
+ "eval_samples_per_second": 20.996,
581
+ "eval_steps_per_second": 5.251,
582
+ "step": 750
583
  }
584
  ],
585
  "logging_steps": 10,
 
608
  "attributes": {}
609
  }
610
  },
611
+ "total_flos": 6.652544882269225e+17,
612
  "train_batch_size": 8,
613
  "trial_name": null,
614
  "trial_params": null