aseratus1 commited on
Commit
01d60a8
·
verified ·
1 Parent(s): b44d578

Training in progress, step 900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6a5e7e2e9841d8d40165440a3b114fc6d3a87902db66ffbd1b617c4aa2a1f92
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2773d103adc0c8ead46e0d1c9d2b732e43a4d7f35c1549395924e393990949aa
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c836f87dcb2db7f41a447900b41dd00eae6cdbaa06e92c32e9807a9f578ad1e6
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa61b3f79af01e7d3e5def7cc983009509b1ffb854e28f10d1b53c286d3e8339
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b06ba8d87a0f7bcf601543861884286b2d36e86437be80b16b850d3463989780
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b38bdd189b57d19eb835886fb130c8330a50be14e952a10893bb9397bd28e4c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66d2088500e6ca21e61da50d6c34e788ca76aa5deb1c73d3c503a66913944d14
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:718698456f8ff8ad30f575ae2e26668e12a9063b8bcc9333e9c52723f1413da8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.49101725220680237,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-750",
4
- "epoch": 0.2843601895734597,
5
  "eval_steps": 150,
6
- "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -580,6 +580,119 @@
580
  "eval_samples_per_second": 20.996,
581
  "eval_steps_per_second": 5.251,
582
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
  }
584
  ],
585
  "logging_steps": 10,
@@ -608,7 +721,7 @@
608
  "attributes": {}
609
  }
610
  },
611
- "total_flos": 6.652544882269225e+17,
612
  "train_batch_size": 8,
613
  "trial_name": null,
614
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.48085248470306396,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-900",
4
+ "epoch": 0.3412322274881517,
5
  "eval_steps": 150,
6
+ "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
580
  "eval_samples_per_second": 20.996,
581
  "eval_steps_per_second": 5.251,
582
  "step": 750
583
+ },
584
+ {
585
+ "epoch": 0.2881516587677725,
586
+ "grad_norm": 0.8108296394348145,
587
+ "learning_rate": 8.255079353099611e-05,
588
+ "loss": 0.7564,
589
+ "step": 760
590
+ },
591
+ {
592
+ "epoch": 0.2919431279620853,
593
+ "grad_norm": 0.778976559638977,
594
+ "learning_rate": 8.208768975964338e-05,
595
+ "loss": 0.5116,
596
+ "step": 770
597
+ },
598
+ {
599
+ "epoch": 0.2957345971563981,
600
+ "grad_norm": 0.7423689961433411,
601
+ "learning_rate": 8.161985769597045e-05,
602
+ "loss": 0.4358,
603
+ "step": 780
604
+ },
605
+ {
606
+ "epoch": 0.2995260663507109,
607
+ "grad_norm": 0.7508371472358704,
608
+ "learning_rate": 8.114736627752846e-05,
609
+ "loss": 0.3686,
610
+ "step": 790
611
+ },
612
+ {
613
+ "epoch": 0.3033175355450237,
614
+ "grad_norm": 0.5939842462539673,
615
+ "learning_rate": 8.067028512844929e-05,
616
+ "loss": 0.2847,
617
+ "step": 800
618
+ },
619
+ {
620
+ "epoch": 0.3071090047393365,
621
+ "grad_norm": 0.8010008931159973,
622
+ "learning_rate": 8.018868454918627e-05,
623
+ "loss": 0.7743,
624
+ "step": 810
625
+ },
626
+ {
627
+ "epoch": 0.3109004739336493,
628
+ "grad_norm": 0.8660693764686584,
629
+ "learning_rate": 7.970263550615469e-05,
630
+ "loss": 0.5334,
631
+ "step": 820
632
+ },
633
+ {
634
+ "epoch": 0.31469194312796206,
635
+ "grad_norm": 0.708128035068512,
636
+ "learning_rate": 7.921220962127487e-05,
637
+ "loss": 0.4534,
638
+ "step": 830
639
+ },
640
+ {
641
+ "epoch": 0.3184834123222749,
642
+ "grad_norm": 0.7724855542182922,
643
+ "learning_rate": 7.871747916141808e-05,
644
+ "loss": 0.3683,
645
+ "step": 840
646
+ },
647
+ {
648
+ "epoch": 0.3222748815165877,
649
+ "grad_norm": 0.6242368817329407,
650
+ "learning_rate": 7.821851702775765e-05,
651
+ "loss": 0.2945,
652
+ "step": 850
653
+ },
654
+ {
655
+ "epoch": 0.32606635071090045,
656
+ "grad_norm": 0.8044713139533997,
657
+ "learning_rate": 7.771539674502667e-05,
658
+ "loss": 0.7826,
659
+ "step": 860
660
+ },
661
+ {
662
+ "epoch": 0.3298578199052133,
663
+ "grad_norm": 0.7477179765701294,
664
+ "learning_rate": 7.720819245068368e-05,
665
+ "loss": 0.4976,
666
+ "step": 870
667
+ },
668
+ {
669
+ "epoch": 0.33364928909952607,
670
+ "grad_norm": 0.7895752191543579,
671
+ "learning_rate": 7.669697888398812e-05,
672
+ "loss": 0.4432,
673
+ "step": 880
674
+ },
675
+ {
676
+ "epoch": 0.33744075829383885,
677
+ "grad_norm": 0.7435291409492493,
678
+ "learning_rate": 7.618183137498709e-05,
679
+ "loss": 0.3796,
680
+ "step": 890
681
+ },
682
+ {
683
+ "epoch": 0.3412322274881517,
684
+ "grad_norm": 0.8892961144447327,
685
+ "learning_rate": 7.56628258334151e-05,
686
+ "loss": 0.2694,
687
+ "step": 900
688
+ },
689
+ {
690
+ "epoch": 0.3412322274881517,
691
+ "eval_loss": 0.48085248470306396,
692
+ "eval_runtime": 211.2971,
693
+ "eval_samples_per_second": 21.023,
694
+ "eval_steps_per_second": 5.258,
695
+ "step": 900
696
  }
697
  ],
698
  "logging_steps": 10,
 
721
  "attributes": {}
722
  }
723
  },
724
+ "total_flos": 7.983808115512443e+17,
725
  "train_batch_size": 8,
726
  "trial_name": null,
727
  "trial_params": null