aseratus1 commited on
Commit
0af757a
·
verified ·
1 Parent(s): 8bd69d0

Training in progress, step 1050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2773d103adc0c8ead46e0d1c9d2b732e43a4d7f35c1549395924e393990949aa
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d50e369477786043c5b430851002961dd9f9caa36844616c46cc591eef997815
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa61b3f79af01e7d3e5def7cc983009509b1ffb854e28f10d1b53c286d3e8339
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce7f08252a0254378dc4f06a78893a75baf46281ea927e52f1363d5c2a38aa73
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b38bdd189b57d19eb835886fb130c8330a50be14e952a10893bb9397bd28e4c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d03f439ef2470796b8838956fc6c9abc3635668ab5869b079253aa045a6d43
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:718698456f8ff8ad30f575ae2e26668e12a9063b8bcc9333e9c52723f1413da8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac137486d76d1388893f1df68d737ba62f99dd1ad4017b78eda04d51748bd9d2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.48085248470306396,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-900",
4
- "epoch": 0.3412322274881517,
5
  "eval_steps": 150,
6
- "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -693,6 +693,119 @@
693
  "eval_samples_per_second": 21.023,
694
  "eval_steps_per_second": 5.258,
695
  "step": 900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696
  }
697
  ],
698
  "logging_steps": 10,
@@ -721,7 +834,7 @@
721
  "attributes": {}
722
  }
723
  },
724
- "total_flos": 7.983808115512443e+17,
725
  "train_batch_size": 8,
726
  "trial_name": null,
727
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.4649004638195038,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1050",
4
+ "epoch": 0.3981042654028436,
5
  "eval_steps": 150,
6
+ "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
693
  "eval_samples_per_second": 21.023,
694
  "eval_steps_per_second": 5.258,
695
  "step": 900
696
+ },
697
+ {
698
+ "epoch": 0.34502369668246446,
699
+ "grad_norm": 0.7861023545265198,
700
+ "learning_rate": 7.514003873750836e-05,
701
+ "loss": 0.7591,
702
+ "step": 910
703
+ },
704
+ {
705
+ "epoch": 0.34881516587677724,
706
+ "grad_norm": 0.7269836068153381,
707
+ "learning_rate": 7.461354712273526e-05,
708
+ "loss": 0.502,
709
+ "step": 920
710
+ },
711
+ {
712
+ "epoch": 0.35260663507109,
713
+ "grad_norm": 0.7629136443138123,
714
+ "learning_rate": 7.408342857044484e-05,
715
+ "loss": 0.4215,
716
+ "step": 930
717
+ },
718
+ {
719
+ "epoch": 0.35639810426540286,
720
+ "grad_norm": 0.668658435344696,
721
+ "learning_rate": 7.354976119643472e-05,
722
+ "loss": 0.3744,
723
+ "step": 940
724
+ },
725
+ {
726
+ "epoch": 0.36018957345971564,
727
+ "grad_norm": 0.6049548387527466,
728
+ "learning_rate": 7.301262363944035e-05,
729
+ "loss": 0.2709,
730
+ "step": 950
731
+ },
732
+ {
733
+ "epoch": 0.3639810426540284,
734
+ "grad_norm": 0.8917579650878906,
735
+ "learning_rate": 7.247209504954715e-05,
736
+ "loss": 0.7532,
737
+ "step": 960
738
+ },
739
+ {
740
+ "epoch": 0.36777251184834125,
741
+ "grad_norm": 0.7818393111228943,
742
+ "learning_rate": 7.192825507652734e-05,
743
+ "loss": 0.4742,
744
+ "step": 970
745
+ },
746
+ {
747
+ "epoch": 0.37156398104265403,
748
+ "grad_norm": 0.7396854162216187,
749
+ "learning_rate": 7.138118385810313e-05,
750
+ "loss": 0.4385,
751
+ "step": 980
752
+ },
753
+ {
754
+ "epoch": 0.3753554502369668,
755
+ "grad_norm": 0.8227097392082214,
756
+ "learning_rate": 7.083096200813794e-05,
757
+ "loss": 0.3656,
758
+ "step": 990
759
+ },
760
+ {
761
+ "epoch": 0.3791469194312796,
762
+ "grad_norm": 0.6364562511444092,
763
+ "learning_rate": 7.027767060475764e-05,
764
+ "loss": 0.2728,
765
+ "step": 1000
766
+ },
767
+ {
768
+ "epoch": 0.38293838862559243,
769
+ "grad_norm": 0.7405619025230408,
770
+ "learning_rate": 6.972139117840307e-05,
771
+ "loss": 0.7329,
772
+ "step": 1010
773
+ },
774
+ {
775
+ "epoch": 0.3867298578199052,
776
+ "grad_norm": 0.761043131351471,
777
+ "learning_rate": 6.91622056998163e-05,
778
+ "loss": 0.4928,
779
+ "step": 1020
780
+ },
781
+ {
782
+ "epoch": 0.390521327014218,
783
+ "grad_norm": 0.7283722758293152,
784
+ "learning_rate": 6.860019656796163e-05,
785
+ "loss": 0.4324,
786
+ "step": 1030
787
+ },
788
+ {
789
+ "epoch": 0.3943127962085308,
790
+ "grad_norm": 0.6700026392936707,
791
+ "learning_rate": 6.80354465978838e-05,
792
+ "loss": 0.371,
793
+ "step": 1040
794
+ },
795
+ {
796
+ "epoch": 0.3981042654028436,
797
+ "grad_norm": 0.6622222065925598,
798
+ "learning_rate": 6.746803900850462e-05,
799
+ "loss": 0.2729,
800
+ "step": 1050
801
+ },
802
+ {
803
+ "epoch": 0.3981042654028436,
804
+ "eval_loss": 0.4649004638195038,
805
+ "eval_runtime": 211.8681,
806
+ "eval_samples_per_second": 20.966,
807
+ "eval_steps_per_second": 5.244,
808
+ "step": 1050
809
  }
810
  ],
811
  "logging_steps": 10,
 
834
  "attributes": {}
835
  }
836
  },
837
+ "total_flos": 9.315071348755661e+17,
838
  "train_batch_size": 8,
839
  "trial_name": null,
840
  "trial_params": null