lesso commited on
Commit
88ddef5
·
verified ·
1 Parent(s): d840126

Training in progress, step 119, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f757c3e1b1a28ac8f73b2218643ddc385adca6f4a8a0dd016f0c4f246d712b3e
3
  size 231448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3dd7a21a4e569c749409d50ac89148aab852b4313f89d44feb028807cb75774
3
  size 231448
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:497ce543c979f2ce6f4ec604b6dbb0c254acac3c6ca06459b03a6c85dc28ab22
3
  size 254576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea48cc6707d7cc34eaad72e155840be4cdb3ccaf4dc2f287b343caf2c3cf839e
3
  size 254576
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82b356d78178e7a2ae49b0fb5f7a237878fdd3cbd73389518882f29c15bba89a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c442b1586b35f38f53dc754b0acf3319f62d976794dea33e04f32e9234f79ce1
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e635241d528538f9bb04a27dca5fb22e408ffea565ee1b598b71a97a8d2e276e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927e2892925a2a06cf03905ae7d4eaefc90931850a6059ce86cd9ecd4cda1939
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5b09a32341207947bc8fe88055fd4f4b39f334a2b00e666668ba1609a1ced5d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:760224d83793ba66736eaf062fd2ac4a49d7aacae3a6be37752aec8874bdd256
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de5a2676afef28fc154aabe5bdf74e4a7892b71f71996a710d1917fc7ca1b511
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3803ebbc7c0d6617f66382fc25c4c8883331a70a6b685548b808faceb8317259
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a6e11399356e19cdcb0f1309056bbc4f1328deb0ce732adfdc3024d8890e09c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d9855d560c6215cd274a4aa0b1d0ae171e1a930f52123b698de04b2a488e0a
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b48b6dc79365b72e57891463df2fe10a703a10748598155ce0cfda494a19b72
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee8ca76bc58688c439e07fb502a1e5217f6281b386c5dfdc0f2e522ced398a81
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1aa459af208bd13482a6f15758e0637d9fd1b243c7b5e2c58379848399e3547
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7382f055d15bf806fc789a57b301a409008c8d57878842462ba7e76cbdbf2ee
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7091f6691296f0113046371fd5a02f74d9f53e794634f79726eeceeb49c779c1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a43a63ce63d2508c50e86ec058ecc0e3cd162a22083ad025498042ec775277
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d0cd91d59507c28bc18bd9b01764d373ca8bb673def2d9bd5d155d2ea42c3fa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daeaa5730100adbc7e999120bcfde2edc17529df09920a58e9651489db9d4fe9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 10.305556297302246,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 2.5283018867924527,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,139 @@
731
  "eval_samples_per_second": 3010.845,
732
  "eval_steps_per_second": 95.851,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -754,12 +887,12 @@
754
  "should_evaluate": false,
755
  "should_log": false,
756
  "should_save": true,
757
- "should_training_stop": false
758
  },
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 173649218764800.0,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
  "best_metric": 10.305556297302246,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 3.0125786163522013,
5
  "eval_steps": 50,
6
+ "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 3010.845,
732
  "eval_steps_per_second": 95.851,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 2.5534591194968552,
737
+ "grad_norm": 0.1466299146413803,
738
+ "learning_rate": 1.667889908256881e-05,
739
+ "loss": 10.3158,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 2.5786163522012577,
744
+ "grad_norm": 0.17850784957408905,
745
+ "learning_rate": 1.5752293577981652e-05,
746
+ "loss": 10.2993,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 2.6037735849056602,
751
+ "grad_norm": 0.2629879117012024,
752
+ "learning_rate": 1.4825688073394497e-05,
753
+ "loss": 10.2901,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 2.6289308176100628,
758
+ "grad_norm": 0.3722081184387207,
759
+ "learning_rate": 1.389908256880734e-05,
760
+ "loss": 10.2754,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 2.6540880503144653,
765
+ "grad_norm": 0.17005372047424316,
766
+ "learning_rate": 1.2972477064220185e-05,
767
+ "loss": 10.3255,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 2.6792452830188678,
772
+ "grad_norm": 0.14880169928073883,
773
+ "learning_rate": 1.2045871559633028e-05,
774
+ "loss": 10.3339,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 2.7044025157232703,
779
+ "grad_norm": 0.18930739164352417,
780
+ "learning_rate": 1.1119266055045872e-05,
781
+ "loss": 10.3178,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 2.729559748427673,
786
+ "grad_norm": 0.18744437396526337,
787
+ "learning_rate": 1.0192660550458717e-05,
788
+ "loss": 10.2987,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 2.7547169811320753,
793
+ "grad_norm": 0.27432578802108765,
794
+ "learning_rate": 9.26605504587156e-06,
795
+ "loss": 10.2927,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 2.779874213836478,
800
+ "grad_norm": 0.31490325927734375,
801
+ "learning_rate": 8.339449541284405e-06,
802
+ "loss": 10.2855,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 2.8050314465408803,
807
+ "grad_norm": 0.17064593732357025,
808
+ "learning_rate": 7.412844036697248e-06,
809
+ "loss": 10.3155,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 2.830188679245283,
814
+ "grad_norm": 0.15425540506839752,
815
+ "learning_rate": 6.4862385321100925e-06,
816
+ "loss": 10.3282,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 2.8553459119496853,
821
+ "grad_norm": 0.14992254972457886,
822
+ "learning_rate": 5.559633027522936e-06,
823
+ "loss": 10.3263,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 2.880503144654088,
828
+ "grad_norm": 0.1558142602443695,
829
+ "learning_rate": 4.63302752293578e-06,
830
+ "loss": 10.3093,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 2.9056603773584904,
835
+ "grad_norm": 0.2051534205675125,
836
+ "learning_rate": 3.706422018348624e-06,
837
+ "loss": 10.2948,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 2.930817610062893,
842
+ "grad_norm": 0.2544957101345062,
843
+ "learning_rate": 2.779816513761468e-06,
844
+ "loss": 10.2848,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 2.9559748427672954,
849
+ "grad_norm": 0.13796408474445343,
850
+ "learning_rate": 1.853211009174312e-06,
851
+ "loss": 10.3145,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 2.981132075471698,
856
+ "grad_norm": 0.12653085589408875,
857
+ "learning_rate": 9.26605504587156e-07,
858
+ "loss": 10.3105,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 3.0125786163522013,
863
+ "grad_norm": 0.185125932097435,
864
+ "learning_rate": 0.0,
865
+ "loss": 16.0677,
866
+ "step": 119
867
  }
868
  ],
869
  "logging_steps": 1,
 
887
  "should_evaluate": false,
888
  "should_log": false,
889
  "should_save": true,
890
+ "should_training_stop": true
891
  },
892
  "attributes": {}
893
  }
894
  },
895
+ "total_flos": 206642570330112.0,
896
  "train_batch_size": 8,
897
  "trial_name": null,
898
  "trial_params": null