Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fca193fe7b104c0f0dfd3c674d38342c338601da5551637cbc39bacdc8d7584
 size 140815952

 version https://git-lfs.github.com/spec/v1
+oid sha256:131dd2d88aeb27e5f3d1fa05d91b64b1a8057641bbc289d41968986c9d15a241
 size 140815952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7618f27af9009fc2a7e1369d91b5811921c629d938d28fb409b7d91a60a1be7
 size 71878612

 version https://git-lfs.github.com/spec/v1
+oid sha256:6476f9bbd3a12e0da116c4c9e5e75afc2bf08982a238af22556d70a043fd56f2
 size 71878612

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20e7d0334d41869ee6242e9de52890289490908cecc65991bc3eb474e746522b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c1b72e884a5b1bec71ff8b94eddbb2ed4f1afe844e239447c6ce3d66de319f0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc4a786186a574bdc543ff4b4563aab7c5e0b442c74c85899bb42a25553c5d0c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca62d85cf5423834480d3c20680f93b185c8e6574a8a14021d285e0b05c7449f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.5829623937606812,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.3412969283276451,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 37.171,
       "eval_steps_per_second": 9.293,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.930188487340851e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.5781207084655762,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.4550625711035267,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 37.171,
       "eval_steps_per_second": 9.293,
       "step": 150
+    },
+    {
+      "epoch": 0.3435722411831627,
+      "grad_norm": 0.5254620313644409,
+      "learning_rate": 9.319397726443026e-06,
+      "loss": 1.6864,
+      "step": 151
+    },
+    {
+      "epoch": 0.34584755403868034,
+      "grad_norm": 0.5362634658813477,
+      "learning_rate": 8.962896471825342e-06,
+      "loss": 1.6429,
+      "step": 152
+    },
+    {
+      "epoch": 0.34812286689419797,
+      "grad_norm": 0.554410457611084,
+      "learning_rate": 8.61214655125809e-06,
+      "loss": 1.7406,
+      "step": 153
+    },
+    {
+      "epoch": 0.3503981797497156,
+      "grad_norm": 0.5366057753562927,
+      "learning_rate": 8.267243856267331e-06,
+      "loss": 1.7129,
+      "step": 154
+    },
+    {
+      "epoch": 0.35267349260523323,
+      "grad_norm": 0.5138299465179443,
+      "learning_rate": 7.928282679806052e-06,
+      "loss": 1.7155,
+      "step": 155
+    },
+    {
+      "epoch": 0.35494880546075086,
+      "grad_norm": 0.5121486783027649,
+      "learning_rate": 7.595355690475393e-06,
+      "loss": 1.742,
+      "step": 156
+    },
+    {
+      "epoch": 0.3572241183162685,
+      "grad_norm": 0.5009651780128479,
+      "learning_rate": 7.268553907189964e-06,
+      "loss": 1.6497,
+      "step": 157
+    },
+    {
+      "epoch": 0.3594994311717861,
+      "grad_norm": 0.4928452670574188,
+      "learning_rate": 6.947966674294236e-06,
+      "loss": 1.7092,
+      "step": 158
+    },
+    {
+      "epoch": 0.36177474402730375,
+      "grad_norm": 0.48998475074768066,
+      "learning_rate": 6.6336816371366305e-06,
+      "loss": 1.6682,
+      "step": 159
+    },
+    {
+      "epoch": 0.3640500568828214,
+      "grad_norm": 0.4646508991718292,
+      "learning_rate": 6.325784718108196e-06,
+      "loss": 1.7273,
+      "step": 160
+    },
+    {
+      "epoch": 0.366325369738339,
+      "grad_norm": 0.47496816515922546,
+      "learning_rate": 6.0243600931522595e-06,
+      "loss": 1.6381,
+      "step": 161
+    },
+    {
+      "epoch": 0.36860068259385664,
+      "grad_norm": 0.4742555022239685,
+      "learning_rate": 5.72949016875158e-06,
+      "loss": 1.7534,
+      "step": 162
+    },
+    {
+      "epoch": 0.3708759954493743,
+      "grad_norm": 0.4708501994609833,
+      "learning_rate": 5.44125555939923e-06,
+      "loss": 1.6269,
+      "step": 163
+    },
+    {
+      "epoch": 0.3731513083048919,
+      "grad_norm": 0.46369269490242004,
+      "learning_rate": 5.159735065559399e-06,
+      "loss": 1.6726,
+      "step": 164
+    },
+    {
+      "epoch": 0.37542662116040953,
+      "grad_norm": 0.4433726668357849,
+      "learning_rate": 4.885005652124144e-06,
+      "loss": 1.6271,
+      "step": 165
+    },
+    {
+      "epoch": 0.37770193401592717,
+      "grad_norm": 0.46605101227760315,
+      "learning_rate": 4.617142427371934e-06,
+      "loss": 1.7106,
+      "step": 166
+    },
+    {
+      "epoch": 0.3799772468714448,
+      "grad_norm": 0.4526192247867584,
+      "learning_rate": 4.3562186224338265e-06,
+      "loss": 1.6717,
+      "step": 167
+    },
+    {
+      "epoch": 0.3822525597269625,
+      "grad_norm": 0.4626869261264801,
+      "learning_rate": 4.102305571272783e-06,
+      "loss": 1.5921,
+      "step": 168
+    },
+    {
+      "epoch": 0.3845278725824801,
+      "grad_norm": 0.45658981800079346,
+      "learning_rate": 3.855472691181678e-06,
+      "loss": 1.6207,
+      "step": 169
+    },
+    {
+      "epoch": 0.38680318543799774,
+      "grad_norm": 0.4643127918243408,
+      "learning_rate": 3.615787463805331e-06,
+      "loss": 1.6378,
+      "step": 170
+    },
+    {
+      "epoch": 0.3890784982935154,
+      "grad_norm": 0.462409645318985,
+      "learning_rate": 3.383315416691646e-06,
+      "loss": 1.6626,
+      "step": 171
+    },
+    {
+      "epoch": 0.391353811149033,
+      "grad_norm": 0.5124365091323853,
+      "learning_rate": 3.158120105377096e-06,
+      "loss": 1.649,
+      "step": 172
+    },
+    {
+      "epoch": 0.39362912400455063,
+      "grad_norm": 0.49918651580810547,
+      "learning_rate": 2.940263096011233e-06,
+      "loss": 1.6939,
+      "step": 173
+    },
+    {
+      "epoch": 0.39590443686006827,
+      "grad_norm": 0.5136593580245972,
+      "learning_rate": 2.729803948525125e-06,
+      "loss": 1.7328,
+      "step": 174
+    },
+    {
+      "epoch": 0.3981797497155859,
+      "grad_norm": 0.49986395239830017,
+      "learning_rate": 2.526800200348275e-06,
+      "loss": 1.6338,
+      "step": 175
+    },
+    {
+      "epoch": 0.4004550625711035,
+      "grad_norm": 0.4834483563899994,
+      "learning_rate": 2.3313073506784575e-06,
+      "loss": 1.5782,
+      "step": 176
+    },
+    {
+      "epoch": 0.40273037542662116,
+      "grad_norm": 0.5294801592826843,
+      "learning_rate": 2.143378845308791e-06,
+      "loss": 1.6917,
+      "step": 177
+    },
+    {
+      "epoch": 0.4050056882821388,
+      "grad_norm": 0.5083182454109192,
+      "learning_rate": 1.9630660620161777e-06,
+      "loss": 1.7105,
+      "step": 178
+    },
+    {
+      "epoch": 0.4072810011376564,
+      "grad_norm": 0.5304492115974426,
+      "learning_rate": 1.790418296515165e-06,
+      "loss": 1.711,
+      "step": 179
+    },
+    {
+      "epoch": 0.40955631399317405,
+      "grad_norm": 0.5188761949539185,
+      "learning_rate": 1.625482748980961e-06,
+      "loss": 1.6797,
+      "step": 180
+    },
+    {
+      "epoch": 0.4118316268486917,
+      "grad_norm": 0.5140330195426941,
+      "learning_rate": 1.4683045111453942e-06,
+      "loss": 1.6917,
+      "step": 181
+    },
+    {
+      "epoch": 0.4141069397042093,
+      "grad_norm": 0.5246240496635437,
+      "learning_rate": 1.3189265539692707e-06,
+      "loss": 1.702,
+      "step": 182
+    },
+    {
+      "epoch": 0.41638225255972694,
+      "grad_norm": 0.5261593461036682,
+      "learning_rate": 1.1773897158945557e-06,
+      "loss": 1.6699,
+      "step": 183
+    },
+    {
+      "epoch": 0.41865756541524457,
+      "grad_norm": 0.5666885375976562,
+      "learning_rate": 1.0437326916795432e-06,
+      "loss": 1.736,
+      "step": 184
+    },
+    {
+      "epoch": 0.42093287827076226,
+      "grad_norm": 0.5484257936477661,
+      "learning_rate": 9.179920218200888e-07,
+      "loss": 1.637,
+      "step": 185
+    },
+    {
+      "epoch": 0.4232081911262799,
+      "grad_norm": 0.5620837807655334,
+      "learning_rate": 8.002020825598277e-07,
+      "loss": 1.6758,
+      "step": 186
+    },
+    {
+      "epoch": 0.4254835039817975,
+      "grad_norm": 0.5988110303878784,
+      "learning_rate": 6.90395076492022e-07,
+      "loss": 1.8052,
+      "step": 187
+    },
+    {
+      "epoch": 0.42775881683731515,
+      "grad_norm": 0.5587230920791626,
+      "learning_rate": 5.886010237557194e-07,
+      "loss": 1.6675,
+      "step": 188
+    },
+    {
+      "epoch": 0.4300341296928328,
+      "grad_norm": 0.5831708908081055,
+      "learning_rate": 4.94847753828529e-07,
+      "loss": 1.6906,
+      "step": 189
+    },
+    {
+      "epoch": 0.4323094425483504,
+      "grad_norm": 0.5833969116210938,
+      "learning_rate": 4.091608979183303e-07,
+      "loss": 1.5982,
+      "step": 190
+    },
+    {
+      "epoch": 0.43458475540386804,
+      "grad_norm": 0.5912043452262878,
+      "learning_rate": 3.315638819559452e-07,
+      "loss": 1.6195,
+      "step": 191
+    },
+    {
+      "epoch": 0.43686006825938567,
+      "grad_norm": 0.6026055216789246,
+      "learning_rate": 2.6207792019074414e-07,
+      "loss": 1.5465,
+      "step": 192
+    },
+    {
+      "epoch": 0.4391353811149033,
+      "grad_norm": 0.637148380279541,
+      "learning_rate": 2.0072200939085573e-07,
+      "loss": 1.6555,
+      "step": 193
+    },
+    {
+      "epoch": 0.44141069397042093,
+      "grad_norm": 0.5973919034004211,
+      "learning_rate": 1.475129236496575e-07,
+      "loss": 1.4016,
+      "step": 194
+    },
+    {
+      "epoch": 0.44368600682593856,
+      "grad_norm": 0.5609529614448547,
+      "learning_rate": 1.0246520979990459e-07,
+      "loss": 1.098,
+      "step": 195
+    },
+    {
+      "epoch": 0.4459613196814562,
+      "grad_norm": 0.5903803706169128,
+      "learning_rate": 6.559118343676396e-08,
+      "loss": 1.0209,
+      "step": 196
+    },
+    {
+      "epoch": 0.4482366325369738,
+      "grad_norm": 0.5936239957809448,
+      "learning_rate": 3.690092555085789e-08,
+      "loss": 1.008,
+      "step": 197
+    },
+    {
+      "epoch": 0.45051194539249145,
+      "grad_norm": 0.6110945343971252,
+      "learning_rate": 1.640227977221853e-08,
+      "loss": 0.9535,
+      "step": 198
+    },
+    {
+      "epoch": 0.4527872582480091,
+      "grad_norm": 0.7129843235015869,
+      "learning_rate": 4.1008502259298755e-09,
+      "loss": 0.9507,
+      "step": 199
+    },
+    {
+      "epoch": 0.4550625711035267,
+      "grad_norm": 1.5045645236968994,
+      "learning_rate": 0.0,
+      "loss": 1.4004,
+      "step": 200
+    },
+    {
+      "epoch": 0.4550625711035267,
+      "eval_loss": 1.5781207084655762,
+      "eval_runtime": 19.9818,
+      "eval_samples_per_second": 37.034,
+      "eval_steps_per_second": 9.258,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.5658561673560064e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null