Training in progress, step 200, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ef0a893d2304ec2613a1078060e716f10f486497b39ac7d6098a58b1273563c
 size 231448

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0da31e174f14b582413d238f092823f82409ac74e15fa6e4c1e6206e1b1560f
 size 231448

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3707403520a4d70acd85e81fc7294957debd06341784f510c951231f7d02f6aa
 size 254576

 version https://git-lfs.github.com/spec/v1
+oid sha256:b359f1f4456fc66e8af7bd6af10b44ef389cdde081cd3d67e2b08e1b3e921f33
 size 254576

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c863c613739f451b760af9bcf169c99631b803046ad1df995c2add5e78ed709a
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:611fca5a82812c7bf7c0a091240a103959a433ead0308056e898bc62b8c9fbdd
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43096e812f7650a4b695340f3eaf966d60dd0759abf6f09fa67052dee3fe7d99
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5a2d44d83e73859366b99e73d612be6557265dcc5de9f969cbf19f618c3c8ce
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10dbbac7bda18fc1cc7964704f9f64c12e7ede8f17e11ce2b0c4645c3c6d41c8
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8279a5d0480e72cd82abac26ef8bbb4b3ea84e29cd00bb2e8cb04037ecdb6dc
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:acc25acb48e0988288290c80fa9710dd6f5e396ee3272850234d6acd812b31c9
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:bae1f2d1b05a13e3d9d7174173a91262a36a0116d043a0090d590a0b0222d664
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68f2710d27d5470572cb666389ac3253ce6bd2f8baf3a72931f38a2f0798d9a5
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:c869cce088ba4dd9dfd0f0e58d0da60479fc388bc53e70941b100ddad2afc086
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:efb49070d441143366f4d6ebfdcbb8e0f3f4e28c5c8117de597dab70c018f87c
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:61fc0ca314c33e82c0809352bc2be36cc8b5692e46265c358b2f4772a328143d
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a00dc5014660d73dadd088e3fb70a41125098421b6e220dd2bad84d7f1f2270
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:b77c83b6bdd0d80895f4e0994d6455635cada32b7bb73ad8d3d964e4a7615606
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:154dd7f69382b8ce674aa6438ae9886cf534911695e2fb575e3f2f8afeb04408
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:8aeb1aa09d0638b89c02d026f94ae7283dea7fac42be93be421d08bbc2804afc
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d606eeb1aa97b417de3c30d0a970be83ac979e2c7cc0fa41135c63d459909e5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba4af3b1b4fa156d60adeec70df709d1741ac2f3147c676ab2805007313fc707
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.31015396118164,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.05447117566954154,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 3362.23,
       "eval_steps_per_second": 105.118,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 260528093528064.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.308185577392578,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.07262823422605538,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3362.23,
       "eval_steps_per_second": 105.118,
       "step": 150
+    },
+    {
+      "epoch": 0.05483431684067181,
+      "grad_norm": 0.09872455149888992,
+      "learning_rate": 2.6047368421052634e-05,
+      "loss": 10.2997,
+      "step": 151
+    },
+    {
+      "epoch": 0.055197458011802085,
+      "grad_norm": 0.06669897586107254,
+      "learning_rate": 2.5515789473684213e-05,
+      "loss": 10.3051,
+      "step": 152
+    },
+    {
+      "epoch": 0.055560599182932366,
+      "grad_norm": 0.05202397704124451,
+      "learning_rate": 2.4984210526315788e-05,
+      "loss": 10.3089,
+      "step": 153
+    },
+    {
+      "epoch": 0.05592374035406264,
+      "grad_norm": 0.05739379674196243,
+      "learning_rate": 2.445263157894737e-05,
+      "loss": 10.3133,
+      "step": 154
+    },
+    {
+      "epoch": 0.05628688152519292,
+      "grad_norm": 0.07251626998186111,
+      "learning_rate": 2.3921052631578946e-05,
+      "loss": 10.3164,
+      "step": 155
+    },
+    {
+      "epoch": 0.056650022696323195,
+      "grad_norm": 0.09823988378047943,
+      "learning_rate": 2.3389473684210528e-05,
+      "loss": 10.3158,
+      "step": 156
+    },
+    {
+      "epoch": 0.057013163867453476,
+      "grad_norm": 0.1004052385687828,
+      "learning_rate": 2.2857894736842106e-05,
+      "loss": 10.296,
+      "step": 157
+    },
+    {
+      "epoch": 0.05737630503858375,
+      "grad_norm": 0.0734814703464508,
+      "learning_rate": 2.2326315789473685e-05,
+      "loss": 10.3039,
+      "step": 158
+    },
+    {
+      "epoch": 0.057739446209714024,
+      "grad_norm": 0.04906293377280235,
+      "learning_rate": 2.1794736842105264e-05,
+      "loss": 10.3087,
+      "step": 159
+    },
+    {
+      "epoch": 0.058102587380844305,
+      "grad_norm": 0.053951047360897064,
+      "learning_rate": 2.1263157894736842e-05,
+      "loss": 10.3102,
+      "step": 160
+    },
+    {
+      "epoch": 0.05846572855197458,
+      "grad_norm": 0.07190736383199692,
+      "learning_rate": 2.073157894736842e-05,
+      "loss": 10.3167,
+      "step": 161
+    },
+    {
+      "epoch": 0.05882886972310486,
+      "grad_norm": 0.09854470193386078,
+      "learning_rate": 2.0200000000000003e-05,
+      "loss": 10.3165,
+      "step": 162
+    },
+    {
+      "epoch": 0.059192010894235134,
+      "grad_norm": 0.07515429705381393,
+      "learning_rate": 1.966842105263158e-05,
+      "loss": 10.3002,
+      "step": 163
+    },
+    {
+      "epoch": 0.05955515206536541,
+      "grad_norm": 0.06888720393180847,
+      "learning_rate": 1.913684210526316e-05,
+      "loss": 10.3024,
+      "step": 164
+    },
+    {
+      "epoch": 0.05991829323649569,
+      "grad_norm": 0.054647717624902725,
+      "learning_rate": 1.8605263157894736e-05,
+      "loss": 10.3061,
+      "step": 165
+    },
+    {
+      "epoch": 0.06028143440762596,
+      "grad_norm": 0.05850168317556381,
+      "learning_rate": 1.8073684210526318e-05,
+      "loss": 10.3109,
+      "step": 166
+    },
+    {
+      "epoch": 0.060644575578756243,
+      "grad_norm": 0.05970611050724983,
+      "learning_rate": 1.7542105263157897e-05,
+      "loss": 10.3116,
+      "step": 167
+    },
+    {
+      "epoch": 0.06100771674988652,
+      "grad_norm": 0.094508096575737,
+      "learning_rate": 1.7010526315789475e-05,
+      "loss": 10.3123,
+      "step": 168
+    },
+    {
+      "epoch": 0.0613708579210168,
+      "grad_norm": 0.060756973922252655,
+      "learning_rate": 1.6478947368421054e-05,
+      "loss": 10.3036,
+      "step": 169
+    },
+    {
+      "epoch": 0.06173399909214707,
+      "grad_norm": 0.07302611321210861,
+      "learning_rate": 1.5947368421052633e-05,
+      "loss": 10.3019,
+      "step": 170
+    },
+    {
+      "epoch": 0.062097140263277346,
+      "grad_norm": 0.06162986904382706,
+      "learning_rate": 1.541578947368421e-05,
+      "loss": 10.3047,
+      "step": 171
+    },
+    {
+      "epoch": 0.06246028143440763,
+      "grad_norm": 0.05356194078922272,
+      "learning_rate": 1.4884210526315788e-05,
+      "loss": 10.3094,
+      "step": 172
+    },
+    {
+      "epoch": 0.0628234226055379,
+      "grad_norm": 0.06249609589576721,
+      "learning_rate": 1.4352631578947369e-05,
+      "loss": 10.309,
+      "step": 173
+    },
+    {
+      "epoch": 0.06318656377666818,
+      "grad_norm": 0.06981988996267319,
+      "learning_rate": 1.3821052631578949e-05,
+      "loss": 10.3136,
+      "step": 174
+    },
+    {
+      "epoch": 0.06354970494779846,
+      "grad_norm": 0.12152791023254395,
+      "learning_rate": 1.3289473684210526e-05,
+      "loss": 10.3166,
+      "step": 175
+    },
+    {
+      "epoch": 0.06391284611892874,
+      "grad_norm": 0.09238287806510925,
+      "learning_rate": 1.2757894736842106e-05,
+      "loss": 10.2989,
+      "step": 176
+    },
+    {
+      "epoch": 0.06427598729005901,
+      "grad_norm": 0.05485982820391655,
+      "learning_rate": 1.2226315789473685e-05,
+      "loss": 10.3074,
+      "step": 177
+    },
+    {
+      "epoch": 0.06463912846118929,
+      "grad_norm": 0.050571322441101074,
+      "learning_rate": 1.1694736842105264e-05,
+      "loss": 10.3061,
+      "step": 178
+    },
+    {
+      "epoch": 0.06500226963231956,
+      "grad_norm": 0.05679386109113693,
+      "learning_rate": 1.1163157894736842e-05,
+      "loss": 10.3071,
+      "step": 179
+    },
+    {
+      "epoch": 0.06536541080344985,
+      "grad_norm": 0.08567273616790771,
+      "learning_rate": 1.0631578947368421e-05,
+      "loss": 10.3117,
+      "step": 180
+    },
+    {
+      "epoch": 0.06572855197458012,
+      "grad_norm": 0.09831085801124573,
+      "learning_rate": 1.0100000000000002e-05,
+      "loss": 10.3135,
+      "step": 181
+    },
+    {
+      "epoch": 0.0660916931457104,
+      "grad_norm": 0.08691138029098511,
+      "learning_rate": 9.56842105263158e-06,
+      "loss": 10.2966,
+      "step": 182
+    },
+    {
+      "epoch": 0.06645483431684067,
+      "grad_norm": 0.06285746395587921,
+      "learning_rate": 9.036842105263159e-06,
+      "loss": 10.3026,
+      "step": 183
+    },
+    {
+      "epoch": 0.06681797548797094,
+      "grad_norm": 0.056860584765672684,
+      "learning_rate": 8.505263157894738e-06,
+      "loss": 10.3051,
+      "step": 184
+    },
+    {
+      "epoch": 0.06718111665910123,
+      "grad_norm": 0.056947801262140274,
+      "learning_rate": 7.973684210526316e-06,
+      "loss": 10.309,
+      "step": 185
+    },
+    {
+      "epoch": 0.0675442578302315,
+      "grad_norm": 0.07250034064054489,
+      "learning_rate": 7.442105263157894e-06,
+      "loss": 10.3137,
+      "step": 186
+    },
+    {
+      "epoch": 0.06790739900136178,
+      "grad_norm": 0.09779293835163116,
+      "learning_rate": 6.9105263157894745e-06,
+      "loss": 10.3132,
+      "step": 187
+    },
+    {
+      "epoch": 0.06827054017249205,
+      "grad_norm": 0.08150464296340942,
+      "learning_rate": 6.378947368421053e-06,
+      "loss": 10.2982,
+      "step": 188
+    },
+    {
+      "epoch": 0.06863368134362233,
+      "grad_norm": 0.06894616782665253,
+      "learning_rate": 5.847368421052632e-06,
+      "loss": 10.3014,
+      "step": 189
+    },
+    {
+      "epoch": 0.06899682251475261,
+      "grad_norm": 0.05844898149371147,
+      "learning_rate": 5.315789473684211e-06,
+      "loss": 10.3091,
+      "step": 190
+    },
+    {
+      "epoch": 0.06935996368588289,
+      "grad_norm": 0.05523189157247543,
+      "learning_rate": 4.78421052631579e-06,
+      "loss": 10.3084,
+      "step": 191
+    },
+    {
+      "epoch": 0.06972310485701316,
+      "grad_norm": 0.06870152801275253,
+      "learning_rate": 4.252631578947369e-06,
+      "loss": 10.3094,
+      "step": 192
+    },
+    {
+      "epoch": 0.07008624602814344,
+      "grad_norm": 0.08776228874921799,
+      "learning_rate": 3.721052631578947e-06,
+      "loss": 10.3117,
+      "step": 193
+    },
+    {
+      "epoch": 0.07044938719927371,
+      "grad_norm": 0.061438314616680145,
+      "learning_rate": 3.1894736842105266e-06,
+      "loss": 10.3034,
+      "step": 194
+    },
+    {
+      "epoch": 0.070812528370404,
+      "grad_norm": 0.08862266689538956,
+      "learning_rate": 2.6578947368421053e-06,
+      "loss": 10.2994,
+      "step": 195
+    },
+    {
+      "epoch": 0.07117566954153427,
+      "grad_norm": 0.05210663750767708,
+      "learning_rate": 2.1263157894736844e-06,
+      "loss": 10.3042,
+      "step": 196
+    },
+    {
+      "epoch": 0.07153881071266455,
+      "grad_norm": 0.04864677041769028,
+      "learning_rate": 1.5947368421052633e-06,
+      "loss": 10.3095,
+      "step": 197
+    },
+    {
+      "epoch": 0.07190195188379482,
+      "grad_norm": 0.08212526142597198,
+      "learning_rate": 1.0631578947368422e-06,
+      "loss": 10.315,
+      "step": 198
+    },
+    {
+      "epoch": 0.07226509305492511,
+      "grad_norm": 0.07653167098760605,
+      "learning_rate": 5.315789473684211e-07,
+      "loss": 10.3105,
+      "step": 199
+    },
+    {
+      "epoch": 0.07262823422605538,
+      "grad_norm": 0.14368408918380737,
+      "learning_rate": 0.0,
+      "loss": 10.3169,
+      "step": 200
+    },
+    {
+      "epoch": 0.07262823422605538,
+      "eval_loss": 10.308185577392578,
+      "eval_runtime": 11.1132,
+      "eval_samples_per_second": 3338.635,
+      "eval_steps_per_second": 104.38,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 347352702910464.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null