Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99e476de7fc3ca4868fd5eae80b84e0903be39638f8a8ebb8fd02e2f8c7227e1
 size 116744

 version https://git-lfs.github.com/spec/v1
+oid sha256:20c1defbf9cb8bb9061d5cd9451312819bf97730c7c855f575a0a3a296a14d14
 size 116744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd24ea3e243fa4c8a3a8b325d411f57281c932b2546b460930320236ab9f6dba
 size 243310

 version https://git-lfs.github.com/spec/v1
+oid sha256:89cb96a05afc3919ba3385b96babce3e79e4b8f0822aa7d692571e4b7d8eaa08
 size 243310

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aae9162b7b6cfc5a6dde2c04ec421fc367b844176dc01f24381cc0633d308ed9
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1db38d4d71c180f6859dfd303173bae0844495efbfd738d53ef2bbee50518f2
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a7a095e53e84dd91b67168fdcea1539877f19960bfcdf91881c4dc40167d8e3
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:ced451b3d641ed4ba8750e512a9d0efbe44b80c77c15afe8e2f39f64ee32acc5
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:272ed26fb67f823155c3c4160d3975031050db726f39c09eba586496f8122d54
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:53cc2eab132aa4ff8e190950e82eddfe26b268169dd563b19a82af4ae7e07604
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ea3fce82b0302f044ef1eb45af3a9199ae2d9a8c7e761f2060629acb685a16a
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c4b1dc007c13fcb99df5d7b9f12f92d0796bbe65567c5dedaedd6e8156e358e
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.354594230651855,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.037362226788716604,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 620.216,
       "eval_steps_per_second": 77.596,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 42311303823360.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.346304893493652,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.07472445357743321,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 620.216,
       "eval_steps_per_second": 77.596,
       "step": 25
+    },
+    {
+      "epoch": 0.03885671586026527,
+      "grad_norm": 0.28412336111068726,
+      "learning_rate": 5e-05,
+      "loss": 10.3612,
+      "step": 26
+    },
+    {
+      "epoch": 0.04035120493181393,
+      "grad_norm": 0.32069140672683716,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 10.3569,
+      "step": 27
+    },
+    {
+      "epoch": 0.0418456940033626,
+      "grad_norm": 0.305347204208374,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 10.3546,
+      "step": 28
+    },
+    {
+      "epoch": 0.04334018307491126,
+      "grad_norm": 0.29833194613456726,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 10.3515,
+      "step": 29
+    },
+    {
+      "epoch": 0.04483467214645993,
+      "grad_norm": 0.3060224652290344,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 10.3536,
+      "step": 30
+    },
+    {
+      "epoch": 0.04632916121800859,
+      "grad_norm": 0.3351181447505951,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 10.3527,
+      "step": 31
+    },
+    {
+      "epoch": 0.047823650289557255,
+      "grad_norm": 0.3166193962097168,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 10.3527,
+      "step": 32
+    },
+    {
+      "epoch": 0.04931813936110592,
+      "grad_norm": 0.3276638984680176,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 10.3493,
+      "step": 33
+    },
+    {
+      "epoch": 0.050812628432654584,
+      "grad_norm": 0.3759688436985016,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 10.3423,
+      "step": 34
+    },
+    {
+      "epoch": 0.05230711750420325,
+      "grad_norm": 0.378872275352478,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 10.3447,
+      "step": 35
+    },
+    {
+      "epoch": 0.05380160657575191,
+      "grad_norm": 0.3691338896751404,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 10.34,
+      "step": 36
+    },
+    {
+      "epoch": 0.05529609564730058,
+      "grad_norm": 0.3872483968734741,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 10.3469,
+      "step": 37
+    },
+    {
+      "epoch": 0.05679058471884924,
+      "grad_norm": 0.26413607597351074,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 10.3527,
+      "step": 38
+    },
+    {
+      "epoch": 0.05828507379039791,
+      "grad_norm": 0.2704392075538635,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 10.3529,
+      "step": 39
+    },
+    {
+      "epoch": 0.05977956286194657,
+      "grad_norm": 0.29490724205970764,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 10.348,
+      "step": 40
+    },
+    {
+      "epoch": 0.061274051933495235,
+      "grad_norm": 0.2891983091831207,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 10.3511,
+      "step": 41
+    },
+    {
+      "epoch": 0.0627685410050439,
+      "grad_norm": 0.28217384219169617,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 10.3498,
+      "step": 42
+    },
+    {
+      "epoch": 0.06426303007659256,
+      "grad_norm": 0.2808740735054016,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 10.3472,
+      "step": 43
+    },
+    {
+      "epoch": 0.06575751914814124,
+      "grad_norm": 0.343799352645874,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 10.3441,
+      "step": 44
+    },
+    {
+      "epoch": 0.0672520082196899,
+      "grad_norm": 0.33633652329444885,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 10.3453,
+      "step": 45
+    },
+    {
+      "epoch": 0.06874649729123856,
+      "grad_norm": 0.334368497133255,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 10.3479,
+      "step": 46
+    },
+    {
+      "epoch": 0.07024098636278722,
+      "grad_norm": 0.32918912172317505,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 10.3446,
+      "step": 47
+    },
+    {
+      "epoch": 0.07173547543433588,
+      "grad_norm": 0.35251981019973755,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 10.3454,
+      "step": 48
+    },
+    {
+      "epoch": 0.07322996450588455,
+      "grad_norm": 0.4148394465446472,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 10.3424,
+      "step": 49
+    },
+    {
+      "epoch": 0.07472445357743321,
+      "grad_norm": 0.43232181668281555,
+      "learning_rate": 0.0,
+      "loss": 10.3421,
+      "step": 50
+    },
+    {
+      "epoch": 0.07472445357743321,
+      "eval_loss": 10.346304893493652,
+      "eval_runtime": 7.2666,
+      "eval_samples_per_second": 620.373,
+      "eval_steps_per_second": 77.615,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 84596179599360.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null