Training in progress, step 100, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +84 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe2ada04e1b192dc4b6d8ad3c4b523c2af0bce420c134c25c3687d18cee6a771
 size 231448

 version https://git-lfs.github.com/spec/v1
+oid sha256:758d8936028f971c6a1dbfd6ce41c233b7bb12c2f09f96a6ae802c17eebd542e
 size 231448

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35e410464659180c0d09d9999217f9fc4a4f42fa12177a77632fa7586713ce6b
 size 254576

 version https://git-lfs.github.com/spec/v1
+oid sha256:42e3466f52f2dcc5a641090d4992a8bbdbfb0e594ba8e245aebbcabd2a9514d0
 size 254576

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4fa8cff5400f5c41601ed4e23d86bd647b9ecd79cbccf4a05ab9b3c387518cc
 size 14448

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e9d350457f4e3fa2819a39d91d2925669e3fb708dae5b483527de8c6e49d2d3
 size 14448

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08a7d2c17ed9294e1527e5a9bde2ad23f23e2f25d5162f4a4bdc1d38d3e802d7
 size 14448

 version https://git-lfs.github.com/spec/v1
+oid sha256:e99c7bce084b3b5f753ff6dd6c4c3531fa235b1767de5e6ba78e4fc50f95a755
 size 14448

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c77ba85d98911ad9cf0002f91e1d1e8461a37560e5cecd2480c44d2118b826e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cee25becd0d944312ef6a737028fa37979bb421c6508dd51dcf308113ec99459
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.318482398986816,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.0016684463427656167,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -93,6 +93,84 @@
       "eval_samples_per_second": 140.32,
       "eval_steps_per_second": 17.548,
       "step": 50
     }
   ],
   "logging_steps": 5,
@@ -116,12 +194,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4870317932544.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.305120468139648,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0033368926855312335,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 140.32,
       "eval_steps_per_second": 17.548,
       "step": 50
+    },
+    {
+      "epoch": 0.0018352909770421783,
+      "grad_norm": 0.3079063296318054,
+      "learning_rate": 3.5e-05,
+      "loss": 10.3132,
+      "step": 55
+    },
+    {
+      "epoch": 0.00200213561131874,
+      "grad_norm": 0.28845730423927307,
+      "learning_rate": 2.8922313781657438e-05,
+      "loss": 10.3096,
+      "step": 60
+    },
+    {
+      "epoch": 0.0021689802455953015,
+      "grad_norm": 0.25424379110336304,
+      "learning_rate": 2.3029294983601594e-05,
+      "loss": 10.3097,
+      "step": 65
+    },
+    {
+      "epoch": 0.0023358248798718635,
+      "grad_norm": 0.24924302101135254,
+      "learning_rate": 1.7500000000000005e-05,
+      "loss": 10.3121,
+      "step": 70
+    },
+    {
+      "epoch": 0.002502669514148425,
+      "grad_norm": 0.2865857481956482,
+      "learning_rate": 1.2502433660971121e-05,
+      "loss": 10.3141,
+      "step": 75
+    },
+    {
+      "epoch": 0.0026695141484249867,
+      "grad_norm": 0.24225687980651855,
+      "learning_rate": 8.188444490835772e-06,
+      "loss": 10.3006,
+      "step": 80
+    },
+    {
+      "epoch": 0.0028363587827015483,
+      "grad_norm": 0.2341487854719162,
+      "learning_rate": 4.689110867544645e-06,
+      "loss": 10.3027,
+      "step": 85
+    },
+    {
+      "epoch": 0.00300320341697811,
+      "grad_norm": 0.23641282320022583,
+      "learning_rate": 2.1107582724932087e-06,
+      "loss": 10.3043,
+      "step": 90
+    },
+    {
+      "epoch": 0.0031700480512546715,
+      "grad_norm": 0.23910833895206451,
+      "learning_rate": 5.317286445727193e-07,
+      "loss": 10.3077,
+      "step": 95
+    },
+    {
+      "epoch": 0.0033368926855312335,
+      "grad_norm": 0.2778906524181366,
+      "learning_rate": 0.0,
+      "loss": 10.3015,
+      "step": 100
+    },
+    {
+      "epoch": 0.0033368926855312335,
+      "eval_loss": 10.305120468139648,
+      "eval_runtime": 90.3868,
+      "eval_samples_per_second": 139.6,
+      "eval_steps_per_second": 17.458,
+      "step": 100
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9699936829440.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null