Training in progress, step 260, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d469916d9bc99ab8690c2ad5963f520f6c7aebca89a0f0fac62a70421794a8e3
 size 48679352

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b28c46bd32ec7687d71dcd8f92b07ccf8622a23ced323d5d17a73cc1d2d7c90
 size 48679352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:061e88c1730d0f3889058eaef8233b4d1a8ec193e7d2cf0deaf2c21b4bce6960
-size 25152500

 version https://git-lfs.github.com/spec/v1
+oid sha256:50f836283767652e9ac7d754244fd5074d2f19b6e5aa2726705730c0809d5111
+size 25152884

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa476934231ac34b5459f284f02dd625d474b4000e9d8f8d25797737ca876934
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f8d57acea1e2f327c0a900d89663e7e6946a8381191b0de303acbea96b525fd
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3373dd77cba0e4415af66a32e16afdc4a8cd4fd649cb8e376d68b5558a8a0e8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:17d1b21cc84b70a525916759b0723a1fe102004e6a7697742d2db971361ed1d7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.1843817787418655,
   "eval_steps": 692,
-  "global_step": 255,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1800,6 +1800,41 @@
       "learning_rate": 1.997448376186836e-05,
       "loss": 1.1043,
       "step": 255
     }
   ],
   "logging_steps": 1,
@@ -1819,7 +1854,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.677100052774912e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.18799710773680406,
   "eval_steps": 692,
+  "global_step": 260,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.997448376186836e-05,
       "loss": 1.1043,
       "step": 255
+    },
+    {
+      "epoch": 0.18510484454085321,
+      "grad_norm": 0.28662002086639404,
+      "learning_rate": 1.997415360037498e-05,
+      "loss": 1.0743,
+      "step": 256
+    },
+    {
+      "epoch": 0.18582791033984092,
+      "grad_norm": 0.266493022441864,
+      "learning_rate": 1.9973821319329625e-05,
+      "loss": 0.9767,
+      "step": 257
+    },
+    {
+      "epoch": 0.18655097613882862,
+      "grad_norm": 0.34035131335258484,
+      "learning_rate": 1.9973486918802912e-05,
+      "loss": 1.2844,
+      "step": 258
+    },
+    {
+      "epoch": 0.18727404193781635,
+      "grad_norm": 0.34165364503860474,
+      "learning_rate": 1.9973150398865908e-05,
+      "loss": 1.2808,
+      "step": 259
+    },
+    {
+      "epoch": 0.18799710773680406,
+      "grad_norm": 0.3018459975719452,
+      "learning_rate": 1.9972811759590117e-05,
+      "loss": 0.9891,
+      "step": 260
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.788415740084224e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null