Training in progress, step 1195, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d39b1e21f3feada5c1b4652ab3d2137deffc495fa5d265ab79f5d70e3379fc5
 size 48679352

 version https://git-lfs.github.com/spec/v1
+oid sha256:500e9c9e1d7b596a8cca4dbaf726ce7ac5c499d70f4d5e39fd8d7f4485b97b49
 size 48679352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29f75c62ea7fc0bc937b54e580a820e0409a98ae4816731931927ec3dec1e6f4
 size 25152884

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f80d0b366876eb9e2c09d97613798a58867f011d2312f7817d74e241e3fc20c
 size 25152884

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9625d93660225e7863c4343da041564a4016a9f3193757e35d26b1769e153e4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a30509d9f5bd92b9af4da026d6adfbd726d761c3c97e7eadffdef2bb65cca2a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b79581c7e761535c7ec00db2e96bb32098b0e5b88df85ce558fc4a2493d296f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d84400fe1b0b865c8e287c3d02c0e1f3d1300d295a0643ce657dd581b4b9eabb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8604483007953724,
   "eval_steps": 692,
-  "global_step": 1190,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -8353,6 +8353,41 @@
       "learning_rate": 1.8763954970638628e-05,
       "loss": 1.0355,
       "step": 1190
     }
   ],
   "logging_steps": 1,
@@ -8372,7 +8407,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.6493133579616256e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8640636297903109,
   "eval_steps": 692,
+  "global_step": 1195,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.8763954970638628e-05,
       "loss": 1.0355,
       "step": 1190
+    },
+    {
+      "epoch": 0.8611713665943601,
+      "grad_norm": 0.45647570490837097,
+      "learning_rate": 1.8761733986490892e-05,
+      "loss": 1.0201,
+      "step": 1191
+    },
+    {
+      "epoch": 0.8618944323933478,
+      "grad_norm": 0.3569079637527466,
+      "learning_rate": 1.8759511140435744e-05,
+      "loss": 1.1432,
+      "step": 1192
+    },
+    {
+      "epoch": 0.8626174981923355,
+      "grad_norm": 0.4410318434238434,
+      "learning_rate": 1.875728643294555e-05,
+      "loss": 1.0172,
+      "step": 1193
+    },
+    {
+      "epoch": 0.8633405639913232,
+      "grad_norm": 0.4239175319671631,
+      "learning_rate": 1.8755059864493065e-05,
+      "loss": 1.0473,
+      "step": 1194
+    },
+    {
+      "epoch": 0.8640636297903109,
+      "grad_norm": 0.48534345626831055,
+      "learning_rate": 1.875283143555145e-05,
+      "loss": 1.065,
+      "step": 1195
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.660444926692557e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null