Training in progress, step 25000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +86 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18e226bfabb4fe37953d03ddbd4d68cb6331b76339a65c4652c13e68da7c732e
 size 959732416

 version https://git-lfs.github.com/spec/v1
+oid sha256:2808ef6d7c1ec76f205bfc3bf60e8896b8d012108d053710f2b9d7309d7f42b2
 size 959732416

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f46b965df64512eadca15157f363bb6b9d956cdb921d1cf19f79f3e3de5b89f3
 size 1915006400

 version https://git-lfs.github.com/spec/v1
+oid sha256:93f4aafd5e55920f58c0febce193ec74760cdf950639df7ad73eefccd9da8ec1
 size 1915006400

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f10ca53587facccfd8ce6c38c8ee3d38f84b1cd52bf1939f663991a4aa3ab223
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1ca75804c729ecd6274811b801cee592417281624e7bdb93722530ee68ca62
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6a37679919f879e26f31535286e088a72dcdededa90c5c4ca97536707dfcd97
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:348961f7e743fe4e2fc3f96e9872ae30fee7c5dae2b7050fcbf673c342e559b9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.35401643468053,
   "eval_steps": 500,
-  "global_step": 20000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -326,6 +326,89 @@
       "learning_rate": 1.6459835653194702e-05,
       "loss": 0.0453,
       "step": 20000
     }
   ],
   "logging_steps": 500,
@@ -345,7 +428,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.21801425313176e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.192520543350662,
   "eval_steps": 500,
+  "global_step": 25000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.6459835653194702e-05,
       "loss": 0.0453,
       "step": 20000
+    },
+    {
+      "epoch": 3.437866845547543,
+      "grad_norm": 0.4968201816082001,
+      "learning_rate": 1.562133154452457e-05,
+      "loss": 0.0443,
+      "step": 20500
+    },
+    {
+      "epoch": 3.5217172564145565,
+      "grad_norm": 0.4099849760532379,
+      "learning_rate": 1.4782827435854438e-05,
+      "loss": 0.0468,
+      "step": 21000
+    },
+    {
+      "epoch": 3.6055676672815697,
+      "grad_norm": 0.5295602083206177,
+      "learning_rate": 1.3944323327184303e-05,
+      "loss": 0.0473,
+      "step": 21500
+    },
+    {
+      "epoch": 3.689418078148583,
+      "grad_norm": 0.5218081474304199,
+      "learning_rate": 1.3105819218514171e-05,
+      "loss": 0.0488,
+      "step": 22000
+    },
+    {
+      "epoch": 3.7732684890155963,
+      "grad_norm": 0.2999401390552521,
+      "learning_rate": 1.226731510984404e-05,
+      "loss": 0.0418,
+      "step": 22500
+    },
+    {
+      "epoch": 3.8571188998826096,
+      "grad_norm": 0.6840993762016296,
+      "learning_rate": 1.1428811001173906e-05,
+      "loss": 0.0431,
+      "step": 23000
+    },
+    {
+      "epoch": 3.9409693107496224,
+      "grad_norm": 0.33584555983543396,
+      "learning_rate": 1.0590306892503774e-05,
+      "loss": 0.0417,
+      "step": 23500
+    },
+    {
+      "epoch": 4.0,
+      "eval_gen_len": 100.0,
+      "eval_loss": 0.07122652977705002,
+      "eval_rouge1": 2.9034,
+      "eval_rouge2": 1.9182,
+      "eval_rougeL": 2.9223,
+      "eval_rougeLsum": 2.9168,
+      "eval_runtime": 1624.6424,
+      "eval_samples_per_second": 0.816,
+      "eval_steps_per_second": 0.408,
+      "step": 23852
+    },
+    {
+      "epoch": 4.024819721616636,
+      "grad_norm": 0.2943115532398224,
+      "learning_rate": 9.751802783833641e-06,
+      "loss": 0.0393,
+      "step": 24000
+    },
+    {
+      "epoch": 4.108670132483649,
+      "grad_norm": 0.4018648564815521,
+      "learning_rate": 8.913298675163509e-06,
+      "loss": 0.0321,
+      "step": 24500
+    },
+    {
+      "epoch": 4.192520543350662,
+      "grad_norm": 0.31552907824516296,
+      "learning_rate": 8.074794566493377e-06,
+      "loss": 0.0309,
+      "step": 25000
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 9.02247270044192e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null