Training in progress, step 6

Files changed (9) hide show

all_results.json ADDED Viewed

+{
+    "epoch": 3.0,
+    "total_flos": 247280173056.0,
+    "train_loss": 0.6713302036126455,
+    "train_runtime": 144.6833,
+    "train_samples_per_second": 0.083,
+    "train_steps_per_second": 0.021
+}

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:180f7e7c4491b24d1a8a415fda7da061c052a85a6af1850cebf4ffcdbca0fad8
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fc19435d964ece00dc5ecddecc6e9659ddeb5d628400682a47b36750f1c82a3
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cafaa206a66393a43fe3ffaccb4f04bdac272195255d4738bab5723f2d7ea551
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:d81da1fa8c36103e482a541ff95c4e867538c24e46bba3e99a34a3dfdf58ea61
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:665df71fd3aa45c78e72c6f03b5fa1482f476dc0171979caa81cc696cb9ee37b
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b4a494933112df9c624f87d536954a3280178756e5122dcbf85e65796277728
 size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ba542d4cb3b649d09a7da1d4ed42124b3cb9463ee1e541eec1046789004143f
 size 1089994880

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8e23c7a39bc2fabfa11b06e0899ff21ded71385791371dd14ced0766c8d96d6
 size 1089994880

train_results.json ADDED Viewed

+{
+    "epoch": 3.0,
+    "total_flos": 247280173056.0,
+    "train_loss": 0.6713302036126455,
+    "train_runtime": 144.6833,
+    "train_samples_per_second": 0.083,
+    "train_steps_per_second": 0.021
+}

trainer_log.jsonl CHANGED Viewed

@@ -2,3 +2,7 @@
 {"current_steps": 2, "total_steps": 3, "loss": 0.7883, "lr": 1e-05, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:00:55", "remaining_time": "0:00:27"}
 {"current_steps": 3, "total_steps": 3, "loss": 0.4374, "lr": 5e-06, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:01:22", "remaining_time": "0:00:00"}
 {"current_steps": 3, "total_steps": 3, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:02:23", "remaining_time": "0:00:00"}

 {"current_steps": 2, "total_steps": 3, "loss": 0.7883, "lr": 1e-05, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:00:55", "remaining_time": "0:00:27"}
 {"current_steps": 3, "total_steps": 3, "loss": 0.4374, "lr": 5e-06, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:01:22", "remaining_time": "0:00:00"}
 {"current_steps": 3, "total_steps": 3, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:02:23", "remaining_time": "0:00:00"}
+{"current_steps": 4, "total_steps": 6, "loss": 0.4374, "lr": 0.0, "epoch": 4.0, "percentage": 66.67, "elapsed_time": "0:00:29", "remaining_time": "0:00:14"}
+{"current_steps": 5, "total_steps": 6, "loss": 0.4374, "lr": 3.4549150281252635e-06, "epoch": 5.0, "percentage": 83.33, "elapsed_time": "0:00:56", "remaining_time": "0:00:11"}
+{"current_steps": 6, "total_steps": 6, "loss": 0.3766, "lr": 9.549150281252633e-07, "epoch": 6.0, "percentage": 100.0, "elapsed_time": "0:01:20", "remaining_time": "0:00:00"}
+{"current_steps": 6, "total_steps": 6, "epoch": 6.0, "percentage": 100.0, "elapsed_time": "0:02:45", "remaining_time": "0:00:00"}

trainer_state.json ADDED Viewed

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 3,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.0,
+      "loss": 0.7883,
+      "step": 1
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 24.690817322034423,
+      "learning_rate": 1e-05,
+      "loss": 0.7883,
+      "step": 2
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 24.690817322034423,
+      "learning_rate": 5e-06,
+      "loss": 0.4374,
+      "step": 3
+    },
+    {
+      "epoch": 3.0,
+      "step": 3,
+      "total_flos": 247280173056.0,
+      "train_loss": 0.6713302036126455,
+      "train_runtime": 144.6833,
+      "train_samples_per_second": 0.083,
+      "train_steps_per_second": 0.021
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 3,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 247280173056.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d591d2aa4195669d37dacb8b8f12633557e9b648cde625fc3583b530205fb8c
 size 7608

 version https://git-lfs.github.com/spec/v1
+oid sha256:20b8c9e2a763a087603937d1ab764f1900fe3afa1cd3155f36f65d2dd76037bb
 size 7608