Training in progress, step 980, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 48679352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9945e31b638334bf494fdbd6bddbd7efb17b80e2cb934406af8bd6e740a52c13
|
3 |
size 48679352
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 25152884
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2faad1ff615b4367ac8f493089d35aa61ad18cd058aa6a59301eee8776decdbe
|
3 |
size 25152884
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a563e473225d388aa85a62aecbc6bdee80f72d3795da30b7a19c51113a1af02
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cdd00c1b48b23a3fb0db3f88bb5f4c0c6d9b16cd39a52129b3151d7da108592
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 692,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6848,6 +6848,41 @@
|
|
6848 |
"learning_rate": 1.9197476297098868e-05,
|
6849 |
"loss": 1.2061,
|
6850 |
"step": 975
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6851 |
}
|
6852 |
],
|
6853 |
"logging_steps": 1,
|
@@ -6867,7 +6902,7 @@
|
|
6867 |
"attributes": {}
|
6868 |
}
|
6869 |
},
|
6870 |
-
"total_flos": 2.
|
6871 |
"train_batch_size": 4,
|
6872 |
"trial_name": null,
|
6873 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7086044830079538,
|
5 |
"eval_steps": 692,
|
6 |
+
"global_step": 980,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6848 |
"learning_rate": 1.9197476297098868e-05,
|
6849 |
"loss": 1.2061,
|
6850 |
"step": 975
|
6851 |
+
},
|
6852 |
+
{
|
6853 |
+
"epoch": 0.7057122198120029,
|
6854 |
+
"grad_norm": 0.49111953377723694,
|
6855 |
+
"learning_rate": 1.9195665918116068e-05,
|
6856 |
+
"loss": 1.0294,
|
6857 |
+
"step": 976
|
6858 |
+
},
|
6859 |
+
{
|
6860 |
+
"epoch": 0.7064352856109906,
|
6861 |
+
"grad_norm": 0.4833580255508423,
|
6862 |
+
"learning_rate": 1.919385358501339e-05,
|
6863 |
+
"loss": 1.0781,
|
6864 |
+
"step": 977
|
6865 |
+
},
|
6866 |
+
{
|
6867 |
+
"epoch": 0.7071583514099783,
|
6868 |
+
"grad_norm": 0.42100799083709717,
|
6869 |
+
"learning_rate": 1.9192039298175965e-05,
|
6870 |
+
"loss": 1.03,
|
6871 |
+
"step": 978
|
6872 |
+
},
|
6873 |
+
{
|
6874 |
+
"epoch": 0.707881417208966,
|
6875 |
+
"grad_norm": 0.5994098782539368,
|
6876 |
+
"learning_rate": 1.9190223057989337e-05,
|
6877 |
+
"loss": 0.8802,
|
6878 |
+
"step": 979
|
6879 |
+
},
|
6880 |
+
{
|
6881 |
+
"epoch": 0.7086044830079538,
|
6882 |
+
"grad_norm": 0.4303410053253174,
|
6883 |
+
"learning_rate": 1.9188404864839465e-05,
|
6884 |
+
"loss": 1.1212,
|
6885 |
+
"step": 980
|
6886 |
}
|
6887 |
],
|
6888 |
"logging_steps": 1,
|
|
|
6902 |
"attributes": {}
|
6903 |
}
|
6904 |
},
|
6905 |
+
"total_flos": 2.1817874712625152e+18,
|
6906 |
"train_batch_size": 4,
|
6907 |
"trial_name": null,
|
6908 |
"trial_params": null
|