Training in progress, step 1195, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 48679352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:500e9c9e1d7b596a8cca4dbaf726ce7ac5c499d70f4d5e39fd8d7f4485b97b49
|
3 |
size 48679352
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 25152884
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f80d0b366876eb9e2c09d97613798a58867f011d2312f7817d74e241e3fc20c
|
3 |
size 25152884
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a30509d9f5bd92b9af4da026d6adfbd726d761c3c97e7eadffdef2bb65cca2a
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d84400fe1b0b865c8e287c3d02c0e1f3d1300d295a0643ce657dd581b4b9eabb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 692,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8353,6 +8353,41 @@
|
|
8353 |
"learning_rate": 1.8763954970638628e-05,
|
8354 |
"loss": 1.0355,
|
8355 |
"step": 1190
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8356 |
}
|
8357 |
],
|
8358 |
"logging_steps": 1,
|
@@ -8372,7 +8407,7 @@
|
|
8372 |
"attributes": {}
|
8373 |
}
|
8374 |
},
|
8375 |
-
"total_flos": 2.
|
8376 |
"train_batch_size": 4,
|
8377 |
"trial_name": null,
|
8378 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8640636297903109,
|
5 |
"eval_steps": 692,
|
6 |
+
"global_step": 1195,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8353 |
"learning_rate": 1.8763954970638628e-05,
|
8354 |
"loss": 1.0355,
|
8355 |
"step": 1190
|
8356 |
+
},
|
8357 |
+
{
|
8358 |
+
"epoch": 0.8611713665943601,
|
8359 |
+
"grad_norm": 0.45647570490837097,
|
8360 |
+
"learning_rate": 1.8761733986490892e-05,
|
8361 |
+
"loss": 1.0201,
|
8362 |
+
"step": 1191
|
8363 |
+
},
|
8364 |
+
{
|
8365 |
+
"epoch": 0.8618944323933478,
|
8366 |
+
"grad_norm": 0.3569079637527466,
|
8367 |
+
"learning_rate": 1.8759511140435744e-05,
|
8368 |
+
"loss": 1.1432,
|
8369 |
+
"step": 1192
|
8370 |
+
},
|
8371 |
+
{
|
8372 |
+
"epoch": 0.8626174981923355,
|
8373 |
+
"grad_norm": 0.4410318434238434,
|
8374 |
+
"learning_rate": 1.875728643294555e-05,
|
8375 |
+
"loss": 1.0172,
|
8376 |
+
"step": 1193
|
8377 |
+
},
|
8378 |
+
{
|
8379 |
+
"epoch": 0.8633405639913232,
|
8380 |
+
"grad_norm": 0.4239175319671631,
|
8381 |
+
"learning_rate": 1.8755059864493065e-05,
|
8382 |
+
"loss": 1.0473,
|
8383 |
+
"step": 1194
|
8384 |
+
},
|
8385 |
+
{
|
8386 |
+
"epoch": 0.8640636297903109,
|
8387 |
+
"grad_norm": 0.48534345626831055,
|
8388 |
+
"learning_rate": 1.875283143555145e-05,
|
8389 |
+
"loss": 1.065,
|
8390 |
+
"step": 1195
|
8391 |
}
|
8392 |
],
|
8393 |
"logging_steps": 1,
|
|
|
8407 |
"attributes": {}
|
8408 |
}
|
8409 |
},
|
8410 |
+
"total_flos": 2.660444926692557e+18,
|
8411 |
"train_batch_size": 4,
|
8412 |
"trial_name": null,
|
8413 |
"trial_params": null
|