Training in progress, step 7170, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1623800
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7a4ea1a45e75153ffc20b849fd0f67baf44b9e333ad68b95de3d2463f5c5694
|
3 |
size 1623800
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3255543
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f6c71fb7d10555fde02594837f1b5b1c72e8adfae4621fe35a1c33a68bddeb8
|
3 |
size 3255543
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae9da71b074f9b06c32d4d58a28d832767dfab2dd4cba0eca26064fe80f0ba0c
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55a73bde68c0da7bbe678db55f0d085fc9f5de86ed520bfa6e44439f3fa9f996
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -48524,6 +48524,1679 @@
|
|
48524 |
"learning_rate": 7.284705227585676e-07,
|
48525 |
"loss": 46.0076,
|
48526 |
"step": 6931
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48527 |
}
|
48528 |
],
|
48529 |
"logging_steps": 1,
|
@@ -48543,7 +50216,7 @@
|
|
48543 |
"attributes": {}
|
48544 |
}
|
48545 |
},
|
48546 |
-
"total_flos":
|
48547 |
"train_batch_size": 4,
|
48548 |
"trial_name": null,
|
48549 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9789064099938563,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 7170,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
48524 |
"learning_rate": 7.284705227585676e-07,
|
48525 |
"loss": 46.0076,
|
48526 |
"step": 6931
|
48527 |
+
},
|
48528 |
+
{
|
48529 |
+
"epoch": 0.946412724418049,
|
48530 |
+
"grad_norm": 0.08758164197206497,
|
48531 |
+
"learning_rate": 7.247769813048644e-07,
|
48532 |
+
"loss": 46.0109,
|
48533 |
+
"step": 6932
|
48534 |
+
},
|
48535 |
+
{
|
48536 |
+
"epoch": 0.9465492525087037,
|
48537 |
+
"grad_norm": 0.058164868503808975,
|
48538 |
+
"learning_rate": 7.210927589273552e-07,
|
48539 |
+
"loss": 46.0074,
|
48540 |
+
"step": 6933
|
48541 |
+
},
|
48542 |
+
{
|
48543 |
+
"epoch": 0.9466857805993584,
|
48544 |
+
"grad_norm": 0.259356290102005,
|
48545 |
+
"learning_rate": 7.174178563228051e-07,
|
48546 |
+
"loss": 46.0097,
|
48547 |
+
"step": 6934
|
48548 |
+
},
|
48549 |
+
{
|
48550 |
+
"epoch": 0.9468223086900129,
|
48551 |
+
"grad_norm": 0.17507486045360565,
|
48552 |
+
"learning_rate": 7.137522741862246e-07,
|
48553 |
+
"loss": 46.0082,
|
48554 |
+
"step": 6935
|
48555 |
+
},
|
48556 |
+
{
|
48557 |
+
"epoch": 0.9469588367806676,
|
48558 |
+
"grad_norm": 0.09167280048131943,
|
48559 |
+
"learning_rate": 7.100960132108648e-07,
|
48560 |
+
"loss": 46.0009,
|
48561 |
+
"step": 6936
|
48562 |
+
},
|
48563 |
+
{
|
48564 |
+
"epoch": 0.9470953648713223,
|
48565 |
+
"grad_norm": 0.09322277456521988,
|
48566 |
+
"learning_rate": 7.064490740882057e-07,
|
48567 |
+
"loss": 46.0123,
|
48568 |
+
"step": 6937
|
48569 |
+
},
|
48570 |
+
{
|
48571 |
+
"epoch": 0.947231892961977,
|
48572 |
+
"grad_norm": 0.20074842870235443,
|
48573 |
+
"learning_rate": 7.02811457507968e-07,
|
48574 |
+
"loss": 46.0051,
|
48575 |
+
"step": 6938
|
48576 |
+
},
|
48577 |
+
{
|
48578 |
+
"epoch": 0.9473684210526315,
|
48579 |
+
"grad_norm": 0.07635287940502167,
|
48580 |
+
"learning_rate": 6.991831641581015e-07,
|
48581 |
+
"loss": 46.005,
|
48582 |
+
"step": 6939
|
48583 |
+
},
|
48584 |
+
{
|
48585 |
+
"epoch": 0.9475049491432862,
|
48586 |
+
"grad_norm": 0.030790025368332863,
|
48587 |
+
"learning_rate": 6.955641947248126e-07,
|
48588 |
+
"loss": 46.0069,
|
48589 |
+
"step": 6940
|
48590 |
+
},
|
48591 |
+
{
|
48592 |
+
"epoch": 0.9476414772339409,
|
48593 |
+
"grad_norm": 0.19854773581027985,
|
48594 |
+
"learning_rate": 6.919545498925206e-07,
|
48595 |
+
"loss": 46.003,
|
48596 |
+
"step": 6941
|
48597 |
+
},
|
48598 |
+
{
|
48599 |
+
"epoch": 0.9477780053245956,
|
48600 |
+
"grad_norm": 0.09742925316095352,
|
48601 |
+
"learning_rate": 6.883542303438962e-07,
|
48602 |
+
"loss": 46.01,
|
48603 |
+
"step": 6942
|
48604 |
+
},
|
48605 |
+
{
|
48606 |
+
"epoch": 0.9479145334152502,
|
48607 |
+
"grad_norm": 0.06210726127028465,
|
48608 |
+
"learning_rate": 6.847632367598389e-07,
|
48609 |
+
"loss": 46.0103,
|
48610 |
+
"step": 6943
|
48611 |
+
},
|
48612 |
+
{
|
48613 |
+
"epoch": 0.9480510615059048,
|
48614 |
+
"grad_norm": 0.1092444583773613,
|
48615 |
+
"learning_rate": 6.811815698194945e-07,
|
48616 |
+
"loss": 46.0021,
|
48617 |
+
"step": 6944
|
48618 |
+
},
|
48619 |
+
{
|
48620 |
+
"epoch": 0.9481875895965595,
|
48621 |
+
"grad_norm": 0.04307679831981659,
|
48622 |
+
"learning_rate": 6.776092302002323e-07,
|
48623 |
+
"loss": 46.0046,
|
48624 |
+
"step": 6945
|
48625 |
+
},
|
48626 |
+
{
|
48627 |
+
"epoch": 0.9483241176872141,
|
48628 |
+
"grad_norm": 0.140605628490448,
|
48629 |
+
"learning_rate": 6.74046218577673e-07,
|
48630 |
+
"loss": 46.0059,
|
48631 |
+
"step": 6946
|
48632 |
+
},
|
48633 |
+
{
|
48634 |
+
"epoch": 0.9484606457778688,
|
48635 |
+
"grad_norm": 0.08600953966379166,
|
48636 |
+
"learning_rate": 6.704925356256553e-07,
|
48637 |
+
"loss": 46.0012,
|
48638 |
+
"step": 6947
|
48639 |
+
},
|
48640 |
+
{
|
48641 |
+
"epoch": 0.9485971738685235,
|
48642 |
+
"grad_norm": 0.2193940430879593,
|
48643 |
+
"learning_rate": 6.669481820162638e-07,
|
48644 |
+
"loss": 46.0036,
|
48645 |
+
"step": 6948
|
48646 |
+
},
|
48647 |
+
{
|
48648 |
+
"epoch": 0.948733701959178,
|
48649 |
+
"grad_norm": 0.09651878476142883,
|
48650 |
+
"learning_rate": 6.634131584198122e-07,
|
48651 |
+
"loss": 46.0042,
|
48652 |
+
"step": 6949
|
48653 |
+
},
|
48654 |
+
{
|
48655 |
+
"epoch": 0.9488702300498327,
|
48656 |
+
"grad_norm": 0.11122124642133713,
|
48657 |
+
"learning_rate": 6.598874655048714e-07,
|
48658 |
+
"loss": 46.002,
|
48659 |
+
"step": 6950
|
48660 |
+
},
|
48661 |
+
{
|
48662 |
+
"epoch": 0.9490067581404874,
|
48663 |
+
"grad_norm": 0.06454525142908096,
|
48664 |
+
"learning_rate": 6.563711039382137e-07,
|
48665 |
+
"loss": 46.0054,
|
48666 |
+
"step": 6951
|
48667 |
+
},
|
48668 |
+
{
|
48669 |
+
"epoch": 0.9491432862311421,
|
48670 |
+
"grad_norm": 0.17408327758312225,
|
48671 |
+
"learning_rate": 6.52864074384868e-07,
|
48672 |
+
"loss": 46.0034,
|
48673 |
+
"step": 6952
|
48674 |
+
},
|
48675 |
+
{
|
48676 |
+
"epoch": 0.9492798143217968,
|
48677 |
+
"grad_norm": 0.057936087250709534,
|
48678 |
+
"learning_rate": 6.493663775080982e-07,
|
48679 |
+
"loss": 46.003,
|
48680 |
+
"step": 6953
|
48681 |
+
},
|
48682 |
+
{
|
48683 |
+
"epoch": 0.9494163424124513,
|
48684 |
+
"grad_norm": 0.05345294252038002,
|
48685 |
+
"learning_rate": 6.458780139694032e-07,
|
48686 |
+
"loss": 46.0013,
|
48687 |
+
"step": 6954
|
48688 |
+
},
|
48689 |
+
{
|
48690 |
+
"epoch": 0.949552870503106,
|
48691 |
+
"grad_norm": 0.08148936182260513,
|
48692 |
+
"learning_rate": 6.42398984428505e-07,
|
48693 |
+
"loss": 46.0027,
|
48694 |
+
"step": 6955
|
48695 |
+
},
|
48696 |
+
{
|
48697 |
+
"epoch": 0.9496893985937607,
|
48698 |
+
"grad_norm": 0.11465345323085785,
|
48699 |
+
"learning_rate": 6.389292895433607e-07,
|
48700 |
+
"loss": 46.0035,
|
48701 |
+
"step": 6956
|
48702 |
+
},
|
48703 |
+
{
|
48704 |
+
"epoch": 0.9498259266844153,
|
48705 |
+
"grad_norm": 0.35127905011177063,
|
48706 |
+
"learning_rate": 6.354689299701844e-07,
|
48707 |
+
"loss": 46.009,
|
48708 |
+
"step": 6957
|
48709 |
+
},
|
48710 |
+
{
|
48711 |
+
"epoch": 0.94996245477507,
|
48712 |
+
"grad_norm": 0.18658971786499023,
|
48713 |
+
"learning_rate": 6.320179063634024e-07,
|
48714 |
+
"loss": 46.0028,
|
48715 |
+
"step": 6958
|
48716 |
+
},
|
48717 |
+
{
|
48718 |
+
"epoch": 0.9500989828657246,
|
48719 |
+
"grad_norm": 0.10089928656816483,
|
48720 |
+
"learning_rate": 6.285762193756817e-07,
|
48721 |
+
"loss": 46.0003,
|
48722 |
+
"step": 6959
|
48723 |
+
},
|
48724 |
+
{
|
48725 |
+
"epoch": 0.9502355109563793,
|
48726 |
+
"grad_norm": 0.15687218308448792,
|
48727 |
+
"learning_rate": 6.251438696579293e-07,
|
48728 |
+
"loss": 46.0024,
|
48729 |
+
"step": 6960
|
48730 |
+
},
|
48731 |
+
{
|
48732 |
+
"epoch": 0.9503720390470339,
|
48733 |
+
"grad_norm": 0.05431222543120384,
|
48734 |
+
"learning_rate": 6.217208578592759e-07,
|
48735 |
+
"loss": 46.0119,
|
48736 |
+
"step": 6961
|
48737 |
+
},
|
48738 |
+
{
|
48739 |
+
"epoch": 0.9505085671376886,
|
48740 |
+
"grad_norm": 0.10426490008831024,
|
48741 |
+
"learning_rate": 6.183071846270983e-07,
|
48742 |
+
"loss": 46.0037,
|
48743 |
+
"step": 6962
|
48744 |
+
},
|
48745 |
+
{
|
48746 |
+
"epoch": 0.9506450952283433,
|
48747 |
+
"grad_norm": 0.04488224536180496,
|
48748 |
+
"learning_rate": 6.149028506069909e-07,
|
48749 |
+
"loss": 46.0085,
|
48750 |
+
"step": 6963
|
48751 |
+
},
|
48752 |
+
{
|
48753 |
+
"epoch": 0.9507816233189978,
|
48754 |
+
"grad_norm": 0.11251979321241379,
|
48755 |
+
"learning_rate": 6.115078564427945e-07,
|
48756 |
+
"loss": 46.01,
|
48757 |
+
"step": 6964
|
48758 |
+
},
|
48759 |
+
{
|
48760 |
+
"epoch": 0.9509181514096525,
|
48761 |
+
"grad_norm": 0.09327413141727448,
|
48762 |
+
"learning_rate": 6.081222027765843e-07,
|
48763 |
+
"loss": 46.0048,
|
48764 |
+
"step": 6965
|
48765 |
+
},
|
48766 |
+
{
|
48767 |
+
"epoch": 0.9510546795003072,
|
48768 |
+
"grad_norm": 0.0498911589384079,
|
48769 |
+
"learning_rate": 6.047458902486647e-07,
|
48770 |
+
"loss": 46.0036,
|
48771 |
+
"step": 6966
|
48772 |
+
},
|
48773 |
+
{
|
48774 |
+
"epoch": 0.9511912075909619,
|
48775 |
+
"grad_norm": 0.05903641879558563,
|
48776 |
+
"learning_rate": 6.013789194975749e-07,
|
48777 |
+
"loss": 46.006,
|
48778 |
+
"step": 6967
|
48779 |
+
},
|
48780 |
+
{
|
48781 |
+
"epoch": 0.9513277356816164,
|
48782 |
+
"grad_norm": 0.03856969624757767,
|
48783 |
+
"learning_rate": 5.980212911600836e-07,
|
48784 |
+
"loss": 46.0117,
|
48785 |
+
"step": 6968
|
48786 |
+
},
|
48787 |
+
{
|
48788 |
+
"epoch": 0.9514642637722711,
|
48789 |
+
"grad_norm": 0.10870201885700226,
|
48790 |
+
"learning_rate": 5.946730058711935e-07,
|
48791 |
+
"loss": 46.0018,
|
48792 |
+
"step": 6969
|
48793 |
+
},
|
48794 |
+
{
|
48795 |
+
"epoch": 0.9516007918629258,
|
48796 |
+
"grad_norm": 0.08802196383476257,
|
48797 |
+
"learning_rate": 5.91334064264143e-07,
|
48798 |
+
"loss": 46.0028,
|
48799 |
+
"step": 6970
|
48800 |
+
},
|
48801 |
+
{
|
48802 |
+
"epoch": 0.9517373199535805,
|
48803 |
+
"grad_norm": 0.10287310928106308,
|
48804 |
+
"learning_rate": 5.880044669704099e-07,
|
48805 |
+
"loss": 46.0053,
|
48806 |
+
"step": 6971
|
48807 |
+
},
|
48808 |
+
{
|
48809 |
+
"epoch": 0.9518738480442351,
|
48810 |
+
"grad_norm": 0.1097191721200943,
|
48811 |
+
"learning_rate": 5.846842146196852e-07,
|
48812 |
+
"loss": 46.0043,
|
48813 |
+
"step": 6972
|
48814 |
+
},
|
48815 |
+
{
|
48816 |
+
"epoch": 0.9520103761348897,
|
48817 |
+
"grad_norm": 0.055113162845373154,
|
48818 |
+
"learning_rate": 5.813733078399164e-07,
|
48819 |
+
"loss": 46.0025,
|
48820 |
+
"step": 6973
|
48821 |
+
},
|
48822 |
+
{
|
48823 |
+
"epoch": 0.9521469042255444,
|
48824 |
+
"grad_norm": 0.1519494652748108,
|
48825 |
+
"learning_rate": 5.780717472572638e-07,
|
48826 |
+
"loss": 46.0048,
|
48827 |
+
"step": 6974
|
48828 |
+
},
|
48829 |
+
{
|
48830 |
+
"epoch": 0.952283432316199,
|
48831 |
+
"grad_norm": 0.12530197203159332,
|
48832 |
+
"learning_rate": 5.747795334961336e-07,
|
48833 |
+
"loss": 46.0099,
|
48834 |
+
"step": 6975
|
48835 |
+
},
|
48836 |
+
{
|
48837 |
+
"epoch": 0.9524199604068537,
|
48838 |
+
"grad_norm": 0.11176592856645584,
|
48839 |
+
"learning_rate": 5.714966671791556e-07,
|
48840 |
+
"loss": 46.0073,
|
48841 |
+
"step": 6976
|
48842 |
+
},
|
48843 |
+
{
|
48844 |
+
"epoch": 0.9525564884975084,
|
48845 |
+
"grad_norm": 0.15323077142238617,
|
48846 |
+
"learning_rate": 5.682231489271938e-07,
|
48847 |
+
"loss": 46.0071,
|
48848 |
+
"step": 6977
|
48849 |
+
},
|
48850 |
+
{
|
48851 |
+
"epoch": 0.952693016588163,
|
48852 |
+
"grad_norm": 0.12638357281684875,
|
48853 |
+
"learning_rate": 5.649589793593591e-07,
|
48854 |
+
"loss": 46.0029,
|
48855 |
+
"step": 6978
|
48856 |
+
},
|
48857 |
+
{
|
48858 |
+
"epoch": 0.9528295446788176,
|
48859 |
+
"grad_norm": 0.20770449936389923,
|
48860 |
+
"learning_rate": 5.617041590929628e-07,
|
48861 |
+
"loss": 46.0074,
|
48862 |
+
"step": 6979
|
48863 |
+
},
|
48864 |
+
{
|
48865 |
+
"epoch": 0.9529660727694723,
|
48866 |
+
"grad_norm": 0.050461236387491226,
|
48867 |
+
"learning_rate": 5.584586887435739e-07,
|
48868 |
+
"loss": 46.0042,
|
48869 |
+
"step": 6980
|
48870 |
+
},
|
48871 |
+
{
|
48872 |
+
"epoch": 0.953102600860127,
|
48873 |
+
"grad_norm": 0.10567734390497208,
|
48874 |
+
"learning_rate": 5.552225689249846e-07,
|
48875 |
+
"loss": 46.0048,
|
48876 |
+
"step": 6981
|
48877 |
+
},
|
48878 |
+
{
|
48879 |
+
"epoch": 0.9532391289507817,
|
48880 |
+
"grad_norm": 0.1779739260673523,
|
48881 |
+
"learning_rate": 5.519958002492221e-07,
|
48882 |
+
"loss": 46.0051,
|
48883 |
+
"step": 6982
|
48884 |
+
},
|
48885 |
+
{
|
48886 |
+
"epoch": 0.9533756570414362,
|
48887 |
+
"grad_norm": 0.1124066412448883,
|
48888 |
+
"learning_rate": 5.487783833265425e-07,
|
48889 |
+
"loss": 46.0013,
|
48890 |
+
"step": 6983
|
48891 |
+
},
|
48892 |
+
{
|
48893 |
+
"epoch": 0.9535121851320909,
|
48894 |
+
"grad_norm": 0.11008557677268982,
|
48895 |
+
"learning_rate": 5.45570318765426e-07,
|
48896 |
+
"loss": 46.0048,
|
48897 |
+
"step": 6984
|
48898 |
+
},
|
48899 |
+
{
|
48900 |
+
"epoch": 0.9536487132227456,
|
48901 |
+
"grad_norm": 0.055017564445734024,
|
48902 |
+
"learning_rate": 5.423716071725981e-07,
|
48903 |
+
"loss": 46.0073,
|
48904 |
+
"step": 6985
|
48905 |
+
},
|
48906 |
+
{
|
48907 |
+
"epoch": 0.9537852413134003,
|
48908 |
+
"grad_norm": 0.09516242891550064,
|
48909 |
+
"learning_rate": 5.391822491530085e-07,
|
48910 |
+
"loss": 46.0016,
|
48911 |
+
"step": 6986
|
48912 |
+
},
|
48913 |
+
{
|
48914 |
+
"epoch": 0.9539217694040549,
|
48915 |
+
"grad_norm": 0.07182832807302475,
|
48916 |
+
"learning_rate": 5.360022453098357e-07,
|
48917 |
+
"loss": 46.0091,
|
48918 |
+
"step": 6987
|
48919 |
+
},
|
48920 |
+
{
|
48921 |
+
"epoch": 0.9540582974947095,
|
48922 |
+
"grad_norm": 0.04664941132068634,
|
48923 |
+
"learning_rate": 5.328315962444874e-07,
|
48924 |
+
"loss": 46.0027,
|
48925 |
+
"step": 6988
|
48926 |
+
},
|
48927 |
+
{
|
48928 |
+
"epoch": 0.9541948255853642,
|
48929 |
+
"grad_norm": 0.1028062254190445,
|
48930 |
+
"learning_rate": 5.296703025566175e-07,
|
48931 |
+
"loss": 46.0034,
|
48932 |
+
"step": 6989
|
48933 |
+
},
|
48934 |
+
{
|
48935 |
+
"epoch": 0.9543313536760188,
|
48936 |
+
"grad_norm": 0.12866486608982086,
|
48937 |
+
"learning_rate": 5.265183648440919e-07,
|
48938 |
+
"loss": 46.0031,
|
48939 |
+
"step": 6990
|
48940 |
+
},
|
48941 |
+
{
|
48942 |
+
"epoch": 0.9544678817666735,
|
48943 |
+
"grad_norm": 0.15095901489257812,
|
48944 |
+
"learning_rate": 5.233757837030118e-07,
|
48945 |
+
"loss": 46.0056,
|
48946 |
+
"step": 6991
|
48947 |
+
},
|
48948 |
+
{
|
48949 |
+
"epoch": 0.9546044098573282,
|
48950 |
+
"grad_norm": 0.045242372900247574,
|
48951 |
+
"learning_rate": 5.202425597277183e-07,
|
48952 |
+
"loss": 46.005,
|
48953 |
+
"step": 6992
|
48954 |
+
},
|
48955 |
+
{
|
48956 |
+
"epoch": 0.9547409379479828,
|
48957 |
+
"grad_norm": 0.14112210273742676,
|
48958 |
+
"learning_rate": 5.171186935107708e-07,
|
48959 |
+
"loss": 46.002,
|
48960 |
+
"step": 6993
|
48961 |
+
},
|
48962 |
+
{
|
48963 |
+
"epoch": 0.9548774660386374,
|
48964 |
+
"grad_norm": 0.046833448112010956,
|
48965 |
+
"learning_rate": 5.14004185642969e-07,
|
48966 |
+
"loss": 46.0047,
|
48967 |
+
"step": 6994
|
48968 |
+
},
|
48969 |
+
{
|
48970 |
+
"epoch": 0.9550139941292921,
|
48971 |
+
"grad_norm": 0.061061080545186996,
|
48972 |
+
"learning_rate": 5.108990367133304e-07,
|
48973 |
+
"loss": 46.0023,
|
48974 |
+
"step": 6995
|
48975 |
+
},
|
48976 |
+
{
|
48977 |
+
"epoch": 0.9551505222199468,
|
48978 |
+
"grad_norm": 0.3781220614910126,
|
48979 |
+
"learning_rate": 5.078032473091188e-07,
|
48980 |
+
"loss": 46.0036,
|
48981 |
+
"step": 6996
|
48982 |
+
},
|
48983 |
+
{
|
48984 |
+
"epoch": 0.9552870503106015,
|
48985 |
+
"grad_norm": 0.18927641212940216,
|
48986 |
+
"learning_rate": 5.047168180158101e-07,
|
48987 |
+
"loss": 46.0046,
|
48988 |
+
"step": 6997
|
48989 |
+
},
|
48990 |
+
{
|
48991 |
+
"epoch": 0.955423578401256,
|
48992 |
+
"grad_norm": 0.0661652684211731,
|
48993 |
+
"learning_rate": 5.016397494171265e-07,
|
48994 |
+
"loss": 46.0012,
|
48995 |
+
"step": 6998
|
48996 |
+
},
|
48997 |
+
{
|
48998 |
+
"epoch": 0.9555601064919107,
|
48999 |
+
"grad_norm": 0.08351822197437286,
|
49000 |
+
"learning_rate": 4.98572042095008e-07,
|
49001 |
+
"loss": 46.0025,
|
49002 |
+
"step": 6999
|
49003 |
+
},
|
49004 |
+
{
|
49005 |
+
"epoch": 0.9556966345825654,
|
49006 |
+
"grad_norm": 0.08313079923391342,
|
49007 |
+
"learning_rate": 4.955136966296292e-07,
|
49008 |
+
"loss": 46.0,
|
49009 |
+
"step": 7000
|
49010 |
+
},
|
49011 |
+
{
|
49012 |
+
"epoch": 0.95583316267322,
|
49013 |
+
"grad_norm": 0.2776692509651184,
|
49014 |
+
"learning_rate": 4.924647135993942e-07,
|
49015 |
+
"loss": 46.0048,
|
49016 |
+
"step": 7001
|
49017 |
+
},
|
49018 |
+
{
|
49019 |
+
"epoch": 0.9559696907638746,
|
49020 |
+
"grad_norm": 0.0676804780960083,
|
49021 |
+
"learning_rate": 4.894250935809364e-07,
|
49022 |
+
"loss": 46.0024,
|
49023 |
+
"step": 7002
|
49024 |
+
},
|
49025 |
+
{
|
49026 |
+
"epoch": 0.9561062188545293,
|
49027 |
+
"grad_norm": 0.1363213211297989,
|
49028 |
+
"learning_rate": 4.863948371491122e-07,
|
49029 |
+
"loss": 46.0021,
|
49030 |
+
"step": 7003
|
49031 |
+
},
|
49032 |
+
{
|
49033 |
+
"epoch": 0.956242746945184,
|
49034 |
+
"grad_norm": 0.06328721344470978,
|
49035 |
+
"learning_rate": 4.833739448770247e-07,
|
49036 |
+
"loss": 46.0027,
|
49037 |
+
"step": 7004
|
49038 |
+
},
|
49039 |
+
{
|
49040 |
+
"epoch": 0.9563792750358386,
|
49041 |
+
"grad_norm": 0.09872937202453613,
|
49042 |
+
"learning_rate": 4.803624173359833e-07,
|
49043 |
+
"loss": 46.0002,
|
49044 |
+
"step": 7005
|
49045 |
+
},
|
49046 |
+
{
|
49047 |
+
"epoch": 0.9565158031264933,
|
49048 |
+
"grad_norm": 0.06226339936256409,
|
49049 |
+
"learning_rate": 4.77360255095538e-07,
|
49050 |
+
"loss": 46.0013,
|
49051 |
+
"step": 7006
|
49052 |
+
},
|
49053 |
+
{
|
49054 |
+
"epoch": 0.9566523312171479,
|
49055 |
+
"grad_norm": 0.1212535873055458,
|
49056 |
+
"learning_rate": 4.743674587234737e-07,
|
49057 |
+
"loss": 46.0019,
|
49058 |
+
"step": 7007
|
49059 |
+
},
|
49060 |
+
{
|
49061 |
+
"epoch": 0.9567888593078026,
|
49062 |
+
"grad_norm": 0.03763558715581894,
|
49063 |
+
"learning_rate": 4.7138402878578736e-07,
|
49064 |
+
"loss": 46.0017,
|
49065 |
+
"step": 7008
|
49066 |
+
},
|
49067 |
+
{
|
49068 |
+
"epoch": 0.9569253873984572,
|
49069 |
+
"grad_norm": 0.07306963950395584,
|
49070 |
+
"learning_rate": 4.684099658467223e-07,
|
49071 |
+
"loss": 46.0019,
|
49072 |
+
"step": 7009
|
49073 |
+
},
|
49074 |
+
{
|
49075 |
+
"epoch": 0.9570619154891119,
|
49076 |
+
"grad_norm": 0.16501565277576447,
|
49077 |
+
"learning_rate": 4.6544527046873953e-07,
|
49078 |
+
"loss": 46.0033,
|
49079 |
+
"step": 7010
|
49080 |
+
},
|
49081 |
+
{
|
49082 |
+
"epoch": 0.9571984435797666,
|
49083 |
+
"grad_norm": 0.094601109623909,
|
49084 |
+
"learning_rate": 4.624899432125296e-07,
|
49085 |
+
"loss": 46.0083,
|
49086 |
+
"step": 7011
|
49087 |
+
},
|
49088 |
+
{
|
49089 |
+
"epoch": 0.9573349716704211,
|
49090 |
+
"grad_norm": 0.03492524102330208,
|
49091 |
+
"learning_rate": 4.595439846370064e-07,
|
49092 |
+
"loss": 46.0073,
|
49093 |
+
"step": 7012
|
49094 |
+
},
|
49095 |
+
{
|
49096 |
+
"epoch": 0.9574714997610758,
|
49097 |
+
"grad_norm": 0.09824098646640778,
|
49098 |
+
"learning_rate": 4.566073952993355e-07,
|
49099 |
+
"loss": 46.0013,
|
49100 |
+
"step": 7013
|
49101 |
+
},
|
49102 |
+
{
|
49103 |
+
"epoch": 0.9576080278517305,
|
49104 |
+
"grad_norm": 0.06130605190992355,
|
49105 |
+
"learning_rate": 4.536801757548781e-07,
|
49106 |
+
"loss": 46.0104,
|
49107 |
+
"step": 7014
|
49108 |
+
},
|
49109 |
+
{
|
49110 |
+
"epoch": 0.9577445559423852,
|
49111 |
+
"grad_norm": 0.05644996836781502,
|
49112 |
+
"learning_rate": 4.5076232655724695e-07,
|
49113 |
+
"loss": 46.0124,
|
49114 |
+
"step": 7015
|
49115 |
+
},
|
49116 |
+
{
|
49117 |
+
"epoch": 0.9578810840330398,
|
49118 |
+
"grad_norm": 0.09736155718564987,
|
49119 |
+
"learning_rate": 4.4785384825826173e-07,
|
49120 |
+
"loss": 46.0055,
|
49121 |
+
"step": 7016
|
49122 |
+
},
|
49123 |
+
{
|
49124 |
+
"epoch": 0.9580176121236944,
|
49125 |
+
"grad_norm": 0.06964929401874542,
|
49126 |
+
"learning_rate": 4.449547414079991e-07,
|
49127 |
+
"loss": 46.0047,
|
49128 |
+
"step": 7017
|
49129 |
+
},
|
49130 |
+
{
|
49131 |
+
"epoch": 0.9581541402143491,
|
49132 |
+
"grad_norm": 0.08945920318365097,
|
49133 |
+
"learning_rate": 4.420650065547427e-07,
|
49134 |
+
"loss": 46.0032,
|
49135 |
+
"step": 7018
|
49136 |
+
},
|
49137 |
+
{
|
49138 |
+
"epoch": 0.9582906683050038,
|
49139 |
+
"grad_norm": 0.12674036622047424,
|
49140 |
+
"learning_rate": 4.3918464424499984e-07,
|
49141 |
+
"loss": 46.0168,
|
49142 |
+
"step": 7019
|
49143 |
+
},
|
49144 |
+
{
|
49145 |
+
"epoch": 0.9584271963956584,
|
49146 |
+
"grad_norm": 0.07263398170471191,
|
49147 |
+
"learning_rate": 4.3631365502351807e-07,
|
49148 |
+
"loss": 46.0024,
|
49149 |
+
"step": 7020
|
49150 |
+
},
|
49151 |
+
{
|
49152 |
+
"epoch": 0.9585637244863131,
|
49153 |
+
"grad_norm": 0.4187130630016327,
|
49154 |
+
"learning_rate": 4.334520394332686e-07,
|
49155 |
+
"loss": 46.0087,
|
49156 |
+
"step": 7021
|
49157 |
+
},
|
49158 |
+
{
|
49159 |
+
"epoch": 0.9587002525769677,
|
49160 |
+
"grad_norm": 0.06423017382621765,
|
49161 |
+
"learning_rate": 4.305997980154519e-07,
|
49162 |
+
"loss": 46.0113,
|
49163 |
+
"step": 7022
|
49164 |
+
},
|
49165 |
+
{
|
49166 |
+
"epoch": 0.9588367806676223,
|
49167 |
+
"grad_norm": 0.16717016696929932,
|
49168 |
+
"learning_rate": 4.277569313094809e-07,
|
49169 |
+
"loss": 46.0098,
|
49170 |
+
"step": 7023
|
49171 |
+
},
|
49172 |
+
{
|
49173 |
+
"epoch": 0.958973308758277,
|
49174 |
+
"grad_norm": 0.07602465152740479,
|
49175 |
+
"learning_rate": 4.2492343985301443e-07,
|
49176 |
+
"loss": 46.0064,
|
49177 |
+
"step": 7024
|
49178 |
+
},
|
49179 |
+
{
|
49180 |
+
"epoch": 0.9591098368489317,
|
49181 |
+
"grad_norm": 0.06755789369344711,
|
49182 |
+
"learning_rate": 4.220993241819293e-07,
|
49183 |
+
"loss": 46.0023,
|
49184 |
+
"step": 7025
|
49185 |
+
},
|
49186 |
+
{
|
49187 |
+
"epoch": 0.9592463649395864,
|
49188 |
+
"grad_norm": 0.08068063855171204,
|
49189 |
+
"learning_rate": 4.192845848303373e-07,
|
49190 |
+
"loss": 46.0024,
|
49191 |
+
"step": 7026
|
49192 |
+
},
|
49193 |
+
{
|
49194 |
+
"epoch": 0.9593828930302409,
|
49195 |
+
"grad_norm": 0.06310666352510452,
|
49196 |
+
"learning_rate": 4.164792223305569e-07,
|
49197 |
+
"loss": 46.003,
|
49198 |
+
"step": 7027
|
49199 |
+
},
|
49200 |
+
{
|
49201 |
+
"epoch": 0.9595194211208956,
|
49202 |
+
"grad_norm": 0.03446575999259949,
|
49203 |
+
"learning_rate": 4.1368323721315825e-07,
|
49204 |
+
"loss": 46.0022,
|
49205 |
+
"step": 7028
|
49206 |
+
},
|
49207 |
+
{
|
49208 |
+
"epoch": 0.9596559492115503,
|
49209 |
+
"grad_norm": 0.03199775144457817,
|
49210 |
+
"learning_rate": 4.108966300069239e-07,
|
49211 |
+
"loss": 46.0061,
|
49212 |
+
"step": 7029
|
49213 |
+
},
|
49214 |
+
{
|
49215 |
+
"epoch": 0.959792477302205,
|
49216 |
+
"grad_norm": 0.07025648653507233,
|
49217 |
+
"learning_rate": 4.0811940123886004e-07,
|
49218 |
+
"loss": 46.0121,
|
49219 |
+
"step": 7030
|
49220 |
+
},
|
49221 |
+
{
|
49222 |
+
"epoch": 0.9599290053928596,
|
49223 |
+
"grad_norm": 0.09752369672060013,
|
49224 |
+
"learning_rate": 4.0535155143420765e-07,
|
49225 |
+
"loss": 46.0066,
|
49226 |
+
"step": 7031
|
49227 |
+
},
|
49228 |
+
{
|
49229 |
+
"epoch": 0.9600655334835142,
|
49230 |
+
"grad_norm": 0.09167366474866867,
|
49231 |
+
"learning_rate": 4.025930811164369e-07,
|
49232 |
+
"loss": 46.0045,
|
49233 |
+
"step": 7032
|
49234 |
+
},
|
49235 |
+
{
|
49236 |
+
"epoch": 0.9602020615741689,
|
49237 |
+
"grad_norm": 0.04597092419862747,
|
49238 |
+
"learning_rate": 3.998439908072249e-07,
|
49239 |
+
"loss": 46.0013,
|
49240 |
+
"step": 7033
|
49241 |
+
},
|
49242 |
+
{
|
49243 |
+
"epoch": 0.9603385896648235,
|
49244 |
+
"grad_norm": 0.07277274876832962,
|
49245 |
+
"learning_rate": 3.971042810265002e-07,
|
49246 |
+
"loss": 46.0036,
|
49247 |
+
"step": 7034
|
49248 |
+
},
|
49249 |
+
{
|
49250 |
+
"epoch": 0.9604751177554782,
|
49251 |
+
"grad_norm": 0.19522015750408173,
|
49252 |
+
"learning_rate": 3.943739522923928e-07,
|
49253 |
+
"loss": 46.011,
|
49254 |
+
"step": 7035
|
49255 |
+
},
|
49256 |
+
{
|
49257 |
+
"epoch": 0.9606116458461328,
|
49258 |
+
"grad_norm": 0.1344897300004959,
|
49259 |
+
"learning_rate": 3.916530051212841e-07,
|
49260 |
+
"loss": 46.0066,
|
49261 |
+
"step": 7036
|
49262 |
+
},
|
49263 |
+
{
|
49264 |
+
"epoch": 0.9607481739367875,
|
49265 |
+
"grad_norm": 0.11266939342021942,
|
49266 |
+
"learning_rate": 3.889414400277569e-07,
|
49267 |
+
"loss": 46.0024,
|
49268 |
+
"step": 7037
|
49269 |
+
},
|
49270 |
+
{
|
49271 |
+
"epoch": 0.9608847020274421,
|
49272 |
+
"grad_norm": 0.03876335546374321,
|
49273 |
+
"learning_rate": 3.862392575246343e-07,
|
49274 |
+
"loss": 46.005,
|
49275 |
+
"step": 7038
|
49276 |
+
},
|
49277 |
+
{
|
49278 |
+
"epoch": 0.9610212301180968,
|
49279 |
+
"grad_norm": 0.04259270429611206,
|
49280 |
+
"learning_rate": 3.8354645812296306e-07,
|
49281 |
+
"loss": 46.0054,
|
49282 |
+
"step": 7039
|
49283 |
+
},
|
49284 |
+
{
|
49285 |
+
"epoch": 0.9611577582087515,
|
49286 |
+
"grad_norm": 0.07908140867948532,
|
49287 |
+
"learning_rate": 3.8086304233200807e-07,
|
49288 |
+
"loss": 46.0036,
|
49289 |
+
"step": 7040
|
49290 |
+
},
|
49291 |
+
{
|
49292 |
+
"epoch": 0.961294286299406,
|
49293 |
+
"grad_norm": 0.10258141160011292,
|
49294 |
+
"learning_rate": 3.781890106592689e-07,
|
49295 |
+
"loss": 46.0006,
|
49296 |
+
"step": 7041
|
49297 |
+
},
|
49298 |
+
{
|
49299 |
+
"epoch": 0.9614308143900607,
|
49300 |
+
"grad_norm": 0.07387635856866837,
|
49301 |
+
"learning_rate": 3.7552436361046884e-07,
|
49302 |
+
"loss": 46.0055,
|
49303 |
+
"step": 7042
|
49304 |
+
},
|
49305 |
+
{
|
49306 |
+
"epoch": 0.9615673424807154,
|
49307 |
+
"grad_norm": 0.13900095224380493,
|
49308 |
+
"learning_rate": 3.7286910168954914e-07,
|
49309 |
+
"loss": 46.0045,
|
49310 |
+
"step": 7043
|
49311 |
+
},
|
49312 |
+
{
|
49313 |
+
"epoch": 0.9617038705713701,
|
49314 |
+
"grad_norm": 0.09079175442457199,
|
49315 |
+
"learning_rate": 3.702232253986804e-07,
|
49316 |
+
"loss": 46.0049,
|
49317 |
+
"step": 7044
|
49318 |
+
},
|
49319 |
+
{
|
49320 |
+
"epoch": 0.9618403986620248,
|
49321 |
+
"grad_norm": 0.15789593756198883,
|
49322 |
+
"learning_rate": 3.6758673523826225e-07,
|
49323 |
+
"loss": 46.0071,
|
49324 |
+
"step": 7045
|
49325 |
+
},
|
49326 |
+
{
|
49327 |
+
"epoch": 0.9619769267526793,
|
49328 |
+
"grad_norm": 0.18449178338050842,
|
49329 |
+
"learning_rate": 3.649596317069126e-07,
|
49330 |
+
"loss": 46.0041,
|
49331 |
+
"step": 7046
|
49332 |
+
},
|
49333 |
+
{
|
49334 |
+
"epoch": 0.962113454843334,
|
49335 |
+
"grad_norm": 0.28473979234695435,
|
49336 |
+
"learning_rate": 3.623419153014784e-07,
|
49337 |
+
"loss": 46.0064,
|
49338 |
+
"step": 7047
|
49339 |
+
},
|
49340 |
+
{
|
49341 |
+
"epoch": 0.9622499829339887,
|
49342 |
+
"grad_norm": 0.11489143967628479,
|
49343 |
+
"learning_rate": 3.597335865170304e-07,
|
49344 |
+
"loss": 46.0089,
|
49345 |
+
"step": 7048
|
49346 |
+
},
|
49347 |
+
{
|
49348 |
+
"epoch": 0.9623865110246433,
|
49349 |
+
"grad_norm": 0.16180576384067535,
|
49350 |
+
"learning_rate": 3.5713464584686273e-07,
|
49351 |
+
"loss": 46.0006,
|
49352 |
+
"step": 7049
|
49353 |
+
},
|
49354 |
+
{
|
49355 |
+
"epoch": 0.962523039115298,
|
49356 |
+
"grad_norm": 0.2144034057855606,
|
49357 |
+
"learning_rate": 3.545450937824935e-07,
|
49358 |
+
"loss": 46.0011,
|
49359 |
+
"step": 7050
|
49360 |
+
},
|
49361 |
+
{
|
49362 |
+
"epoch": 0.9626595672059526,
|
49363 |
+
"grad_norm": 0.09911711513996124,
|
49364 |
+
"learning_rate": 3.5196493081366967e-07,
|
49365 |
+
"loss": 46.0078,
|
49366 |
+
"step": 7051
|
49367 |
+
},
|
49368 |
+
{
|
49369 |
+
"epoch": 0.9627960952966073,
|
49370 |
+
"grad_norm": 0.17295798659324646,
|
49371 |
+
"learning_rate": 3.4939415742835655e-07,
|
49372 |
+
"loss": 46.0019,
|
49373 |
+
"step": 7052
|
49374 |
+
},
|
49375 |
+
{
|
49376 |
+
"epoch": 0.9629326233872619,
|
49377 |
+
"grad_norm": 0.0951714962720871,
|
49378 |
+
"learning_rate": 3.468327741127486e-07,
|
49379 |
+
"loss": 46.0018,
|
49380 |
+
"step": 7053
|
49381 |
+
},
|
49382 |
+
{
|
49383 |
+
"epoch": 0.9630691514779166,
|
49384 |
+
"grad_norm": 0.10347151011228561,
|
49385 |
+
"learning_rate": 3.442807813512583e-07,
|
49386 |
+
"loss": 46.0035,
|
49387 |
+
"step": 7054
|
49388 |
+
},
|
49389 |
+
{
|
49390 |
+
"epoch": 0.9632056795685713,
|
49391 |
+
"grad_norm": 0.22633132338523865,
|
49392 |
+
"learning_rate": 3.417381796265273e-07,
|
49393 |
+
"loss": 46.0049,
|
49394 |
+
"step": 7055
|
49395 |
+
},
|
49396 |
+
{
|
49397 |
+
"epoch": 0.9633422076592258,
|
49398 |
+
"grad_norm": 0.047777723520994186,
|
49399 |
+
"learning_rate": 3.3920496941942657e-07,
|
49400 |
+
"loss": 46.002,
|
49401 |
+
"step": 7056
|
49402 |
+
},
|
49403 |
+
{
|
49404 |
+
"epoch": 0.9634787357498805,
|
49405 |
+
"grad_norm": 0.14216388761997223,
|
49406 |
+
"learning_rate": 3.3668115120903954e-07,
|
49407 |
+
"loss": 46.0048,
|
49408 |
+
"step": 7057
|
49409 |
+
},
|
49410 |
+
{
|
49411 |
+
"epoch": 0.9636152638405352,
|
49412 |
+
"grad_norm": 0.07215403020381927,
|
49413 |
+
"learning_rate": 3.3416672547267325e-07,
|
49414 |
+
"loss": 46.0042,
|
49415 |
+
"step": 7058
|
49416 |
+
},
|
49417 |
+
{
|
49418 |
+
"epoch": 0.9637517919311899,
|
49419 |
+
"grad_norm": 0.04733399674296379,
|
49420 |
+
"learning_rate": 3.3166169268586957e-07,
|
49421 |
+
"loss": 46.0042,
|
49422 |
+
"step": 7059
|
49423 |
+
},
|
49424 |
+
{
|
49425 |
+
"epoch": 0.9638883200218445,
|
49426 |
+
"grad_norm": 0.06736130267381668,
|
49427 |
+
"learning_rate": 3.291660533223828e-07,
|
49428 |
+
"loss": 46.0101,
|
49429 |
+
"step": 7060
|
49430 |
+
},
|
49431 |
+
{
|
49432 |
+
"epoch": 0.9640248481124991,
|
49433 |
+
"grad_norm": 0.03229368478059769,
|
49434 |
+
"learning_rate": 3.266798078542077e-07,
|
49435 |
+
"loss": 46.0017,
|
49436 |
+
"step": 7061
|
49437 |
+
},
|
49438 |
+
{
|
49439 |
+
"epoch": 0.9641613762031538,
|
49440 |
+
"grad_norm": 0.2435784935951233,
|
49441 |
+
"learning_rate": 3.242029567515348e-07,
|
49442 |
+
"loss": 46.0118,
|
49443 |
+
"step": 7062
|
49444 |
+
},
|
49445 |
+
{
|
49446 |
+
"epoch": 0.9642979042938085,
|
49447 |
+
"grad_norm": 0.08515594154596329,
|
49448 |
+
"learning_rate": 3.217355004828004e-07,
|
49449 |
+
"loss": 46.0011,
|
49450 |
+
"step": 7063
|
49451 |
+
},
|
49452 |
+
{
|
49453 |
+
"epoch": 0.9644344323844631,
|
49454 |
+
"grad_norm": 0.04602936655282974,
|
49455 |
+
"learning_rate": 3.1927743951465914e-07,
|
49456 |
+
"loss": 46.0024,
|
49457 |
+
"step": 7064
|
49458 |
+
},
|
49459 |
+
{
|
49460 |
+
"epoch": 0.9645709604751177,
|
49461 |
+
"grad_norm": 0.036774687469005585,
|
49462 |
+
"learning_rate": 3.1682877431198353e-07,
|
49463 |
+
"loss": 46.009,
|
49464 |
+
"step": 7065
|
49465 |
+
},
|
49466 |
+
{
|
49467 |
+
"epoch": 0.9647074885657724,
|
49468 |
+
"grad_norm": 0.06910528987646103,
|
49469 |
+
"learning_rate": 3.143895053378698e-07,
|
49470 |
+
"loss": 46.013,
|
49471 |
+
"step": 7066
|
49472 |
+
},
|
49473 |
+
{
|
49474 |
+
"epoch": 0.964844016656427,
|
49475 |
+
"grad_norm": 0.08251021802425385,
|
49476 |
+
"learning_rate": 3.1195963305364894e-07,
|
49477 |
+
"loss": 46.0075,
|
49478 |
+
"step": 7067
|
49479 |
+
},
|
49480 |
+
{
|
49481 |
+
"epoch": 0.9649805447470817,
|
49482 |
+
"grad_norm": 0.09395699948072433,
|
49483 |
+
"learning_rate": 3.095391579188589e-07,
|
49484 |
+
"loss": 46.0053,
|
49485 |
+
"step": 7068
|
49486 |
+
},
|
49487 |
+
{
|
49488 |
+
"epoch": 0.9651170728377364,
|
49489 |
+
"grad_norm": 0.043019089847803116,
|
49490 |
+
"learning_rate": 3.0712808039126685e-07,
|
49491 |
+
"loss": 46.0069,
|
49492 |
+
"step": 7069
|
49493 |
+
},
|
49494 |
+
{
|
49495 |
+
"epoch": 0.965253600928391,
|
49496 |
+
"grad_norm": 0.13021165132522583,
|
49497 |
+
"learning_rate": 3.0472640092686355e-07,
|
49498 |
+
"loss": 46.0039,
|
49499 |
+
"step": 7070
|
49500 |
+
},
|
49501 |
+
{
|
49502 |
+
"epoch": 0.9653901290190456,
|
49503 |
+
"grad_norm": 0.07317258417606354,
|
49504 |
+
"learning_rate": 3.0233411997986904e-07,
|
49505 |
+
"loss": 46.0053,
|
49506 |
+
"step": 7071
|
49507 |
+
},
|
49508 |
+
{
|
49509 |
+
"epoch": 0.9655266571097003,
|
49510 |
+
"grad_norm": 0.21500803530216217,
|
49511 |
+
"learning_rate": 2.9995123800270476e-07,
|
49512 |
+
"loss": 46.0071,
|
49513 |
+
"step": 7072
|
49514 |
+
},
|
49515 |
+
{
|
49516 |
+
"epoch": 0.965663185200355,
|
49517 |
+
"grad_norm": 0.03951704129576683,
|
49518 |
+
"learning_rate": 2.975777554460379e-07,
|
49519 |
+
"loss": 46.004,
|
49520 |
+
"step": 7073
|
49521 |
+
},
|
49522 |
+
{
|
49523 |
+
"epoch": 0.9657997132910097,
|
49524 |
+
"grad_norm": 0.08986201137304306,
|
49525 |
+
"learning_rate": 2.9521367275874844e-07,
|
49526 |
+
"loss": 46.0017,
|
49527 |
+
"step": 7074
|
49528 |
+
},
|
49529 |
+
{
|
49530 |
+
"epoch": 0.9659362413816642,
|
49531 |
+
"grad_norm": 0.08408564329147339,
|
49532 |
+
"learning_rate": 2.928589903879342e-07,
|
49533 |
+
"loss": 46.0027,
|
49534 |
+
"step": 7075
|
49535 |
+
},
|
49536 |
+
{
|
49537 |
+
"epoch": 0.9660727694723189,
|
49538 |
+
"grad_norm": 0.09055382758378983,
|
49539 |
+
"learning_rate": 2.9051370877892226e-07,
|
49540 |
+
"loss": 46.0016,
|
49541 |
+
"step": 7076
|
49542 |
+
},
|
49543 |
+
{
|
49544 |
+
"epoch": 0.9662092975629736,
|
49545 |
+
"grad_norm": 0.06586837023496628,
|
49546 |
+
"learning_rate": 2.8817782837526343e-07,
|
49547 |
+
"loss": 46.0031,
|
49548 |
+
"step": 7077
|
49549 |
+
},
|
49550 |
+
{
|
49551 |
+
"epoch": 0.9663458256536283,
|
49552 |
+
"grad_norm": 0.06981931626796722,
|
49553 |
+
"learning_rate": 2.858513496187154e-07,
|
49554 |
+
"loss": 46.0101,
|
49555 |
+
"step": 7078
|
49556 |
+
},
|
49557 |
+
{
|
49558 |
+
"epoch": 0.9664823537442829,
|
49559 |
+
"grad_norm": 0.11279299855232239,
|
49560 |
+
"learning_rate": 2.8353427294927627e-07,
|
49561 |
+
"loss": 46.0137,
|
49562 |
+
"step": 7079
|
49563 |
+
},
|
49564 |
+
{
|
49565 |
+
"epoch": 0.9666188818349375,
|
49566 |
+
"grad_norm": 0.1102173924446106,
|
49567 |
+
"learning_rate": 2.8122659880516213e-07,
|
49568 |
+
"loss": 46.0036,
|
49569 |
+
"step": 7080
|
49570 |
+
},
|
49571 |
+
{
|
49572 |
+
"epoch": 0.9667554099255922,
|
49573 |
+
"grad_norm": 0.10839895904064178,
|
49574 |
+
"learning_rate": 2.789283276228016e-07,
|
49575 |
+
"loss": 46.0082,
|
49576 |
+
"step": 7081
|
49577 |
+
},
|
49578 |
+
{
|
49579 |
+
"epoch": 0.9668919380162468,
|
49580 |
+
"grad_norm": 0.08555541932582855,
|
49581 |
+
"learning_rate": 2.7663945983684713e-07,
|
49582 |
+
"loss": 46.0035,
|
49583 |
+
"step": 7082
|
49584 |
+
},
|
49585 |
+
{
|
49586 |
+
"epoch": 0.9670284661069015,
|
49587 |
+
"grad_norm": 0.13209228217601776,
|
49588 |
+
"learning_rate": 2.7435999588018567e-07,
|
49589 |
+
"loss": 46.0042,
|
49590 |
+
"step": 7083
|
49591 |
+
},
|
49592 |
+
{
|
49593 |
+
"epoch": 0.9671649941975562,
|
49594 |
+
"grad_norm": 0.09820085763931274,
|
49595 |
+
"learning_rate": 2.7208993618390576e-07,
|
49596 |
+
"loss": 46.0108,
|
49597 |
+
"step": 7084
|
49598 |
+
},
|
49599 |
+
{
|
49600 |
+
"epoch": 0.9673015222882108,
|
49601 |
+
"grad_norm": 0.12806051969528198,
|
49602 |
+
"learning_rate": 2.698292811773362e-07,
|
49603 |
+
"loss": 46.0063,
|
49604 |
+
"step": 7085
|
49605 |
+
},
|
49606 |
+
{
|
49607 |
+
"epoch": 0.9674380503788654,
|
49608 |
+
"grad_norm": 0.10604225099086761,
|
49609 |
+
"learning_rate": 2.675780312880127e-07,
|
49610 |
+
"loss": 46.0055,
|
49611 |
+
"step": 7086
|
49612 |
+
},
|
49613 |
+
{
|
49614 |
+
"epoch": 0.9675745784695201,
|
49615 |
+
"grad_norm": 0.059482015669345856,
|
49616 |
+
"learning_rate": 2.653361869417059e-07,
|
49617 |
+
"loss": 46.0126,
|
49618 |
+
"step": 7087
|
49619 |
+
},
|
49620 |
+
{
|
49621 |
+
"epoch": 0.9677111065601748,
|
49622 |
+
"grad_norm": 0.11784996092319489,
|
49623 |
+
"learning_rate": 2.631037485623933e-07,
|
49624 |
+
"loss": 46.0021,
|
49625 |
+
"step": 7088
|
49626 |
+
},
|
49627 |
+
{
|
49628 |
+
"epoch": 0.9678476346508295,
|
49629 |
+
"grad_norm": 0.11169984936714172,
|
49630 |
+
"learning_rate": 2.6088071657228706e-07,
|
49631 |
+
"loss": 46.0031,
|
49632 |
+
"step": 7089
|
49633 |
+
},
|
49634 |
+
{
|
49635 |
+
"epoch": 0.967984162741484,
|
49636 |
+
"grad_norm": 0.11371095478534698,
|
49637 |
+
"learning_rate": 2.5866709139180103e-07,
|
49638 |
+
"loss": 46.0063,
|
49639 |
+
"step": 7090
|
49640 |
+
},
|
49641 |
+
{
|
49642 |
+
"epoch": 0.9681206908321387,
|
49643 |
+
"grad_norm": 0.1076866015791893,
|
49644 |
+
"learning_rate": 2.5646287343959464e-07,
|
49645 |
+
"loss": 46.0087,
|
49646 |
+
"step": 7091
|
49647 |
+
},
|
49648 |
+
{
|
49649 |
+
"epoch": 0.9682572189227934,
|
49650 |
+
"grad_norm": 0.10492290556430817,
|
49651 |
+
"learning_rate": 2.542680631325289e-07,
|
49652 |
+
"loss": 46.0015,
|
49653 |
+
"step": 7092
|
49654 |
+
},
|
49655 |
+
{
|
49656 |
+
"epoch": 0.968393747013448,
|
49657 |
+
"grad_norm": 0.3201570212841034,
|
49658 |
+
"learning_rate": 2.5208266088569966e-07,
|
49659 |
+
"loss": 46.0071,
|
49660 |
+
"step": 7093
|
49661 |
+
},
|
49662 |
+
{
|
49663 |
+
"epoch": 0.9685302751041027,
|
49664 |
+
"grad_norm": 0.07352815568447113,
|
49665 |
+
"learning_rate": 2.499066671124095e-07,
|
49666 |
+
"loss": 46.0015,
|
49667 |
+
"step": 7094
|
49668 |
+
},
|
49669 |
+
{
|
49670 |
+
"epoch": 0.9686668031947573,
|
49671 |
+
"grad_norm": 0.18468260765075684,
|
49672 |
+
"learning_rate": 2.4774008222419596e-07,
|
49673 |
+
"loss": 46.003,
|
49674 |
+
"step": 7095
|
49675 |
+
},
|
49676 |
+
{
|
49677 |
+
"epoch": 0.968803331285412,
|
49678 |
+
"grad_norm": 0.09998919814825058,
|
49679 |
+
"learning_rate": 2.455829066308035e-07,
|
49680 |
+
"loss": 46.003,
|
49681 |
+
"step": 7096
|
49682 |
+
},
|
49683 |
+
{
|
49684 |
+
"epoch": 0.9689398593760666,
|
49685 |
+
"grad_norm": 0.1068774163722992,
|
49686 |
+
"learning_rate": 2.434351407402058e-07,
|
49687 |
+
"loss": 46.0036,
|
49688 |
+
"step": 7097
|
49689 |
+
},
|
49690 |
+
{
|
49691 |
+
"epoch": 0.9690763874667213,
|
49692 |
+
"grad_norm": 0.1510547548532486,
|
49693 |
+
"learning_rate": 2.412967849586001e-07,
|
49694 |
+
"loss": 46.0024,
|
49695 |
+
"step": 7098
|
49696 |
+
},
|
49697 |
+
{
|
49698 |
+
"epoch": 0.9692129155573759,
|
49699 |
+
"grad_norm": 0.30026623606681824,
|
49700 |
+
"learning_rate": 2.391678396903907e-07,
|
49701 |
+
"loss": 46.0009,
|
49702 |
+
"step": 7099
|
49703 |
+
},
|
49704 |
+
{
|
49705 |
+
"epoch": 0.9693494436480306,
|
49706 |
+
"grad_norm": 0.213755264878273,
|
49707 |
+
"learning_rate": 2.3704830533821108e-07,
|
49708 |
+
"loss": 46.0044,
|
49709 |
+
"step": 7100
|
49710 |
+
},
|
49711 |
+
{
|
49712 |
+
"epoch": 0.9694859717386852,
|
49713 |
+
"grad_norm": 0.13603056967258453,
|
49714 |
+
"learning_rate": 2.3493818230291286e-07,
|
49715 |
+
"loss": 46.0119,
|
49716 |
+
"step": 7101
|
49717 |
+
},
|
49718 |
+
{
|
49719 |
+
"epoch": 0.9696224998293399,
|
49720 |
+
"grad_norm": 0.03541295602917671,
|
49721 |
+
"learning_rate": 2.3283747098357677e-07,
|
49722 |
+
"loss": 46.0053,
|
49723 |
+
"step": 7102
|
49724 |
+
},
|
49725 |
+
{
|
49726 |
+
"epoch": 0.9697590279199946,
|
49727 |
+
"grad_norm": 0.193925142288208,
|
49728 |
+
"learning_rate": 2.307461717774906e-07,
|
49729 |
+
"loss": 46.0049,
|
49730 |
+
"step": 7103
|
49731 |
+
},
|
49732 |
+
{
|
49733 |
+
"epoch": 0.9698955560106491,
|
49734 |
+
"grad_norm": 0.17536532878875732,
|
49735 |
+
"learning_rate": 2.2866428508016568e-07,
|
49736 |
+
"loss": 46.0038,
|
49737 |
+
"step": 7104
|
49738 |
+
},
|
49739 |
+
{
|
49740 |
+
"epoch": 0.9700320841013038,
|
49741 |
+
"grad_norm": 0.13933449983596802,
|
49742 |
+
"learning_rate": 2.2659181128533713e-07,
|
49743 |
+
"loss": 46.0022,
|
49744 |
+
"step": 7105
|
49745 |
+
},
|
49746 |
+
{
|
49747 |
+
"epoch": 0.9701686121919585,
|
49748 |
+
"grad_norm": 0.09835812449455261,
|
49749 |
+
"learning_rate": 2.245287507849525e-07,
|
49750 |
+
"loss": 46.0066,
|
49751 |
+
"step": 7106
|
49752 |
+
},
|
49753 |
+
{
|
49754 |
+
"epoch": 0.9703051402826132,
|
49755 |
+
"grad_norm": 0.0783412829041481,
|
49756 |
+
"learning_rate": 2.2247510396918859e-07,
|
49757 |
+
"loss": 46.0044,
|
49758 |
+
"step": 7107
|
49759 |
+
},
|
49760 |
+
{
|
49761 |
+
"epoch": 0.9704416683732678,
|
49762 |
+
"grad_norm": 0.13357435166835785,
|
49763 |
+
"learning_rate": 2.2043087122644023e-07,
|
49764 |
+
"loss": 46.0025,
|
49765 |
+
"step": 7108
|
49766 |
+
},
|
49767 |
+
{
|
49768 |
+
"epoch": 0.9705781964639224,
|
49769 |
+
"grad_norm": 0.12823739647865295,
|
49770 |
+
"learning_rate": 2.1839605294330933e-07,
|
49771 |
+
"loss": 46.0055,
|
49772 |
+
"step": 7109
|
49773 |
+
},
|
49774 |
+
{
|
49775 |
+
"epoch": 0.9707147245545771,
|
49776 |
+
"grad_norm": 0.0972222238779068,
|
49777 |
+
"learning_rate": 2.1637064950463247e-07,
|
49778 |
+
"loss": 46.0094,
|
49779 |
+
"step": 7110
|
49780 |
+
},
|
49781 |
+
{
|
49782 |
+
"epoch": 0.9708512526452318,
|
49783 |
+
"grad_norm": 0.08541359752416611,
|
49784 |
+
"learning_rate": 2.143546612934644e-07,
|
49785 |
+
"loss": 46.005,
|
49786 |
+
"step": 7111
|
49787 |
+
},
|
49788 |
+
{
|
49789 |
+
"epoch": 0.9709877807358864,
|
49790 |
+
"grad_norm": 0.06323409080505371,
|
49791 |
+
"learning_rate": 2.1234808869106117e-07,
|
49792 |
+
"loss": 46.0035,
|
49793 |
+
"step": 7112
|
49794 |
+
},
|
49795 |
+
{
|
49796 |
+
"epoch": 0.9711243088265411,
|
49797 |
+
"grad_norm": 0.061274804174900055,
|
49798 |
+
"learning_rate": 2.1035093207693036e-07,
|
49799 |
+
"loss": 46.009,
|
49800 |
+
"step": 7113
|
49801 |
+
},
|
49802 |
+
{
|
49803 |
+
"epoch": 0.9712608369171957,
|
49804 |
+
"grad_norm": 0.06812640279531479,
|
49805 |
+
"learning_rate": 2.083631918287643e-07,
|
49806 |
+
"loss": 46.0059,
|
49807 |
+
"step": 7114
|
49808 |
+
},
|
49809 |
+
{
|
49810 |
+
"epoch": 0.9713973650078503,
|
49811 |
+
"grad_norm": 0.05312773957848549,
|
49812 |
+
"learning_rate": 2.063848683224956e-07,
|
49813 |
+
"loss": 46.0003,
|
49814 |
+
"step": 7115
|
49815 |
+
},
|
49816 |
+
{
|
49817 |
+
"epoch": 0.971533893098505,
|
49818 |
+
"grad_norm": 0.09627732634544373,
|
49819 |
+
"learning_rate": 2.0441596193227497e-07,
|
49820 |
+
"loss": 46.0087,
|
49821 |
+
"step": 7116
|
49822 |
+
},
|
49823 |
+
{
|
49824 |
+
"epoch": 0.9716704211891597,
|
49825 |
+
"grad_norm": 0.07566139101982117,
|
49826 |
+
"learning_rate": 2.0245647303046568e-07,
|
49827 |
+
"loss": 46.0026,
|
49828 |
+
"step": 7117
|
49829 |
+
},
|
49830 |
+
{
|
49831 |
+
"epoch": 0.9718069492798144,
|
49832 |
+
"grad_norm": 0.11304798722267151,
|
49833 |
+
"learning_rate": 2.0050640198764347e-07,
|
49834 |
+
"loss": 46.0036,
|
49835 |
+
"step": 7118
|
49836 |
+
},
|
49837 |
+
{
|
49838 |
+
"epoch": 0.9719434773704689,
|
49839 |
+
"grad_norm": 0.06032518297433853,
|
49840 |
+
"learning_rate": 1.985657491726245e-07,
|
49841 |
+
"loss": 46.003,
|
49842 |
+
"step": 7119
|
49843 |
+
},
|
49844 |
+
{
|
49845 |
+
"epoch": 0.9720800054611236,
|
49846 |
+
"grad_norm": 0.0727953091263771,
|
49847 |
+
"learning_rate": 1.9663451495242068e-07,
|
49848 |
+
"loss": 46.0045,
|
49849 |
+
"step": 7120
|
49850 |
+
},
|
49851 |
+
{
|
49852 |
+
"epoch": 0.9722165335517783,
|
49853 |
+
"grad_norm": 0.06881820410490036,
|
49854 |
+
"learning_rate": 1.9471269969227878e-07,
|
49855 |
+
"loss": 46.0078,
|
49856 |
+
"step": 7121
|
49857 |
+
},
|
49858 |
+
{
|
49859 |
+
"epoch": 0.972353061642433,
|
49860 |
+
"grad_norm": 0.057409241795539856,
|
49861 |
+
"learning_rate": 1.9280030375565804e-07,
|
49862 |
+
"loss": 46.0076,
|
49863 |
+
"step": 7122
|
49864 |
+
},
|
49865 |
+
{
|
49866 |
+
"epoch": 0.9724895897330876,
|
49867 |
+
"grad_norm": 0.041979074478149414,
|
49868 |
+
"learning_rate": 1.9089732750423028e-07,
|
49869 |
+
"loss": 46.0056,
|
49870 |
+
"step": 7123
|
49871 |
+
},
|
49872 |
+
{
|
49873 |
+
"epoch": 0.9726261178237422,
|
49874 |
+
"grad_norm": 0.08132766932249069,
|
49875 |
+
"learning_rate": 1.8900377129790202e-07,
|
49876 |
+
"loss": 46.007,
|
49877 |
+
"step": 7124
|
49878 |
+
},
|
49879 |
+
{
|
49880 |
+
"epoch": 0.9727626459143969,
|
49881 |
+
"grad_norm": 0.16842536628246307,
|
49882 |
+
"learning_rate": 1.871196354947813e-07,
|
49883 |
+
"loss": 46.0082,
|
49884 |
+
"step": 7125
|
49885 |
+
},
|
49886 |
+
{
|
49887 |
+
"epoch": 0.9728991740050515,
|
49888 |
+
"grad_norm": 0.19767284393310547,
|
49889 |
+
"learning_rate": 1.8524492045119967e-07,
|
49890 |
+
"loss": 46.0047,
|
49891 |
+
"step": 7126
|
49892 |
+
},
|
49893 |
+
{
|
49894 |
+
"epoch": 0.9730357020957062,
|
49895 |
+
"grad_norm": 0.09396287798881531,
|
49896 |
+
"learning_rate": 1.8337962652171249e-07,
|
49897 |
+
"loss": 46.0017,
|
49898 |
+
"step": 7127
|
49899 |
+
},
|
49900 |
+
{
|
49901 |
+
"epoch": 0.9731722301863609,
|
49902 |
+
"grad_norm": 0.07244996726512909,
|
49903 |
+
"learning_rate": 1.8152375405909305e-07,
|
49904 |
+
"loss": 46.0015,
|
49905 |
+
"step": 7128
|
49906 |
+
},
|
49907 |
+
{
|
49908 |
+
"epoch": 0.9733087582770155,
|
49909 |
+
"grad_norm": 0.12894263863563538,
|
49910 |
+
"learning_rate": 1.7967730341432176e-07,
|
49911 |
+
"loss": 46.0043,
|
49912 |
+
"step": 7129
|
49913 |
+
},
|
49914 |
+
{
|
49915 |
+
"epoch": 0.9734452863676701,
|
49916 |
+
"grad_norm": 0.16393700242042542,
|
49917 |
+
"learning_rate": 1.778402749366137e-07,
|
49918 |
+
"loss": 46.0046,
|
49919 |
+
"step": 7130
|
49920 |
+
},
|
49921 |
+
{
|
49922 |
+
"epoch": 0.9735818144583248,
|
49923 |
+
"grad_norm": 0.0899428129196167,
|
49924 |
+
"learning_rate": 1.7601266897338542e-07,
|
49925 |
+
"loss": 46.0069,
|
49926 |
+
"step": 7131
|
49927 |
+
},
|
49928 |
+
{
|
49929 |
+
"epoch": 0.9737183425489795,
|
49930 |
+
"grad_norm": 0.11820844560861588,
|
49931 |
+
"learning_rate": 1.7419448587027708e-07,
|
49932 |
+
"loss": 46.0105,
|
49933 |
+
"step": 7132
|
49934 |
+
},
|
49935 |
+
{
|
49936 |
+
"epoch": 0.973854870639634,
|
49937 |
+
"grad_norm": 0.1485210806131363,
|
49938 |
+
"learning_rate": 1.723857259711581e-07,
|
49939 |
+
"loss": 46.0064,
|
49940 |
+
"step": 7133
|
49941 |
+
},
|
49942 |
+
{
|
49943 |
+
"epoch": 0.9739913987302887,
|
49944 |
+
"grad_norm": 0.07663711160421371,
|
49945 |
+
"learning_rate": 1.705863896181048e-07,
|
49946 |
+
"loss": 46.0067,
|
49947 |
+
"step": 7134
|
49948 |
+
},
|
49949 |
+
{
|
49950 |
+
"epoch": 0.9741279268209434,
|
49951 |
+
"grad_norm": 0.14216692745685577,
|
49952 |
+
"learning_rate": 1.6879647715140611e-07,
|
49953 |
+
"loss": 46.0041,
|
49954 |
+
"step": 7135
|
49955 |
+
},
|
49956 |
+
{
|
49957 |
+
"epoch": 0.9742644549115981,
|
49958 |
+
"grad_norm": 0.05832170695066452,
|
49959 |
+
"learning_rate": 1.6701598890958014e-07,
|
49960 |
+
"loss": 46.0078,
|
49961 |
+
"step": 7136
|
49962 |
+
},
|
49963 |
+
{
|
49964 |
+
"epoch": 0.9744009830022528,
|
49965 |
+
"grad_norm": 0.0745471715927124,
|
49966 |
+
"learning_rate": 1.6524492522935753e-07,
|
49967 |
+
"loss": 46.0003,
|
49968 |
+
"step": 7137
|
49969 |
+
},
|
49970 |
+
{
|
49971 |
+
"epoch": 0.9745375110929073,
|
49972 |
+
"grad_norm": 0.10810956358909607,
|
49973 |
+
"learning_rate": 1.63483286445687e-07,
|
49974 |
+
"loss": 46.0029,
|
49975 |
+
"step": 7138
|
49976 |
+
},
|
49977 |
+
{
|
49978 |
+
"epoch": 0.974674039183562,
|
49979 |
+
"grad_norm": 0.07453179359436035,
|
49980 |
+
"learning_rate": 1.6173107289173538e-07,
|
49981 |
+
"loss": 46.0042,
|
49982 |
+
"step": 7139
|
49983 |
+
},
|
49984 |
+
{
|
49985 |
+
"epoch": 0.9748105672742167,
|
49986 |
+
"grad_norm": 0.17998754978179932,
|
49987 |
+
"learning_rate": 1.5998828489888763e-07,
|
49988 |
+
"loss": 46.0023,
|
49989 |
+
"step": 7140
|
49990 |
+
},
|
49991 |
+
{
|
49992 |
+
"epoch": 0.9749470953648713,
|
49993 |
+
"grad_norm": 0.08225401490926743,
|
49994 |
+
"learning_rate": 1.5825492279674668e-07,
|
49995 |
+
"loss": 46.0072,
|
49996 |
+
"step": 7141
|
49997 |
+
},
|
49998 |
+
{
|
49999 |
+
"epoch": 0.975083623455526,
|
50000 |
+
"grad_norm": 0.10022734850645065,
|
50001 |
+
"learning_rate": 1.5653098691312263e-07,
|
50002 |
+
"loss": 46.0024,
|
50003 |
+
"step": 7142
|
50004 |
+
},
|
50005 |
+
{
|
50006 |
+
"epoch": 0.9752201515461806,
|
50007 |
+
"grad_norm": 0.04875979945063591,
|
50008 |
+
"learning_rate": 1.5481647757406015e-07,
|
50009 |
+
"loss": 46.0,
|
50010 |
+
"step": 7143
|
50011 |
+
},
|
50012 |
+
{
|
50013 |
+
"epoch": 0.9753566796368353,
|
50014 |
+
"grad_norm": 0.15972889959812164,
|
50015 |
+
"learning_rate": 1.5311139510380545e-07,
|
50016 |
+
"loss": 46.002,
|
50017 |
+
"step": 7144
|
50018 |
+
},
|
50019 |
+
{
|
50020 |
+
"epoch": 0.9754932077274899,
|
50021 |
+
"grad_norm": 0.0891147032380104,
|
50022 |
+
"learning_rate": 1.5141573982483392e-07,
|
50023 |
+
"loss": 46.0018,
|
50024 |
+
"step": 7145
|
50025 |
+
},
|
50026 |
+
{
|
50027 |
+
"epoch": 0.9756297358181446,
|
50028 |
+
"grad_norm": 0.1982298195362091,
|
50029 |
+
"learning_rate": 1.4972951205782793e-07,
|
50030 |
+
"loss": 46.0101,
|
50031 |
+
"step": 7146
|
50032 |
+
},
|
50033 |
+
{
|
50034 |
+
"epoch": 0.9757662639087993,
|
50035 |
+
"grad_norm": 0.22120720148086548,
|
50036 |
+
"learning_rate": 1.4805271212169902e-07,
|
50037 |
+
"loss": 46.0054,
|
50038 |
+
"step": 7147
|
50039 |
+
},
|
50040 |
+
{
|
50041 |
+
"epoch": 0.9759027919994538,
|
50042 |
+
"grad_norm": 0.09720038622617722,
|
50043 |
+
"learning_rate": 1.4638534033356577e-07,
|
50044 |
+
"loss": 46.0075,
|
50045 |
+
"step": 7148
|
50046 |
+
},
|
50047 |
+
{
|
50048 |
+
"epoch": 0.9760393200901085,
|
50049 |
+
"grad_norm": 0.25006312131881714,
|
50050 |
+
"learning_rate": 1.447273970087648e-07,
|
50051 |
+
"loss": 46.0042,
|
50052 |
+
"step": 7149
|
50053 |
+
},
|
50054 |
+
{
|
50055 |
+
"epoch": 0.9761758481807632,
|
50056 |
+
"grad_norm": 0.14676909148693085,
|
50057 |
+
"learning_rate": 1.4307888246085088e-07,
|
50058 |
+
"loss": 46.0059,
|
50059 |
+
"step": 7150
|
50060 |
+
},
|
50061 |
+
{
|
50062 |
+
"epoch": 0.9763123762714179,
|
50063 |
+
"grad_norm": 0.08185182511806488,
|
50064 |
+
"learning_rate": 1.4143979700159682e-07,
|
50065 |
+
"loss": 46.008,
|
50066 |
+
"step": 7151
|
50067 |
+
},
|
50068 |
+
{
|
50069 |
+
"epoch": 0.9764489043620725,
|
50070 |
+
"grad_norm": 0.08078208565711975,
|
50071 |
+
"learning_rate": 1.3981014094099353e-07,
|
50072 |
+
"loss": 46.0013,
|
50073 |
+
"step": 7152
|
50074 |
+
},
|
50075 |
+
{
|
50076 |
+
"epoch": 0.9765854324527271,
|
50077 |
+
"grad_norm": 0.12377557158470154,
|
50078 |
+
"learning_rate": 1.3818991458723894e-07,
|
50079 |
+
"loss": 46.0008,
|
50080 |
+
"step": 7153
|
50081 |
+
},
|
50082 |
+
{
|
50083 |
+
"epoch": 0.9767219605433818,
|
50084 |
+
"grad_norm": 0.18877796828746796,
|
50085 |
+
"learning_rate": 1.3657911824676574e-07,
|
50086 |
+
"loss": 46.0014,
|
50087 |
+
"step": 7154
|
50088 |
+
},
|
50089 |
+
{
|
50090 |
+
"epoch": 0.9768584886340365,
|
50091 |
+
"grad_norm": 0.040742017328739166,
|
50092 |
+
"learning_rate": 1.3497775222420793e-07,
|
50093 |
+
"loss": 46.0002,
|
50094 |
+
"step": 7155
|
50095 |
+
},
|
50096 |
+
{
|
50097 |
+
"epoch": 0.9769950167246911,
|
50098 |
+
"grad_norm": 0.04738624021410942,
|
50099 |
+
"learning_rate": 1.333858168224178e-07,
|
50100 |
+
"loss": 46.005,
|
50101 |
+
"step": 7156
|
50102 |
+
},
|
50103 |
+
{
|
50104 |
+
"epoch": 0.9771315448153458,
|
50105 |
+
"grad_norm": 0.05165252089500427,
|
50106 |
+
"learning_rate": 1.3180331234246556e-07,
|
50107 |
+
"loss": 46.0045,
|
50108 |
+
"step": 7157
|
50109 |
+
},
|
50110 |
+
{
|
50111 |
+
"epoch": 0.9772680729060004,
|
50112 |
+
"grad_norm": 0.09031161665916443,
|
50113 |
+
"learning_rate": 1.3023023908364518e-07,
|
50114 |
+
"loss": 46.0075,
|
50115 |
+
"step": 7158
|
50116 |
+
},
|
50117 |
+
{
|
50118 |
+
"epoch": 0.977404600996655,
|
50119 |
+
"grad_norm": 0.055213626474142075,
|
50120 |
+
"learning_rate": 1.286665973434631e-07,
|
50121 |
+
"loss": 46.0002,
|
50122 |
+
"step": 7159
|
50123 |
+
},
|
50124 |
+
{
|
50125 |
+
"epoch": 0.9775411290873097,
|
50126 |
+
"grad_norm": 0.08890896290540695,
|
50127 |
+
"learning_rate": 1.2711238741762722e-07,
|
50128 |
+
"loss": 46.004,
|
50129 |
+
"step": 7160
|
50130 |
+
},
|
50131 |
+
{
|
50132 |
+
"epoch": 0.9776776571779644,
|
50133 |
+
"grad_norm": 0.08437825739383698,
|
50134 |
+
"learning_rate": 1.2556760960008575e-07,
|
50135 |
+
"loss": 46.0016,
|
50136 |
+
"step": 7161
|
50137 |
+
},
|
50138 |
+
{
|
50139 |
+
"epoch": 0.9778141852686191,
|
50140 |
+
"grad_norm": 0.20216180384159088,
|
50141 |
+
"learning_rate": 1.240322641829883e-07,
|
50142 |
+
"loss": 46.0019,
|
50143 |
+
"step": 7162
|
50144 |
+
},
|
50145 |
+
{
|
50146 |
+
"epoch": 0.9779507133592736,
|
50147 |
+
"grad_norm": 0.1490899920463562,
|
50148 |
+
"learning_rate": 1.225063514567082e-07,
|
50149 |
+
"loss": 46.0074,
|
50150 |
+
"step": 7163
|
50151 |
+
},
|
50152 |
+
{
|
50153 |
+
"epoch": 0.9780872414499283,
|
50154 |
+
"grad_norm": 0.0793762356042862,
|
50155 |
+
"learning_rate": 1.2098987170982013e-07,
|
50156 |
+
"loss": 46.0034,
|
50157 |
+
"step": 7164
|
50158 |
+
},
|
50159 |
+
{
|
50160 |
+
"epoch": 0.978223769540583,
|
50161 |
+
"grad_norm": 0.08808130770921707,
|
50162 |
+
"learning_rate": 1.1948282522913357e-07,
|
50163 |
+
"loss": 46.0061,
|
50164 |
+
"step": 7165
|
50165 |
+
},
|
50166 |
+
{
|
50167 |
+
"epoch": 0.9783602976312377,
|
50168 |
+
"grad_norm": 0.047708965837955475,
|
50169 |
+
"learning_rate": 1.1798521229965942e-07,
|
50170 |
+
"loss": 46.0011,
|
50171 |
+
"step": 7166
|
50172 |
+
},
|
50173 |
+
{
|
50174 |
+
"epoch": 0.9784968257218922,
|
50175 |
+
"grad_norm": 0.06762000173330307,
|
50176 |
+
"learning_rate": 1.1649703320463779e-07,
|
50177 |
+
"loss": 46.0053,
|
50178 |
+
"step": 7167
|
50179 |
+
},
|
50180 |
+
{
|
50181 |
+
"epoch": 0.9786333538125469,
|
50182 |
+
"grad_norm": 0.14670976996421814,
|
50183 |
+
"learning_rate": 1.1501828822551575e-07,
|
50184 |
+
"loss": 46.0038,
|
50185 |
+
"step": 7168
|
50186 |
+
},
|
50187 |
+
{
|
50188 |
+
"epoch": 0.9787698819032016,
|
50189 |
+
"grad_norm": 0.061925239861011505,
|
50190 |
+
"learning_rate": 1.1354897764195293e-07,
|
50191 |
+
"loss": 46.0049,
|
50192 |
+
"step": 7169
|
50193 |
+
},
|
50194 |
+
{
|
50195 |
+
"epoch": 0.9789064099938563,
|
50196 |
+
"grad_norm": 0.08076687157154083,
|
50197 |
+
"learning_rate": 1.1208910173183817e-07,
|
50198 |
+
"loss": 46.0056,
|
50199 |
+
"step": 7170
|
50200 |
}
|
50201 |
],
|
50202 |
"logging_steps": 1,
|
|
|
50216 |
"attributes": {}
|
50217 |
}
|
50218 |
},
|
50219 |
+
"total_flos": 673475530850304.0,
|
50220 |
"train_batch_size": 4,
|
50221 |
"trial_name": null,
|
50222 |
"trial_params": null
|