Training in progress, epoch 12, checkpoint
Browse files- last-checkpoint/global_step2567/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2567/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2567/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2567/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2567/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2567/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2567/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2567/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2567/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2567/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2567/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2567/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2567/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2567/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2567/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2567/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step2567/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5df0dfda7753955a3b1a7c57e32045ecd4ee5a9ecaa6ab09eabac1176fb60756
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2567/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:beaab569569fa9e9974b7c2c8aab4010a914d1f9761715fb409817fb8fa33b7e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2567/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f50bd801c72f28b97ebd7ee0600339b6665560d61267fec66e2c8a1416188e94
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2567/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:249ab69ccb0ef4ce4cd80e00b5665af8a64f38847c4d26d04cf8ff39ae62537c
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2567/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51163e652d04cb476fcfb9d469310033832cfc62b9b765cc2e069fcdc78053bb
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2567/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d234aa42bda89791cb7d131f27e23e89dbccb1085f1c1e360ef686fcf2b43b7
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2567/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:565e00554947705715c69538ad4fdbba2135918d55fd1eac26bff0709d89b13b
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2567/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c515529768ebde36752b6bb1c83e4b81332f8aa9703556918914490ed8d150d
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2567/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1112a0ce6047e224c0d4055aa91e143f1983e512467d18befeebd90215f467e
|
3 |
+
size 85570
|
last-checkpoint/global_step2567/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2184e21318ac898b809f679e6b4c7651cff724486bf10d48097ecba3a51c9066
|
3 |
+
size 85506
|
last-checkpoint/global_step2567/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68574640ec1db60949f0119373598557248ab690791c8c7b4437b5d0969b86d1
|
3 |
+
size 85506
|
last-checkpoint/global_step2567/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc4477293d8ea587976b0a2143cc2df3d00351c399c3e980c03aaa37f2412fce
|
3 |
+
size 85506
|
last-checkpoint/global_step2567/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ddc2c09b1b33dea736afa89199ad10c27814b8da145bc5d167b22bb480bed3f
|
3 |
+
size 85506
|
last-checkpoint/global_step2567/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37312ab5b415aba4ca7ed61ca1cac9f46a58dd5d77f7bc89c5bb78d05cd6d054
|
3 |
+
size 85506
|
last-checkpoint/global_step2567/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b510f4c4fdab13d1a5ab5570ff44824c6b3c61e67db13ecd4057a73fa00a9fd
|
3 |
+
size 85506
|
last-checkpoint/global_step2567/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01bfecfc893fd6030f488f501386e110af2a2f886229a7e230009eab593fb8a6
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step2567
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31a59d02512e22c8fde96ae53ade5fd3efb11b708a7c65545ed6a6a202751a37
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43b5816e1bbc869b1c3d647caa31746b3c9674dc2e4dd47857a690cd4ee4639b
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1bec598899f9d59e70c1b4705ce420a1e0a670957b6c8153a589880068ae5a4
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c60d2348aae518f4c44693db9c9b4b3a3299c556e7f0a86c188b2e4c3e364a7c
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffe5a79d3bcb4ce033de360bc765e616316e3562aba25887cd85c4adbb935abf
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9a9d1f6e22677721841890e6a27855857e6840137650d609eb8e4ac13b71d29
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcac4ff84388a6a4fe3bcae6207c68b2ee5528fb3b6de8cc3588fe1975462aa5
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33fce3cdf5c1b8a8a291e0c73b384e3ad5252640e21e942b44b26b8b0928ffa9
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:919e675f3bcaf4f3c8ba35cd8debf85aec3bbc3c8e5019b74431e0a314e4d37a
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bf6479ce82b88efc6a72a8ee512162b3d0ecab972817296d38ab9c448bb8d96
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9eb2db72f0e418efa4f13d7448db05b4ce751b00def470d4d8f87d4965bb17c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 12.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3577,6 +3577,291 @@
|
|
3577 |
"rewards/margins": 0.5158518552780151,
|
3578 |
"rewards/rejected": -0.3514222800731659,
|
3579 |
"step": 2370
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3580 |
}
|
3581 |
],
|
3582 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 12.99746835443038,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 2567,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3577 |
"rewards/margins": 0.5158518552780151,
|
3578 |
"rewards/rejected": -0.3514222800731659,
|
3579 |
"step": 2370
|
3580 |
+
},
|
3581 |
+
{
|
3582 |
+
"epoch": 12.050632911392405,
|
3583 |
+
"grad_norm": 924736.9233899026,
|
3584 |
+
"learning_rate": 1.827013475399561e-07,
|
3585 |
+
"logits/chosen": 0.09384210407733917,
|
3586 |
+
"logits/rejected": 0.38976824283599854,
|
3587 |
+
"logps/chosen": -60.1981315612793,
|
3588 |
+
"logps/rejected": -569.2012329101562,
|
3589 |
+
"loss": 16551.6906,
|
3590 |
+
"rewards/accuracies": 0.987500011920929,
|
3591 |
+
"rewards/chosen": 0.1783694624900818,
|
3592 |
+
"rewards/margins": 0.5092591047286987,
|
3593 |
+
"rewards/rejected": -0.33088964223861694,
|
3594 |
+
"step": 2380
|
3595 |
+
},
|
3596 |
+
{
|
3597 |
+
"epoch": 12.10126582278481,
|
3598 |
+
"grad_norm": 453683.3343967912,
|
3599 |
+
"learning_rate": 1.811344406142275e-07,
|
3600 |
+
"logits/chosen": -0.1967567503452301,
|
3601 |
+
"logits/rejected": 0.26000285148620605,
|
3602 |
+
"logps/chosen": -51.80207443237305,
|
3603 |
+
"logps/rejected": -586.1417846679688,
|
3604 |
+
"loss": 16650.6516,
|
3605 |
+
"rewards/accuracies": 1.0,
|
3606 |
+
"rewards/chosen": 0.19160635769367218,
|
3607 |
+
"rewards/margins": 0.5359978079795837,
|
3608 |
+
"rewards/rejected": -0.34439152479171753,
|
3609 |
+
"step": 2390
|
3610 |
+
},
|
3611 |
+
{
|
3612 |
+
"epoch": 12.151898734177216,
|
3613 |
+
"grad_norm": 760637.6347084254,
|
3614 |
+
"learning_rate": 1.7956753368849888e-07,
|
3615 |
+
"logits/chosen": -2.4950621128082275,
|
3616 |
+
"logits/rejected": -1.7182337045669556,
|
3617 |
+
"logps/chosen": -54.441162109375,
|
3618 |
+
"logps/rejected": -569.5804443359375,
|
3619 |
+
"loss": 16525.3187,
|
3620 |
+
"rewards/accuracies": 0.987500011920929,
|
3621 |
+
"rewards/chosen": 0.1771778166294098,
|
3622 |
+
"rewards/margins": 0.5123227834701538,
|
3623 |
+
"rewards/rejected": -0.335144966840744,
|
3624 |
+
"step": 2400
|
3625 |
+
},
|
3626 |
+
{
|
3627 |
+
"epoch": 12.20253164556962,
|
3628 |
+
"grad_norm": 760695.8247001156,
|
3629 |
+
"learning_rate": 1.7800062676277027e-07,
|
3630 |
+
"logits/chosen": 2.4408202171325684,
|
3631 |
+
"logits/rejected": 1.941209077835083,
|
3632 |
+
"logps/chosen": -50.47087097167969,
|
3633 |
+
"logps/rejected": -550.1649169921875,
|
3634 |
+
"loss": 16281.4594,
|
3635 |
+
"rewards/accuracies": 0.987500011920929,
|
3636 |
+
"rewards/chosen": 0.1683485209941864,
|
3637 |
+
"rewards/margins": 0.5019410848617554,
|
3638 |
+
"rewards/rejected": -0.33359256386756897,
|
3639 |
+
"step": 2410
|
3640 |
+
},
|
3641 |
+
{
|
3642 |
+
"epoch": 12.253164556962025,
|
3643 |
+
"grad_norm": 501646.8806860111,
|
3644 |
+
"learning_rate": 1.7643371983704165e-07,
|
3645 |
+
"logits/chosen": -1.7683095932006836,
|
3646 |
+
"logits/rejected": -1.838817834854126,
|
3647 |
+
"logps/chosen": -53.41362762451172,
|
3648 |
+
"logps/rejected": -574.3419799804688,
|
3649 |
+
"loss": 16772.675,
|
3650 |
+
"rewards/accuracies": 1.0,
|
3651 |
+
"rewards/chosen": 0.18047122657299042,
|
3652 |
+
"rewards/margins": 0.5231555700302124,
|
3653 |
+
"rewards/rejected": -0.342684268951416,
|
3654 |
+
"step": 2420
|
3655 |
+
},
|
3656 |
+
{
|
3657 |
+
"epoch": 12.30379746835443,
|
3658 |
+
"grad_norm": 705638.6344046313,
|
3659 |
+
"learning_rate": 1.7486681291131307e-07,
|
3660 |
+
"logits/chosen": 0.6870694756507874,
|
3661 |
+
"logits/rejected": 0.9879606366157532,
|
3662 |
+
"logps/chosen": -60.645713806152344,
|
3663 |
+
"logps/rejected": -565.5677490234375,
|
3664 |
+
"loss": 16990.1125,
|
3665 |
+
"rewards/accuracies": 1.0,
|
3666 |
+
"rewards/chosen": 0.180276021361351,
|
3667 |
+
"rewards/margins": 0.5076194405555725,
|
3668 |
+
"rewards/rejected": -0.3273434340953827,
|
3669 |
+
"step": 2430
|
3670 |
+
},
|
3671 |
+
{
|
3672 |
+
"epoch": 12.354430379746836,
|
3673 |
+
"grad_norm": 583239.6869039454,
|
3674 |
+
"learning_rate": 1.7329990598558445e-07,
|
3675 |
+
"logits/chosen": -0.015002572908997536,
|
3676 |
+
"logits/rejected": 0.6669713258743286,
|
3677 |
+
"logps/chosen": -59.69384765625,
|
3678 |
+
"logps/rejected": -595.3045654296875,
|
3679 |
+
"loss": 16570.7625,
|
3680 |
+
"rewards/accuracies": 1.0,
|
3681 |
+
"rewards/chosen": 0.19047938287258148,
|
3682 |
+
"rewards/margins": 0.5352143049240112,
|
3683 |
+
"rewards/rejected": -0.34473496675491333,
|
3684 |
+
"step": 2440
|
3685 |
+
},
|
3686 |
+
{
|
3687 |
+
"epoch": 12.405063291139241,
|
3688 |
+
"grad_norm": 717458.0522613698,
|
3689 |
+
"learning_rate": 1.7173299905985584e-07,
|
3690 |
+
"logits/chosen": -1.5561044216156006,
|
3691 |
+
"logits/rejected": -1.511528730392456,
|
3692 |
+
"logps/chosen": -48.24024200439453,
|
3693 |
+
"logps/rejected": -585.71484375,
|
3694 |
+
"loss": 16296.25,
|
3695 |
+
"rewards/accuracies": 1.0,
|
3696 |
+
"rewards/chosen": 0.18336063623428345,
|
3697 |
+
"rewards/margins": 0.5371404886245728,
|
3698 |
+
"rewards/rejected": -0.3537798523902893,
|
3699 |
+
"step": 2450
|
3700 |
+
},
|
3701 |
+
{
|
3702 |
+
"epoch": 12.455696202531646,
|
3703 |
+
"grad_norm": 1561201.446100151,
|
3704 |
+
"learning_rate": 1.7016609213412722e-07,
|
3705 |
+
"logits/chosen": -0.5445646047592163,
|
3706 |
+
"logits/rejected": 0.5015290379524231,
|
3707 |
+
"logps/chosen": -57.12273025512695,
|
3708 |
+
"logps/rejected": -596.54248046875,
|
3709 |
+
"loss": 17012.2562,
|
3710 |
+
"rewards/accuracies": 0.987500011920929,
|
3711 |
+
"rewards/chosen": 0.1852089911699295,
|
3712 |
+
"rewards/margins": 0.5424550771713257,
|
3713 |
+
"rewards/rejected": -0.35724616050720215,
|
3714 |
+
"step": 2460
|
3715 |
+
},
|
3716 |
+
{
|
3717 |
+
"epoch": 12.50632911392405,
|
3718 |
+
"grad_norm": 576931.8180998629,
|
3719 |
+
"learning_rate": 1.685991852083986e-07,
|
3720 |
+
"logits/chosen": 0.7103387713432312,
|
3721 |
+
"logits/rejected": 0.5729061365127563,
|
3722 |
+
"logps/chosen": -45.429290771484375,
|
3723 |
+
"logps/rejected": -540.9015502929688,
|
3724 |
+
"loss": 17545.0859,
|
3725 |
+
"rewards/accuracies": 0.987500011920929,
|
3726 |
+
"rewards/chosen": 0.17228493094444275,
|
3727 |
+
"rewards/margins": 0.49700021743774414,
|
3728 |
+
"rewards/rejected": -0.3247153162956238,
|
3729 |
+
"step": 2470
|
3730 |
+
},
|
3731 |
+
{
|
3732 |
+
"epoch": 12.556962025316455,
|
3733 |
+
"grad_norm": 790199.4841189157,
|
3734 |
+
"learning_rate": 1.6703227828267e-07,
|
3735 |
+
"logits/chosen": 0.757542610168457,
|
3736 |
+
"logits/rejected": 1.3497235774993896,
|
3737 |
+
"logps/chosen": -60.74102020263672,
|
3738 |
+
"logps/rejected": -570.23583984375,
|
3739 |
+
"loss": 17645.0094,
|
3740 |
+
"rewards/accuracies": 1.0,
|
3741 |
+
"rewards/chosen": 0.17620857059955597,
|
3742 |
+
"rewards/margins": 0.5084448456764221,
|
3743 |
+
"rewards/rejected": -0.33223623037338257,
|
3744 |
+
"step": 2480
|
3745 |
+
},
|
3746 |
+
{
|
3747 |
+
"epoch": 12.60759493670886,
|
3748 |
+
"grad_norm": 1168730.408088866,
|
3749 |
+
"learning_rate": 1.6546537135694138e-07,
|
3750 |
+
"logits/chosen": 1.1095263957977295,
|
3751 |
+
"logits/rejected": 1.6450704336166382,
|
3752 |
+
"logps/chosen": -55.1762580871582,
|
3753 |
+
"logps/rejected": -562.0362548828125,
|
3754 |
+
"loss": 17481.3469,
|
3755 |
+
"rewards/accuracies": 0.9750000238418579,
|
3756 |
+
"rewards/chosen": 0.1728857308626175,
|
3757 |
+
"rewards/margins": 0.5043104887008667,
|
3758 |
+
"rewards/rejected": -0.3314247727394104,
|
3759 |
+
"step": 2490
|
3760 |
+
},
|
3761 |
+
{
|
3762 |
+
"epoch": 12.658227848101266,
|
3763 |
+
"grad_norm": 492108.78941813926,
|
3764 |
+
"learning_rate": 1.6389846443121277e-07,
|
3765 |
+
"logits/chosen": 0.4340684413909912,
|
3766 |
+
"logits/rejected": 0.34048348665237427,
|
3767 |
+
"logps/chosen": -56.212928771972656,
|
3768 |
+
"logps/rejected": -578.192138671875,
|
3769 |
+
"loss": 16462.5594,
|
3770 |
+
"rewards/accuracies": 0.987500011920929,
|
3771 |
+
"rewards/chosen": 0.17624449729919434,
|
3772 |
+
"rewards/margins": 0.5216260552406311,
|
3773 |
+
"rewards/rejected": -0.3453815281391144,
|
3774 |
+
"step": 2500
|
3775 |
+
},
|
3776 |
+
{
|
3777 |
+
"epoch": 12.708860759493671,
|
3778 |
+
"grad_norm": 513189.7522025148,
|
3779 |
+
"learning_rate": 1.6233155750548415e-07,
|
3780 |
+
"logits/chosen": -0.21513333916664124,
|
3781 |
+
"logits/rejected": -0.05444493144750595,
|
3782 |
+
"logps/chosen": -60.96831512451172,
|
3783 |
+
"logps/rejected": -583.4918823242188,
|
3784 |
+
"loss": 16903.7125,
|
3785 |
+
"rewards/accuracies": 0.987500011920929,
|
3786 |
+
"rewards/chosen": 0.1871432662010193,
|
3787 |
+
"rewards/margins": 0.5204809904098511,
|
3788 |
+
"rewards/rejected": -0.3333377242088318,
|
3789 |
+
"step": 2510
|
3790 |
+
},
|
3791 |
+
{
|
3792 |
+
"epoch": 12.759493670886076,
|
3793 |
+
"grad_norm": 527855.7040773877,
|
3794 |
+
"learning_rate": 1.6076465057975556e-07,
|
3795 |
+
"logits/chosen": -1.166076421737671,
|
3796 |
+
"logits/rejected": -0.5938941240310669,
|
3797 |
+
"logps/chosen": -66.41789245605469,
|
3798 |
+
"logps/rejected": -565.521728515625,
|
3799 |
+
"loss": 16873.3,
|
3800 |
+
"rewards/accuracies": 0.9624999761581421,
|
3801 |
+
"rewards/chosen": 0.18609380722045898,
|
3802 |
+
"rewards/margins": 0.5067971348762512,
|
3803 |
+
"rewards/rejected": -0.32070332765579224,
|
3804 |
+
"step": 2520
|
3805 |
+
},
|
3806 |
+
{
|
3807 |
+
"epoch": 12.810126582278482,
|
3808 |
+
"grad_norm": 454333.8693268159,
|
3809 |
+
"learning_rate": 1.5919774365402695e-07,
|
3810 |
+
"logits/chosen": -3.2188408374786377,
|
3811 |
+
"logits/rejected": -2.827929735183716,
|
3812 |
+
"logps/chosen": -64.64167785644531,
|
3813 |
+
"logps/rejected": -578.556396484375,
|
3814 |
+
"loss": 17413.3594,
|
3815 |
+
"rewards/accuracies": 1.0,
|
3816 |
+
"rewards/chosen": 0.1842392235994339,
|
3817 |
+
"rewards/margins": 0.5160521268844604,
|
3818 |
+
"rewards/rejected": -0.33181288838386536,
|
3819 |
+
"step": 2530
|
3820 |
+
},
|
3821 |
+
{
|
3822 |
+
"epoch": 12.860759493670885,
|
3823 |
+
"grad_norm": 613283.375359761,
|
3824 |
+
"learning_rate": 1.5763083672829833e-07,
|
3825 |
+
"logits/chosen": -2.0415351390838623,
|
3826 |
+
"logits/rejected": -1.1543810367584229,
|
3827 |
+
"logps/chosen": -56.55009841918945,
|
3828 |
+
"logps/rejected": -565.3232421875,
|
3829 |
+
"loss": 16952.7828,
|
3830 |
+
"rewards/accuracies": 0.987500011920929,
|
3831 |
+
"rewards/chosen": 0.17874039709568024,
|
3832 |
+
"rewards/margins": 0.5064669847488403,
|
3833 |
+
"rewards/rejected": -0.3277265429496765,
|
3834 |
+
"step": 2540
|
3835 |
+
},
|
3836 |
+
{
|
3837 |
+
"epoch": 12.91139240506329,
|
3838 |
+
"grad_norm": 973991.6151861927,
|
3839 |
+
"learning_rate": 1.5606392980256972e-07,
|
3840 |
+
"logits/chosen": -1.9052120447158813,
|
3841 |
+
"logits/rejected": -1.2125427722930908,
|
3842 |
+
"logps/chosen": -56.37163162231445,
|
3843 |
+
"logps/rejected": -575.3190307617188,
|
3844 |
+
"loss": 17272.6656,
|
3845 |
+
"rewards/accuracies": 1.0,
|
3846 |
+
"rewards/chosen": 0.18349668383598328,
|
3847 |
+
"rewards/margins": 0.5194507837295532,
|
3848 |
+
"rewards/rejected": -0.33595409989356995,
|
3849 |
+
"step": 2550
|
3850 |
+
},
|
3851 |
+
{
|
3852 |
+
"epoch": 12.962025316455696,
|
3853 |
+
"grad_norm": 1049016.1677939103,
|
3854 |
+
"learning_rate": 1.544970228768411e-07,
|
3855 |
+
"logits/chosen": -0.479561984539032,
|
3856 |
+
"logits/rejected": -0.6837025284767151,
|
3857 |
+
"logps/chosen": -56.96269989013672,
|
3858 |
+
"logps/rejected": -579.6213989257812,
|
3859 |
+
"loss": 17023.0859,
|
3860 |
+
"rewards/accuracies": 0.987500011920929,
|
3861 |
+
"rewards/chosen": 0.1867980808019638,
|
3862 |
+
"rewards/margins": 0.5234028100967407,
|
3863 |
+
"rewards/rejected": -0.3366047739982605,
|
3864 |
+
"step": 2560
|
3865 |
}
|
3866 |
],
|
3867 |
"logging_steps": 10,
|