Training in progress, epoch 30, checkpoint
Browse files- last-checkpoint/global_step5916/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5916/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5916/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5916/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5916/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5916/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5916/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5916/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5916/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5916/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5916/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5916/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5916/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5916/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5916/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5916/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step5916/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b18fdebf275477c0bca9d0b7e0f80b1cb03caf57b8df661089b9827471e91fbb
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5916/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2916828579c6438c0f61c89512acda82e3307305bfbfe675be41245a453dba3b
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5916/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8be451455a282d6fa4afdcfcec6b588b88c66217eeb4d427737432d4441f6de1
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5916/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:513012075929a38b46c9ab66651ed948062a9cc18e2bf3a5c39c4dd12e5ea795
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5916/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51c9e1fd5ae664f552446fa3555c9ecf3335137a3a1072cb0090c5f33389fa78
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5916/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04c41fe7fbb304471dbf2512e289491172a5280c76eec719e4da8e2921cc8335
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5916/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac25d8faf9414e2fd21a0c1810e7ac093d908a70e1f1dd2c6a66a9db5ef2b745
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5916/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d72d83348654c08933eab87d5b552549c388ee3191c4615e14f78305cf8034db
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5916/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6d113b8a546176efdfdd4193aa760c8dd58cc3ac05f292d3a4a1037164b0015
|
3 |
+
size 85570
|
last-checkpoint/global_step5916/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e02adc94e97faf702adae82aeb4d9cb85bacfafaa5b0593e9e580dcd23045a9
|
3 |
+
size 85506
|
last-checkpoint/global_step5916/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b618c03c6520f69d65a52cc3cb38e9052921b0e1df0791946a86c37b6516c92
|
3 |
+
size 85506
|
last-checkpoint/global_step5916/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2541aba1009e69905001c19347ed7049287b08a1e30af9f66a4cd00acf843073
|
3 |
+
size 85506
|
last-checkpoint/global_step5916/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e60f9ddba6fbfcef5726792e69653d27a5e9f835fbbfe0838eba8a8a7343525
|
3 |
+
size 85506
|
last-checkpoint/global_step5916/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4c3ff328e9e8ce49b1b7b9e0b4dd7f86c7fb5c6a22706fc6b2deac1bb9ca734
|
3 |
+
size 85506
|
last-checkpoint/global_step5916/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d945c0954cfce168d7f7376d407748d133cfbf6ce4c999f81f22272d9e6c70a
|
3 |
+
size 85506
|
last-checkpoint/global_step5916/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f768a950f1cacb0dea94a1c2017128c7d786d436c044197f5940e93bfb27250a
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step5916
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d3b3ba3d73fcfe06684964aa6a0da80302158d0b45234f7473a31d0a8f86d45
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d86e8e9eec87ad0431303407e43b154a156dec7fa7e56ad78330489a8ac89828
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c6c5e7528cb919bf0e448095ef87b2e21b836859cd72ece77237cb822e78f88
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3be13484e2a524a3f5e79f3e726fd41b0876252a9d0898131fc1ccf0d86f6a8
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f070bbb32dc96a08f76f5f85c2e01ae98d5e4d16dd18623bfab6b4b54e1d03c4
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f43d659c0909b37d66f4502f36b99850e3f553b6e2fb3ac13de4a060aa1cdc1
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c80a51e24107d3bf93a1e4d903f42d6626efe1c5b8bd714f393fb93f840c5ce2
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e624a5e2e5c07e8e80b5d066ec132b4a872761dcba6d77f7386705eb95f67228
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc47fa553a7e6a70b45be521b98449ef920fe0d39f6cf85ae50b9a45d6c9da85
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8a0e7009351aac3c555f7946c515a64baa2d14aa4706e4a371e0c2df02b5a90
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db6ce75ecc7db03a363686e05ba8e98d2588fcd56e7f730c69ff2320b79e2de2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8587,6 +8587,306 @@
|
|
8587 |
"rewards/margins": 0.5581387281417847,
|
8588 |
"rewards/rejected": -0.35167163610458374,
|
8589 |
"step": 5710
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8590 |
}
|
8591 |
],
|
8592 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 30.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 5916,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8587 |
"rewards/margins": 0.5581387281417847,
|
8588 |
"rewards/rejected": -0.35167163610458374,
|
8589 |
"step": 5710
|
8590 |
+
},
|
8591 |
+
{
|
8592 |
+
"epoch": 29.00759493670886,
|
8593 |
+
"grad_norm": 177867.60188083298,
|
8594 |
+
"learning_rate": 1.0748981510498275e-07,
|
8595 |
+
"logits/chosen": -2.0902795791625977,
|
8596 |
+
"logits/rejected": -1.2426658868789673,
|
8597 |
+
"logps/chosen": -25.984241485595703,
|
8598 |
+
"logps/rejected": -595.5320434570312,
|
8599 |
+
"loss": 12101.3188,
|
8600 |
+
"rewards/accuracies": 1.0,
|
8601 |
+
"rewards/chosen": 0.21571488678455353,
|
8602 |
+
"rewards/margins": 0.5755189061164856,
|
8603 |
+
"rewards/rejected": -0.3598039150238037,
|
8604 |
+
"step": 5720
|
8605 |
+
},
|
8606 |
+
{
|
8607 |
+
"epoch": 29.058227848101264,
|
8608 |
+
"grad_norm": 175055.77768040166,
|
8609 |
+
"learning_rate": 1.0670636164211845e-07,
|
8610 |
+
"logits/chosen": -3.0874876976013184,
|
8611 |
+
"logits/rejected": -1.9259151220321655,
|
8612 |
+
"logps/chosen": -30.317163467407227,
|
8613 |
+
"logps/rejected": -582.4378662109375,
|
8614 |
+
"loss": 12058.957,
|
8615 |
+
"rewards/accuracies": 0.987500011920929,
|
8616 |
+
"rewards/chosen": 0.20789256691932678,
|
8617 |
+
"rewards/margins": 0.5540691018104553,
|
8618 |
+
"rewards/rejected": -0.3461765944957733,
|
8619 |
+
"step": 5730
|
8620 |
+
},
|
8621 |
+
{
|
8622 |
+
"epoch": 29.10886075949367,
|
8623 |
+
"grad_norm": 330095.71026448795,
|
8624 |
+
"learning_rate": 1.0592290817925414e-07,
|
8625 |
+
"logits/chosen": -0.40818461775779724,
|
8626 |
+
"logits/rejected": -0.17450471222400665,
|
8627 |
+
"logps/chosen": -37.967308044433594,
|
8628 |
+
"logps/rejected": -574.5567626953125,
|
8629 |
+
"loss": 12163.9234,
|
8630 |
+
"rewards/accuracies": 0.9750000238418579,
|
8631 |
+
"rewards/chosen": 0.199508398771286,
|
8632 |
+
"rewards/margins": 0.5361432433128357,
|
8633 |
+
"rewards/rejected": -0.3366348147392273,
|
8634 |
+
"step": 5740
|
8635 |
+
},
|
8636 |
+
{
|
8637 |
+
"epoch": 29.159493670886075,
|
8638 |
+
"grad_norm": 207868.2185307626,
|
8639 |
+
"learning_rate": 1.0513945471638983e-07,
|
8640 |
+
"logits/chosen": -1.1228978633880615,
|
8641 |
+
"logits/rejected": -0.8512986302375793,
|
8642 |
+
"logps/chosen": -36.19347381591797,
|
8643 |
+
"logps/rejected": -572.5546264648438,
|
8644 |
+
"loss": 12217.475,
|
8645 |
+
"rewards/accuracies": 0.9750000238418579,
|
8646 |
+
"rewards/chosen": 0.20709916949272156,
|
8647 |
+
"rewards/margins": 0.5452824234962463,
|
8648 |
+
"rewards/rejected": -0.3381832540035248,
|
8649 |
+
"step": 5750
|
8650 |
+
},
|
8651 |
+
{
|
8652 |
+
"epoch": 29.21012658227848,
|
8653 |
+
"grad_norm": 180300.955366917,
|
8654 |
+
"learning_rate": 1.0435600125352554e-07,
|
8655 |
+
"logits/chosen": -2.1935715675354004,
|
8656 |
+
"logits/rejected": -1.450584888458252,
|
8657 |
+
"logps/chosen": -41.38114547729492,
|
8658 |
+
"logps/rejected": -551.9308471679688,
|
8659 |
+
"loss": 11531.2219,
|
8660 |
+
"rewards/accuracies": 0.9125000238418579,
|
8661 |
+
"rewards/chosen": 0.19797861576080322,
|
8662 |
+
"rewards/margins": 0.5134168267250061,
|
8663 |
+
"rewards/rejected": -0.3154382109642029,
|
8664 |
+
"step": 5760
|
8665 |
+
},
|
8666 |
+
{
|
8667 |
+
"epoch": 29.260759493670886,
|
8668 |
+
"grad_norm": 230065.76491246693,
|
8669 |
+
"learning_rate": 1.0357254779066123e-07,
|
8670 |
+
"logits/chosen": -2.1162705421447754,
|
8671 |
+
"logits/rejected": -1.343379020690918,
|
8672 |
+
"logps/chosen": -26.30475425720215,
|
8673 |
+
"logps/rejected": -584.0765380859375,
|
8674 |
+
"loss": 12178.225,
|
8675 |
+
"rewards/accuracies": 1.0,
|
8676 |
+
"rewards/chosen": 0.21349939703941345,
|
8677 |
+
"rewards/margins": 0.5615987181663513,
|
8678 |
+
"rewards/rejected": -0.34809932112693787,
|
8679 |
+
"step": 5770
|
8680 |
+
},
|
8681 |
+
{
|
8682 |
+
"epoch": 29.31139240506329,
|
8683 |
+
"grad_norm": 150891.5620522627,
|
8684 |
+
"learning_rate": 1.0278909432779692e-07,
|
8685 |
+
"logits/chosen": -0.6437171101570129,
|
8686 |
+
"logits/rejected": -0.06186608225107193,
|
8687 |
+
"logps/chosen": -32.27136993408203,
|
8688 |
+
"logps/rejected": -575.0911865234375,
|
8689 |
+
"loss": 12350.1367,
|
8690 |
+
"rewards/accuracies": 0.9624999761581421,
|
8691 |
+
"rewards/chosen": 0.20223280787467957,
|
8692 |
+
"rewards/margins": 0.5462868213653564,
|
8693 |
+
"rewards/rejected": -0.3440539240837097,
|
8694 |
+
"step": 5780
|
8695 |
+
},
|
8696 |
+
{
|
8697 |
+
"epoch": 29.362025316455696,
|
8698 |
+
"grad_norm": 268215.91577526846,
|
8699 |
+
"learning_rate": 1.0200564086493262e-07,
|
8700 |
+
"logits/chosen": -2.4000306129455566,
|
8701 |
+
"logits/rejected": -1.5239673852920532,
|
8702 |
+
"logps/chosen": -44.228759765625,
|
8703 |
+
"logps/rejected": -603.037109375,
|
8704 |
+
"loss": 11602.7789,
|
8705 |
+
"rewards/accuracies": 0.987500011920929,
|
8706 |
+
"rewards/chosen": 0.21766121685504913,
|
8707 |
+
"rewards/margins": 0.5693429112434387,
|
8708 |
+
"rewards/rejected": -0.3516816794872284,
|
8709 |
+
"step": 5790
|
8710 |
+
},
|
8711 |
+
{
|
8712 |
+
"epoch": 29.4126582278481,
|
8713 |
+
"grad_norm": 153754.6030127712,
|
8714 |
+
"learning_rate": 1.0122218740206831e-07,
|
8715 |
+
"logits/chosen": 1.1010842323303223,
|
8716 |
+
"logits/rejected": 1.6098358631134033,
|
8717 |
+
"logps/chosen": -25.794830322265625,
|
8718 |
+
"logps/rejected": -580.6827392578125,
|
8719 |
+
"loss": 12135.457,
|
8720 |
+
"rewards/accuracies": 0.987500011920929,
|
8721 |
+
"rewards/chosen": 0.20671968162059784,
|
8722 |
+
"rewards/margins": 0.5531316995620728,
|
8723 |
+
"rewards/rejected": -0.3464120328426361,
|
8724 |
+
"step": 5800
|
8725 |
+
},
|
8726 |
+
{
|
8727 |
+
"epoch": 29.463291139240507,
|
8728 |
+
"grad_norm": 237857.15032498536,
|
8729 |
+
"learning_rate": 1.00438733939204e-07,
|
8730 |
+
"logits/chosen": -2.2038140296936035,
|
8731 |
+
"logits/rejected": -1.9258426427841187,
|
8732 |
+
"logps/chosen": -24.270652770996094,
|
8733 |
+
"logps/rejected": -592.76806640625,
|
8734 |
+
"loss": 12368.1,
|
8735 |
+
"rewards/accuracies": 1.0,
|
8736 |
+
"rewards/chosen": 0.20859424769878387,
|
8737 |
+
"rewards/margins": 0.5708917379379272,
|
8738 |
+
"rewards/rejected": -0.3622974455356598,
|
8739 |
+
"step": 5810
|
8740 |
+
},
|
8741 |
+
{
|
8742 |
+
"epoch": 29.513924050632912,
|
8743 |
+
"grad_norm": 229363.27347544604,
|
8744 |
+
"learning_rate": 9.96552804763397e-08,
|
8745 |
+
"logits/chosen": -1.733412742614746,
|
8746 |
+
"logits/rejected": -1.8426891565322876,
|
8747 |
+
"logps/chosen": -27.749902725219727,
|
8748 |
+
"logps/rejected": -591.9719848632812,
|
8749 |
+
"loss": 12434.6094,
|
8750 |
+
"rewards/accuracies": 1.0,
|
8751 |
+
"rewards/chosen": 0.20853643119335175,
|
8752 |
+
"rewards/margins": 0.559594452381134,
|
8753 |
+
"rewards/rejected": -0.3510579764842987,
|
8754 |
+
"step": 5820
|
8755 |
+
},
|
8756 |
+
{
|
8757 |
+
"epoch": 29.564556962025318,
|
8758 |
+
"grad_norm": 204423.82729459935,
|
8759 |
+
"learning_rate": 9.887182701347539e-08,
|
8760 |
+
"logits/chosen": -0.8372312784194946,
|
8761 |
+
"logits/rejected": -0.9436752200126648,
|
8762 |
+
"logps/chosen": -23.713529586791992,
|
8763 |
+
"logps/rejected": -551.91748046875,
|
8764 |
+
"loss": 12191.0797,
|
8765 |
+
"rewards/accuracies": 0.987500011920929,
|
8766 |
+
"rewards/chosen": 0.1975078582763672,
|
8767 |
+
"rewards/margins": 0.5233575105667114,
|
8768 |
+
"rewards/rejected": -0.32584962248802185,
|
8769 |
+
"step": 5830
|
8770 |
+
},
|
8771 |
+
{
|
8772 |
+
"epoch": 29.615189873417723,
|
8773 |
+
"grad_norm": 196500.42803475718,
|
8774 |
+
"learning_rate": 9.808837355061108e-08,
|
8775 |
+
"logits/chosen": -0.07084647566080093,
|
8776 |
+
"logits/rejected": 0.9050701856613159,
|
8777 |
+
"logps/chosen": -29.59817886352539,
|
8778 |
+
"logps/rejected": -567.6174926757812,
|
8779 |
+
"loss": 12194.2234,
|
8780 |
+
"rewards/accuracies": 0.9750000238418579,
|
8781 |
+
"rewards/chosen": 0.20393919944763184,
|
8782 |
+
"rewards/margins": 0.5439929366111755,
|
8783 |
+
"rewards/rejected": -0.3400537371635437,
|
8784 |
+
"step": 5840
|
8785 |
+
},
|
8786 |
+
{
|
8787 |
+
"epoch": 29.665822784810125,
|
8788 |
+
"grad_norm": 226455.28104673527,
|
8789 |
+
"learning_rate": 9.730492008774677e-08,
|
8790 |
+
"logits/chosen": -3.320272445678711,
|
8791 |
+
"logits/rejected": -3.3560733795166016,
|
8792 |
+
"logps/chosen": -28.402095794677734,
|
8793 |
+
"logps/rejected": -602.0023193359375,
|
8794 |
+
"loss": 12657.2406,
|
8795 |
+
"rewards/accuracies": 1.0,
|
8796 |
+
"rewards/chosen": 0.2187713086605072,
|
8797 |
+
"rewards/margins": 0.5724385976791382,
|
8798 |
+
"rewards/rejected": -0.35366731882095337,
|
8799 |
+
"step": 5850
|
8800 |
+
},
|
8801 |
+
{
|
8802 |
+
"epoch": 29.71645569620253,
|
8803 |
+
"grad_norm": 162035.60177504522,
|
8804 |
+
"learning_rate": 9.652146662488248e-08,
|
8805 |
+
"logits/chosen": -1.8201286792755127,
|
8806 |
+
"logits/rejected": -1.7938740253448486,
|
8807 |
+
"logps/chosen": -35.96394348144531,
|
8808 |
+
"logps/rejected": -611.4141845703125,
|
8809 |
+
"loss": 12011.9406,
|
8810 |
+
"rewards/accuracies": 1.0,
|
8811 |
+
"rewards/chosen": 0.21413405239582062,
|
8812 |
+
"rewards/margins": 0.5712839365005493,
|
8813 |
+
"rewards/rejected": -0.3571499288082123,
|
8814 |
+
"step": 5860
|
8815 |
+
},
|
8816 |
+
{
|
8817 |
+
"epoch": 29.767088607594935,
|
8818 |
+
"grad_norm": 162090.09030278528,
|
8819 |
+
"learning_rate": 9.573801316201817e-08,
|
8820 |
+
"logits/chosen": -0.6652274131774902,
|
8821 |
+
"logits/rejected": -0.600281834602356,
|
8822 |
+
"logps/chosen": -24.422576904296875,
|
8823 |
+
"logps/rejected": -566.0366821289062,
|
8824 |
+
"loss": 12593.6359,
|
8825 |
+
"rewards/accuracies": 0.9750000238418579,
|
8826 |
+
"rewards/chosen": 0.1961481273174286,
|
8827 |
+
"rewards/margins": 0.5393214821815491,
|
8828 |
+
"rewards/rejected": -0.3431733250617981,
|
8829 |
+
"step": 5870
|
8830 |
+
},
|
8831 |
+
{
|
8832 |
+
"epoch": 29.81772151898734,
|
8833 |
+
"grad_norm": 365229.93961962714,
|
8834 |
+
"learning_rate": 9.495455969915387e-08,
|
8835 |
+
"logits/chosen": -2.613847017288208,
|
8836 |
+
"logits/rejected": -2.108478546142578,
|
8837 |
+
"logps/chosen": -29.573253631591797,
|
8838 |
+
"logps/rejected": -577.60546875,
|
8839 |
+
"loss": 12424.4891,
|
8840 |
+
"rewards/accuracies": 1.0,
|
8841 |
+
"rewards/chosen": 0.20539173483848572,
|
8842 |
+
"rewards/margins": 0.5470829010009766,
|
8843 |
+
"rewards/rejected": -0.34169113636016846,
|
8844 |
+
"step": 5880
|
8845 |
+
},
|
8846 |
+
{
|
8847 |
+
"epoch": 29.868354430379746,
|
8848 |
+
"grad_norm": 173325.82955161307,
|
8849 |
+
"learning_rate": 9.417110623628956e-08,
|
8850 |
+
"logits/chosen": -1.4006824493408203,
|
8851 |
+
"logits/rejected": -0.5856371521949768,
|
8852 |
+
"logps/chosen": -27.345510482788086,
|
8853 |
+
"logps/rejected": -584.8424072265625,
|
8854 |
+
"loss": 12358.3133,
|
8855 |
+
"rewards/accuracies": 0.987500011920929,
|
8856 |
+
"rewards/chosen": 0.2079104632139206,
|
8857 |
+
"rewards/margins": 0.5603929758071899,
|
8858 |
+
"rewards/rejected": -0.35248249769210815,
|
8859 |
+
"step": 5890
|
8860 |
+
},
|
8861 |
+
{
|
8862 |
+
"epoch": 29.91898734177215,
|
8863 |
+
"grad_norm": 287432.0969704827,
|
8864 |
+
"learning_rate": 9.338765277342525e-08,
|
8865 |
+
"logits/chosen": -0.21508927643299103,
|
8866 |
+
"logits/rejected": -0.1394989937543869,
|
8867 |
+
"logps/chosen": -30.839313507080078,
|
8868 |
+
"logps/rejected": -594.2600708007812,
|
8869 |
+
"loss": 11980.4219,
|
8870 |
+
"rewards/accuracies": 1.0,
|
8871 |
+
"rewards/chosen": 0.21064691245555878,
|
8872 |
+
"rewards/margins": 0.5655493140220642,
|
8873 |
+
"rewards/rejected": -0.354902446269989,
|
8874 |
+
"step": 5900
|
8875 |
+
},
|
8876 |
+
{
|
8877 |
+
"epoch": 29.969620253164557,
|
8878 |
+
"grad_norm": 365207.2969153869,
|
8879 |
+
"learning_rate": 9.260419931056094e-08,
|
8880 |
+
"logits/chosen": -0.40759915113449097,
|
8881 |
+
"logits/rejected": 0.3133270740509033,
|
8882 |
+
"logps/chosen": -25.633676528930664,
|
8883 |
+
"logps/rejected": -578.2957763671875,
|
8884 |
+
"loss": 12223.2844,
|
8885 |
+
"rewards/accuracies": 0.987500011920929,
|
8886 |
+
"rewards/chosen": 0.20843228697776794,
|
8887 |
+
"rewards/margins": 0.5534237027168274,
|
8888 |
+
"rewards/rejected": -0.34499144554138184,
|
8889 |
+
"step": 5910
|
8890 |
}
|
8891 |
],
|
8892 |
"logging_steps": 10,
|