Training in progress, epoch 4, checkpoint
Browse files- last-checkpoint/global_step790/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step790/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step790/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step790/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step790/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step790/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step790/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step790/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step790/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step790/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step790/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step790/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step790/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step790/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step790/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step790/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step790/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ebab2c06b29452f6de93c8bffdef75f42f32a9d435d5c9e37f3e1ac9543ab37
|
3 |
+
size 2506176112
|
last-checkpoint/global_step790/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:926a4ce57de0b6b06c925063ca470c3ca68122b9c5724a355b7c3fe7910d20fc
|
3 |
+
size 2506176112
|
last-checkpoint/global_step790/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75a93542c0f12727b123d1d54335bf8097436511a3587dd0905ebe9d09ed2f06
|
3 |
+
size 2506176112
|
last-checkpoint/global_step790/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4d8c48f03e6556d2d2606e9707b476bd4bb5262212f4150977fb1e42ed1df0f
|
3 |
+
size 2506176112
|
last-checkpoint/global_step790/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:772b1ea3808f5fc20fdf22edd66e1ae4ae4ef34605b2ebe5e1745d5e14d55671
|
3 |
+
size 2506176112
|
last-checkpoint/global_step790/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d625662771aba327b77816ceed0565d20080a0c5a305c3e2248fbcebfa2bb063
|
3 |
+
size 2506176112
|
last-checkpoint/global_step790/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:180938c313e2814458e72fc67603d6ba0a533c8dae500fbefbc94f1e0b7b0720
|
3 |
+
size 2506176112
|
last-checkpoint/global_step790/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6cc16a9757eb5a6a23c61fb2323541d1567d476e1d39be2964eb4a984e0555a
|
3 |
+
size 2506176112
|
last-checkpoint/global_step790/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06f0ca6d02959847dc60015859018bd1ea389bedd8e3efcd16cdc28802f5c321
|
3 |
+
size 85570
|
last-checkpoint/global_step790/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e302486d8f9cafaf089fddf7804b4384a3dea043c10f31dbd4d21a8b1ac3eb75
|
3 |
+
size 85506
|
last-checkpoint/global_step790/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b0e31d7289e9670875e12ade9d5b693da2855f0920ccb85863e84b8af9ca363
|
3 |
+
size 85506
|
last-checkpoint/global_step790/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02305061948a94bf55878779df7195ad8c84c7e1280ac3482e11389e587bc1a7
|
3 |
+
size 85506
|
last-checkpoint/global_step790/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e5057064fdcdd6528512c1dc18a1216d398b90bdde5bfc7e73b5e0457ba1e6d
|
3 |
+
size 85506
|
last-checkpoint/global_step790/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51a5b62db0169425bf97984191b8caec2b640adf6d7f112b0230e10b92a7e044
|
3 |
+
size 85506
|
last-checkpoint/global_step790/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0f9dbbfa290fd66199bc446429e9e6a11e5ed24bb2a70c8f85b2bc5de7ac4ff
|
3 |
+
size 85506
|
last-checkpoint/global_step790/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8543e807f4dc246ae05e1758933f3b6c81ed5b3d4e8ab40ae76ccb4d84c05b54
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step790
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ee3f893a00b883ea2140dd40c1fb5676a8e4b4c39e9f77ab126e1a38a9c2786
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:391cf44d17535a2b42d0a567444b9ca191b26d208e2891eee80e248f7f6c3747
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36d2a2034ebb05cb71c510897f2795b31164e50f17b270bc25d2be3ad9a17b22
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:060dfdb1c49102cbdc8868a6031e68787601b4ccd782f3fb9b137e20c1fd2c7a
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af01895cb66e616591f2e4baa8dcd8151530eab133c73571ccb31c74f35422ce
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:677921992b1e0cef3aee776f245975003d22f51d9bd6ed20f248ded1deb72fa9
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d69353c629541c690c5471f8ec05fdab2bfecf3d37afaa436bc45939da6db68f
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e40ba6668cc03c9162c68a933d164bf38ae2d196a9a6fec03ae615491201185
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:870968fea834e24b2e099cf3e4fe1e3fb8caf38d8f8e5b790d7d47386d4d05f5
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9e19618bee7c6ef43256fea25abe19bca88535eb1e7dc213cde8929ae4e8180
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b9e01fb8119366f950b23568c9c5eaa6d3e352534620301a9291190e4d0ef8f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -907,6 +907,306 @@
|
|
907 |
"rewards/margins": 0.053022872656583786,
|
908 |
"rewards/rejected": -0.04732084274291992,
|
909 |
"step": 590
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
910 |
}
|
911 |
],
|
912 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 790,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
907 |
"rewards/margins": 0.053022872656583786,
|
908 |
"rewards/rejected": -0.04732084274291992,
|
909 |
"step": 590
|
910 |
+
},
|
911 |
+
{
|
912 |
+
"epoch": 3.037974683544304,
|
913 |
+
"grad_norm": 1435515.2852262415,
|
914 |
+
"learning_rate": 4.61610780319649e-07,
|
915 |
+
"logits/chosen": -7.956998348236084,
|
916 |
+
"logits/rejected": -7.496169090270996,
|
917 |
+
"logps/chosen": -219.92410278320312,
|
918 |
+
"logps/rejected": -310.20123291015625,
|
919 |
+
"loss": 95986.4875,
|
920 |
+
"rewards/accuracies": 0.9624999761581421,
|
921 |
+
"rewards/chosen": 0.016220271587371826,
|
922 |
+
"rewards/margins": 0.09167212247848511,
|
923 |
+
"rewards/rejected": -0.07545184344053268,
|
924 |
+
"step": 600
|
925 |
+
},
|
926 |
+
{
|
927 |
+
"epoch": 3.088607594936709,
|
928 |
+
"grad_norm": 1646011.901841717,
|
929 |
+
"learning_rate": 4.6004387339392035e-07,
|
930 |
+
"logits/chosen": -7.747580051422119,
|
931 |
+
"logits/rejected": -7.5227952003479,
|
932 |
+
"logps/chosen": -217.8295440673828,
|
933 |
+
"logps/rejected": -343.4312438964844,
|
934 |
+
"loss": 91538.925,
|
935 |
+
"rewards/accuracies": 0.9624999761581421,
|
936 |
+
"rewards/chosen": 0.02667585015296936,
|
937 |
+
"rewards/margins": 0.12547221779823303,
|
938 |
+
"rewards/rejected": -0.09879636764526367,
|
939 |
+
"step": 610
|
940 |
+
},
|
941 |
+
{
|
942 |
+
"epoch": 3.1392405063291138,
|
943 |
+
"grad_norm": 1631989.4144731541,
|
944 |
+
"learning_rate": 4.5847696646819176e-07,
|
945 |
+
"logits/chosen": -6.8127121925354,
|
946 |
+
"logits/rejected": -6.8090972900390625,
|
947 |
+
"logps/chosen": -209.46859741210938,
|
948 |
+
"logps/rejected": -332.0594482421875,
|
949 |
+
"loss": 92242.9,
|
950 |
+
"rewards/accuracies": 0.949999988079071,
|
951 |
+
"rewards/chosen": 0.026208167895674706,
|
952 |
+
"rewards/margins": 0.12268342822790146,
|
953 |
+
"rewards/rejected": -0.0964752584695816,
|
954 |
+
"step": 620
|
955 |
+
},
|
956 |
+
{
|
957 |
+
"epoch": 3.189873417721519,
|
958 |
+
"grad_norm": 1627589.9925143481,
|
959 |
+
"learning_rate": 4.569100595424631e-07,
|
960 |
+
"logits/chosen": -6.631221771240234,
|
961 |
+
"logits/rejected": -6.502354621887207,
|
962 |
+
"logps/chosen": -211.57974243164062,
|
963 |
+
"logps/rejected": -333.447265625,
|
964 |
+
"loss": 89921.25,
|
965 |
+
"rewards/accuracies": 0.9624999761581421,
|
966 |
+
"rewards/chosen": 0.022689208388328552,
|
967 |
+
"rewards/margins": 0.12395058572292328,
|
968 |
+
"rewards/rejected": -0.10126137733459473,
|
969 |
+
"step": 630
|
970 |
+
},
|
971 |
+
{
|
972 |
+
"epoch": 3.240506329113924,
|
973 |
+
"grad_norm": 1780107.5787213328,
|
974 |
+
"learning_rate": 4.5534315261673453e-07,
|
975 |
+
"logits/chosen": -7.868208885192871,
|
976 |
+
"logits/rejected": -7.755393981933594,
|
977 |
+
"logps/chosen": -209.3970184326172,
|
978 |
+
"logps/rejected": -341.9508056640625,
|
979 |
+
"loss": 89608.1875,
|
980 |
+
"rewards/accuracies": 0.9375,
|
981 |
+
"rewards/chosen": 0.027028566226363182,
|
982 |
+
"rewards/margins": 0.133165642619133,
|
983 |
+
"rewards/rejected": -0.10613708198070526,
|
984 |
+
"step": 640
|
985 |
+
},
|
986 |
+
{
|
987 |
+
"epoch": 3.291139240506329,
|
988 |
+
"grad_norm": 1730512.4518714033,
|
989 |
+
"learning_rate": 4.5377624569100595e-07,
|
990 |
+
"logits/chosen": -7.359053134918213,
|
991 |
+
"logits/rejected": -7.324367523193359,
|
992 |
+
"logps/chosen": -193.1954803466797,
|
993 |
+
"logps/rejected": -309.5513610839844,
|
994 |
+
"loss": 93257.225,
|
995 |
+
"rewards/accuracies": 0.949999988079071,
|
996 |
+
"rewards/chosen": 0.028996175155043602,
|
997 |
+
"rewards/margins": 0.11760006099939346,
|
998 |
+
"rewards/rejected": -0.08860386908054352,
|
999 |
+
"step": 650
|
1000 |
+
},
|
1001 |
+
{
|
1002 |
+
"epoch": 3.3417721518987342,
|
1003 |
+
"grad_norm": 1692816.769511115,
|
1004 |
+
"learning_rate": 4.5220933876527736e-07,
|
1005 |
+
"logits/chosen": -8.043203353881836,
|
1006 |
+
"logits/rejected": -8.003018379211426,
|
1007 |
+
"logps/chosen": -211.73648071289062,
|
1008 |
+
"logps/rejected": -336.10455322265625,
|
1009 |
+
"loss": 88400.4688,
|
1010 |
+
"rewards/accuracies": 0.887499988079071,
|
1011 |
+
"rewards/chosen": 0.024640550836920738,
|
1012 |
+
"rewards/margins": 0.12655004858970642,
|
1013 |
+
"rewards/rejected": -0.10190950334072113,
|
1014 |
+
"step": 660
|
1015 |
+
},
|
1016 |
+
{
|
1017 |
+
"epoch": 3.392405063291139,
|
1018 |
+
"grad_norm": 1906377.7496358757,
|
1019 |
+
"learning_rate": 4.506424318395487e-07,
|
1020 |
+
"logits/chosen": -7.25619649887085,
|
1021 |
+
"logits/rejected": -7.37869119644165,
|
1022 |
+
"logps/chosen": -197.8258819580078,
|
1023 |
+
"logps/rejected": -324.2138671875,
|
1024 |
+
"loss": 89983.5688,
|
1025 |
+
"rewards/accuracies": 0.949999988079071,
|
1026 |
+
"rewards/chosen": 0.026263948529958725,
|
1027 |
+
"rewards/margins": 0.12702925503253937,
|
1028 |
+
"rewards/rejected": -0.10076530277729034,
|
1029 |
+
"step": 670
|
1030 |
+
},
|
1031 |
+
{
|
1032 |
+
"epoch": 3.4430379746835444,
|
1033 |
+
"grad_norm": 1785643.0594316572,
|
1034 |
+
"learning_rate": 4.4907552491382013e-07,
|
1035 |
+
"logits/chosen": -6.798577785491943,
|
1036 |
+
"logits/rejected": -6.7768073081970215,
|
1037 |
+
"logps/chosen": -208.5835723876953,
|
1038 |
+
"logps/rejected": -323.3017883300781,
|
1039 |
+
"loss": 89767.5,
|
1040 |
+
"rewards/accuracies": 0.9375,
|
1041 |
+
"rewards/chosen": 0.025741413235664368,
|
1042 |
+
"rewards/margins": 0.1167701929807663,
|
1043 |
+
"rewards/rejected": -0.09102877229452133,
|
1044 |
+
"step": 680
|
1045 |
+
},
|
1046 |
+
{
|
1047 |
+
"epoch": 3.4936708860759493,
|
1048 |
+
"grad_norm": 2393957.296937455,
|
1049 |
+
"learning_rate": 4.475086179880915e-07,
|
1050 |
+
"logits/chosen": -6.352355480194092,
|
1051 |
+
"logits/rejected": -6.526197910308838,
|
1052 |
+
"logps/chosen": -187.56597900390625,
|
1053 |
+
"logps/rejected": -306.5972595214844,
|
1054 |
+
"loss": 89036.6875,
|
1055 |
+
"rewards/accuracies": 0.925000011920929,
|
1056 |
+
"rewards/chosen": 0.024061182513833046,
|
1057 |
+
"rewards/margins": 0.11990946531295776,
|
1058 |
+
"rewards/rejected": -0.09584827721118927,
|
1059 |
+
"step": 690
|
1060 |
+
},
|
1061 |
+
{
|
1062 |
+
"epoch": 3.5443037974683547,
|
1063 |
+
"grad_norm": 1811486.2204670438,
|
1064 |
+
"learning_rate": 4.459417110623629e-07,
|
1065 |
+
"logits/chosen": -5.7466630935668945,
|
1066 |
+
"logits/rejected": -5.797163486480713,
|
1067 |
+
"logps/chosen": -212.6585235595703,
|
1068 |
+
"logps/rejected": -364.36199951171875,
|
1069 |
+
"loss": 88031.3,
|
1070 |
+
"rewards/accuracies": 0.9750000238418579,
|
1071 |
+
"rewards/chosen": 0.027677077800035477,
|
1072 |
+
"rewards/margins": 0.14764061570167542,
|
1073 |
+
"rewards/rejected": -0.11996352672576904,
|
1074 |
+
"step": 700
|
1075 |
+
},
|
1076 |
+
{
|
1077 |
+
"epoch": 3.5949367088607596,
|
1078 |
+
"grad_norm": 1724684.5755440604,
|
1079 |
+
"learning_rate": 4.4437480413663426e-07,
|
1080 |
+
"logits/chosen": -5.412962436676025,
|
1081 |
+
"logits/rejected": -5.541121959686279,
|
1082 |
+
"logps/chosen": -202.39065551757812,
|
1083 |
+
"logps/rejected": -333.0758056640625,
|
1084 |
+
"loss": 86956.675,
|
1085 |
+
"rewards/accuracies": 0.9750000238418579,
|
1086 |
+
"rewards/chosen": 0.0247800350189209,
|
1087 |
+
"rewards/margins": 0.12825721502304077,
|
1088 |
+
"rewards/rejected": -0.10347716510295868,
|
1089 |
+
"step": 710
|
1090 |
+
},
|
1091 |
+
{
|
1092 |
+
"epoch": 3.6455696202531644,
|
1093 |
+
"grad_norm": 1933271.7611355048,
|
1094 |
+
"learning_rate": 4.4280789721090567e-07,
|
1095 |
+
"logits/chosen": -5.053005218505859,
|
1096 |
+
"logits/rejected": -4.886711597442627,
|
1097 |
+
"logps/chosen": -199.10885620117188,
|
1098 |
+
"logps/rejected": -317.7257385253906,
|
1099 |
+
"loss": 86655.0125,
|
1100 |
+
"rewards/accuracies": 0.9125000238418579,
|
1101 |
+
"rewards/chosen": 0.02152046002447605,
|
1102 |
+
"rewards/margins": 0.11774978786706924,
|
1103 |
+
"rewards/rejected": -0.09622932970523834,
|
1104 |
+
"step": 720
|
1105 |
+
},
|
1106 |
+
{
|
1107 |
+
"epoch": 3.6962025316455698,
|
1108 |
+
"grad_norm": 2267463.489494214,
|
1109 |
+
"learning_rate": 4.4124099028517703e-07,
|
1110 |
+
"logits/chosen": -6.616279602050781,
|
1111 |
+
"logits/rejected": -6.9615797996521,
|
1112 |
+
"logps/chosen": -200.58961486816406,
|
1113 |
+
"logps/rejected": -351.6376953125,
|
1114 |
+
"loss": 86181.3938,
|
1115 |
+
"rewards/accuracies": 0.9750000238418579,
|
1116 |
+
"rewards/chosen": 0.032253801822662354,
|
1117 |
+
"rewards/margins": 0.14937567710876465,
|
1118 |
+
"rewards/rejected": -0.1171218603849411,
|
1119 |
+
"step": 730
|
1120 |
+
},
|
1121 |
+
{
|
1122 |
+
"epoch": 3.7468354430379747,
|
1123 |
+
"grad_norm": 1734288.0953653858,
|
1124 |
+
"learning_rate": 4.3967408335944844e-07,
|
1125 |
+
"logits/chosen": -5.873335361480713,
|
1126 |
+
"logits/rejected": -5.689335823059082,
|
1127 |
+
"logps/chosen": -217.43637084960938,
|
1128 |
+
"logps/rejected": -350.2752990722656,
|
1129 |
+
"loss": 86780.825,
|
1130 |
+
"rewards/accuracies": 0.9624999761581421,
|
1131 |
+
"rewards/chosen": 0.031159091740846634,
|
1132 |
+
"rewards/margins": 0.13692796230316162,
|
1133 |
+
"rewards/rejected": -0.10576887428760529,
|
1134 |
+
"step": 740
|
1135 |
+
},
|
1136 |
+
{
|
1137 |
+
"epoch": 3.7974683544303796,
|
1138 |
+
"grad_norm": 1741715.9901586007,
|
1139 |
+
"learning_rate": 4.381071764337198e-07,
|
1140 |
+
"logits/chosen": -7.123785972595215,
|
1141 |
+
"logits/rejected": -7.188807487487793,
|
1142 |
+
"logps/chosen": -207.00045776367188,
|
1143 |
+
"logps/rejected": -336.5976867675781,
|
1144 |
+
"loss": 86139.5625,
|
1145 |
+
"rewards/accuracies": 0.9375,
|
1146 |
+
"rewards/chosen": 0.03052128478884697,
|
1147 |
+
"rewards/margins": 0.13043463230133057,
|
1148 |
+
"rewards/rejected": -0.0999133437871933,
|
1149 |
+
"step": 750
|
1150 |
+
},
|
1151 |
+
{
|
1152 |
+
"epoch": 3.848101265822785,
|
1153 |
+
"grad_norm": 1879351.8394690978,
|
1154 |
+
"learning_rate": 4.365402695079912e-07,
|
1155 |
+
"logits/chosen": -7.820990085601807,
|
1156 |
+
"logits/rejected": -7.7128729820251465,
|
1157 |
+
"logps/chosen": -213.57388305664062,
|
1158 |
+
"logps/rejected": -362.5634460449219,
|
1159 |
+
"loss": 87478.3625,
|
1160 |
+
"rewards/accuracies": 0.987500011920929,
|
1161 |
+
"rewards/chosen": 0.03660900145769119,
|
1162 |
+
"rewards/margins": 0.1480773240327835,
|
1163 |
+
"rewards/rejected": -0.11146833002567291,
|
1164 |
+
"step": 760
|
1165 |
+
},
|
1166 |
+
{
|
1167 |
+
"epoch": 3.8987341772151898,
|
1168 |
+
"grad_norm": 1968713.4204386624,
|
1169 |
+
"learning_rate": 4.349733625822626e-07,
|
1170 |
+
"logits/chosen": -7.314540863037109,
|
1171 |
+
"logits/rejected": -7.363668918609619,
|
1172 |
+
"logps/chosen": -213.6930694580078,
|
1173 |
+
"logps/rejected": -367.44073486328125,
|
1174 |
+
"loss": 86825.5813,
|
1175 |
+
"rewards/accuracies": 0.949999988079071,
|
1176 |
+
"rewards/chosen": 0.026752913370728493,
|
1177 |
+
"rewards/margins": 0.15061405301094055,
|
1178 |
+
"rewards/rejected": -0.1238611489534378,
|
1179 |
+
"step": 770
|
1180 |
+
},
|
1181 |
+
{
|
1182 |
+
"epoch": 3.9493670886075947,
|
1183 |
+
"grad_norm": 2163439.406665409,
|
1184 |
+
"learning_rate": 4.33406455656534e-07,
|
1185 |
+
"logits/chosen": -7.67099666595459,
|
1186 |
+
"logits/rejected": -7.536408424377441,
|
1187 |
+
"logps/chosen": -213.9747772216797,
|
1188 |
+
"logps/rejected": -344.7560119628906,
|
1189 |
+
"loss": 86913.0375,
|
1190 |
+
"rewards/accuracies": 0.925000011920929,
|
1191 |
+
"rewards/chosen": 0.029844319447875023,
|
1192 |
+
"rewards/margins": 0.12930825352668762,
|
1193 |
+
"rewards/rejected": -0.09946390986442566,
|
1194 |
+
"step": 780
|
1195 |
+
},
|
1196 |
+
{
|
1197 |
+
"epoch": 4.0,
|
1198 |
+
"grad_norm": 1866234.1823014135,
|
1199 |
+
"learning_rate": 4.3183954873080535e-07,
|
1200 |
+
"logits/chosen": -7.922532081604004,
|
1201 |
+
"logits/rejected": -7.692726135253906,
|
1202 |
+
"logps/chosen": -211.41653442382812,
|
1203 |
+
"logps/rejected": -349.7116394042969,
|
1204 |
+
"loss": 86592.8938,
|
1205 |
+
"rewards/accuracies": 0.9624999761581421,
|
1206 |
+
"rewards/chosen": 0.027728911489248276,
|
1207 |
+
"rewards/margins": 0.1435452550649643,
|
1208 |
+
"rewards/rejected": -0.11581633985042572,
|
1209 |
+
"step": 790
|
1210 |
}
|
1211 |
],
|
1212 |
"logging_steps": 10,
|