cat-searcher commited on
Commit
dec42a0
·
verified ·
1 Parent(s): d150634

Training in progress, epoch 4, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step790/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step790/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step790/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step790/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step790/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step790/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step790/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step790/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step790/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step790/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step790/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step790/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step790/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step790/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step790/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step790/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step790/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ebab2c06b29452f6de93c8bffdef75f42f32a9d435d5c9e37f3e1ac9543ab37
3
+ size 2506176112
last-checkpoint/global_step790/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:926a4ce57de0b6b06c925063ca470c3ca68122b9c5724a355b7c3fe7910d20fc
3
+ size 2506176112
last-checkpoint/global_step790/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a93542c0f12727b123d1d54335bf8097436511a3587dd0905ebe9d09ed2f06
3
+ size 2506176112
last-checkpoint/global_step790/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4d8c48f03e6556d2d2606e9707b476bd4bb5262212f4150977fb1e42ed1df0f
3
+ size 2506176112
last-checkpoint/global_step790/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:772b1ea3808f5fc20fdf22edd66e1ae4ae4ef34605b2ebe5e1745d5e14d55671
3
+ size 2506176112
last-checkpoint/global_step790/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d625662771aba327b77816ceed0565d20080a0c5a305c3e2248fbcebfa2bb063
3
+ size 2506176112
last-checkpoint/global_step790/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:180938c313e2814458e72fc67603d6ba0a533c8dae500fbefbc94f1e0b7b0720
3
+ size 2506176112
last-checkpoint/global_step790/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6cc16a9757eb5a6a23c61fb2323541d1567d476e1d39be2964eb4a984e0555a
3
+ size 2506176112
last-checkpoint/global_step790/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06f0ca6d02959847dc60015859018bd1ea389bedd8e3efcd16cdc28802f5c321
3
+ size 85570
last-checkpoint/global_step790/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e302486d8f9cafaf089fddf7804b4384a3dea043c10f31dbd4d21a8b1ac3eb75
3
+ size 85506
last-checkpoint/global_step790/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0e31d7289e9670875e12ade9d5b693da2855f0920ccb85863e84b8af9ca363
3
+ size 85506
last-checkpoint/global_step790/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02305061948a94bf55878779df7195ad8c84c7e1280ac3482e11389e587bc1a7
3
+ size 85506
last-checkpoint/global_step790/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e5057064fdcdd6528512c1dc18a1216d398b90bdde5bfc7e73b5e0457ba1e6d
3
+ size 85506
last-checkpoint/global_step790/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51a5b62db0169425bf97984191b8caec2b640adf6d7f112b0230e10b92a7e044
3
+ size 85506
last-checkpoint/global_step790/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0f9dbbfa290fd66199bc446429e9e6a11e5ed24bb2a70c8f85b2bc5de7ac4ff
3
+ size 85506
last-checkpoint/global_step790/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8543e807f4dc246ae05e1758933f3b6c81ed5b3d4e8ab40ae76ccb4d84c05b54
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step592
 
1
+ global_step790
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3134047f2f0f5b6606ba7f39ebac7011215d2546becd91f868f44e1a3a2a36f
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ee3f893a00b883ea2140dd40c1fb5676a8e4b4c39e9f77ab126e1a38a9c2786
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3d94a5b51729f4e9144b68f58645cf9e4208ff217517d5c899baafc7b82b472
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:391cf44d17535a2b42d0a567444b9ca191b26d208e2891eee80e248f7f6c3747
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08282b46825aa78d10fe10e3fea89555c5b5a691b261a3ddfd58fcb58370edff
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36d2a2034ebb05cb71c510897f2795b31164e50f17b270bc25d2be3ad9a17b22
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbab71d98a3a9a92df82a6bba463947327c3a1bcf35cd9f4f46114641fc42dd9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:060dfdb1c49102cbdc8868a6031e68787601b4ccd782f3fb9b137e20c1fd2c7a
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caac82d57d878d30219a4f9ec289a97ff90c53afc160b968f251b3fd3454b8d8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af01895cb66e616591f2e4baa8dcd8151530eab133c73571ccb31c74f35422ce
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19762d2d370222b01817da11bbaa6665d542293373186d66f754e7246bb861ed
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:677921992b1e0cef3aee776f245975003d22f51d9bd6ed20f248ded1deb72fa9
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00c7508b346a7d3c5c23392845f1d013331114ade778794b76e919cb3ed5d33e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d69353c629541c690c5471f8ec05fdab2bfecf3d37afaa436bc45939da6db68f
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b89de7d14dd20a191f56b74c816ef8b7fe5c171e31efbeadbf321c4539ed68c3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e40ba6668cc03c9162c68a933d164bf38ae2d196a9a6fec03ae615491201185
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c71152053553e6e22d670fbc4fd7550bf8a046b54cad7b71869787986a6a42c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:870968fea834e24b2e099cf3e4fe1e3fb8caf38d8f8e5b790d7d47386d4d05f5
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b67db12a26a26ffe03d9afc84a43857eb2e5b2fec2dd189653b415f74208190
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9e19618bee7c6ef43256fea25abe19bca88535eb1e7dc213cde8929ae4e8180
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83e8d6cb171d4563e280d4ca4761e24d8366d2865f6f1dda2bd2597b587d30c2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b9e01fb8119366f950b23568c9c5eaa6d3e352534620301a9291190e4d0ef8f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9974683544303797,
5
  "eval_steps": 100,
6
- "global_step": 592,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -907,6 +907,306 @@
907
  "rewards/margins": 0.053022872656583786,
908
  "rewards/rejected": -0.04732084274291992,
909
  "step": 590
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
910
  }
911
  ],
912
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 100,
6
+ "global_step": 790,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
907
  "rewards/margins": 0.053022872656583786,
908
  "rewards/rejected": -0.04732084274291992,
909
  "step": 590
910
+ },
911
+ {
912
+ "epoch": 3.037974683544304,
913
+ "grad_norm": 1435515.2852262415,
914
+ "learning_rate": 4.61610780319649e-07,
915
+ "logits/chosen": -7.956998348236084,
916
+ "logits/rejected": -7.496169090270996,
917
+ "logps/chosen": -219.92410278320312,
918
+ "logps/rejected": -310.20123291015625,
919
+ "loss": 95986.4875,
920
+ "rewards/accuracies": 0.9624999761581421,
921
+ "rewards/chosen": 0.016220271587371826,
922
+ "rewards/margins": 0.09167212247848511,
923
+ "rewards/rejected": -0.07545184344053268,
924
+ "step": 600
925
+ },
926
+ {
927
+ "epoch": 3.088607594936709,
928
+ "grad_norm": 1646011.901841717,
929
+ "learning_rate": 4.6004387339392035e-07,
930
+ "logits/chosen": -7.747580051422119,
931
+ "logits/rejected": -7.5227952003479,
932
+ "logps/chosen": -217.8295440673828,
933
+ "logps/rejected": -343.4312438964844,
934
+ "loss": 91538.925,
935
+ "rewards/accuracies": 0.9624999761581421,
936
+ "rewards/chosen": 0.02667585015296936,
937
+ "rewards/margins": 0.12547221779823303,
938
+ "rewards/rejected": -0.09879636764526367,
939
+ "step": 610
940
+ },
941
+ {
942
+ "epoch": 3.1392405063291138,
943
+ "grad_norm": 1631989.4144731541,
944
+ "learning_rate": 4.5847696646819176e-07,
945
+ "logits/chosen": -6.8127121925354,
946
+ "logits/rejected": -6.8090972900390625,
947
+ "logps/chosen": -209.46859741210938,
948
+ "logps/rejected": -332.0594482421875,
949
+ "loss": 92242.9,
950
+ "rewards/accuracies": 0.949999988079071,
951
+ "rewards/chosen": 0.026208167895674706,
952
+ "rewards/margins": 0.12268342822790146,
953
+ "rewards/rejected": -0.0964752584695816,
954
+ "step": 620
955
+ },
956
+ {
957
+ "epoch": 3.189873417721519,
958
+ "grad_norm": 1627589.9925143481,
959
+ "learning_rate": 4.569100595424631e-07,
960
+ "logits/chosen": -6.631221771240234,
961
+ "logits/rejected": -6.502354621887207,
962
+ "logps/chosen": -211.57974243164062,
963
+ "logps/rejected": -333.447265625,
964
+ "loss": 89921.25,
965
+ "rewards/accuracies": 0.9624999761581421,
966
+ "rewards/chosen": 0.022689208388328552,
967
+ "rewards/margins": 0.12395058572292328,
968
+ "rewards/rejected": -0.10126137733459473,
969
+ "step": 630
970
+ },
971
+ {
972
+ "epoch": 3.240506329113924,
973
+ "grad_norm": 1780107.5787213328,
974
+ "learning_rate": 4.5534315261673453e-07,
975
+ "logits/chosen": -7.868208885192871,
976
+ "logits/rejected": -7.755393981933594,
977
+ "logps/chosen": -209.3970184326172,
978
+ "logps/rejected": -341.9508056640625,
979
+ "loss": 89608.1875,
980
+ "rewards/accuracies": 0.9375,
981
+ "rewards/chosen": 0.027028566226363182,
982
+ "rewards/margins": 0.133165642619133,
983
+ "rewards/rejected": -0.10613708198070526,
984
+ "step": 640
985
+ },
986
+ {
987
+ "epoch": 3.291139240506329,
988
+ "grad_norm": 1730512.4518714033,
989
+ "learning_rate": 4.5377624569100595e-07,
990
+ "logits/chosen": -7.359053134918213,
991
+ "logits/rejected": -7.324367523193359,
992
+ "logps/chosen": -193.1954803466797,
993
+ "logps/rejected": -309.5513610839844,
994
+ "loss": 93257.225,
995
+ "rewards/accuracies": 0.949999988079071,
996
+ "rewards/chosen": 0.028996175155043602,
997
+ "rewards/margins": 0.11760006099939346,
998
+ "rewards/rejected": -0.08860386908054352,
999
+ "step": 650
1000
+ },
1001
+ {
1002
+ "epoch": 3.3417721518987342,
1003
+ "grad_norm": 1692816.769511115,
1004
+ "learning_rate": 4.5220933876527736e-07,
1005
+ "logits/chosen": -8.043203353881836,
1006
+ "logits/rejected": -8.003018379211426,
1007
+ "logps/chosen": -211.73648071289062,
1008
+ "logps/rejected": -336.10455322265625,
1009
+ "loss": 88400.4688,
1010
+ "rewards/accuracies": 0.887499988079071,
1011
+ "rewards/chosen": 0.024640550836920738,
1012
+ "rewards/margins": 0.12655004858970642,
1013
+ "rewards/rejected": -0.10190950334072113,
1014
+ "step": 660
1015
+ },
1016
+ {
1017
+ "epoch": 3.392405063291139,
1018
+ "grad_norm": 1906377.7496358757,
1019
+ "learning_rate": 4.506424318395487e-07,
1020
+ "logits/chosen": -7.25619649887085,
1021
+ "logits/rejected": -7.37869119644165,
1022
+ "logps/chosen": -197.8258819580078,
1023
+ "logps/rejected": -324.2138671875,
1024
+ "loss": 89983.5688,
1025
+ "rewards/accuracies": 0.949999988079071,
1026
+ "rewards/chosen": 0.026263948529958725,
1027
+ "rewards/margins": 0.12702925503253937,
1028
+ "rewards/rejected": -0.10076530277729034,
1029
+ "step": 670
1030
+ },
1031
+ {
1032
+ "epoch": 3.4430379746835444,
1033
+ "grad_norm": 1785643.0594316572,
1034
+ "learning_rate": 4.4907552491382013e-07,
1035
+ "logits/chosen": -6.798577785491943,
1036
+ "logits/rejected": -6.7768073081970215,
1037
+ "logps/chosen": -208.5835723876953,
1038
+ "logps/rejected": -323.3017883300781,
1039
+ "loss": 89767.5,
1040
+ "rewards/accuracies": 0.9375,
1041
+ "rewards/chosen": 0.025741413235664368,
1042
+ "rewards/margins": 0.1167701929807663,
1043
+ "rewards/rejected": -0.09102877229452133,
1044
+ "step": 680
1045
+ },
1046
+ {
1047
+ "epoch": 3.4936708860759493,
1048
+ "grad_norm": 2393957.296937455,
1049
+ "learning_rate": 4.475086179880915e-07,
1050
+ "logits/chosen": -6.352355480194092,
1051
+ "logits/rejected": -6.526197910308838,
1052
+ "logps/chosen": -187.56597900390625,
1053
+ "logps/rejected": -306.5972595214844,
1054
+ "loss": 89036.6875,
1055
+ "rewards/accuracies": 0.925000011920929,
1056
+ "rewards/chosen": 0.024061182513833046,
1057
+ "rewards/margins": 0.11990946531295776,
1058
+ "rewards/rejected": -0.09584827721118927,
1059
+ "step": 690
1060
+ },
1061
+ {
1062
+ "epoch": 3.5443037974683547,
1063
+ "grad_norm": 1811486.2204670438,
1064
+ "learning_rate": 4.459417110623629e-07,
1065
+ "logits/chosen": -5.7466630935668945,
1066
+ "logits/rejected": -5.797163486480713,
1067
+ "logps/chosen": -212.6585235595703,
1068
+ "logps/rejected": -364.36199951171875,
1069
+ "loss": 88031.3,
1070
+ "rewards/accuracies": 0.9750000238418579,
1071
+ "rewards/chosen": 0.027677077800035477,
1072
+ "rewards/margins": 0.14764061570167542,
1073
+ "rewards/rejected": -0.11996352672576904,
1074
+ "step": 700
1075
+ },
1076
+ {
1077
+ "epoch": 3.5949367088607596,
1078
+ "grad_norm": 1724684.5755440604,
1079
+ "learning_rate": 4.4437480413663426e-07,
1080
+ "logits/chosen": -5.412962436676025,
1081
+ "logits/rejected": -5.541121959686279,
1082
+ "logps/chosen": -202.39065551757812,
1083
+ "logps/rejected": -333.0758056640625,
1084
+ "loss": 86956.675,
1085
+ "rewards/accuracies": 0.9750000238418579,
1086
+ "rewards/chosen": 0.0247800350189209,
1087
+ "rewards/margins": 0.12825721502304077,
1088
+ "rewards/rejected": -0.10347716510295868,
1089
+ "step": 710
1090
+ },
1091
+ {
1092
+ "epoch": 3.6455696202531644,
1093
+ "grad_norm": 1933271.7611355048,
1094
+ "learning_rate": 4.4280789721090567e-07,
1095
+ "logits/chosen": -5.053005218505859,
1096
+ "logits/rejected": -4.886711597442627,
1097
+ "logps/chosen": -199.10885620117188,
1098
+ "logps/rejected": -317.7257385253906,
1099
+ "loss": 86655.0125,
1100
+ "rewards/accuracies": 0.9125000238418579,
1101
+ "rewards/chosen": 0.02152046002447605,
1102
+ "rewards/margins": 0.11774978786706924,
1103
+ "rewards/rejected": -0.09622932970523834,
1104
+ "step": 720
1105
+ },
1106
+ {
1107
+ "epoch": 3.6962025316455698,
1108
+ "grad_norm": 2267463.489494214,
1109
+ "learning_rate": 4.4124099028517703e-07,
1110
+ "logits/chosen": -6.616279602050781,
1111
+ "logits/rejected": -6.9615797996521,
1112
+ "logps/chosen": -200.58961486816406,
1113
+ "logps/rejected": -351.6376953125,
1114
+ "loss": 86181.3938,
1115
+ "rewards/accuracies": 0.9750000238418579,
1116
+ "rewards/chosen": 0.032253801822662354,
1117
+ "rewards/margins": 0.14937567710876465,
1118
+ "rewards/rejected": -0.1171218603849411,
1119
+ "step": 730
1120
+ },
1121
+ {
1122
+ "epoch": 3.7468354430379747,
1123
+ "grad_norm": 1734288.0953653858,
1124
+ "learning_rate": 4.3967408335944844e-07,
1125
+ "logits/chosen": -5.873335361480713,
1126
+ "logits/rejected": -5.689335823059082,
1127
+ "logps/chosen": -217.43637084960938,
1128
+ "logps/rejected": -350.2752990722656,
1129
+ "loss": 86780.825,
1130
+ "rewards/accuracies": 0.9624999761581421,
1131
+ "rewards/chosen": 0.031159091740846634,
1132
+ "rewards/margins": 0.13692796230316162,
1133
+ "rewards/rejected": -0.10576887428760529,
1134
+ "step": 740
1135
+ },
1136
+ {
1137
+ "epoch": 3.7974683544303796,
1138
+ "grad_norm": 1741715.9901586007,
1139
+ "learning_rate": 4.381071764337198e-07,
1140
+ "logits/chosen": -7.123785972595215,
1141
+ "logits/rejected": -7.188807487487793,
1142
+ "logps/chosen": -207.00045776367188,
1143
+ "logps/rejected": -336.5976867675781,
1144
+ "loss": 86139.5625,
1145
+ "rewards/accuracies": 0.9375,
1146
+ "rewards/chosen": 0.03052128478884697,
1147
+ "rewards/margins": 0.13043463230133057,
1148
+ "rewards/rejected": -0.0999133437871933,
1149
+ "step": 750
1150
+ },
1151
+ {
1152
+ "epoch": 3.848101265822785,
1153
+ "grad_norm": 1879351.8394690978,
1154
+ "learning_rate": 4.365402695079912e-07,
1155
+ "logits/chosen": -7.820990085601807,
1156
+ "logits/rejected": -7.7128729820251465,
1157
+ "logps/chosen": -213.57388305664062,
1158
+ "logps/rejected": -362.5634460449219,
1159
+ "loss": 87478.3625,
1160
+ "rewards/accuracies": 0.987500011920929,
1161
+ "rewards/chosen": 0.03660900145769119,
1162
+ "rewards/margins": 0.1480773240327835,
1163
+ "rewards/rejected": -0.11146833002567291,
1164
+ "step": 760
1165
+ },
1166
+ {
1167
+ "epoch": 3.8987341772151898,
1168
+ "grad_norm": 1968713.4204386624,
1169
+ "learning_rate": 4.349733625822626e-07,
1170
+ "logits/chosen": -7.314540863037109,
1171
+ "logits/rejected": -7.363668918609619,
1172
+ "logps/chosen": -213.6930694580078,
1173
+ "logps/rejected": -367.44073486328125,
1174
+ "loss": 86825.5813,
1175
+ "rewards/accuracies": 0.949999988079071,
1176
+ "rewards/chosen": 0.026752913370728493,
1177
+ "rewards/margins": 0.15061405301094055,
1178
+ "rewards/rejected": -0.1238611489534378,
1179
+ "step": 770
1180
+ },
1181
+ {
1182
+ "epoch": 3.9493670886075947,
1183
+ "grad_norm": 2163439.406665409,
1184
+ "learning_rate": 4.33406455656534e-07,
1185
+ "logits/chosen": -7.67099666595459,
1186
+ "logits/rejected": -7.536408424377441,
1187
+ "logps/chosen": -213.9747772216797,
1188
+ "logps/rejected": -344.7560119628906,
1189
+ "loss": 86913.0375,
1190
+ "rewards/accuracies": 0.925000011920929,
1191
+ "rewards/chosen": 0.029844319447875023,
1192
+ "rewards/margins": 0.12930825352668762,
1193
+ "rewards/rejected": -0.09946390986442566,
1194
+ "step": 780
1195
+ },
1196
+ {
1197
+ "epoch": 4.0,
1198
+ "grad_norm": 1866234.1823014135,
1199
+ "learning_rate": 4.3183954873080535e-07,
1200
+ "logits/chosen": -7.922532081604004,
1201
+ "logits/rejected": -7.692726135253906,
1202
+ "logps/chosen": -211.41653442382812,
1203
+ "logps/rejected": -349.7116394042969,
1204
+ "loss": 86592.8938,
1205
+ "rewards/accuracies": 0.9624999761581421,
1206
+ "rewards/chosen": 0.027728911489248276,
1207
+ "rewards/margins": 0.1435452550649643,
1208
+ "rewards/rejected": -0.11581633985042572,
1209
+ "step": 790
1210
  }
1211
  ],
1212
  "logging_steps": 10,