cat-searcher commited on
Commit
8e2644b
·
verified ·
1 Parent(s): e5d4e75

Training in progress, epoch 12, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step2567/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step2567/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step2567/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step2567/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step2567/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step2567/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step2567/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step2567/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step2567/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step2567/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step2567/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step2567/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step2567/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step2567/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step2567/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step2567/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step2567/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5df0dfda7753955a3b1a7c57e32045ecd4ee5a9ecaa6ab09eabac1176fb60756
3
+ size 2506176112
last-checkpoint/global_step2567/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beaab569569fa9e9974b7c2c8aab4010a914d1f9761715fb409817fb8fa33b7e
3
+ size 2506176112
last-checkpoint/global_step2567/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f50bd801c72f28b97ebd7ee0600339b6665560d61267fec66e2c8a1416188e94
3
+ size 2506176112
last-checkpoint/global_step2567/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:249ab69ccb0ef4ce4cd80e00b5665af8a64f38847c4d26d04cf8ff39ae62537c
3
+ size 2506176112
last-checkpoint/global_step2567/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51163e652d04cb476fcfb9d469310033832cfc62b9b765cc2e069fcdc78053bb
3
+ size 2506176112
last-checkpoint/global_step2567/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d234aa42bda89791cb7d131f27e23e89dbccb1085f1c1e360ef686fcf2b43b7
3
+ size 2506176112
last-checkpoint/global_step2567/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565e00554947705715c69538ad4fdbba2135918d55fd1eac26bff0709d89b13b
3
+ size 2506176112
last-checkpoint/global_step2567/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c515529768ebde36752b6bb1c83e4b81332f8aa9703556918914490ed8d150d
3
+ size 2506176112
last-checkpoint/global_step2567/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1112a0ce6047e224c0d4055aa91e143f1983e512467d18befeebd90215f467e
3
+ size 85570
last-checkpoint/global_step2567/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2184e21318ac898b809f679e6b4c7651cff724486bf10d48097ecba3a51c9066
3
+ size 85506
last-checkpoint/global_step2567/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68574640ec1db60949f0119373598557248ab690791c8c7b4437b5d0969b86d1
3
+ size 85506
last-checkpoint/global_step2567/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc4477293d8ea587976b0a2143cc2df3d00351c399c3e980c03aaa37f2412fce
3
+ size 85506
last-checkpoint/global_step2567/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ddc2c09b1b33dea736afa89199ad10c27814b8da145bc5d167b22bb480bed3f
3
+ size 85506
last-checkpoint/global_step2567/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37312ab5b415aba4ca7ed61ca1cac9f46a58dd5d77f7bc89c5bb78d05cd6d054
3
+ size 85506
last-checkpoint/global_step2567/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b510f4c4fdab13d1a5ab5570ff44824c6b3c61e67db13ecd4057a73fa00a9fd
3
+ size 85506
last-checkpoint/global_step2567/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01bfecfc893fd6030f488f501386e110af2a2f886229a7e230009eab593fb8a6
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2370
 
1
+ global_step2567
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb90761b8d1100caed65f46e62bc543938eea85fd3c409acef5287472762cc31
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31a59d02512e22c8fde96ae53ade5fd3efb11b708a7c65545ed6a6a202751a37
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:933b765e67fd27d0106fb7378964b3b6e3a143eaa550740f03ab1d3a10ff3bea
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43b5816e1bbc869b1c3d647caa31746b3c9674dc2e4dd47857a690cd4ee4639b
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feb6462d333dbc5bb5e497ea9b0adb960f7616f79e6eea63222de6d5bd559516
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1bec598899f9d59e70c1b4705ce420a1e0a670957b6c8153a589880068ae5a4
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b045e1bfa728f51c8b51ab0faa20b128a4fbd350da006b9b39a19e24abdf5a74
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c60d2348aae518f4c44693db9c9b4b3a3299c556e7f0a86c188b2e4c3e364a7c
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f76a3d058d2628a61848c2441d313f251278bd8f74ce43dc44d8cd8ad3e619a8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffe5a79d3bcb4ce033de360bc765e616316e3562aba25887cd85c4adbb935abf
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7f72fc498e6eaa671cdc0e8a627a668b8ef607063a22ddb4edbc05e791be830
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9a9d1f6e22677721841890e6a27855857e6840137650d609eb8e4ac13b71d29
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12889af98e175b734a788f4c5b8c4da91dd61ff3a05aaf61b9d4c66aa3dd8ad6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcac4ff84388a6a4fe3bcae6207c68b2ee5528fb3b6de8cc3588fe1975462aa5
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe21a86abfceeac2cf2f48afd61a9a506cf61a287f3403f1adf391bb2ffa5a83
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33fce3cdf5c1b8a8a291e0c73b384e3ad5252640e21e942b44b26b8b0928ffa9
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73488bec91f9dee6d8105d06f99edaf4d27b6b064250d4c7023f33285b2f3132
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:919e675f3bcaf4f3c8ba35cd8debf85aec3bbc3c8e5019b74431e0a314e4d37a
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edf6ee1cc2e1325b428a21172ec4e61b7220c5489751ea11c06bb66c77a0cd08
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bf6479ce82b88efc6a72a8ee512162b3d0ecab972817296d38ab9c448bb8d96
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80293d1d3039c03cadd9a7663af0dceb761b51cb1e901c839618d66f90e7f384
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9eb2db72f0e418efa4f13d7448db05b4ce751b00def470d4d8f87d4965bb17c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.0,
5
  "eval_steps": 100,
6
- "global_step": 2370,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3577,6 +3577,291 @@
3577
  "rewards/margins": 0.5158518552780151,
3578
  "rewards/rejected": -0.3514222800731659,
3579
  "step": 2370
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3580
  }
3581
  ],
3582
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 2567,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3577
  "rewards/margins": 0.5158518552780151,
3578
  "rewards/rejected": -0.3514222800731659,
3579
  "step": 2370
3580
+ },
3581
+ {
3582
+ "epoch": 12.050632911392405,
3583
+ "grad_norm": 924736.9233899026,
3584
+ "learning_rate": 1.827013475399561e-07,
3585
+ "logits/chosen": 0.09384210407733917,
3586
+ "logits/rejected": 0.38976824283599854,
3587
+ "logps/chosen": -60.1981315612793,
3588
+ "logps/rejected": -569.2012329101562,
3589
+ "loss": 16551.6906,
3590
+ "rewards/accuracies": 0.987500011920929,
3591
+ "rewards/chosen": 0.1783694624900818,
3592
+ "rewards/margins": 0.5092591047286987,
3593
+ "rewards/rejected": -0.33088964223861694,
3594
+ "step": 2380
3595
+ },
3596
+ {
3597
+ "epoch": 12.10126582278481,
3598
+ "grad_norm": 453683.3343967912,
3599
+ "learning_rate": 1.811344406142275e-07,
3600
+ "logits/chosen": -0.1967567503452301,
3601
+ "logits/rejected": 0.26000285148620605,
3602
+ "logps/chosen": -51.80207443237305,
3603
+ "logps/rejected": -586.1417846679688,
3604
+ "loss": 16650.6516,
3605
+ "rewards/accuracies": 1.0,
3606
+ "rewards/chosen": 0.19160635769367218,
3607
+ "rewards/margins": 0.5359978079795837,
3608
+ "rewards/rejected": -0.34439152479171753,
3609
+ "step": 2390
3610
+ },
3611
+ {
3612
+ "epoch": 12.151898734177216,
3613
+ "grad_norm": 760637.6347084254,
3614
+ "learning_rate": 1.7956753368849888e-07,
3615
+ "logits/chosen": -2.4950621128082275,
3616
+ "logits/rejected": -1.7182337045669556,
3617
+ "logps/chosen": -54.441162109375,
3618
+ "logps/rejected": -569.5804443359375,
3619
+ "loss": 16525.3187,
3620
+ "rewards/accuracies": 0.987500011920929,
3621
+ "rewards/chosen": 0.1771778166294098,
3622
+ "rewards/margins": 0.5123227834701538,
3623
+ "rewards/rejected": -0.335144966840744,
3624
+ "step": 2400
3625
+ },
3626
+ {
3627
+ "epoch": 12.20253164556962,
3628
+ "grad_norm": 760695.8247001156,
3629
+ "learning_rate": 1.7800062676277027e-07,
3630
+ "logits/chosen": 2.4408202171325684,
3631
+ "logits/rejected": 1.941209077835083,
3632
+ "logps/chosen": -50.47087097167969,
3633
+ "logps/rejected": -550.1649169921875,
3634
+ "loss": 16281.4594,
3635
+ "rewards/accuracies": 0.987500011920929,
3636
+ "rewards/chosen": 0.1683485209941864,
3637
+ "rewards/margins": 0.5019410848617554,
3638
+ "rewards/rejected": -0.33359256386756897,
3639
+ "step": 2410
3640
+ },
3641
+ {
3642
+ "epoch": 12.253164556962025,
3643
+ "grad_norm": 501646.8806860111,
3644
+ "learning_rate": 1.7643371983704165e-07,
3645
+ "logits/chosen": -1.7683095932006836,
3646
+ "logits/rejected": -1.838817834854126,
3647
+ "logps/chosen": -53.41362762451172,
3648
+ "logps/rejected": -574.3419799804688,
3649
+ "loss": 16772.675,
3650
+ "rewards/accuracies": 1.0,
3651
+ "rewards/chosen": 0.18047122657299042,
3652
+ "rewards/margins": 0.5231555700302124,
3653
+ "rewards/rejected": -0.342684268951416,
3654
+ "step": 2420
3655
+ },
3656
+ {
3657
+ "epoch": 12.30379746835443,
3658
+ "grad_norm": 705638.6344046313,
3659
+ "learning_rate": 1.7486681291131307e-07,
3660
+ "logits/chosen": 0.6870694756507874,
3661
+ "logits/rejected": 0.9879606366157532,
3662
+ "logps/chosen": -60.645713806152344,
3663
+ "logps/rejected": -565.5677490234375,
3664
+ "loss": 16990.1125,
3665
+ "rewards/accuracies": 1.0,
3666
+ "rewards/chosen": 0.180276021361351,
3667
+ "rewards/margins": 0.5076194405555725,
3668
+ "rewards/rejected": -0.3273434340953827,
3669
+ "step": 2430
3670
+ },
3671
+ {
3672
+ "epoch": 12.354430379746836,
3673
+ "grad_norm": 583239.6869039454,
3674
+ "learning_rate": 1.7329990598558445e-07,
3675
+ "logits/chosen": -0.015002572908997536,
3676
+ "logits/rejected": 0.6669713258743286,
3677
+ "logps/chosen": -59.69384765625,
3678
+ "logps/rejected": -595.3045654296875,
3679
+ "loss": 16570.7625,
3680
+ "rewards/accuracies": 1.0,
3681
+ "rewards/chosen": 0.19047938287258148,
3682
+ "rewards/margins": 0.5352143049240112,
3683
+ "rewards/rejected": -0.34473496675491333,
3684
+ "step": 2440
3685
+ },
3686
+ {
3687
+ "epoch": 12.405063291139241,
3688
+ "grad_norm": 717458.0522613698,
3689
+ "learning_rate": 1.7173299905985584e-07,
3690
+ "logits/chosen": -1.5561044216156006,
3691
+ "logits/rejected": -1.511528730392456,
3692
+ "logps/chosen": -48.24024200439453,
3693
+ "logps/rejected": -585.71484375,
3694
+ "loss": 16296.25,
3695
+ "rewards/accuracies": 1.0,
3696
+ "rewards/chosen": 0.18336063623428345,
3697
+ "rewards/margins": 0.5371404886245728,
3698
+ "rewards/rejected": -0.3537798523902893,
3699
+ "step": 2450
3700
+ },
3701
+ {
3702
+ "epoch": 12.455696202531646,
3703
+ "grad_norm": 1561201.446100151,
3704
+ "learning_rate": 1.7016609213412722e-07,
3705
+ "logits/chosen": -0.5445646047592163,
3706
+ "logits/rejected": 0.5015290379524231,
3707
+ "logps/chosen": -57.12273025512695,
3708
+ "logps/rejected": -596.54248046875,
3709
+ "loss": 17012.2562,
3710
+ "rewards/accuracies": 0.987500011920929,
3711
+ "rewards/chosen": 0.1852089911699295,
3712
+ "rewards/margins": 0.5424550771713257,
3713
+ "rewards/rejected": -0.35724616050720215,
3714
+ "step": 2460
3715
+ },
3716
+ {
3717
+ "epoch": 12.50632911392405,
3718
+ "grad_norm": 576931.8180998629,
3719
+ "learning_rate": 1.685991852083986e-07,
3720
+ "logits/chosen": 0.7103387713432312,
3721
+ "logits/rejected": 0.5729061365127563,
3722
+ "logps/chosen": -45.429290771484375,
3723
+ "logps/rejected": -540.9015502929688,
3724
+ "loss": 17545.0859,
3725
+ "rewards/accuracies": 0.987500011920929,
3726
+ "rewards/chosen": 0.17228493094444275,
3727
+ "rewards/margins": 0.49700021743774414,
3728
+ "rewards/rejected": -0.3247153162956238,
3729
+ "step": 2470
3730
+ },
3731
+ {
3732
+ "epoch": 12.556962025316455,
3733
+ "grad_norm": 790199.4841189157,
3734
+ "learning_rate": 1.6703227828267e-07,
3735
+ "logits/chosen": 0.757542610168457,
3736
+ "logits/rejected": 1.3497235774993896,
3737
+ "logps/chosen": -60.74102020263672,
3738
+ "logps/rejected": -570.23583984375,
3739
+ "loss": 17645.0094,
3740
+ "rewards/accuracies": 1.0,
3741
+ "rewards/chosen": 0.17620857059955597,
3742
+ "rewards/margins": 0.5084448456764221,
3743
+ "rewards/rejected": -0.33223623037338257,
3744
+ "step": 2480
3745
+ },
3746
+ {
3747
+ "epoch": 12.60759493670886,
3748
+ "grad_norm": 1168730.408088866,
3749
+ "learning_rate": 1.6546537135694138e-07,
3750
+ "logits/chosen": 1.1095263957977295,
3751
+ "logits/rejected": 1.6450704336166382,
3752
+ "logps/chosen": -55.1762580871582,
3753
+ "logps/rejected": -562.0362548828125,
3754
+ "loss": 17481.3469,
3755
+ "rewards/accuracies": 0.9750000238418579,
3756
+ "rewards/chosen": 0.1728857308626175,
3757
+ "rewards/margins": 0.5043104887008667,
3758
+ "rewards/rejected": -0.3314247727394104,
3759
+ "step": 2490
3760
+ },
3761
+ {
3762
+ "epoch": 12.658227848101266,
3763
+ "grad_norm": 492108.78941813926,
3764
+ "learning_rate": 1.6389846443121277e-07,
3765
+ "logits/chosen": 0.4340684413909912,
3766
+ "logits/rejected": 0.34048348665237427,
3767
+ "logps/chosen": -56.212928771972656,
3768
+ "logps/rejected": -578.192138671875,
3769
+ "loss": 16462.5594,
3770
+ "rewards/accuracies": 0.987500011920929,
3771
+ "rewards/chosen": 0.17624449729919434,
3772
+ "rewards/margins": 0.5216260552406311,
3773
+ "rewards/rejected": -0.3453815281391144,
3774
+ "step": 2500
3775
+ },
3776
+ {
3777
+ "epoch": 12.708860759493671,
3778
+ "grad_norm": 513189.7522025148,
3779
+ "learning_rate": 1.6233155750548415e-07,
3780
+ "logits/chosen": -0.21513333916664124,
3781
+ "logits/rejected": -0.05444493144750595,
3782
+ "logps/chosen": -60.96831512451172,
3783
+ "logps/rejected": -583.4918823242188,
3784
+ "loss": 16903.7125,
3785
+ "rewards/accuracies": 0.987500011920929,
3786
+ "rewards/chosen": 0.1871432662010193,
3787
+ "rewards/margins": 0.5204809904098511,
3788
+ "rewards/rejected": -0.3333377242088318,
3789
+ "step": 2510
3790
+ },
3791
+ {
3792
+ "epoch": 12.759493670886076,
3793
+ "grad_norm": 527855.7040773877,
3794
+ "learning_rate": 1.6076465057975556e-07,
3795
+ "logits/chosen": -1.166076421737671,
3796
+ "logits/rejected": -0.5938941240310669,
3797
+ "logps/chosen": -66.41789245605469,
3798
+ "logps/rejected": -565.521728515625,
3799
+ "loss": 16873.3,
3800
+ "rewards/accuracies": 0.9624999761581421,
3801
+ "rewards/chosen": 0.18609380722045898,
3802
+ "rewards/margins": 0.5067971348762512,
3803
+ "rewards/rejected": -0.32070332765579224,
3804
+ "step": 2520
3805
+ },
3806
+ {
3807
+ "epoch": 12.810126582278482,
3808
+ "grad_norm": 454333.8693268159,
3809
+ "learning_rate": 1.5919774365402695e-07,
3810
+ "logits/chosen": -3.2188408374786377,
3811
+ "logits/rejected": -2.827929735183716,
3812
+ "logps/chosen": -64.64167785644531,
3813
+ "logps/rejected": -578.556396484375,
3814
+ "loss": 17413.3594,
3815
+ "rewards/accuracies": 1.0,
3816
+ "rewards/chosen": 0.1842392235994339,
3817
+ "rewards/margins": 0.5160521268844604,
3818
+ "rewards/rejected": -0.33181288838386536,
3819
+ "step": 2530
3820
+ },
3821
+ {
3822
+ "epoch": 12.860759493670885,
3823
+ "grad_norm": 613283.375359761,
3824
+ "learning_rate": 1.5763083672829833e-07,
3825
+ "logits/chosen": -2.0415351390838623,
3826
+ "logits/rejected": -1.1543810367584229,
3827
+ "logps/chosen": -56.55009841918945,
3828
+ "logps/rejected": -565.3232421875,
3829
+ "loss": 16952.7828,
3830
+ "rewards/accuracies": 0.987500011920929,
3831
+ "rewards/chosen": 0.17874039709568024,
3832
+ "rewards/margins": 0.5064669847488403,
3833
+ "rewards/rejected": -0.3277265429496765,
3834
+ "step": 2540
3835
+ },
3836
+ {
3837
+ "epoch": 12.91139240506329,
3838
+ "grad_norm": 973991.6151861927,
3839
+ "learning_rate": 1.5606392980256972e-07,
3840
+ "logits/chosen": -1.9052120447158813,
3841
+ "logits/rejected": -1.2125427722930908,
3842
+ "logps/chosen": -56.37163162231445,
3843
+ "logps/rejected": -575.3190307617188,
3844
+ "loss": 17272.6656,
3845
+ "rewards/accuracies": 1.0,
3846
+ "rewards/chosen": 0.18349668383598328,
3847
+ "rewards/margins": 0.5194507837295532,
3848
+ "rewards/rejected": -0.33595409989356995,
3849
+ "step": 2550
3850
+ },
3851
+ {
3852
+ "epoch": 12.962025316455696,
3853
+ "grad_norm": 1049016.1677939103,
3854
+ "learning_rate": 1.544970228768411e-07,
3855
+ "logits/chosen": -0.479561984539032,
3856
+ "logits/rejected": -0.6837025284767151,
3857
+ "logps/chosen": -56.96269989013672,
3858
+ "logps/rejected": -579.6213989257812,
3859
+ "loss": 17023.0859,
3860
+ "rewards/accuracies": 0.987500011920929,
3861
+ "rewards/chosen": 0.1867980808019638,
3862
+ "rewards/margins": 0.5234028100967407,
3863
+ "rewards/rejected": -0.3366047739982605,
3864
+ "step": 2560
3865
  }
3866
  ],
3867
  "logging_steps": 10,