cat-searcher commited on
Commit
c4d97fc
·
verified ·
1 Parent(s): b9c54d3

Training in progress, epoch 30, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step5916/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step5916/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step5916/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step5916/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step5916/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step5916/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step5916/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step5916/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step5916/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step5916/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step5916/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step5916/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step5916/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step5916/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step5916/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step5916/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step5916/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b18fdebf275477c0bca9d0b7e0f80b1cb03caf57b8df661089b9827471e91fbb
3
+ size 2506176112
last-checkpoint/global_step5916/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2916828579c6438c0f61c89512acda82e3307305bfbfe675be41245a453dba3b
3
+ size 2506176112
last-checkpoint/global_step5916/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8be451455a282d6fa4afdcfcec6b588b88c66217eeb4d427737432d4441f6de1
3
+ size 2506176112
last-checkpoint/global_step5916/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:513012075929a38b46c9ab66651ed948062a9cc18e2bf3a5c39c4dd12e5ea795
3
+ size 2506176112
last-checkpoint/global_step5916/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51c9e1fd5ae664f552446fa3555c9ecf3335137a3a1072cb0090c5f33389fa78
3
+ size 2506176112
last-checkpoint/global_step5916/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04c41fe7fbb304471dbf2512e289491172a5280c76eec719e4da8e2921cc8335
3
+ size 2506176112
last-checkpoint/global_step5916/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac25d8faf9414e2fd21a0c1810e7ac093d908a70e1f1dd2c6a66a9db5ef2b745
3
+ size 2506176112
last-checkpoint/global_step5916/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d72d83348654c08933eab87d5b552549c388ee3191c4615e14f78305cf8034db
3
+ size 2506176112
last-checkpoint/global_step5916/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6d113b8a546176efdfdd4193aa760c8dd58cc3ac05f292d3a4a1037164b0015
3
+ size 85570
last-checkpoint/global_step5916/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e02adc94e97faf702adae82aeb4d9cb85bacfafaa5b0593e9e580dcd23045a9
3
+ size 85506
last-checkpoint/global_step5916/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b618c03c6520f69d65a52cc3cb38e9052921b0e1df0791946a86c37b6516c92
3
+ size 85506
last-checkpoint/global_step5916/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2541aba1009e69905001c19347ed7049287b08a1e30af9f66a4cd00acf843073
3
+ size 85506
last-checkpoint/global_step5916/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e60f9ddba6fbfcef5726792e69653d27a5e9f835fbbfe0838eba8a8a7343525
3
+ size 85506
last-checkpoint/global_step5916/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4c3ff328e9e8ce49b1b7b9e0b4dd7f86c7fb5c6a22706fc6b2deac1bb9ca734
3
+ size 85506
last-checkpoint/global_step5916/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d945c0954cfce168d7f7376d407748d133cfbf6ce4c999f81f22272d9e6c70a
3
+ size 85506
last-checkpoint/global_step5916/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f768a950f1cacb0dea94a1c2017128c7d786d436c044197f5940e93bfb27250a
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step5718
 
1
+ global_step5916
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfa3379a77e192ab0af64fe78334db958cf6214addb3b4fbbc67569fb9f2e836
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d3b3ba3d73fcfe06684964aa6a0da80302158d0b45234f7473a31d0a8f86d45
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c217d7738abb0675dccca5614343dba054b625a983adb5b66bb33a2cf128b5c4
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d86e8e9eec87ad0431303407e43b154a156dec7fa7e56ad78330489a8ac89828
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a205f64c86d241517037857e791fc7cdcfd1b2d8a26ca46ff4e6430fc9491c64
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c6c5e7528cb919bf0e448095ef87b2e21b836859cd72ece77237cb822e78f88
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:384e04a3f342b13aa2aff82b03d67994ff48a7b6e7d90ad53291b0ccf1124755
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3be13484e2a524a3f5e79f3e726fd41b0876252a9d0898131fc1ccf0d86f6a8
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c75af15b2dde4bfa82f45cc3b48588a123bca20dd4b565a0312d5c1198d8bca9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f070bbb32dc96a08f76f5f85c2e01ae98d5e4d16dd18623bfab6b4b54e1d03c4
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8162e1d617d2045c999376967bb33455e9055c2882d00e9e6c3e5639106c7cf2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f43d659c0909b37d66f4502f36b99850e3f553b6e2fb3ac13de4a060aa1cdc1
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37df018beaaf1bcb0a0451dcfd03f257c34b245315c9d5243bad309614abb972
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c80a51e24107d3bf93a1e4d903f42d6626efe1c5b8bd714f393fb93f840c5ce2
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc45f95eb705f82fae244f5f5bb1d1d060492c8b55aba2aa58162687e28952bc
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e624a5e2e5c07e8e80b5d066ec132b4a872761dcba6d77f7386705eb95f67228
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8ed548560db42b0406ce39f0d4bce5fac64c200cc3ab9248f1dd703dde9dfef
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc47fa553a7e6a70b45be521b98449ef920fe0d39f6cf85ae50b9a45d6c9da85
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:865e7aa49ec74e2e8e5ffbc2b62c9edb6308476119fe3e77f2fe29961dd5deaf
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a0e7009351aac3c555f7946c515a64baa2d14aa4706e4a371e0c2df02b5a90
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3921aadd9f9af170d702817ed4b4c15515108ff8717773c58f7ba3567a43d1be
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6ce75ecc7db03a363686e05ba8e98d2588fcd56e7f730c69ff2320b79e2de2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 28.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 5718,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8587,6 +8587,306 @@
8587
  "rewards/margins": 0.5581387281417847,
8588
  "rewards/rejected": -0.35167163610458374,
8589
  "step": 5710
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8590
  }
8591
  ],
8592
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 30.0,
5
  "eval_steps": 100,
6
+ "global_step": 5916,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8587
  "rewards/margins": 0.5581387281417847,
8588
  "rewards/rejected": -0.35167163610458374,
8589
  "step": 5710
8590
+ },
8591
+ {
8592
+ "epoch": 29.00759493670886,
8593
+ "grad_norm": 177867.60188083298,
8594
+ "learning_rate": 1.0748981510498275e-07,
8595
+ "logits/chosen": -2.0902795791625977,
8596
+ "logits/rejected": -1.2426658868789673,
8597
+ "logps/chosen": -25.984241485595703,
8598
+ "logps/rejected": -595.5320434570312,
8599
+ "loss": 12101.3188,
8600
+ "rewards/accuracies": 1.0,
8601
+ "rewards/chosen": 0.21571488678455353,
8602
+ "rewards/margins": 0.5755189061164856,
8603
+ "rewards/rejected": -0.3598039150238037,
8604
+ "step": 5720
8605
+ },
8606
+ {
8607
+ "epoch": 29.058227848101264,
8608
+ "grad_norm": 175055.77768040166,
8609
+ "learning_rate": 1.0670636164211845e-07,
8610
+ "logits/chosen": -3.0874876976013184,
8611
+ "logits/rejected": -1.9259151220321655,
8612
+ "logps/chosen": -30.317163467407227,
8613
+ "logps/rejected": -582.4378662109375,
8614
+ "loss": 12058.957,
8615
+ "rewards/accuracies": 0.987500011920929,
8616
+ "rewards/chosen": 0.20789256691932678,
8617
+ "rewards/margins": 0.5540691018104553,
8618
+ "rewards/rejected": -0.3461765944957733,
8619
+ "step": 5730
8620
+ },
8621
+ {
8622
+ "epoch": 29.10886075949367,
8623
+ "grad_norm": 330095.71026448795,
8624
+ "learning_rate": 1.0592290817925414e-07,
8625
+ "logits/chosen": -0.40818461775779724,
8626
+ "logits/rejected": -0.17450471222400665,
8627
+ "logps/chosen": -37.967308044433594,
8628
+ "logps/rejected": -574.5567626953125,
8629
+ "loss": 12163.9234,
8630
+ "rewards/accuracies": 0.9750000238418579,
8631
+ "rewards/chosen": 0.199508398771286,
8632
+ "rewards/margins": 0.5361432433128357,
8633
+ "rewards/rejected": -0.3366348147392273,
8634
+ "step": 5740
8635
+ },
8636
+ {
8637
+ "epoch": 29.159493670886075,
8638
+ "grad_norm": 207868.2185307626,
8639
+ "learning_rate": 1.0513945471638983e-07,
8640
+ "logits/chosen": -1.1228978633880615,
8641
+ "logits/rejected": -0.8512986302375793,
8642
+ "logps/chosen": -36.19347381591797,
8643
+ "logps/rejected": -572.5546264648438,
8644
+ "loss": 12217.475,
8645
+ "rewards/accuracies": 0.9750000238418579,
8646
+ "rewards/chosen": 0.20709916949272156,
8647
+ "rewards/margins": 0.5452824234962463,
8648
+ "rewards/rejected": -0.3381832540035248,
8649
+ "step": 5750
8650
+ },
8651
+ {
8652
+ "epoch": 29.21012658227848,
8653
+ "grad_norm": 180300.955366917,
8654
+ "learning_rate": 1.0435600125352554e-07,
8655
+ "logits/chosen": -2.1935715675354004,
8656
+ "logits/rejected": -1.450584888458252,
8657
+ "logps/chosen": -41.38114547729492,
8658
+ "logps/rejected": -551.9308471679688,
8659
+ "loss": 11531.2219,
8660
+ "rewards/accuracies": 0.9125000238418579,
8661
+ "rewards/chosen": 0.19797861576080322,
8662
+ "rewards/margins": 0.5134168267250061,
8663
+ "rewards/rejected": -0.3154382109642029,
8664
+ "step": 5760
8665
+ },
8666
+ {
8667
+ "epoch": 29.260759493670886,
8668
+ "grad_norm": 230065.76491246693,
8669
+ "learning_rate": 1.0357254779066123e-07,
8670
+ "logits/chosen": -2.1162705421447754,
8671
+ "logits/rejected": -1.343379020690918,
8672
+ "logps/chosen": -26.30475425720215,
8673
+ "logps/rejected": -584.0765380859375,
8674
+ "loss": 12178.225,
8675
+ "rewards/accuracies": 1.0,
8676
+ "rewards/chosen": 0.21349939703941345,
8677
+ "rewards/margins": 0.5615987181663513,
8678
+ "rewards/rejected": -0.34809932112693787,
8679
+ "step": 5770
8680
+ },
8681
+ {
8682
+ "epoch": 29.31139240506329,
8683
+ "grad_norm": 150891.5620522627,
8684
+ "learning_rate": 1.0278909432779692e-07,
8685
+ "logits/chosen": -0.6437171101570129,
8686
+ "logits/rejected": -0.06186608225107193,
8687
+ "logps/chosen": -32.27136993408203,
8688
+ "logps/rejected": -575.0911865234375,
8689
+ "loss": 12350.1367,
8690
+ "rewards/accuracies": 0.9624999761581421,
8691
+ "rewards/chosen": 0.20223280787467957,
8692
+ "rewards/margins": 0.5462868213653564,
8693
+ "rewards/rejected": -0.3440539240837097,
8694
+ "step": 5780
8695
+ },
8696
+ {
8697
+ "epoch": 29.362025316455696,
8698
+ "grad_norm": 268215.91577526846,
8699
+ "learning_rate": 1.0200564086493262e-07,
8700
+ "logits/chosen": -2.4000306129455566,
8701
+ "logits/rejected": -1.5239673852920532,
8702
+ "logps/chosen": -44.228759765625,
8703
+ "logps/rejected": -603.037109375,
8704
+ "loss": 11602.7789,
8705
+ "rewards/accuracies": 0.987500011920929,
8706
+ "rewards/chosen": 0.21766121685504913,
8707
+ "rewards/margins": 0.5693429112434387,
8708
+ "rewards/rejected": -0.3516816794872284,
8709
+ "step": 5790
8710
+ },
8711
+ {
8712
+ "epoch": 29.4126582278481,
8713
+ "grad_norm": 153754.6030127712,
8714
+ "learning_rate": 1.0122218740206831e-07,
8715
+ "logits/chosen": 1.1010842323303223,
8716
+ "logits/rejected": 1.6098358631134033,
8717
+ "logps/chosen": -25.794830322265625,
8718
+ "logps/rejected": -580.6827392578125,
8719
+ "loss": 12135.457,
8720
+ "rewards/accuracies": 0.987500011920929,
8721
+ "rewards/chosen": 0.20671968162059784,
8722
+ "rewards/margins": 0.5531316995620728,
8723
+ "rewards/rejected": -0.3464120328426361,
8724
+ "step": 5800
8725
+ },
8726
+ {
8727
+ "epoch": 29.463291139240507,
8728
+ "grad_norm": 237857.15032498536,
8729
+ "learning_rate": 1.00438733939204e-07,
8730
+ "logits/chosen": -2.2038140296936035,
8731
+ "logits/rejected": -1.9258426427841187,
8732
+ "logps/chosen": -24.270652770996094,
8733
+ "logps/rejected": -592.76806640625,
8734
+ "loss": 12368.1,
8735
+ "rewards/accuracies": 1.0,
8736
+ "rewards/chosen": 0.20859424769878387,
8737
+ "rewards/margins": 0.5708917379379272,
8738
+ "rewards/rejected": -0.3622974455356598,
8739
+ "step": 5810
8740
+ },
8741
+ {
8742
+ "epoch": 29.513924050632912,
8743
+ "grad_norm": 229363.27347544604,
8744
+ "learning_rate": 9.96552804763397e-08,
8745
+ "logits/chosen": -1.733412742614746,
8746
+ "logits/rejected": -1.8426891565322876,
8747
+ "logps/chosen": -27.749902725219727,
8748
+ "logps/rejected": -591.9719848632812,
8749
+ "loss": 12434.6094,
8750
+ "rewards/accuracies": 1.0,
8751
+ "rewards/chosen": 0.20853643119335175,
8752
+ "rewards/margins": 0.559594452381134,
8753
+ "rewards/rejected": -0.3510579764842987,
8754
+ "step": 5820
8755
+ },
8756
+ {
8757
+ "epoch": 29.564556962025318,
8758
+ "grad_norm": 204423.82729459935,
8759
+ "learning_rate": 9.887182701347539e-08,
8760
+ "logits/chosen": -0.8372312784194946,
8761
+ "logits/rejected": -0.9436752200126648,
8762
+ "logps/chosen": -23.713529586791992,
8763
+ "logps/rejected": -551.91748046875,
8764
+ "loss": 12191.0797,
8765
+ "rewards/accuracies": 0.987500011920929,
8766
+ "rewards/chosen": 0.1975078582763672,
8767
+ "rewards/margins": 0.5233575105667114,
8768
+ "rewards/rejected": -0.32584962248802185,
8769
+ "step": 5830
8770
+ },
8771
+ {
8772
+ "epoch": 29.615189873417723,
8773
+ "grad_norm": 196500.42803475718,
8774
+ "learning_rate": 9.808837355061108e-08,
8775
+ "logits/chosen": -0.07084647566080093,
8776
+ "logits/rejected": 0.9050701856613159,
8777
+ "logps/chosen": -29.59817886352539,
8778
+ "logps/rejected": -567.6174926757812,
8779
+ "loss": 12194.2234,
8780
+ "rewards/accuracies": 0.9750000238418579,
8781
+ "rewards/chosen": 0.20393919944763184,
8782
+ "rewards/margins": 0.5439929366111755,
8783
+ "rewards/rejected": -0.3400537371635437,
8784
+ "step": 5840
8785
+ },
8786
+ {
8787
+ "epoch": 29.665822784810125,
8788
+ "grad_norm": 226455.28104673527,
8789
+ "learning_rate": 9.730492008774677e-08,
8790
+ "logits/chosen": -3.320272445678711,
8791
+ "logits/rejected": -3.3560733795166016,
8792
+ "logps/chosen": -28.402095794677734,
8793
+ "logps/rejected": -602.0023193359375,
8794
+ "loss": 12657.2406,
8795
+ "rewards/accuracies": 1.0,
8796
+ "rewards/chosen": 0.2187713086605072,
8797
+ "rewards/margins": 0.5724385976791382,
8798
+ "rewards/rejected": -0.35366731882095337,
8799
+ "step": 5850
8800
+ },
8801
+ {
8802
+ "epoch": 29.71645569620253,
8803
+ "grad_norm": 162035.60177504522,
8804
+ "learning_rate": 9.652146662488248e-08,
8805
+ "logits/chosen": -1.8201286792755127,
8806
+ "logits/rejected": -1.7938740253448486,
8807
+ "logps/chosen": -35.96394348144531,
8808
+ "logps/rejected": -611.4141845703125,
8809
+ "loss": 12011.9406,
8810
+ "rewards/accuracies": 1.0,
8811
+ "rewards/chosen": 0.21413405239582062,
8812
+ "rewards/margins": 0.5712839365005493,
8813
+ "rewards/rejected": -0.3571499288082123,
8814
+ "step": 5860
8815
+ },
8816
+ {
8817
+ "epoch": 29.767088607594935,
8818
+ "grad_norm": 162090.09030278528,
8819
+ "learning_rate": 9.573801316201817e-08,
8820
+ "logits/chosen": -0.6652274131774902,
8821
+ "logits/rejected": -0.600281834602356,
8822
+ "logps/chosen": -24.422576904296875,
8823
+ "logps/rejected": -566.0366821289062,
8824
+ "loss": 12593.6359,
8825
+ "rewards/accuracies": 0.9750000238418579,
8826
+ "rewards/chosen": 0.1961481273174286,
8827
+ "rewards/margins": 0.5393214821815491,
8828
+ "rewards/rejected": -0.3431733250617981,
8829
+ "step": 5870
8830
+ },
8831
+ {
8832
+ "epoch": 29.81772151898734,
8833
+ "grad_norm": 365229.93961962714,
8834
+ "learning_rate": 9.495455969915387e-08,
8835
+ "logits/chosen": -2.613847017288208,
8836
+ "logits/rejected": -2.108478546142578,
8837
+ "logps/chosen": -29.573253631591797,
8838
+ "logps/rejected": -577.60546875,
8839
+ "loss": 12424.4891,
8840
+ "rewards/accuracies": 1.0,
8841
+ "rewards/chosen": 0.20539173483848572,
8842
+ "rewards/margins": 0.5470829010009766,
8843
+ "rewards/rejected": -0.34169113636016846,
8844
+ "step": 5880
8845
+ },
8846
+ {
8847
+ "epoch": 29.868354430379746,
8848
+ "grad_norm": 173325.82955161307,
8849
+ "learning_rate": 9.417110623628956e-08,
8850
+ "logits/chosen": -1.4006824493408203,
8851
+ "logits/rejected": -0.5856371521949768,
8852
+ "logps/chosen": -27.345510482788086,
8853
+ "logps/rejected": -584.8424072265625,
8854
+ "loss": 12358.3133,
8855
+ "rewards/accuracies": 0.987500011920929,
8856
+ "rewards/chosen": 0.2079104632139206,
8857
+ "rewards/margins": 0.5603929758071899,
8858
+ "rewards/rejected": -0.35248249769210815,
8859
+ "step": 5890
8860
+ },
8861
+ {
8862
+ "epoch": 29.91898734177215,
8863
+ "grad_norm": 287432.0969704827,
8864
+ "learning_rate": 9.338765277342525e-08,
8865
+ "logits/chosen": -0.21508927643299103,
8866
+ "logits/rejected": -0.1394989937543869,
8867
+ "logps/chosen": -30.839313507080078,
8868
+ "logps/rejected": -594.2600708007812,
8869
+ "loss": 11980.4219,
8870
+ "rewards/accuracies": 1.0,
8871
+ "rewards/chosen": 0.21064691245555878,
8872
+ "rewards/margins": 0.5655493140220642,
8873
+ "rewards/rejected": -0.354902446269989,
8874
+ "step": 5900
8875
+ },
8876
+ {
8877
+ "epoch": 29.969620253164557,
8878
+ "grad_norm": 365207.2969153869,
8879
+ "learning_rate": 9.260419931056094e-08,
8880
+ "logits/chosen": -0.40759915113449097,
8881
+ "logits/rejected": 0.3133270740509033,
8882
+ "logps/chosen": -25.633676528930664,
8883
+ "logps/rejected": -578.2957763671875,
8884
+ "loss": 12223.2844,
8885
+ "rewards/accuracies": 0.987500011920929,
8886
+ "rewards/chosen": 0.20843228697776794,
8887
+ "rewards/margins": 0.5534237027168274,
8888
+ "rewards/rejected": -0.34499144554138184,
8889
+ "step": 5910
8890
  }
8891
  ],
8892
  "logging_steps": 10,