cat-searcher commited on
Commit
41fc73e
·
verified ·
1 Parent(s): eea3bd2

Training in progress, epoch 6, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step1382/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step1382/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step1382/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step1382/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step1382/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step1382/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step1382/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step1382/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step1382/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step1382/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step1382/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step1382/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step1382/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step1382/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step1382/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step1382/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step1382/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5edc639329efc382f1596e9be6ac92eec76c1fdd8e4a69123e4acea724783a10
3
+ size 2506176112
last-checkpoint/global_step1382/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da7df7280f43cce9ec67ef31ffff618527964de214ff45ae23ac9e1d1168893b
3
+ size 2506176112
last-checkpoint/global_step1382/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e17290a4120dc42280154868406961335a33157e6f3114ff90c09acb8b24acb
3
+ size 2506176112
last-checkpoint/global_step1382/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d26faf01a63c156ebbe630b0524846f2815bbf2cc5f46f73747cc22527df907
3
+ size 2506176112
last-checkpoint/global_step1382/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bec63be9931390bc6d71ce2735ac3dd8e7f802a28e26956bea12eb605fd9505e
3
+ size 2506176112
last-checkpoint/global_step1382/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441c120cbd3652396561ac5f4d4ccc64200145087f637c5e239099ba974a949d
3
+ size 2506176112
last-checkpoint/global_step1382/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ca143448b9530345df4b969d2299b4bf1da9619c327d7c642d3a438222c4bd2
3
+ size 2506176112
last-checkpoint/global_step1382/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fa39a4703bdb28ffd77786fec2b404f228fb9e9bb77ad247f053ae92863ca74
3
+ size 2506176112
last-checkpoint/global_step1382/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7eef319adec752ebcb7e5756859278b3f0ff682fe8235144859d0abf9869be7
3
+ size 85570
last-checkpoint/global_step1382/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8af85528c85925f43ae266b7b8588a10cc6efa2c1dccabc0ecd20bfb236d97a9
3
+ size 85506
last-checkpoint/global_step1382/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b67f5b7768a185948bdb34ca7228765fcca69bd7cc4d4d9baa4469d6c52e078
3
+ size 85506
last-checkpoint/global_step1382/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:218725051515ca7bd445cecc5c6757cf4ae94075236a0cc2c155cc02815190f7
3
+ size 85506
last-checkpoint/global_step1382/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12e6a54fd2770a9cabb78c97f1391e3bbed91e1829a79395e3f41d605ced7922
3
+ size 85506
last-checkpoint/global_step1382/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be913c9be8c2851c3625e30850b9ee412f151791e096510473ef072bcd54d79e
3
+ size 85506
last-checkpoint/global_step1382/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4277b50ed5eab841afd7db113fdbcd6660bb2d15561600ca14392448022caaf
3
+ size 85506
last-checkpoint/global_step1382/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:834f8c3943871f1e3af96c72bda8aa89d8d9edd3952ae3b8718b93d7a0cf4b02
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1185
 
1
+ global_step1382
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbec331a03b63bf09d63c111d004f44d4e1b4622bd66a68a51c8a93312a7aaed
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f02726a44bd4780d17e35e236963b8fce24dc953bf71f40b4952f4922816c337
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f62a449b2e0be7027fb456871163c3388d55fd9f5230ef65a2e8ae1bd77075ac
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b91f43ac308f747e0c6169d0cc727dbf1d843ea3d107cf67c9329f99912a02
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0628a9017696045a3a29e9eaffc71e9262d855716e773c0c3be760a1fe85bc8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d7e02ffb4d440dce7ab4ce0b5617578ec9ce3672acee7434ed6f1153f1ae0c
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df342004a4d8e3626bf2a9f689fde7c8bfd6d995e14931f5496eda1f456cb6f2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5b40ca759e432b2688f021b81291d74a40f56a205e9842119f7e772275eebd3
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f02096eb4e8850b91490e80e4a042e2e60f71bd2abc6a269d62c271649cb77d2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdaef955ddd36d6bc1c40584113dd6205483e2aa85b02439b8b27e82e02a8359
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:326c778d3d0e7e3d5665fa0a9ecd92986609c430da08b41611d6c05dc19815a8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b14ae5db356e6512538751d6b386c190754e307cc99cd652d5c6dd891e1f82
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d978dcb0c34e022ee6750e9d86814b8c82e4965d7e07662f35f06eeac12938f3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f26e28be26826eeeed244b77185c67b443ac185175f8d4bf5ba94caa8b271bc5
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01e83399aed1d9d173c3e07b2efa8530c956b62b2b68394c2ed0d43bd8bba9d1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:847cedc1d6ca26f299a132c2ade9754887374acb9d98f26594a85d4c7742d474
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:606ab3ca92e3d20c327c69fdcce7f7e39bec2f2c3538b036088b255f917e3ba4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd043d1690ae0ff6991b03322799a0b28f021427b15fd9f1e5ed8b9905d9307
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1276a987dd22c9093fec58921ba19f340a28f18bff635cc01324e09a3c37ac3a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:772190f7e6667c865d25fc72da7bdd1b5d39f46fe03bb5c2d754aee1ad3c99c7
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:251c2a48bb3a46f7c0365ebd02f9e250fbea04549ecdfec993cf3e0a3155f3a0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dd882b6f08b8cde72a038748b9c995e480ab99405e1f7e6c7a03592bdad6355
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.0,
5
  "eval_steps": 100,
6
- "global_step": 1185,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1792,6 +1792,306 @@
1792
  "rewards/margins": 0.30018630623817444,
1793
  "rewards/rejected": -0.2124231606721878,
1794
  "step": 1180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1795
  }
1796
  ],
1797
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 1382,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1792
  "rewards/margins": 0.30018630623817444,
1793
  "rewards/rejected": -0.2124231606721878,
1794
  "step": 1180
1795
+ },
1796
+ {
1797
+ "epoch": 6.025316455696203,
1798
+ "grad_norm": 1287103.6124582873,
1799
+ "learning_rate": 3.691632717016609e-07,
1800
+ "logits/chosen": -6.58931827545166,
1801
+ "logits/rejected": -6.494097709655762,
1802
+ "logps/chosen": -136.68003845214844,
1803
+ "logps/rejected": -493.61822509765625,
1804
+ "loss": 41587.3125,
1805
+ "rewards/accuracies": 0.987500011920929,
1806
+ "rewards/chosen": 0.10335598886013031,
1807
+ "rewards/margins": 0.36172229051589966,
1808
+ "rewards/rejected": -0.25836625695228577,
1809
+ "step": 1190
1810
+ },
1811
+ {
1812
+ "epoch": 6.075949367088608,
1813
+ "grad_norm": 1654691.3160849167,
1814
+ "learning_rate": 3.675963647759323e-07,
1815
+ "logits/chosen": -5.342609882354736,
1816
+ "logits/rejected": -5.393660545349121,
1817
+ "logps/chosen": -116.93675231933594,
1818
+ "logps/rejected": -476.22833251953125,
1819
+ "loss": 38118.9437,
1820
+ "rewards/accuracies": 0.9750000238418579,
1821
+ "rewards/chosen": 0.10985767841339111,
1822
+ "rewards/margins": 0.3632175922393799,
1823
+ "rewards/rejected": -0.25335997343063354,
1824
+ "step": 1200
1825
+ },
1826
+ {
1827
+ "epoch": 6.1265822784810124,
1828
+ "grad_norm": 1390108.9081190277,
1829
+ "learning_rate": 3.6602945785020365e-07,
1830
+ "logits/chosen": -5.185478687286377,
1831
+ "logits/rejected": -4.843894958496094,
1832
+ "logps/chosen": -128.81143188476562,
1833
+ "logps/rejected": -519.8304443359375,
1834
+ "loss": 36511.2875,
1835
+ "rewards/accuracies": 0.9750000238418579,
1836
+ "rewards/chosen": 0.1154375821352005,
1837
+ "rewards/margins": 0.3926604092121124,
1838
+ "rewards/rejected": -0.27722278237342834,
1839
+ "step": 1210
1840
+ },
1841
+ {
1842
+ "epoch": 6.177215189873418,
1843
+ "grad_norm": 1502780.5568957475,
1844
+ "learning_rate": 3.644625509244751e-07,
1845
+ "logits/chosen": -4.163270473480225,
1846
+ "logits/rejected": -3.8083653450012207,
1847
+ "logps/chosen": -120.57966613769531,
1848
+ "logps/rejected": -497.63226318359375,
1849
+ "loss": 37966.2937,
1850
+ "rewards/accuracies": 1.0,
1851
+ "rewards/chosen": 0.11406160891056061,
1852
+ "rewards/margins": 0.37608999013900757,
1853
+ "rewards/rejected": -0.2620283365249634,
1854
+ "step": 1220
1855
+ },
1856
+ {
1857
+ "epoch": 6.227848101265823,
1858
+ "grad_norm": 1846607.9980803088,
1859
+ "learning_rate": 3.6289564399874647e-07,
1860
+ "logits/chosen": -4.317009925842285,
1861
+ "logits/rejected": -4.062619209289551,
1862
+ "logps/chosen": -112.0468521118164,
1863
+ "logps/rejected": -490.73974609375,
1864
+ "loss": 36750.4688,
1865
+ "rewards/accuracies": 0.987500011920929,
1866
+ "rewards/chosen": 0.11853437125682831,
1867
+ "rewards/margins": 0.37694281339645386,
1868
+ "rewards/rejected": -0.25840842723846436,
1869
+ "step": 1230
1870
+ },
1871
+ {
1872
+ "epoch": 6.2784810126582276,
1873
+ "grad_norm": 1432477.9223833755,
1874
+ "learning_rate": 3.613287370730179e-07,
1875
+ "logits/chosen": -4.580340385437012,
1876
+ "logits/rejected": -4.493284225463867,
1877
+ "logps/chosen": -123.97422790527344,
1878
+ "logps/rejected": -509.47076416015625,
1879
+ "loss": 37540.4875,
1880
+ "rewards/accuracies": 0.987500011920929,
1881
+ "rewards/chosen": 0.11854572594165802,
1882
+ "rewards/margins": 0.38835546374320984,
1883
+ "rewards/rejected": -0.2698097229003906,
1884
+ "step": 1240
1885
+ },
1886
+ {
1887
+ "epoch": 6.329113924050633,
1888
+ "grad_norm": 1551602.6793086384,
1889
+ "learning_rate": 3.5976183014728924e-07,
1890
+ "logits/chosen": -3.541313886642456,
1891
+ "logits/rejected": -3.6754157543182373,
1892
+ "logps/chosen": -120.3751220703125,
1893
+ "logps/rejected": -483.46221923828125,
1894
+ "loss": 35927.6062,
1895
+ "rewards/accuracies": 0.987500011920929,
1896
+ "rewards/chosen": 0.10841184854507446,
1897
+ "rewards/margins": 0.3652178645133972,
1898
+ "rewards/rejected": -0.25680604577064514,
1899
+ "step": 1250
1900
+ },
1901
+ {
1902
+ "epoch": 6.379746835443038,
1903
+ "grad_norm": 1628016.050343189,
1904
+ "learning_rate": 3.5819492322156066e-07,
1905
+ "logits/chosen": -3.570946216583252,
1906
+ "logits/rejected": -3.6950716972351074,
1907
+ "logps/chosen": -134.7080535888672,
1908
+ "logps/rejected": -500.80108642578125,
1909
+ "loss": 36467.1375,
1910
+ "rewards/accuracies": 0.987500011920929,
1911
+ "rewards/chosen": 0.1038375124335289,
1912
+ "rewards/margins": 0.36301389336586,
1913
+ "rewards/rejected": -0.2591763734817505,
1914
+ "step": 1260
1915
+ },
1916
+ {
1917
+ "epoch": 6.430379746835443,
1918
+ "grad_norm": 1416336.114974791,
1919
+ "learning_rate": 3.56628016295832e-07,
1920
+ "logits/chosen": -2.9958808422088623,
1921
+ "logits/rejected": -3.158600330352783,
1922
+ "logps/chosen": -120.319580078125,
1923
+ "logps/rejected": -493.46075439453125,
1924
+ "loss": 35704.05,
1925
+ "rewards/accuracies": 0.987500011920929,
1926
+ "rewards/chosen": 0.11720545589923859,
1927
+ "rewards/margins": 0.3729427754878998,
1928
+ "rewards/rejected": -0.2557373046875,
1929
+ "step": 1270
1930
+ },
1931
+ {
1932
+ "epoch": 6.481012658227848,
1933
+ "grad_norm": 1429276.465119334,
1934
+ "learning_rate": 3.5506110937010343e-07,
1935
+ "logits/chosen": -5.23915958404541,
1936
+ "logits/rejected": -5.513189792633057,
1937
+ "logps/chosen": -106.6229476928711,
1938
+ "logps/rejected": -512.9346923828125,
1939
+ "loss": 37476.4688,
1940
+ "rewards/accuracies": 1.0,
1941
+ "rewards/chosen": 0.1187194362282753,
1942
+ "rewards/margins": 0.4039131700992584,
1943
+ "rewards/rejected": -0.2851937413215637,
1944
+ "step": 1280
1945
+ },
1946
+ {
1947
+ "epoch": 6.531645569620253,
1948
+ "grad_norm": 1838991.6289765981,
1949
+ "learning_rate": 3.534942024443748e-07,
1950
+ "logits/chosen": -3.1320407390594482,
1951
+ "logits/rejected": -3.531493663787842,
1952
+ "logps/chosen": -114.69315338134766,
1953
+ "logps/rejected": -521.70458984375,
1954
+ "loss": 37236.3688,
1955
+ "rewards/accuracies": 1.0,
1956
+ "rewards/chosen": 0.12156815826892853,
1957
+ "rewards/margins": 0.39552414417266846,
1958
+ "rewards/rejected": -0.2739560008049011,
1959
+ "step": 1290
1960
+ },
1961
+ {
1962
+ "epoch": 6.582278481012658,
1963
+ "grad_norm": 1965294.5428377022,
1964
+ "learning_rate": 3.519272955186462e-07,
1965
+ "logits/chosen": -3.1404528617858887,
1966
+ "logits/rejected": -3.159364938735962,
1967
+ "logps/chosen": -108.1359634399414,
1968
+ "logps/rejected": -441.573486328125,
1969
+ "loss": 35760.8688,
1970
+ "rewards/accuracies": 0.949999988079071,
1971
+ "rewards/chosen": 0.10433737188577652,
1972
+ "rewards/margins": 0.3334364593029022,
1973
+ "rewards/rejected": -0.2290991097688675,
1974
+ "step": 1300
1975
+ },
1976
+ {
1977
+ "epoch": 6.632911392405063,
1978
+ "grad_norm": 1744782.725381992,
1979
+ "learning_rate": 3.5036038859291756e-07,
1980
+ "logits/chosen": -5.149240970611572,
1981
+ "logits/rejected": -4.872938632965088,
1982
+ "logps/chosen": -110.17635345458984,
1983
+ "logps/rejected": -462.6591796875,
1984
+ "loss": 38854.3313,
1985
+ "rewards/accuracies": 0.9750000238418579,
1986
+ "rewards/chosen": 0.10797703266143799,
1987
+ "rewards/margins": 0.35402077436447144,
1988
+ "rewards/rejected": -0.24604372680187225,
1989
+ "step": 1310
1990
+ },
1991
+ {
1992
+ "epoch": 6.6835443037974684,
1993
+ "grad_norm": 1449584.094036676,
1994
+ "learning_rate": 3.4879348166718897e-07,
1995
+ "logits/chosen": -5.302030086517334,
1996
+ "logits/rejected": -5.005532264709473,
1997
+ "logps/chosen": -114.39412689208984,
1998
+ "logps/rejected": -497.2879943847656,
1999
+ "loss": 37031.9281,
2000
+ "rewards/accuracies": 0.9750000238418579,
2001
+ "rewards/chosen": 0.11388063430786133,
2002
+ "rewards/margins": 0.38410684466362,
2003
+ "rewards/rejected": -0.27022621035575867,
2004
+ "step": 1320
2005
+ },
2006
+ {
2007
+ "epoch": 6.734177215189874,
2008
+ "grad_norm": 1655726.3529691189,
2009
+ "learning_rate": 3.4722657474146033e-07,
2010
+ "logits/chosen": -5.846579074859619,
2011
+ "logits/rejected": -5.164810657501221,
2012
+ "logps/chosen": -122.16035461425781,
2013
+ "logps/rejected": -490.97503662109375,
2014
+ "loss": 35881.3438,
2015
+ "rewards/accuracies": 1.0,
2016
+ "rewards/chosen": 0.11242518573999405,
2017
+ "rewards/margins": 0.3698340058326721,
2018
+ "rewards/rejected": -0.2574087679386139,
2019
+ "step": 1330
2020
+ },
2021
+ {
2022
+ "epoch": 6.784810126582278,
2023
+ "grad_norm": 1473850.8586688952,
2024
+ "learning_rate": 3.4565966781573174e-07,
2025
+ "logits/chosen": -6.604684352874756,
2026
+ "logits/rejected": -6.540472984313965,
2027
+ "logps/chosen": -141.56655883789062,
2028
+ "logps/rejected": -504.536865234375,
2029
+ "loss": 35791.1937,
2030
+ "rewards/accuracies": 0.9750000238418579,
2031
+ "rewards/chosen": 0.11088699102401733,
2032
+ "rewards/margins": 0.36103492975234985,
2033
+ "rewards/rejected": -0.2501479685306549,
2034
+ "step": 1340
2035
+ },
2036
+ {
2037
+ "epoch": 6.8354430379746836,
2038
+ "grad_norm": 1716575.4855753484,
2039
+ "learning_rate": 3.440927608900031e-07,
2040
+ "logits/chosen": -5.3845696449279785,
2041
+ "logits/rejected": -5.094508647918701,
2042
+ "logps/chosen": -126.5009536743164,
2043
+ "logps/rejected": -501.36407470703125,
2044
+ "loss": 36855.7281,
2045
+ "rewards/accuracies": 0.9750000238418579,
2046
+ "rewards/chosen": 0.12023582309484482,
2047
+ "rewards/margins": 0.3794700503349304,
2048
+ "rewards/rejected": -0.2592342793941498,
2049
+ "step": 1350
2050
+ },
2051
+ {
2052
+ "epoch": 6.886075949367089,
2053
+ "grad_norm": 1860603.9086510486,
2054
+ "learning_rate": 3.425258539642745e-07,
2055
+ "logits/chosen": -5.825100898742676,
2056
+ "logits/rejected": -5.165715217590332,
2057
+ "logps/chosen": -123.0651626586914,
2058
+ "logps/rejected": -519.5916748046875,
2059
+ "loss": 37158.7969,
2060
+ "rewards/accuracies": 1.0,
2061
+ "rewards/chosen": 0.11998645961284637,
2062
+ "rewards/margins": 0.40252119302749634,
2063
+ "rewards/rejected": -0.28253474831581116,
2064
+ "step": 1360
2065
+ },
2066
+ {
2067
+ "epoch": 6.936708860759493,
2068
+ "grad_norm": 1781429.39957367,
2069
+ "learning_rate": 3.4095894703854587e-07,
2070
+ "logits/chosen": -5.593798637390137,
2071
+ "logits/rejected": -5.400781631469727,
2072
+ "logps/chosen": -122.57585144042969,
2073
+ "logps/rejected": -500.21844482421875,
2074
+ "loss": 36281.8938,
2075
+ "rewards/accuracies": 1.0,
2076
+ "rewards/chosen": 0.11947381496429443,
2077
+ "rewards/margins": 0.377518892288208,
2078
+ "rewards/rejected": -0.25804510712623596,
2079
+ "step": 1370
2080
+ },
2081
+ {
2082
+ "epoch": 6.987341772151899,
2083
+ "grad_norm": 1883344.192547866,
2084
+ "learning_rate": 3.393920401128173e-07,
2085
+ "logits/chosen": -5.272061347961426,
2086
+ "logits/rejected": -5.000374794006348,
2087
+ "logps/chosen": -109.66764831542969,
2088
+ "logps/rejected": -471.388916015625,
2089
+ "loss": 37081.4062,
2090
+ "rewards/accuracies": 0.9624999761581421,
2091
+ "rewards/chosen": 0.11030924320220947,
2092
+ "rewards/margins": 0.36379513144493103,
2093
+ "rewards/rejected": -0.25348588824272156,
2094
+ "step": 1380
2095
  }
2096
  ],
2097
  "logging_steps": 10,