Training in progress, epoch 6, checkpoint
Browse files- last-checkpoint/global_step1382/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1382/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1382/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1382/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1382/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1382/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1382/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1382/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1382/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1382/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1382/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1382/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1382/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1382/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1382/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1382/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step1382/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5edc639329efc382f1596e9be6ac92eec76c1fdd8e4a69123e4acea724783a10
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1382/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da7df7280f43cce9ec67ef31ffff618527964de214ff45ae23ac9e1d1168893b
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1382/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e17290a4120dc42280154868406961335a33157e6f3114ff90c09acb8b24acb
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1382/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d26faf01a63c156ebbe630b0524846f2815bbf2cc5f46f73747cc22527df907
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1382/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bec63be9931390bc6d71ce2735ac3dd8e7f802a28e26956bea12eb605fd9505e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1382/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:441c120cbd3652396561ac5f4d4ccc64200145087f637c5e239099ba974a949d
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1382/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ca143448b9530345df4b969d2299b4bf1da9619c327d7c642d3a438222c4bd2
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1382/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fa39a4703bdb28ffd77786fec2b404f228fb9e9bb77ad247f053ae92863ca74
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1382/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7eef319adec752ebcb7e5756859278b3f0ff682fe8235144859d0abf9869be7
|
3 |
+
size 85570
|
last-checkpoint/global_step1382/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8af85528c85925f43ae266b7b8588a10cc6efa2c1dccabc0ecd20bfb236d97a9
|
3 |
+
size 85506
|
last-checkpoint/global_step1382/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b67f5b7768a185948bdb34ca7228765fcca69bd7cc4d4d9baa4469d6c52e078
|
3 |
+
size 85506
|
last-checkpoint/global_step1382/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:218725051515ca7bd445cecc5c6757cf4ae94075236a0cc2c155cc02815190f7
|
3 |
+
size 85506
|
last-checkpoint/global_step1382/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12e6a54fd2770a9cabb78c97f1391e3bbed91e1829a79395e3f41d605ced7922
|
3 |
+
size 85506
|
last-checkpoint/global_step1382/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be913c9be8c2851c3625e30850b9ee412f151791e096510473ef072bcd54d79e
|
3 |
+
size 85506
|
last-checkpoint/global_step1382/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4277b50ed5eab841afd7db113fdbcd6660bb2d15561600ca14392448022caaf
|
3 |
+
size 85506
|
last-checkpoint/global_step1382/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:834f8c3943871f1e3af96c72bda8aa89d8d9edd3952ae3b8718b93d7a0cf4b02
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1382
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f02726a44bd4780d17e35e236963b8fce24dc953bf71f40b4952f4922816c337
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10b91f43ac308f747e0c6169d0cc727dbf1d843ea3d107cf67c9329f99912a02
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7d7e02ffb4d440dce7ab4ce0b5617578ec9ce3672acee7434ed6f1153f1ae0c
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5b40ca759e432b2688f021b81291d74a40f56a205e9842119f7e772275eebd3
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdaef955ddd36d6bc1c40584113dd6205483e2aa85b02439b8b27e82e02a8359
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10b14ae5db356e6512538751d6b386c190754e307cc99cd652d5c6dd891e1f82
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f26e28be26826eeeed244b77185c67b443ac185175f8d4bf5ba94caa8b271bc5
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:847cedc1d6ca26f299a132c2ade9754887374acb9d98f26594a85d4c7742d474
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcd043d1690ae0ff6991b03322799a0b28f021427b15fd9f1e5ed8b9905d9307
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:772190f7e6667c865d25fc72da7bdd1b5d39f46fe03bb5c2d754aee1ad3c99c7
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0dd882b6f08b8cde72a038748b9c995e480ab99405e1f7e6c7a03592bdad6355
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1792,6 +1792,306 @@
|
|
1792 |
"rewards/margins": 0.30018630623817444,
|
1793 |
"rewards/rejected": -0.2124231606721878,
|
1794 |
"step": 1180
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1795 |
}
|
1796 |
],
|
1797 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.99746835443038,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 1382,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1792 |
"rewards/margins": 0.30018630623817444,
|
1793 |
"rewards/rejected": -0.2124231606721878,
|
1794 |
"step": 1180
|
1795 |
+
},
|
1796 |
+
{
|
1797 |
+
"epoch": 6.025316455696203,
|
1798 |
+
"grad_norm": 1287103.6124582873,
|
1799 |
+
"learning_rate": 3.691632717016609e-07,
|
1800 |
+
"logits/chosen": -6.58931827545166,
|
1801 |
+
"logits/rejected": -6.494097709655762,
|
1802 |
+
"logps/chosen": -136.68003845214844,
|
1803 |
+
"logps/rejected": -493.61822509765625,
|
1804 |
+
"loss": 41587.3125,
|
1805 |
+
"rewards/accuracies": 0.987500011920929,
|
1806 |
+
"rewards/chosen": 0.10335598886013031,
|
1807 |
+
"rewards/margins": 0.36172229051589966,
|
1808 |
+
"rewards/rejected": -0.25836625695228577,
|
1809 |
+
"step": 1190
|
1810 |
+
},
|
1811 |
+
{
|
1812 |
+
"epoch": 6.075949367088608,
|
1813 |
+
"grad_norm": 1654691.3160849167,
|
1814 |
+
"learning_rate": 3.675963647759323e-07,
|
1815 |
+
"logits/chosen": -5.342609882354736,
|
1816 |
+
"logits/rejected": -5.393660545349121,
|
1817 |
+
"logps/chosen": -116.93675231933594,
|
1818 |
+
"logps/rejected": -476.22833251953125,
|
1819 |
+
"loss": 38118.9437,
|
1820 |
+
"rewards/accuracies": 0.9750000238418579,
|
1821 |
+
"rewards/chosen": 0.10985767841339111,
|
1822 |
+
"rewards/margins": 0.3632175922393799,
|
1823 |
+
"rewards/rejected": -0.25335997343063354,
|
1824 |
+
"step": 1200
|
1825 |
+
},
|
1826 |
+
{
|
1827 |
+
"epoch": 6.1265822784810124,
|
1828 |
+
"grad_norm": 1390108.9081190277,
|
1829 |
+
"learning_rate": 3.6602945785020365e-07,
|
1830 |
+
"logits/chosen": -5.185478687286377,
|
1831 |
+
"logits/rejected": -4.843894958496094,
|
1832 |
+
"logps/chosen": -128.81143188476562,
|
1833 |
+
"logps/rejected": -519.8304443359375,
|
1834 |
+
"loss": 36511.2875,
|
1835 |
+
"rewards/accuracies": 0.9750000238418579,
|
1836 |
+
"rewards/chosen": 0.1154375821352005,
|
1837 |
+
"rewards/margins": 0.3926604092121124,
|
1838 |
+
"rewards/rejected": -0.27722278237342834,
|
1839 |
+
"step": 1210
|
1840 |
+
},
|
1841 |
+
{
|
1842 |
+
"epoch": 6.177215189873418,
|
1843 |
+
"grad_norm": 1502780.5568957475,
|
1844 |
+
"learning_rate": 3.644625509244751e-07,
|
1845 |
+
"logits/chosen": -4.163270473480225,
|
1846 |
+
"logits/rejected": -3.8083653450012207,
|
1847 |
+
"logps/chosen": -120.57966613769531,
|
1848 |
+
"logps/rejected": -497.63226318359375,
|
1849 |
+
"loss": 37966.2937,
|
1850 |
+
"rewards/accuracies": 1.0,
|
1851 |
+
"rewards/chosen": 0.11406160891056061,
|
1852 |
+
"rewards/margins": 0.37608999013900757,
|
1853 |
+
"rewards/rejected": -0.2620283365249634,
|
1854 |
+
"step": 1220
|
1855 |
+
},
|
1856 |
+
{
|
1857 |
+
"epoch": 6.227848101265823,
|
1858 |
+
"grad_norm": 1846607.9980803088,
|
1859 |
+
"learning_rate": 3.6289564399874647e-07,
|
1860 |
+
"logits/chosen": -4.317009925842285,
|
1861 |
+
"logits/rejected": -4.062619209289551,
|
1862 |
+
"logps/chosen": -112.0468521118164,
|
1863 |
+
"logps/rejected": -490.73974609375,
|
1864 |
+
"loss": 36750.4688,
|
1865 |
+
"rewards/accuracies": 0.987500011920929,
|
1866 |
+
"rewards/chosen": 0.11853437125682831,
|
1867 |
+
"rewards/margins": 0.37694281339645386,
|
1868 |
+
"rewards/rejected": -0.25840842723846436,
|
1869 |
+
"step": 1230
|
1870 |
+
},
|
1871 |
+
{
|
1872 |
+
"epoch": 6.2784810126582276,
|
1873 |
+
"grad_norm": 1432477.9223833755,
|
1874 |
+
"learning_rate": 3.613287370730179e-07,
|
1875 |
+
"logits/chosen": -4.580340385437012,
|
1876 |
+
"logits/rejected": -4.493284225463867,
|
1877 |
+
"logps/chosen": -123.97422790527344,
|
1878 |
+
"logps/rejected": -509.47076416015625,
|
1879 |
+
"loss": 37540.4875,
|
1880 |
+
"rewards/accuracies": 0.987500011920929,
|
1881 |
+
"rewards/chosen": 0.11854572594165802,
|
1882 |
+
"rewards/margins": 0.38835546374320984,
|
1883 |
+
"rewards/rejected": -0.2698097229003906,
|
1884 |
+
"step": 1240
|
1885 |
+
},
|
1886 |
+
{
|
1887 |
+
"epoch": 6.329113924050633,
|
1888 |
+
"grad_norm": 1551602.6793086384,
|
1889 |
+
"learning_rate": 3.5976183014728924e-07,
|
1890 |
+
"logits/chosen": -3.541313886642456,
|
1891 |
+
"logits/rejected": -3.6754157543182373,
|
1892 |
+
"logps/chosen": -120.3751220703125,
|
1893 |
+
"logps/rejected": -483.46221923828125,
|
1894 |
+
"loss": 35927.6062,
|
1895 |
+
"rewards/accuracies": 0.987500011920929,
|
1896 |
+
"rewards/chosen": 0.10841184854507446,
|
1897 |
+
"rewards/margins": 0.3652178645133972,
|
1898 |
+
"rewards/rejected": -0.25680604577064514,
|
1899 |
+
"step": 1250
|
1900 |
+
},
|
1901 |
+
{
|
1902 |
+
"epoch": 6.379746835443038,
|
1903 |
+
"grad_norm": 1628016.050343189,
|
1904 |
+
"learning_rate": 3.5819492322156066e-07,
|
1905 |
+
"logits/chosen": -3.570946216583252,
|
1906 |
+
"logits/rejected": -3.6950716972351074,
|
1907 |
+
"logps/chosen": -134.7080535888672,
|
1908 |
+
"logps/rejected": -500.80108642578125,
|
1909 |
+
"loss": 36467.1375,
|
1910 |
+
"rewards/accuracies": 0.987500011920929,
|
1911 |
+
"rewards/chosen": 0.1038375124335289,
|
1912 |
+
"rewards/margins": 0.36301389336586,
|
1913 |
+
"rewards/rejected": -0.2591763734817505,
|
1914 |
+
"step": 1260
|
1915 |
+
},
|
1916 |
+
{
|
1917 |
+
"epoch": 6.430379746835443,
|
1918 |
+
"grad_norm": 1416336.114974791,
|
1919 |
+
"learning_rate": 3.56628016295832e-07,
|
1920 |
+
"logits/chosen": -2.9958808422088623,
|
1921 |
+
"logits/rejected": -3.158600330352783,
|
1922 |
+
"logps/chosen": -120.319580078125,
|
1923 |
+
"logps/rejected": -493.46075439453125,
|
1924 |
+
"loss": 35704.05,
|
1925 |
+
"rewards/accuracies": 0.987500011920929,
|
1926 |
+
"rewards/chosen": 0.11720545589923859,
|
1927 |
+
"rewards/margins": 0.3729427754878998,
|
1928 |
+
"rewards/rejected": -0.2557373046875,
|
1929 |
+
"step": 1270
|
1930 |
+
},
|
1931 |
+
{
|
1932 |
+
"epoch": 6.481012658227848,
|
1933 |
+
"grad_norm": 1429276.465119334,
|
1934 |
+
"learning_rate": 3.5506110937010343e-07,
|
1935 |
+
"logits/chosen": -5.23915958404541,
|
1936 |
+
"logits/rejected": -5.513189792633057,
|
1937 |
+
"logps/chosen": -106.6229476928711,
|
1938 |
+
"logps/rejected": -512.9346923828125,
|
1939 |
+
"loss": 37476.4688,
|
1940 |
+
"rewards/accuracies": 1.0,
|
1941 |
+
"rewards/chosen": 0.1187194362282753,
|
1942 |
+
"rewards/margins": 0.4039131700992584,
|
1943 |
+
"rewards/rejected": -0.2851937413215637,
|
1944 |
+
"step": 1280
|
1945 |
+
},
|
1946 |
+
{
|
1947 |
+
"epoch": 6.531645569620253,
|
1948 |
+
"grad_norm": 1838991.6289765981,
|
1949 |
+
"learning_rate": 3.534942024443748e-07,
|
1950 |
+
"logits/chosen": -3.1320407390594482,
|
1951 |
+
"logits/rejected": -3.531493663787842,
|
1952 |
+
"logps/chosen": -114.69315338134766,
|
1953 |
+
"logps/rejected": -521.70458984375,
|
1954 |
+
"loss": 37236.3688,
|
1955 |
+
"rewards/accuracies": 1.0,
|
1956 |
+
"rewards/chosen": 0.12156815826892853,
|
1957 |
+
"rewards/margins": 0.39552414417266846,
|
1958 |
+
"rewards/rejected": -0.2739560008049011,
|
1959 |
+
"step": 1290
|
1960 |
+
},
|
1961 |
+
{
|
1962 |
+
"epoch": 6.582278481012658,
|
1963 |
+
"grad_norm": 1965294.5428377022,
|
1964 |
+
"learning_rate": 3.519272955186462e-07,
|
1965 |
+
"logits/chosen": -3.1404528617858887,
|
1966 |
+
"logits/rejected": -3.159364938735962,
|
1967 |
+
"logps/chosen": -108.1359634399414,
|
1968 |
+
"logps/rejected": -441.573486328125,
|
1969 |
+
"loss": 35760.8688,
|
1970 |
+
"rewards/accuracies": 0.949999988079071,
|
1971 |
+
"rewards/chosen": 0.10433737188577652,
|
1972 |
+
"rewards/margins": 0.3334364593029022,
|
1973 |
+
"rewards/rejected": -0.2290991097688675,
|
1974 |
+
"step": 1300
|
1975 |
+
},
|
1976 |
+
{
|
1977 |
+
"epoch": 6.632911392405063,
|
1978 |
+
"grad_norm": 1744782.725381992,
|
1979 |
+
"learning_rate": 3.5036038859291756e-07,
|
1980 |
+
"logits/chosen": -5.149240970611572,
|
1981 |
+
"logits/rejected": -4.872938632965088,
|
1982 |
+
"logps/chosen": -110.17635345458984,
|
1983 |
+
"logps/rejected": -462.6591796875,
|
1984 |
+
"loss": 38854.3313,
|
1985 |
+
"rewards/accuracies": 0.9750000238418579,
|
1986 |
+
"rewards/chosen": 0.10797703266143799,
|
1987 |
+
"rewards/margins": 0.35402077436447144,
|
1988 |
+
"rewards/rejected": -0.24604372680187225,
|
1989 |
+
"step": 1310
|
1990 |
+
},
|
1991 |
+
{
|
1992 |
+
"epoch": 6.6835443037974684,
|
1993 |
+
"grad_norm": 1449584.094036676,
|
1994 |
+
"learning_rate": 3.4879348166718897e-07,
|
1995 |
+
"logits/chosen": -5.302030086517334,
|
1996 |
+
"logits/rejected": -5.005532264709473,
|
1997 |
+
"logps/chosen": -114.39412689208984,
|
1998 |
+
"logps/rejected": -497.2879943847656,
|
1999 |
+
"loss": 37031.9281,
|
2000 |
+
"rewards/accuracies": 0.9750000238418579,
|
2001 |
+
"rewards/chosen": 0.11388063430786133,
|
2002 |
+
"rewards/margins": 0.38410684466362,
|
2003 |
+
"rewards/rejected": -0.27022621035575867,
|
2004 |
+
"step": 1320
|
2005 |
+
},
|
2006 |
+
{
|
2007 |
+
"epoch": 6.734177215189874,
|
2008 |
+
"grad_norm": 1655726.3529691189,
|
2009 |
+
"learning_rate": 3.4722657474146033e-07,
|
2010 |
+
"logits/chosen": -5.846579074859619,
|
2011 |
+
"logits/rejected": -5.164810657501221,
|
2012 |
+
"logps/chosen": -122.16035461425781,
|
2013 |
+
"logps/rejected": -490.97503662109375,
|
2014 |
+
"loss": 35881.3438,
|
2015 |
+
"rewards/accuracies": 1.0,
|
2016 |
+
"rewards/chosen": 0.11242518573999405,
|
2017 |
+
"rewards/margins": 0.3698340058326721,
|
2018 |
+
"rewards/rejected": -0.2574087679386139,
|
2019 |
+
"step": 1330
|
2020 |
+
},
|
2021 |
+
{
|
2022 |
+
"epoch": 6.784810126582278,
|
2023 |
+
"grad_norm": 1473850.8586688952,
|
2024 |
+
"learning_rate": 3.4565966781573174e-07,
|
2025 |
+
"logits/chosen": -6.604684352874756,
|
2026 |
+
"logits/rejected": -6.540472984313965,
|
2027 |
+
"logps/chosen": -141.56655883789062,
|
2028 |
+
"logps/rejected": -504.536865234375,
|
2029 |
+
"loss": 35791.1937,
|
2030 |
+
"rewards/accuracies": 0.9750000238418579,
|
2031 |
+
"rewards/chosen": 0.11088699102401733,
|
2032 |
+
"rewards/margins": 0.36103492975234985,
|
2033 |
+
"rewards/rejected": -0.2501479685306549,
|
2034 |
+
"step": 1340
|
2035 |
+
},
|
2036 |
+
{
|
2037 |
+
"epoch": 6.8354430379746836,
|
2038 |
+
"grad_norm": 1716575.4855753484,
|
2039 |
+
"learning_rate": 3.440927608900031e-07,
|
2040 |
+
"logits/chosen": -5.3845696449279785,
|
2041 |
+
"logits/rejected": -5.094508647918701,
|
2042 |
+
"logps/chosen": -126.5009536743164,
|
2043 |
+
"logps/rejected": -501.36407470703125,
|
2044 |
+
"loss": 36855.7281,
|
2045 |
+
"rewards/accuracies": 0.9750000238418579,
|
2046 |
+
"rewards/chosen": 0.12023582309484482,
|
2047 |
+
"rewards/margins": 0.3794700503349304,
|
2048 |
+
"rewards/rejected": -0.2592342793941498,
|
2049 |
+
"step": 1350
|
2050 |
+
},
|
2051 |
+
{
|
2052 |
+
"epoch": 6.886075949367089,
|
2053 |
+
"grad_norm": 1860603.9086510486,
|
2054 |
+
"learning_rate": 3.425258539642745e-07,
|
2055 |
+
"logits/chosen": -5.825100898742676,
|
2056 |
+
"logits/rejected": -5.165715217590332,
|
2057 |
+
"logps/chosen": -123.0651626586914,
|
2058 |
+
"logps/rejected": -519.5916748046875,
|
2059 |
+
"loss": 37158.7969,
|
2060 |
+
"rewards/accuracies": 1.0,
|
2061 |
+
"rewards/chosen": 0.11998645961284637,
|
2062 |
+
"rewards/margins": 0.40252119302749634,
|
2063 |
+
"rewards/rejected": -0.28253474831581116,
|
2064 |
+
"step": 1360
|
2065 |
+
},
|
2066 |
+
{
|
2067 |
+
"epoch": 6.936708860759493,
|
2068 |
+
"grad_norm": 1781429.39957367,
|
2069 |
+
"learning_rate": 3.4095894703854587e-07,
|
2070 |
+
"logits/chosen": -5.593798637390137,
|
2071 |
+
"logits/rejected": -5.400781631469727,
|
2072 |
+
"logps/chosen": -122.57585144042969,
|
2073 |
+
"logps/rejected": -500.21844482421875,
|
2074 |
+
"loss": 36281.8938,
|
2075 |
+
"rewards/accuracies": 1.0,
|
2076 |
+
"rewards/chosen": 0.11947381496429443,
|
2077 |
+
"rewards/margins": 0.377518892288208,
|
2078 |
+
"rewards/rejected": -0.25804510712623596,
|
2079 |
+
"step": 1370
|
2080 |
+
},
|
2081 |
+
{
|
2082 |
+
"epoch": 6.987341772151899,
|
2083 |
+
"grad_norm": 1883344.192547866,
|
2084 |
+
"learning_rate": 3.393920401128173e-07,
|
2085 |
+
"logits/chosen": -5.272061347961426,
|
2086 |
+
"logits/rejected": -5.000374794006348,
|
2087 |
+
"logps/chosen": -109.66764831542969,
|
2088 |
+
"logps/rejected": -471.388916015625,
|
2089 |
+
"loss": 37081.4062,
|
2090 |
+
"rewards/accuracies": 0.9624999761581421,
|
2091 |
+
"rewards/chosen": 0.11030924320220947,
|
2092 |
+
"rewards/margins": 0.36379513144493103,
|
2093 |
+
"rewards/rejected": -0.25348588824272156,
|
2094 |
+
"step": 1380
|
2095 |
}
|
2096 |
],
|
2097 |
"logging_steps": 10,
|