Training in progress, epoch 8, checkpoint
Browse files- last-checkpoint/global_step1580/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1580/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1580/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1580/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1580/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1580/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1580/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1580/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1580/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1580/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1580/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1580/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1580/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1580/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1580/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1580/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step1580/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35b8d916c014ce248bb98e73a3806d83db0060df242c4851d6304d9ac624587a
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1580/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffec229c0d2dd05f50e617b7f5dbb7e07ac92f8a1f9a7f705ff3d70f90464e39
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1580/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03483348e6552301cb13fc8304e7de58faa7fa806b7ccc77448967ab7c9c140b
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1580/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:756c27dc2358f438b45bd626b25026fae77fc421f5018eb282c8f023f0dcc46e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1580/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd329bfac8f118af8025cdaeaf69703e913ea32a6f301d6af1dfccdb73171352
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1580/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:484b5403a8824ef95137ff82bb37f4e4a7236ad81b461ca02535d897bb210d64
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1580/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ffa5eb8f4626caf4beb17f20b54f219b9c6a63e53e69c89ba3b916fa5409f27
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1580/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23a8e73036cb47e552f1bb09c2098f42541b69bf120c99ec30d00ddf8d53789a
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1580/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68743fb792877033bfb3aedb8d7d498df0e80df4fa97f3b49df7392f14046293
|
3 |
+
size 85570
|
last-checkpoint/global_step1580/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:562ab1107c2e6e61e559e052443b7b8804c2b7284cebe971d0bd6bbf89c38695
|
3 |
+
size 85506
|
last-checkpoint/global_step1580/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d09fd2b95b4260467ebd2b91d2758ff7dac5a6b4d5bc17d41d9e4f4034421dba
|
3 |
+
size 85506
|
last-checkpoint/global_step1580/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d82f1ee7ab57e1ddcc9ef0f135ee190da1b92285b3914a186314ade3393eaab3
|
3 |
+
size 85506
|
last-checkpoint/global_step1580/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d9076aab39103ad4399e01117b7df76cf8bf23a6f4c5764716d87171d24267e
|
3 |
+
size 85506
|
last-checkpoint/global_step1580/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9908edca5b40c6ba0f8ba54a93a80c05c032c21df25828608487d14975abeced
|
3 |
+
size 85506
|
last-checkpoint/global_step1580/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8eb8caed2a09d6d17007ab0248eb74db5df6263c83f0a0cad02eecc5f8ceb78c
|
3 |
+
size 85506
|
last-checkpoint/global_step1580/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6a30d04f9a8013c674a78929c0d2dbe739edd246e8526478f31634ef534f97f
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1580
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d4c8b92632023613bca2cbaab82aff2bfc0f1b7c62aab671b9cfd3d8f06b448
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12c27030d235bb5b6be6f9fb6111ad0d2904a9ae1dbba3911f671ef6abf0b238
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ee195ebde9bf012f945f068f133e7fe22fef5450c496607e3ef11cc2034a186
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf0fe1a3315d60b197207c5cb249d0ce4f9ce6d7585e696276d9ffbcb5379893
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01c5bd6eae04542162b3e94245555bd81312524066bc01d0ebbfc4fd8554240e
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45b74942c68b00d657cfce186b0eeb4aa8f52efa04b114803b605fee8de45972
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cd66dd2ba958fc9929441817d8154abbd929c0aa9cd66ff3171965bdaaf5d78
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89eeedefdd62514d0130acc330a5c08e9774c95d38c60997905cfd65fc54b710
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f43ced939100082608f57561a10e1888e69210c80675068db530c5815889910e
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d8d6ee244d99525e7004ae3f02d44ae63082d81fbbab7306f641ac6aeeb736f
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4222d0b9fadaea1c2825a6be6146f638b45462a966591dbc095e76b291c3b43f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2092,6 +2092,306 @@
|
|
2092 |
"rewards/margins": 0.36379513144493103,
|
2093 |
"rewards/rejected": -0.25348588824272156,
|
2094 |
"step": 1380
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2095 |
}
|
2096 |
],
|
2097 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 1580,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2092 |
"rewards/margins": 0.36379513144493103,
|
2093 |
"rewards/rejected": -0.25348588824272156,
|
2094 |
"step": 1380
|
2095 |
+
},
|
2096 |
+
{
|
2097 |
+
"epoch": 7.037974683544304,
|
2098 |
+
"grad_norm": 1158283.9951295503,
|
2099 |
+
"learning_rate": 3.3782513318708864e-07,
|
2100 |
+
"logits/chosen": -4.4635396003723145,
|
2101 |
+
"logits/rejected": -4.055373668670654,
|
2102 |
+
"logps/chosen": -126.25242614746094,
|
2103 |
+
"logps/rejected": -513.0021362304688,
|
2104 |
+
"loss": 32182.2562,
|
2105 |
+
"rewards/accuracies": 0.9750000238418579,
|
2106 |
+
"rewards/chosen": 0.12364669889211655,
|
2107 |
+
"rewards/margins": 0.39015716314315796,
|
2108 |
+
"rewards/rejected": -0.2665104568004608,
|
2109 |
+
"step": 1390
|
2110 |
+
},
|
2111 |
+
{
|
2112 |
+
"epoch": 7.0886075949367084,
|
2113 |
+
"grad_norm": 1635336.0000705447,
|
2114 |
+
"learning_rate": 3.3625822626136005e-07,
|
2115 |
+
"logits/chosen": -3.2711379528045654,
|
2116 |
+
"logits/rejected": -2.849708080291748,
|
2117 |
+
"logps/chosen": -120.3502426147461,
|
2118 |
+
"logps/rejected": -554.61669921875,
|
2119 |
+
"loss": 28154.0125,
|
2120 |
+
"rewards/accuracies": 1.0,
|
2121 |
+
"rewards/chosen": 0.1400633156299591,
|
2122 |
+
"rewards/margins": 0.4437219500541687,
|
2123 |
+
"rewards/rejected": -0.3036586344242096,
|
2124 |
+
"step": 1400
|
2125 |
+
},
|
2126 |
+
{
|
2127 |
+
"epoch": 7.139240506329114,
|
2128 |
+
"grad_norm": 1478880.6175367055,
|
2129 |
+
"learning_rate": 3.346913193356314e-07,
|
2130 |
+
"logits/chosen": -1.498684048652649,
|
2131 |
+
"logits/rejected": -1.5719478130340576,
|
2132 |
+
"logps/chosen": -97.41731262207031,
|
2133 |
+
"logps/rejected": -528.29833984375,
|
2134 |
+
"loss": 30443.8531,
|
2135 |
+
"rewards/accuracies": 1.0,
|
2136 |
+
"rewards/chosen": 0.13250485062599182,
|
2137 |
+
"rewards/margins": 0.4276755452156067,
|
2138 |
+
"rewards/rejected": -0.29517072439193726,
|
2139 |
+
"step": 1410
|
2140 |
+
},
|
2141 |
+
{
|
2142 |
+
"epoch": 7.189873417721519,
|
2143 |
+
"grad_norm": 1190966.9261622827,
|
2144 |
+
"learning_rate": 3.331244124099029e-07,
|
2145 |
+
"logits/chosen": -3.576815366744995,
|
2146 |
+
"logits/rejected": -3.1508662700653076,
|
2147 |
+
"logps/chosen": -92.4610595703125,
|
2148 |
+
"logps/rejected": -499.2225646972656,
|
2149 |
+
"loss": 30200.7656,
|
2150 |
+
"rewards/accuracies": 0.987500011920929,
|
2151 |
+
"rewards/chosen": 0.1342589408159256,
|
2152 |
+
"rewards/margins": 0.40714582800865173,
|
2153 |
+
"rewards/rejected": -0.2728869318962097,
|
2154 |
+
"step": 1420
|
2155 |
+
},
|
2156 |
+
{
|
2157 |
+
"epoch": 7.2405063291139244,
|
2158 |
+
"grad_norm": 1654460.4321586012,
|
2159 |
+
"learning_rate": 3.3155750548417424e-07,
|
2160 |
+
"logits/chosen": -3.6517982482910156,
|
2161 |
+
"logits/rejected": -2.912386894226074,
|
2162 |
+
"logps/chosen": -113.77073669433594,
|
2163 |
+
"logps/rejected": -548.2919921875,
|
2164 |
+
"loss": 29291.1719,
|
2165 |
+
"rewards/accuracies": 0.9750000238418579,
|
2166 |
+
"rewards/chosen": 0.13462531566619873,
|
2167 |
+
"rewards/margins": 0.435891717672348,
|
2168 |
+
"rewards/rejected": -0.3012663722038269,
|
2169 |
+
"step": 1430
|
2170 |
+
},
|
2171 |
+
{
|
2172 |
+
"epoch": 7.291139240506329,
|
2173 |
+
"grad_norm": 1547048.8074025025,
|
2174 |
+
"learning_rate": 3.2999059855844565e-07,
|
2175 |
+
"logits/chosen": -4.762998580932617,
|
2176 |
+
"logits/rejected": -4.417517185211182,
|
2177 |
+
"logps/chosen": -103.59019470214844,
|
2178 |
+
"logps/rejected": -516.0870361328125,
|
2179 |
+
"loss": 30597.95,
|
2180 |
+
"rewards/accuracies": 0.987500011920929,
|
2181 |
+
"rewards/chosen": 0.1253672093153,
|
2182 |
+
"rewards/margins": 0.4090943932533264,
|
2183 |
+
"rewards/rejected": -0.28372713923454285,
|
2184 |
+
"step": 1440
|
2185 |
+
},
|
2186 |
+
{
|
2187 |
+
"epoch": 7.341772151898734,
|
2188 |
+
"grad_norm": 1083334.846955902,
|
2189 |
+
"learning_rate": 3.28423691632717e-07,
|
2190 |
+
"logits/chosen": -4.341902732849121,
|
2191 |
+
"logits/rejected": -3.4809889793395996,
|
2192 |
+
"logps/chosen": -105.1113052368164,
|
2193 |
+
"logps/rejected": -537.7858276367188,
|
2194 |
+
"loss": 28933.9125,
|
2195 |
+
"rewards/accuracies": 1.0,
|
2196 |
+
"rewards/chosen": 0.139817476272583,
|
2197 |
+
"rewards/margins": 0.4371423125267029,
|
2198 |
+
"rewards/rejected": -0.2973248362541199,
|
2199 |
+
"step": 1450
|
2200 |
+
},
|
2201 |
+
{
|
2202 |
+
"epoch": 7.3924050632911396,
|
2203 |
+
"grad_norm": 1583721.4157786674,
|
2204 |
+
"learning_rate": 3.268567847069884e-07,
|
2205 |
+
"logits/chosen": -5.8856353759765625,
|
2206 |
+
"logits/rejected": -5.3746867179870605,
|
2207 |
+
"logps/chosen": -94.76522827148438,
|
2208 |
+
"logps/rejected": -525.3110961914062,
|
2209 |
+
"loss": 29575.7844,
|
2210 |
+
"rewards/accuracies": 0.9750000238418579,
|
2211 |
+
"rewards/chosen": 0.13582661747932434,
|
2212 |
+
"rewards/margins": 0.4354213774204254,
|
2213 |
+
"rewards/rejected": -0.29959478974342346,
|
2214 |
+
"step": 1460
|
2215 |
+
},
|
2216 |
+
{
|
2217 |
+
"epoch": 7.443037974683544,
|
2218 |
+
"grad_norm": 1391896.6733071958,
|
2219 |
+
"learning_rate": 3.252898777812598e-07,
|
2220 |
+
"logits/chosen": -3.2749342918395996,
|
2221 |
+
"logits/rejected": -3.6061177253723145,
|
2222 |
+
"logps/chosen": -99.21089172363281,
|
2223 |
+
"logps/rejected": -534.4422607421875,
|
2224 |
+
"loss": 29207.5719,
|
2225 |
+
"rewards/accuracies": 1.0,
|
2226 |
+
"rewards/chosen": 0.1312985122203827,
|
2227 |
+
"rewards/margins": 0.433136522769928,
|
2228 |
+
"rewards/rejected": -0.3018379807472229,
|
2229 |
+
"step": 1470
|
2230 |
+
},
|
2231 |
+
{
|
2232 |
+
"epoch": 7.493670886075949,
|
2233 |
+
"grad_norm": 1294960.5242478126,
|
2234 |
+
"learning_rate": 3.237229708555312e-07,
|
2235 |
+
"logits/chosen": -2.985567808151245,
|
2236 |
+
"logits/rejected": -1.8726612329483032,
|
2237 |
+
"logps/chosen": -112.32755279541016,
|
2238 |
+
"logps/rejected": -509.37286376953125,
|
2239 |
+
"loss": 29187.1594,
|
2240 |
+
"rewards/accuracies": 0.9750000238418579,
|
2241 |
+
"rewards/chosen": 0.1289207637310028,
|
2242 |
+
"rewards/margins": 0.4079267978668213,
|
2243 |
+
"rewards/rejected": -0.27900606393814087,
|
2244 |
+
"step": 1480
|
2245 |
+
},
|
2246 |
+
{
|
2247 |
+
"epoch": 7.544303797468355,
|
2248 |
+
"grad_norm": 1193173.6877739348,
|
2249 |
+
"learning_rate": 3.2215606392980255e-07,
|
2250 |
+
"logits/chosen": -2.0656161308288574,
|
2251 |
+
"logits/rejected": -2.3443799018859863,
|
2252 |
+
"logps/chosen": -97.64754486083984,
|
2253 |
+
"logps/rejected": -511.40576171875,
|
2254 |
+
"loss": 29322.4313,
|
2255 |
+
"rewards/accuracies": 0.987500011920929,
|
2256 |
+
"rewards/chosen": 0.13589712977409363,
|
2257 |
+
"rewards/margins": 0.413860946893692,
|
2258 |
+
"rewards/rejected": -0.2779638171195984,
|
2259 |
+
"step": 1490
|
2260 |
+
},
|
2261 |
+
{
|
2262 |
+
"epoch": 7.594936708860759,
|
2263 |
+
"grad_norm": 1279108.0637389964,
|
2264 |
+
"learning_rate": 3.2058915700407396e-07,
|
2265 |
+
"logits/chosen": -3.5005557537078857,
|
2266 |
+
"logits/rejected": -3.4204413890838623,
|
2267 |
+
"logps/chosen": -107.39742279052734,
|
2268 |
+
"logps/rejected": -530.2638549804688,
|
2269 |
+
"loss": 27542.3625,
|
2270 |
+
"rewards/accuracies": 0.987500011920929,
|
2271 |
+
"rewards/chosen": 0.13995657861232758,
|
2272 |
+
"rewards/margins": 0.42647701501846313,
|
2273 |
+
"rewards/rejected": -0.28652042150497437,
|
2274 |
+
"step": 1500
|
2275 |
+
},
|
2276 |
+
{
|
2277 |
+
"epoch": 7.6455696202531644,
|
2278 |
+
"grad_norm": 2707102.044355496,
|
2279 |
+
"learning_rate": 3.190222500783453e-07,
|
2280 |
+
"logits/chosen": -4.715664863586426,
|
2281 |
+
"logits/rejected": -4.245431900024414,
|
2282 |
+
"logps/chosen": -101.01532745361328,
|
2283 |
+
"logps/rejected": -561.7377319335938,
|
2284 |
+
"loss": 29571.3625,
|
2285 |
+
"rewards/accuracies": 1.0,
|
2286 |
+
"rewards/chosen": 0.14493677020072937,
|
2287 |
+
"rewards/margins": 0.4646069407463074,
|
2288 |
+
"rewards/rejected": -0.3196701109409332,
|
2289 |
+
"step": 1510
|
2290 |
+
},
|
2291 |
+
{
|
2292 |
+
"epoch": 7.69620253164557,
|
2293 |
+
"grad_norm": 1346703.2802720347,
|
2294 |
+
"learning_rate": 3.1745534315261674e-07,
|
2295 |
+
"logits/chosen": -2.4094414710998535,
|
2296 |
+
"logits/rejected": -2.316082715988159,
|
2297 |
+
"logps/chosen": -90.64556121826172,
|
2298 |
+
"logps/rejected": -524.6895751953125,
|
2299 |
+
"loss": 29962.2875,
|
2300 |
+
"rewards/accuracies": 1.0,
|
2301 |
+
"rewards/chosen": 0.1430484652519226,
|
2302 |
+
"rewards/margins": 0.4339544177055359,
|
2303 |
+
"rewards/rejected": -0.2909059524536133,
|
2304 |
+
"step": 1520
|
2305 |
+
},
|
2306 |
+
{
|
2307 |
+
"epoch": 7.746835443037975,
|
2308 |
+
"grad_norm": 1570681.8076612286,
|
2309 |
+
"learning_rate": 3.158884362268881e-07,
|
2310 |
+
"logits/chosen": -1.977839708328247,
|
2311 |
+
"logits/rejected": -1.748456597328186,
|
2312 |
+
"logps/chosen": -95.17073822021484,
|
2313 |
+
"logps/rejected": -536.3465576171875,
|
2314 |
+
"loss": 29005.075,
|
2315 |
+
"rewards/accuracies": 0.987500011920929,
|
2316 |
+
"rewards/chosen": 0.13247540593147278,
|
2317 |
+
"rewards/margins": 0.44195109605789185,
|
2318 |
+
"rewards/rejected": -0.3094756603240967,
|
2319 |
+
"step": 1530
|
2320 |
+
},
|
2321 |
+
{
|
2322 |
+
"epoch": 7.7974683544303796,
|
2323 |
+
"grad_norm": 1321655.562082779,
|
2324 |
+
"learning_rate": 3.143215293011595e-07,
|
2325 |
+
"logits/chosen": -5.75424861907959,
|
2326 |
+
"logits/rejected": -5.283251762390137,
|
2327 |
+
"logps/chosen": -109.5367202758789,
|
2328 |
+
"logps/rejected": -538.626220703125,
|
2329 |
+
"loss": 29057.1688,
|
2330 |
+
"rewards/accuracies": 0.9750000238418579,
|
2331 |
+
"rewards/chosen": 0.14621947705745697,
|
2332 |
+
"rewards/margins": 0.43537068367004395,
|
2333 |
+
"rewards/rejected": -0.2891511619091034,
|
2334 |
+
"step": 1540
|
2335 |
+
},
|
2336 |
+
{
|
2337 |
+
"epoch": 7.848101265822785,
|
2338 |
+
"grad_norm": 1360253.1191038797,
|
2339 |
+
"learning_rate": 3.1275462237543087e-07,
|
2340 |
+
"logits/chosen": -3.4590229988098145,
|
2341 |
+
"logits/rejected": -3.5962212085723877,
|
2342 |
+
"logps/chosen": -114.27938079833984,
|
2343 |
+
"logps/rejected": -566.5555419921875,
|
2344 |
+
"loss": 29716.3094,
|
2345 |
+
"rewards/accuracies": 1.0,
|
2346 |
+
"rewards/chosen": 0.1347774863243103,
|
2347 |
+
"rewards/margins": 0.44886675477027893,
|
2348 |
+
"rewards/rejected": -0.314089298248291,
|
2349 |
+
"step": 1550
|
2350 |
+
},
|
2351 |
+
{
|
2352 |
+
"epoch": 7.89873417721519,
|
2353 |
+
"grad_norm": 1269167.0621019504,
|
2354 |
+
"learning_rate": 3.111877154497023e-07,
|
2355 |
+
"logits/chosen": -1.0884647369384766,
|
2356 |
+
"logits/rejected": -0.7194244265556335,
|
2357 |
+
"logps/chosen": -89.07111358642578,
|
2358 |
+
"logps/rejected": -494.15789794921875,
|
2359 |
+
"loss": 29335.9875,
|
2360 |
+
"rewards/accuracies": 0.9750000238418579,
|
2361 |
+
"rewards/chosen": 0.1284293383359909,
|
2362 |
+
"rewards/margins": 0.4071559011936188,
|
2363 |
+
"rewards/rejected": -0.2787265181541443,
|
2364 |
+
"step": 1560
|
2365 |
+
},
|
2366 |
+
{
|
2367 |
+
"epoch": 7.949367088607595,
|
2368 |
+
"grad_norm": 1453875.4579149496,
|
2369 |
+
"learning_rate": 3.0962080852397364e-07,
|
2370 |
+
"logits/chosen": -2.750883102416992,
|
2371 |
+
"logits/rejected": -3.123683452606201,
|
2372 |
+
"logps/chosen": -98.0600357055664,
|
2373 |
+
"logps/rejected": -508.206298828125,
|
2374 |
+
"loss": 29392.4875,
|
2375 |
+
"rewards/accuracies": 0.9624999761581421,
|
2376 |
+
"rewards/chosen": 0.13056252896785736,
|
2377 |
+
"rewards/margins": 0.4083867073059082,
|
2378 |
+
"rewards/rejected": -0.2778242230415344,
|
2379 |
+
"step": 1570
|
2380 |
+
},
|
2381 |
+
{
|
2382 |
+
"epoch": 8.0,
|
2383 |
+
"grad_norm": 1764041.9454831716,
|
2384 |
+
"learning_rate": 3.0805390159824505e-07,
|
2385 |
+
"logits/chosen": -3.7020182609558105,
|
2386 |
+
"logits/rejected": -2.8675622940063477,
|
2387 |
+
"logps/chosen": -112.20640563964844,
|
2388 |
+
"logps/rejected": -527.1363525390625,
|
2389 |
+
"loss": 30214.225,
|
2390 |
+
"rewards/accuracies": 1.0,
|
2391 |
+
"rewards/chosen": 0.1370132714509964,
|
2392 |
+
"rewards/margins": 0.42148295044898987,
|
2393 |
+
"rewards/rejected": -0.2844696640968323,
|
2394 |
+
"step": 1580
|
2395 |
}
|
2396 |
],
|
2397 |
"logging_steps": 10,
|