Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/global_step395/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step395/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step395/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step395/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step395/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step395/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step395/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step395/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step395/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step395/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step395/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step395/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step395/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step395/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step395/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step395/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step395/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:086c52b28660ca782f6ee675cc801159bd0026030f049bda56af9f9324fac11a
|
3 |
+
size 2506176112
|
last-checkpoint/global_step395/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dbdabe9b8be25d459a371ec014f993acff4868b046e50ba4daca55e126096cb
|
3 |
+
size 2506176112
|
last-checkpoint/global_step395/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca1c520ae86ae3b5a4db616c312bb6717f6951825750c2ed8cda24633db64023
|
3 |
+
size 2506176112
|
last-checkpoint/global_step395/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9395c5e3a847278aba424706d7d370416acba646f8ebcd45eaf4ca591ca22f6
|
3 |
+
size 2506176112
|
last-checkpoint/global_step395/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d13964d138f5fd50dfcc8161b79ecb300ccd9416b4028fd2a4534186ce1009ac
|
3 |
+
size 2506176112
|
last-checkpoint/global_step395/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ae90343b471783dc9fe197f8da232df3bddc6ebdfa93965ead020d8dc6cbd08
|
3 |
+
size 2506176112
|
last-checkpoint/global_step395/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16e48c3bcf2e872f10422d3cf57963915450d25d29a7204d0c7ae86254ba000e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step395/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a307c885ef2ba3e560866bf94c14405881b4e572d92f5b10e2fa9ac1e170aa8a
|
3 |
+
size 2506176112
|
last-checkpoint/global_step395/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75bb878a0ed7adc2cface3f9bc8e631a0aab4f0e45334e96e3fe13d39bcff9de
|
3 |
+
size 85570
|
last-checkpoint/global_step395/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47dbee4c51823b9b6cd890f3af7501acf75345058e84edd142d58a0b3781d53e
|
3 |
+
size 85506
|
last-checkpoint/global_step395/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e64fcc2246057ef1b8fdd6d60377768654cb2153f5b61e9078abbc625ee500f6
|
3 |
+
size 85506
|
last-checkpoint/global_step395/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af5c06052957a77edb29cfb1b5a7c8d5f7db0d7612c989a726e0ba2ae2150d79
|
3 |
+
size 85506
|
last-checkpoint/global_step395/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1eb333edde2ea77272b49d6674c33db7d9e9531b3547fa98a6d6b0bea7f89f2c
|
3 |
+
size 85506
|
last-checkpoint/global_step395/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9eb7f7e9313028c684e3a54685d085b32caf69a34848d7376578882c5865ed48
|
3 |
+
size 85506
|
last-checkpoint/global_step395/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1dd1e18b37316704dc77c6de9d1e5716674a83e6f94c71c71b5c9e646cf4d8b
|
3 |
+
size 85506
|
last-checkpoint/global_step395/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25726a31f653af8737c40bd8a739caed00f3b94b1460a5a80e1fdf39cabfe2db
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step395
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:731d149712afa98318e30f48083df4b1e2a718b770eb7e709dd7592a1503e28f
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ccbe171405888a5d7edb0b1d77178ea2e7f377564c0c2a9d150a60457fc4e88
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad8a35afd8967cbb748405387e44426e43ad127028e826eddc9b67d2ca873c85
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f338ce80d7c441076bfc8c53b84067a0181f5a14e80c13d5acb8150b659f4d73
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9fbc9fa428939be10b46779f0eb5cd833e0da426b1cbdee77b3a55b6952235b
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac55dba0b79d5fa4699d239da2f966d52040d576d31234ac8d4632e6956481bc
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af2d0c015100768ffa23faf3b6c2d54ea89eb045603e30e55cd211e06ff34972
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c60a1b40608e34bc801c8231f97b81c53b5290dfaed1b9cd0ccbeca29574a991
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ad6a142a403eb9aafc4a3a9a856bca648fe31fd22d796867baca31fb13656aa
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38bc23a138cc800b22881742c0f3f9a71731a9a7111c6058a0077e6274d21773
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4d84901e6619ac64838b9206ce48d7b495f836174ff70c8bf3fe0cbb24ca9e1
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -307,6 +307,306 @@
|
|
307 |
"rewards/margins": 0.00019036220328416675,
|
308 |
"rewards/rejected": -0.0012195140589028597,
|
309 |
"step": 190
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
}
|
311 |
],
|
312 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 395,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
307 |
"rewards/margins": 0.00019036220328416675,
|
308 |
"rewards/rejected": -0.0012195140589028597,
|
309 |
"step": 190
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 1.0126582278481013,
|
313 |
+
"grad_norm": 497332.98430491646,
|
314 |
+
"learning_rate": 2.8169014084507043e-07,
|
315 |
+
"logits/chosen": -16.127140045166016,
|
316 |
+
"logits/rejected": -15.988116264343262,
|
317 |
+
"logps/chosen": -225.9070587158203,
|
318 |
+
"logps/rejected": -227.90145874023438,
|
319 |
+
"loss": 126358.875,
|
320 |
+
"rewards/accuracies": 0.6000000238418579,
|
321 |
+
"rewards/chosen": -0.0012379485415294766,
|
322 |
+
"rewards/margins": 0.0006675361073575914,
|
323 |
+
"rewards/rejected": -0.001905484707094729,
|
324 |
+
"step": 200
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 1.0632911392405062,
|
328 |
+
"grad_norm": 935063.760892245,
|
329 |
+
"learning_rate": 2.957746478873239e-07,
|
330 |
+
"logits/chosen": -16.701793670654297,
|
331 |
+
"logits/rejected": -16.669902801513672,
|
332 |
+
"logps/chosen": -230.3677520751953,
|
333 |
+
"logps/rejected": -229.03921508789062,
|
334 |
+
"loss": 124250.775,
|
335 |
+
"rewards/accuracies": 0.737500011920929,
|
336 |
+
"rewards/chosen": -0.0006614397279918194,
|
337 |
+
"rewards/margins": 0.003238010685890913,
|
338 |
+
"rewards/rejected": -0.003899450646713376,
|
339 |
+
"step": 210
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 1.1139240506329113,
|
343 |
+
"grad_norm": 517399.2020129059,
|
344 |
+
"learning_rate": 3.098591549295774e-07,
|
345 |
+
"logits/chosen": -16.413972854614258,
|
346 |
+
"logits/rejected": -16.371458053588867,
|
347 |
+
"logps/chosen": -247.8984832763672,
|
348 |
+
"logps/rejected": -249.5322723388672,
|
349 |
+
"loss": 124993.7375,
|
350 |
+
"rewards/accuracies": 0.762499988079071,
|
351 |
+
"rewards/chosen": -0.0012706981506198645,
|
352 |
+
"rewards/margins": 0.003060612827539444,
|
353 |
+
"rewards/rejected": -0.004331310745328665,
|
354 |
+
"step": 220
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 1.1645569620253164,
|
358 |
+
"grad_norm": 499036.7717944408,
|
359 |
+
"learning_rate": 3.23943661971831e-07,
|
360 |
+
"logits/chosen": -15.908624649047852,
|
361 |
+
"logits/rejected": -15.847338676452637,
|
362 |
+
"logps/chosen": -236.7013397216797,
|
363 |
+
"logps/rejected": -239.3136749267578,
|
364 |
+
"loss": 122842.5,
|
365 |
+
"rewards/accuracies": 0.800000011920929,
|
366 |
+
"rewards/chosen": -0.0006655483739450574,
|
367 |
+
"rewards/margins": 0.0032406128011643887,
|
368 |
+
"rewards/rejected": -0.0039061610586941242,
|
369 |
+
"step": 230
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"epoch": 1.2151898734177216,
|
373 |
+
"grad_norm": 540681.7856619481,
|
374 |
+
"learning_rate": 3.380281690140845e-07,
|
375 |
+
"logits/chosen": -16.052249908447266,
|
376 |
+
"logits/rejected": -15.99653148651123,
|
377 |
+
"logps/chosen": -229.74832153320312,
|
378 |
+
"logps/rejected": -230.9803009033203,
|
379 |
+
"loss": 124587.3625,
|
380 |
+
"rewards/accuracies": 0.7749999761581421,
|
381 |
+
"rewards/chosen": -0.0007962372037582099,
|
382 |
+
"rewards/margins": 0.0025483998470008373,
|
383 |
+
"rewards/rejected": -0.003344637108966708,
|
384 |
+
"step": 240
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"epoch": 1.2658227848101267,
|
388 |
+
"grad_norm": 1023950.8355601664,
|
389 |
+
"learning_rate": 3.52112676056338e-07,
|
390 |
+
"logits/chosen": -15.299288749694824,
|
391 |
+
"logits/rejected": -15.215815544128418,
|
392 |
+
"logps/chosen": -231.2301788330078,
|
393 |
+
"logps/rejected": -232.03359985351562,
|
394 |
+
"loss": 121822.4,
|
395 |
+
"rewards/accuracies": 0.7250000238418579,
|
396 |
+
"rewards/chosen": -7.512583579227794e-06,
|
397 |
+
"rewards/margins": 0.003883513854816556,
|
398 |
+
"rewards/rejected": -0.0038910270668566227,
|
399 |
+
"step": 250
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"epoch": 1.3164556962025316,
|
403 |
+
"grad_norm": 620253.8184950812,
|
404 |
+
"learning_rate": 3.6619718309859155e-07,
|
405 |
+
"logits/chosen": -16.167770385742188,
|
406 |
+
"logits/rejected": -15.915590286254883,
|
407 |
+
"logps/chosen": -238.9904327392578,
|
408 |
+
"logps/rejected": -239.73953247070312,
|
409 |
+
"loss": 123388.8625,
|
410 |
+
"rewards/accuracies": 0.7875000238418579,
|
411 |
+
"rewards/chosen": -0.00017356239550281316,
|
412 |
+
"rewards/margins": 0.0050824107602238655,
|
413 |
+
"rewards/rejected": -0.005255972500890493,
|
414 |
+
"step": 260
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"epoch": 1.3670886075949367,
|
418 |
+
"grad_norm": 575104.3218096169,
|
419 |
+
"learning_rate": 3.8028169014084507e-07,
|
420 |
+
"logits/chosen": -15.480558395385742,
|
421 |
+
"logits/rejected": -15.386639595031738,
|
422 |
+
"logps/chosen": -241.60879516601562,
|
423 |
+
"logps/rejected": -250.003173828125,
|
424 |
+
"loss": 123555.7,
|
425 |
+
"rewards/accuracies": 0.737500011920929,
|
426 |
+
"rewards/chosen": -0.001139859901741147,
|
427 |
+
"rewards/margins": 0.005077657289803028,
|
428 |
+
"rewards/rejected": -0.0062175169587135315,
|
429 |
+
"step": 270
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"epoch": 1.4177215189873418,
|
433 |
+
"grad_norm": 601224.4433091934,
|
434 |
+
"learning_rate": 3.9436619718309853e-07,
|
435 |
+
"logits/chosen": -15.266016960144043,
|
436 |
+
"logits/rejected": -15.313554763793945,
|
437 |
+
"logps/chosen": -230.73397827148438,
|
438 |
+
"logps/rejected": -237.3317108154297,
|
439 |
+
"loss": 125556.675,
|
440 |
+
"rewards/accuracies": 0.762499988079071,
|
441 |
+
"rewards/chosen": 0.0007209269679151475,
|
442 |
+
"rewards/margins": 0.00534270191565156,
|
443 |
+
"rewards/rejected": -0.004621774889528751,
|
444 |
+
"step": 280
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 1.4683544303797469,
|
448 |
+
"grad_norm": 751936.3077706753,
|
449 |
+
"learning_rate": 4.084507042253521e-07,
|
450 |
+
"logits/chosen": -14.600263595581055,
|
451 |
+
"logits/rejected": -14.538311958312988,
|
452 |
+
"logps/chosen": -224.1177520751953,
|
453 |
+
"logps/rejected": -226.97879028320312,
|
454 |
+
"loss": 123584.675,
|
455 |
+
"rewards/accuracies": 0.7875000238418579,
|
456 |
+
"rewards/chosen": 0.0011863496620208025,
|
457 |
+
"rewards/margins": 0.007649322040379047,
|
458 |
+
"rewards/rejected": -0.006462973542511463,
|
459 |
+
"step": 290
|
460 |
+
},
|
461 |
+
{
|
462 |
+
"epoch": 1.518987341772152,
|
463 |
+
"grad_norm": 575660.5828565176,
|
464 |
+
"learning_rate": 4.225352112676056e-07,
|
465 |
+
"logits/chosen": -14.935551643371582,
|
466 |
+
"logits/rejected": -15.062429428100586,
|
467 |
+
"logps/chosen": -235.7123565673828,
|
468 |
+
"logps/rejected": -245.36181640625,
|
469 |
+
"loss": 122562.1375,
|
470 |
+
"rewards/accuracies": 0.75,
|
471 |
+
"rewards/chosen": 0.0014863747637718916,
|
472 |
+
"rewards/margins": 0.0057060932740569115,
|
473 |
+
"rewards/rejected": -0.0042197187431156635,
|
474 |
+
"step": 300
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"epoch": 1.5696202531645569,
|
478 |
+
"grad_norm": 619514.1083852616,
|
479 |
+
"learning_rate": 4.366197183098591e-07,
|
480 |
+
"logits/chosen": -14.678690910339355,
|
481 |
+
"logits/rejected": -14.617218017578125,
|
482 |
+
"logps/chosen": -229.6386260986328,
|
483 |
+
"logps/rejected": -234.1474151611328,
|
484 |
+
"loss": 123630.225,
|
485 |
+
"rewards/accuracies": 0.699999988079071,
|
486 |
+
"rewards/chosen": 0.0006864996394142509,
|
487 |
+
"rewards/margins": 0.004933560267090797,
|
488 |
+
"rewards/rejected": -0.004247060976922512,
|
489 |
+
"step": 310
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"epoch": 1.620253164556962,
|
493 |
+
"grad_norm": 738538.1512211321,
|
494 |
+
"learning_rate": 4.5070422535211266e-07,
|
495 |
+
"logits/chosen": -14.131611824035645,
|
496 |
+
"logits/rejected": -14.156657218933105,
|
497 |
+
"logps/chosen": -241.20156860351562,
|
498 |
+
"logps/rejected": -248.2321319580078,
|
499 |
+
"loss": 124158.6,
|
500 |
+
"rewards/accuracies": 0.699999988079071,
|
501 |
+
"rewards/chosen": -0.0009155808947980404,
|
502 |
+
"rewards/margins": 0.006913213524967432,
|
503 |
+
"rewards/rejected": -0.007828795351088047,
|
504 |
+
"step": 320
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"epoch": 1.6708860759493671,
|
508 |
+
"grad_norm": 688317.7143989427,
|
509 |
+
"learning_rate": 4.647887323943662e-07,
|
510 |
+
"logits/chosen": -13.791796684265137,
|
511 |
+
"logits/rejected": -13.970884323120117,
|
512 |
+
"logps/chosen": -228.53079223632812,
|
513 |
+
"logps/rejected": -235.5008087158203,
|
514 |
+
"loss": 123378.175,
|
515 |
+
"rewards/accuracies": 0.7250000238418579,
|
516 |
+
"rewards/chosen": 0.0017698236042633653,
|
517 |
+
"rewards/margins": 0.006004182621836662,
|
518 |
+
"rewards/rejected": -0.004234359599649906,
|
519 |
+
"step": 330
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"epoch": 1.721518987341772,
|
523 |
+
"grad_norm": 693314.5034252935,
|
524 |
+
"learning_rate": 4.788732394366196e-07,
|
525 |
+
"logits/chosen": -13.555567741394043,
|
526 |
+
"logits/rejected": -13.32630729675293,
|
527 |
+
"logps/chosen": -227.0249481201172,
|
528 |
+
"logps/rejected": -232.2772216796875,
|
529 |
+
"loss": 122521.475,
|
530 |
+
"rewards/accuracies": 0.7875000238418579,
|
531 |
+
"rewards/chosen": 0.001143553527072072,
|
532 |
+
"rewards/margins": 0.009070896543562412,
|
533 |
+
"rewards/rejected": -0.00792734231799841,
|
534 |
+
"step": 340
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 1.7721518987341773,
|
538 |
+
"grad_norm": 758709.6120906892,
|
539 |
+
"learning_rate": 4.929577464788733e-07,
|
540 |
+
"logits/chosen": -13.520563125610352,
|
541 |
+
"logits/rejected": -13.633130073547363,
|
542 |
+
"logps/chosen": -234.7182159423828,
|
543 |
+
"logps/rejected": -248.12890625,
|
544 |
+
"loss": 121557.575,
|
545 |
+
"rewards/accuracies": 0.675000011920929,
|
546 |
+
"rewards/chosen": -0.00047356385039165616,
|
547 |
+
"rewards/margins": 0.00813873577862978,
|
548 |
+
"rewards/rejected": -0.008612299337983131,
|
549 |
+
"step": 350
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"epoch": 1.8227848101265822,
|
553 |
+
"grad_norm": 689974.393201542,
|
554 |
+
"learning_rate": 4.992165465371357e-07,
|
555 |
+
"logits/chosen": -12.841153144836426,
|
556 |
+
"logits/rejected": -12.86094856262207,
|
557 |
+
"logps/chosen": -232.314697265625,
|
558 |
+
"logps/rejected": -232.64297485351562,
|
559 |
+
"loss": 121436.65,
|
560 |
+
"rewards/accuracies": 0.7124999761581421,
|
561 |
+
"rewards/chosen": 0.0036194869317114353,
|
562 |
+
"rewards/margins": 0.009506477043032646,
|
563 |
+
"rewards/rejected": -0.005886988714337349,
|
564 |
+
"step": 360
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"epoch": 1.8734177215189873,
|
568 |
+
"grad_norm": 883375.543329047,
|
569 |
+
"learning_rate": 4.976496396114071e-07,
|
570 |
+
"logits/chosen": -12.77904224395752,
|
571 |
+
"logits/rejected": -12.76900577545166,
|
572 |
+
"logps/chosen": -239.8730010986328,
|
573 |
+
"logps/rejected": -251.4569549560547,
|
574 |
+
"loss": 122456.925,
|
575 |
+
"rewards/accuracies": 0.75,
|
576 |
+
"rewards/chosen": -0.0006393647054210305,
|
577 |
+
"rewards/margins": 0.008665768429636955,
|
578 |
+
"rewards/rejected": -0.009305133484303951,
|
579 |
+
"step": 370
|
580 |
+
},
|
581 |
+
{
|
582 |
+
"epoch": 1.9240506329113924,
|
583 |
+
"grad_norm": 797554.0864386982,
|
584 |
+
"learning_rate": 4.960827326856785e-07,
|
585 |
+
"logits/chosen": -13.028135299682617,
|
586 |
+
"logits/rejected": -13.148831367492676,
|
587 |
+
"logps/chosen": -237.040771484375,
|
588 |
+
"logps/rejected": -244.45181274414062,
|
589 |
+
"loss": 124907.725,
|
590 |
+
"rewards/accuracies": 0.699999988079071,
|
591 |
+
"rewards/chosen": 0.0025544934906065464,
|
592 |
+
"rewards/margins": 0.008132859133183956,
|
593 |
+
"rewards/rejected": -0.005578366108238697,
|
594 |
+
"step": 380
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"epoch": 1.9746835443037973,
|
598 |
+
"grad_norm": 793120.1180084129,
|
599 |
+
"learning_rate": 4.945158257599498e-07,
|
600 |
+
"logits/chosen": -12.312803268432617,
|
601 |
+
"logits/rejected": -12.135167121887207,
|
602 |
+
"logps/chosen": -235.60360717773438,
|
603 |
+
"logps/rejected": -242.9219207763672,
|
604 |
+
"loss": 121583.8,
|
605 |
+
"rewards/accuracies": 0.75,
|
606 |
+
"rewards/chosen": 0.003660207614302635,
|
607 |
+
"rewards/margins": 0.011001082137227058,
|
608 |
+
"rewards/rejected": -0.007340874522924423,
|
609 |
+
"step": 390
|
610 |
}
|
611 |
],
|
612 |
"logging_steps": 10,
|