cat-searcher commited on
Commit
9165fc3
·
verified ·
1 Parent(s): 731b321

Training in progress, epoch 2, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step395/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step395/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step395/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step395/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step395/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step395/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step395/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step395/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step395/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step395/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step395/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step395/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step395/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step395/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step395/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step395/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step395/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:086c52b28660ca782f6ee675cc801159bd0026030f049bda56af9f9324fac11a
3
+ size 2506176112
last-checkpoint/global_step395/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dbdabe9b8be25d459a371ec014f993acff4868b046e50ba4daca55e126096cb
3
+ size 2506176112
last-checkpoint/global_step395/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca1c520ae86ae3b5a4db616c312bb6717f6951825750c2ed8cda24633db64023
3
+ size 2506176112
last-checkpoint/global_step395/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9395c5e3a847278aba424706d7d370416acba646f8ebcd45eaf4ca591ca22f6
3
+ size 2506176112
last-checkpoint/global_step395/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d13964d138f5fd50dfcc8161b79ecb300ccd9416b4028fd2a4534186ce1009ac
3
+ size 2506176112
last-checkpoint/global_step395/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae90343b471783dc9fe197f8da232df3bddc6ebdfa93965ead020d8dc6cbd08
3
+ size 2506176112
last-checkpoint/global_step395/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16e48c3bcf2e872f10422d3cf57963915450d25d29a7204d0c7ae86254ba000e
3
+ size 2506176112
last-checkpoint/global_step395/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a307c885ef2ba3e560866bf94c14405881b4e572d92f5b10e2fa9ac1e170aa8a
3
+ size 2506176112
last-checkpoint/global_step395/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75bb878a0ed7adc2cface3f9bc8e631a0aab4f0e45334e96e3fe13d39bcff9de
3
+ size 85570
last-checkpoint/global_step395/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47dbee4c51823b9b6cd890f3af7501acf75345058e84edd142d58a0b3781d53e
3
+ size 85506
last-checkpoint/global_step395/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e64fcc2246057ef1b8fdd6d60377768654cb2153f5b61e9078abbc625ee500f6
3
+ size 85506
last-checkpoint/global_step395/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af5c06052957a77edb29cfb1b5a7c8d5f7db0d7612c989a726e0ba2ae2150d79
3
+ size 85506
last-checkpoint/global_step395/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eb333edde2ea77272b49d6674c33db7d9e9531b3547fa98a6d6b0bea7f89f2c
3
+ size 85506
last-checkpoint/global_step395/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eb7f7e9313028c684e3a54685d085b32caf69a34848d7376578882c5865ed48
3
+ size 85506
last-checkpoint/global_step395/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1dd1e18b37316704dc77c6de9d1e5716674a83e6f94c71c71b5c9e646cf4d8b
3
+ size 85506
last-checkpoint/global_step395/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25726a31f653af8737c40bd8a739caed00f3b94b1460a5a80e1fdf39cabfe2db
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step197
 
1
+ global_step395
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27d7c0dfbbc6e28570f86b38b11d2817a5b8c5c2bc889b8312668d033b216d0a
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:731d149712afa98318e30f48083df4b1e2a718b770eb7e709dd7592a1503e28f
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78e0d61af8a24b88b19ca4a8e4ee2f71049da12e01f51c0fb9e87eef0586b015
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ccbe171405888a5d7edb0b1d77178ea2e7f377564c0c2a9d150a60457fc4e88
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:575119a228f98110923ffa2dedcb50e3317251b26054355d015e0b2240d566f2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad8a35afd8967cbb748405387e44426e43ad127028e826eddc9b67d2ca873c85
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0728b56dab7abb5ef8a0d4bae3519c5767c97467bdd886d26bf19cc8599d0312
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f338ce80d7c441076bfc8c53b84067a0181f5a14e80c13d5acb8150b659f4d73
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4e481d4ef1546694da7337f6bb6c658b866dcb79b85deeb477da0d27ebe851e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9fbc9fa428939be10b46779f0eb5cd833e0da426b1cbdee77b3a55b6952235b
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:353c60be37ea56fc992fca446598ceca5d1fd002aa3bd6dbb9ad740e6f47ebb3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac55dba0b79d5fa4699d239da2f966d52040d576d31234ac8d4632e6956481bc
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9107fe964ba7205e354084b85210e5a5ea1c98cfd4d38adb9cd3926945dcae4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2d0c015100768ffa23faf3b6c2d54ea89eb045603e30e55cd211e06ff34972
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69d1bb1abee38b92e53f3f23549b642ce0f1edcdccf7b6129847ac61636e96d5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c60a1b40608e34bc801c8231f97b81c53b5290dfaed1b9cd0ccbeca29574a991
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afd5516048e20f36959601574e29e40106085a7d3cdc7bf425ce5e84633490e6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ad6a142a403eb9aafc4a3a9a856bca648fe31fd22d796867baca31fb13656aa
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e2c46927fc06939b4c976a01e4b95dec1f8b98ceaea86d31a5d756fc30ff006
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38bc23a138cc800b22881742c0f3f9a71731a9a7111c6058a0077e6274d21773
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc279b723e739fa2285d5a9080ed83f238eeeaa9774b6d10de050af2cc8ff7be
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4d84901e6619ac64838b9206ce48d7b495f836174ff70c8bf3fe0cbb24ca9e1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9974683544303797,
5
  "eval_steps": 100,
6
- "global_step": 197,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -307,6 +307,306 @@
307
  "rewards/margins": 0.00019036220328416675,
308
  "rewards/rejected": -0.0012195140589028597,
309
  "step": 190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  }
311
  ],
312
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 100,
6
+ "global_step": 395,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
307
  "rewards/margins": 0.00019036220328416675,
308
  "rewards/rejected": -0.0012195140589028597,
309
  "step": 190
310
+ },
311
+ {
312
+ "epoch": 1.0126582278481013,
313
+ "grad_norm": 497332.98430491646,
314
+ "learning_rate": 2.8169014084507043e-07,
315
+ "logits/chosen": -16.127140045166016,
316
+ "logits/rejected": -15.988116264343262,
317
+ "logps/chosen": -225.9070587158203,
318
+ "logps/rejected": -227.90145874023438,
319
+ "loss": 126358.875,
320
+ "rewards/accuracies": 0.6000000238418579,
321
+ "rewards/chosen": -0.0012379485415294766,
322
+ "rewards/margins": 0.0006675361073575914,
323
+ "rewards/rejected": -0.001905484707094729,
324
+ "step": 200
325
+ },
326
+ {
327
+ "epoch": 1.0632911392405062,
328
+ "grad_norm": 935063.760892245,
329
+ "learning_rate": 2.957746478873239e-07,
330
+ "logits/chosen": -16.701793670654297,
331
+ "logits/rejected": -16.669902801513672,
332
+ "logps/chosen": -230.3677520751953,
333
+ "logps/rejected": -229.03921508789062,
334
+ "loss": 124250.775,
335
+ "rewards/accuracies": 0.737500011920929,
336
+ "rewards/chosen": -0.0006614397279918194,
337
+ "rewards/margins": 0.003238010685890913,
338
+ "rewards/rejected": -0.003899450646713376,
339
+ "step": 210
340
+ },
341
+ {
342
+ "epoch": 1.1139240506329113,
343
+ "grad_norm": 517399.2020129059,
344
+ "learning_rate": 3.098591549295774e-07,
345
+ "logits/chosen": -16.413972854614258,
346
+ "logits/rejected": -16.371458053588867,
347
+ "logps/chosen": -247.8984832763672,
348
+ "logps/rejected": -249.5322723388672,
349
+ "loss": 124993.7375,
350
+ "rewards/accuracies": 0.762499988079071,
351
+ "rewards/chosen": -0.0012706981506198645,
352
+ "rewards/margins": 0.003060612827539444,
353
+ "rewards/rejected": -0.004331310745328665,
354
+ "step": 220
355
+ },
356
+ {
357
+ "epoch": 1.1645569620253164,
358
+ "grad_norm": 499036.7717944408,
359
+ "learning_rate": 3.23943661971831e-07,
360
+ "logits/chosen": -15.908624649047852,
361
+ "logits/rejected": -15.847338676452637,
362
+ "logps/chosen": -236.7013397216797,
363
+ "logps/rejected": -239.3136749267578,
364
+ "loss": 122842.5,
365
+ "rewards/accuracies": 0.800000011920929,
366
+ "rewards/chosen": -0.0006655483739450574,
367
+ "rewards/margins": 0.0032406128011643887,
368
+ "rewards/rejected": -0.0039061610586941242,
369
+ "step": 230
370
+ },
371
+ {
372
+ "epoch": 1.2151898734177216,
373
+ "grad_norm": 540681.7856619481,
374
+ "learning_rate": 3.380281690140845e-07,
375
+ "logits/chosen": -16.052249908447266,
376
+ "logits/rejected": -15.99653148651123,
377
+ "logps/chosen": -229.74832153320312,
378
+ "logps/rejected": -230.9803009033203,
379
+ "loss": 124587.3625,
380
+ "rewards/accuracies": 0.7749999761581421,
381
+ "rewards/chosen": -0.0007962372037582099,
382
+ "rewards/margins": 0.0025483998470008373,
383
+ "rewards/rejected": -0.003344637108966708,
384
+ "step": 240
385
+ },
386
+ {
387
+ "epoch": 1.2658227848101267,
388
+ "grad_norm": 1023950.8355601664,
389
+ "learning_rate": 3.52112676056338e-07,
390
+ "logits/chosen": -15.299288749694824,
391
+ "logits/rejected": -15.215815544128418,
392
+ "logps/chosen": -231.2301788330078,
393
+ "logps/rejected": -232.03359985351562,
394
+ "loss": 121822.4,
395
+ "rewards/accuracies": 0.7250000238418579,
396
+ "rewards/chosen": -7.512583579227794e-06,
397
+ "rewards/margins": 0.003883513854816556,
398
+ "rewards/rejected": -0.0038910270668566227,
399
+ "step": 250
400
+ },
401
+ {
402
+ "epoch": 1.3164556962025316,
403
+ "grad_norm": 620253.8184950812,
404
+ "learning_rate": 3.6619718309859155e-07,
405
+ "logits/chosen": -16.167770385742188,
406
+ "logits/rejected": -15.915590286254883,
407
+ "logps/chosen": -238.9904327392578,
408
+ "logps/rejected": -239.73953247070312,
409
+ "loss": 123388.8625,
410
+ "rewards/accuracies": 0.7875000238418579,
411
+ "rewards/chosen": -0.00017356239550281316,
412
+ "rewards/margins": 0.0050824107602238655,
413
+ "rewards/rejected": -0.005255972500890493,
414
+ "step": 260
415
+ },
416
+ {
417
+ "epoch": 1.3670886075949367,
418
+ "grad_norm": 575104.3218096169,
419
+ "learning_rate": 3.8028169014084507e-07,
420
+ "logits/chosen": -15.480558395385742,
421
+ "logits/rejected": -15.386639595031738,
422
+ "logps/chosen": -241.60879516601562,
423
+ "logps/rejected": -250.003173828125,
424
+ "loss": 123555.7,
425
+ "rewards/accuracies": 0.737500011920929,
426
+ "rewards/chosen": -0.001139859901741147,
427
+ "rewards/margins": 0.005077657289803028,
428
+ "rewards/rejected": -0.0062175169587135315,
429
+ "step": 270
430
+ },
431
+ {
432
+ "epoch": 1.4177215189873418,
433
+ "grad_norm": 601224.4433091934,
434
+ "learning_rate": 3.9436619718309853e-07,
435
+ "logits/chosen": -15.266016960144043,
436
+ "logits/rejected": -15.313554763793945,
437
+ "logps/chosen": -230.73397827148438,
438
+ "logps/rejected": -237.3317108154297,
439
+ "loss": 125556.675,
440
+ "rewards/accuracies": 0.762499988079071,
441
+ "rewards/chosen": 0.0007209269679151475,
442
+ "rewards/margins": 0.00534270191565156,
443
+ "rewards/rejected": -0.004621774889528751,
444
+ "step": 280
445
+ },
446
+ {
447
+ "epoch": 1.4683544303797469,
448
+ "grad_norm": 751936.3077706753,
449
+ "learning_rate": 4.084507042253521e-07,
450
+ "logits/chosen": -14.600263595581055,
451
+ "logits/rejected": -14.538311958312988,
452
+ "logps/chosen": -224.1177520751953,
453
+ "logps/rejected": -226.97879028320312,
454
+ "loss": 123584.675,
455
+ "rewards/accuracies": 0.7875000238418579,
456
+ "rewards/chosen": 0.0011863496620208025,
457
+ "rewards/margins": 0.007649322040379047,
458
+ "rewards/rejected": -0.006462973542511463,
459
+ "step": 290
460
+ },
461
+ {
462
+ "epoch": 1.518987341772152,
463
+ "grad_norm": 575660.5828565176,
464
+ "learning_rate": 4.225352112676056e-07,
465
+ "logits/chosen": -14.935551643371582,
466
+ "logits/rejected": -15.062429428100586,
467
+ "logps/chosen": -235.7123565673828,
468
+ "logps/rejected": -245.36181640625,
469
+ "loss": 122562.1375,
470
+ "rewards/accuracies": 0.75,
471
+ "rewards/chosen": 0.0014863747637718916,
472
+ "rewards/margins": 0.0057060932740569115,
473
+ "rewards/rejected": -0.0042197187431156635,
474
+ "step": 300
475
+ },
476
+ {
477
+ "epoch": 1.5696202531645569,
478
+ "grad_norm": 619514.1083852616,
479
+ "learning_rate": 4.366197183098591e-07,
480
+ "logits/chosen": -14.678690910339355,
481
+ "logits/rejected": -14.617218017578125,
482
+ "logps/chosen": -229.6386260986328,
483
+ "logps/rejected": -234.1474151611328,
484
+ "loss": 123630.225,
485
+ "rewards/accuracies": 0.699999988079071,
486
+ "rewards/chosen": 0.0006864996394142509,
487
+ "rewards/margins": 0.004933560267090797,
488
+ "rewards/rejected": -0.004247060976922512,
489
+ "step": 310
490
+ },
491
+ {
492
+ "epoch": 1.620253164556962,
493
+ "grad_norm": 738538.1512211321,
494
+ "learning_rate": 4.5070422535211266e-07,
495
+ "logits/chosen": -14.131611824035645,
496
+ "logits/rejected": -14.156657218933105,
497
+ "logps/chosen": -241.20156860351562,
498
+ "logps/rejected": -248.2321319580078,
499
+ "loss": 124158.6,
500
+ "rewards/accuracies": 0.699999988079071,
501
+ "rewards/chosen": -0.0009155808947980404,
502
+ "rewards/margins": 0.006913213524967432,
503
+ "rewards/rejected": -0.007828795351088047,
504
+ "step": 320
505
+ },
506
+ {
507
+ "epoch": 1.6708860759493671,
508
+ "grad_norm": 688317.7143989427,
509
+ "learning_rate": 4.647887323943662e-07,
510
+ "logits/chosen": -13.791796684265137,
511
+ "logits/rejected": -13.970884323120117,
512
+ "logps/chosen": -228.53079223632812,
513
+ "logps/rejected": -235.5008087158203,
514
+ "loss": 123378.175,
515
+ "rewards/accuracies": 0.7250000238418579,
516
+ "rewards/chosen": 0.0017698236042633653,
517
+ "rewards/margins": 0.006004182621836662,
518
+ "rewards/rejected": -0.004234359599649906,
519
+ "step": 330
520
+ },
521
+ {
522
+ "epoch": 1.721518987341772,
523
+ "grad_norm": 693314.5034252935,
524
+ "learning_rate": 4.788732394366196e-07,
525
+ "logits/chosen": -13.555567741394043,
526
+ "logits/rejected": -13.32630729675293,
527
+ "logps/chosen": -227.0249481201172,
528
+ "logps/rejected": -232.2772216796875,
529
+ "loss": 122521.475,
530
+ "rewards/accuracies": 0.7875000238418579,
531
+ "rewards/chosen": 0.001143553527072072,
532
+ "rewards/margins": 0.009070896543562412,
533
+ "rewards/rejected": -0.00792734231799841,
534
+ "step": 340
535
+ },
536
+ {
537
+ "epoch": 1.7721518987341773,
538
+ "grad_norm": 758709.6120906892,
539
+ "learning_rate": 4.929577464788733e-07,
540
+ "logits/chosen": -13.520563125610352,
541
+ "logits/rejected": -13.633130073547363,
542
+ "logps/chosen": -234.7182159423828,
543
+ "logps/rejected": -248.12890625,
544
+ "loss": 121557.575,
545
+ "rewards/accuracies": 0.675000011920929,
546
+ "rewards/chosen": -0.00047356385039165616,
547
+ "rewards/margins": 0.00813873577862978,
548
+ "rewards/rejected": -0.008612299337983131,
549
+ "step": 350
550
+ },
551
+ {
552
+ "epoch": 1.8227848101265822,
553
+ "grad_norm": 689974.393201542,
554
+ "learning_rate": 4.992165465371357e-07,
555
+ "logits/chosen": -12.841153144836426,
556
+ "logits/rejected": -12.86094856262207,
557
+ "logps/chosen": -232.314697265625,
558
+ "logps/rejected": -232.64297485351562,
559
+ "loss": 121436.65,
560
+ "rewards/accuracies": 0.7124999761581421,
561
+ "rewards/chosen": 0.0036194869317114353,
562
+ "rewards/margins": 0.009506477043032646,
563
+ "rewards/rejected": -0.005886988714337349,
564
+ "step": 360
565
+ },
566
+ {
567
+ "epoch": 1.8734177215189873,
568
+ "grad_norm": 883375.543329047,
569
+ "learning_rate": 4.976496396114071e-07,
570
+ "logits/chosen": -12.77904224395752,
571
+ "logits/rejected": -12.76900577545166,
572
+ "logps/chosen": -239.8730010986328,
573
+ "logps/rejected": -251.4569549560547,
574
+ "loss": 122456.925,
575
+ "rewards/accuracies": 0.75,
576
+ "rewards/chosen": -0.0006393647054210305,
577
+ "rewards/margins": 0.008665768429636955,
578
+ "rewards/rejected": -0.009305133484303951,
579
+ "step": 370
580
+ },
581
+ {
582
+ "epoch": 1.9240506329113924,
583
+ "grad_norm": 797554.0864386982,
584
+ "learning_rate": 4.960827326856785e-07,
585
+ "logits/chosen": -13.028135299682617,
586
+ "logits/rejected": -13.148831367492676,
587
+ "logps/chosen": -237.040771484375,
588
+ "logps/rejected": -244.45181274414062,
589
+ "loss": 124907.725,
590
+ "rewards/accuracies": 0.699999988079071,
591
+ "rewards/chosen": 0.0025544934906065464,
592
+ "rewards/margins": 0.008132859133183956,
593
+ "rewards/rejected": -0.005578366108238697,
594
+ "step": 380
595
+ },
596
+ {
597
+ "epoch": 1.9746835443037973,
598
+ "grad_norm": 793120.1180084129,
599
+ "learning_rate": 4.945158257599498e-07,
600
+ "logits/chosen": -12.312803268432617,
601
+ "logits/rejected": -12.135167121887207,
602
+ "logps/chosen": -235.60360717773438,
603
+ "logps/rejected": -242.9219207763672,
604
+ "loss": 121583.8,
605
+ "rewards/accuracies": 0.75,
606
+ "rewards/chosen": 0.003660207614302635,
607
+ "rewards/margins": 0.011001082137227058,
608
+ "rewards/rejected": -0.007340874522924423,
609
+ "step": 390
610
  }
611
  ],
612
  "logging_steps": 10,