|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994767137624281, |
|
"eval_steps": 500, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0052328623757195184, |
|
"grad_norm": 819697.7987526867, |
|
"learning_rate": 2.6041666666666667e-08, |
|
"logits/chosen": -2.897020101547241, |
|
"logits/rejected": -2.8810553550720215, |
|
"logps/chosen": -281.18853759765625, |
|
"logps/rejected": -241.4916534423828, |
|
"loss": 62511.5062, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -281.18853759765625, |
|
"rewards/margins": -39.69694519042969, |
|
"rewards/rejected": -241.4916534423828, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010465724751439037, |
|
"grad_norm": 856447.0339256247, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.8515119552612305, |
|
"logits/rejected": -2.852177381515503, |
|
"logps/chosen": -227.5166778564453, |
|
"logps/rejected": -218.9936065673828, |
|
"loss": 62508.0563, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -227.5166778564453, |
|
"rewards/margins": -8.523069381713867, |
|
"rewards/rejected": -218.9936065673828, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015698587127158554, |
|
"grad_norm": 608077.0241737472, |
|
"learning_rate": 7.812499999999999e-08, |
|
"logits/chosen": -2.8871281147003174, |
|
"logits/rejected": -2.8566455841064453, |
|
"logps/chosen": -296.6144104003906, |
|
"logps/rejected": -248.87496948242188, |
|
"loss": 62494.775, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -296.6144104003906, |
|
"rewards/margins": -47.739437103271484, |
|
"rewards/rejected": -248.87496948242188, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 547713.4134125254, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.8649909496307373, |
|
"logits/rejected": -2.900007963180542, |
|
"logps/chosen": -300.6615905761719, |
|
"logps/rejected": -290.6969909667969, |
|
"loss": 62498.0375, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -300.6615905761719, |
|
"rewards/margins": -9.964593887329102, |
|
"rewards/rejected": -290.6969909667969, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.026164311878597593, |
|
"grad_norm": 550202.226793022, |
|
"learning_rate": 1.3020833333333334e-07, |
|
"logits/chosen": -2.861807346343994, |
|
"logits/rejected": -2.8286397457122803, |
|
"logps/chosen": -297.4012756347656, |
|
"logps/rejected": -225.73532104492188, |
|
"loss": 62479.4313, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -297.4012756347656, |
|
"rewards/margins": -71.66590881347656, |
|
"rewards/rejected": -225.73532104492188, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 575687.3818314937, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.8637468814849854, |
|
"logits/rejected": -2.855187177658081, |
|
"logps/chosen": -261.7722473144531, |
|
"logps/rejected": -266.75311279296875, |
|
"loss": 62467.7375, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -261.7722473144531, |
|
"rewards/margins": 4.980858325958252, |
|
"rewards/rejected": -266.75311279296875, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03663003663003663, |
|
"grad_norm": 601042.0970547737, |
|
"learning_rate": 1.8229166666666666e-07, |
|
"logits/chosen": -2.882888078689575, |
|
"logits/rejected": -2.8436450958251953, |
|
"logps/chosen": -322.3620300292969, |
|
"logps/rejected": -236.65188598632812, |
|
"loss": 62398.2562, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -322.3620300292969, |
|
"rewards/margins": -85.71016693115234, |
|
"rewards/rejected": -236.65188598632812, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 1270156.296221847, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.926880121231079, |
|
"logits/rejected": -2.873258590698242, |
|
"logps/chosen": -266.81585693359375, |
|
"logps/rejected": -222.47512817382812, |
|
"loss": 62382.9187, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -266.81585693359375, |
|
"rewards/margins": -44.34074401855469, |
|
"rewards/rejected": -222.47512817382812, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04709576138147567, |
|
"grad_norm": 562197.8356415116, |
|
"learning_rate": 2.3437499999999998e-07, |
|
"logits/chosen": -2.934823989868164, |
|
"logits/rejected": -2.8437087535858154, |
|
"logps/chosen": -337.57647705078125, |
|
"logps/rejected": -253.1848602294922, |
|
"loss": 62295.2562, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -337.57647705078125, |
|
"rewards/margins": -84.39164733886719, |
|
"rewards/rejected": -253.1848602294922, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.052328623757195186, |
|
"grad_norm": 579259.1669227169, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -2.8226637840270996, |
|
"logits/rejected": -2.8579444885253906, |
|
"logps/chosen": -235.44284057617188, |
|
"logps/rejected": -253.05126953125, |
|
"loss": 62140.85, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -235.44284057617188, |
|
"rewards/margins": 17.60841941833496, |
|
"rewards/rejected": -253.05126953125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0575614861329147, |
|
"grad_norm": 599221.2375408602, |
|
"learning_rate": 2.864583333333333e-07, |
|
"logits/chosen": -2.9071204662323, |
|
"logits/rejected": -2.86643385887146, |
|
"logps/chosen": -295.3536376953125, |
|
"logps/rejected": -295.96044921875, |
|
"loss": 62103.8438, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -295.3536376953125, |
|
"rewards/margins": 0.6068130731582642, |
|
"rewards/rejected": -295.96044921875, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 587217.7209315128, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.825546979904175, |
|
"logits/rejected": -2.853196620941162, |
|
"logps/chosen": -280.54376220703125, |
|
"logps/rejected": -290.41162109375, |
|
"loss": 61848.075, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -280.54376220703125, |
|
"rewards/margins": 9.867898941040039, |
|
"rewards/rejected": -290.41162109375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 692483.3715259883, |
|
"learning_rate": 3.3854166666666667e-07, |
|
"logits/chosen": -2.8896777629852295, |
|
"logits/rejected": -2.869809150695801, |
|
"logps/chosen": -279.5859375, |
|
"logps/rejected": -267.8680725097656, |
|
"loss": 61784.125, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -279.5859375, |
|
"rewards/margins": -11.717863082885742, |
|
"rewards/rejected": -267.8680725097656, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 643621.4786223344, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -2.86277174949646, |
|
"logits/rejected": -2.849290132522583, |
|
"logps/chosen": -270.3601989746094, |
|
"logps/rejected": -299.9423828125, |
|
"loss": 61415.6375, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -270.3601989746094, |
|
"rewards/margins": 29.582199096679688, |
|
"rewards/rejected": -299.9423828125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07849293563579278, |
|
"grad_norm": 797625.2394802963, |
|
"learning_rate": 3.9062499999999997e-07, |
|
"logits/chosen": -2.875126600265503, |
|
"logits/rejected": -2.8260860443115234, |
|
"logps/chosen": -278.2060241699219, |
|
"logps/rejected": -263.739990234375, |
|
"loss": 61252.4812, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -278.2060241699219, |
|
"rewards/margins": -14.466039657592773, |
|
"rewards/rejected": -263.739990234375, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 570934.2395758026, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.8297553062438965, |
|
"logits/rejected": -2.818152904510498, |
|
"logps/chosen": -244.71047973632812, |
|
"logps/rejected": -216.3663330078125, |
|
"loss": 61182.05, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -244.71047973632812, |
|
"rewards/margins": -28.344135284423828, |
|
"rewards/rejected": -216.3663330078125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08895866038723181, |
|
"grad_norm": 647636.324219079, |
|
"learning_rate": 4.427083333333333e-07, |
|
"logits/chosen": -2.8677287101745605, |
|
"logits/rejected": -2.8416037559509277, |
|
"logps/chosen": -280.59759521484375, |
|
"logps/rejected": -278.1571044921875, |
|
"loss": 60841.875, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -280.59759521484375, |
|
"rewards/margins": -2.440479278564453, |
|
"rewards/rejected": -278.1571044921875, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 693686.6913297386, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.8715648651123047, |
|
"logits/rejected": -2.886065721511841, |
|
"logps/chosen": -303.4865417480469, |
|
"logps/rejected": -300.1495361328125, |
|
"loss": 60200.25, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -303.4865417480469, |
|
"rewards/margins": -3.336996555328369, |
|
"rewards/rejected": -300.1495361328125, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09942438513867086, |
|
"grad_norm": 695048.3682737482, |
|
"learning_rate": 4.947916666666667e-07, |
|
"logits/chosen": -2.8399910926818848, |
|
"logits/rejected": -2.8273520469665527, |
|
"logps/chosen": -285.8985900878906, |
|
"logps/rejected": -278.19525146484375, |
|
"loss": 59913.85, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -285.8985900878906, |
|
"rewards/margins": -7.703277587890625, |
|
"rewards/rejected": -278.19525146484375, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 926619.6100320778, |
|
"learning_rate": 4.999732492681437e-07, |
|
"logits/chosen": -2.839812994003296, |
|
"logits/rejected": -2.814923048019409, |
|
"logps/chosen": -280.0777587890625, |
|
"logps/rejected": -326.1065979003906, |
|
"loss": 58985.2125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -280.0777587890625, |
|
"rewards/margins": 46.02882385253906, |
|
"rewards/rejected": -326.1065979003906, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 730881.0367768478, |
|
"learning_rate": 4.998645842314724e-07, |
|
"logits/chosen": -2.8014039993286133, |
|
"logits/rejected": -2.7791314125061035, |
|
"logps/chosen": -325.879638671875, |
|
"logps/rejected": -323.22125244140625, |
|
"loss": 59519.525, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -325.879638671875, |
|
"rewards/margins": -2.658414363861084, |
|
"rewards/rejected": -323.22125244140625, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1151229722658294, |
|
"grad_norm": 787482.5379143337, |
|
"learning_rate": 4.996723692767926e-07, |
|
"logits/chosen": -2.877906322479248, |
|
"logits/rejected": -2.860431671142578, |
|
"logps/chosen": -331.8926086425781, |
|
"logps/rejected": -336.84979248046875, |
|
"loss": 59833.9437, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -331.8926086425781, |
|
"rewards/margins": 4.957190036773682, |
|
"rewards/rejected": -336.84979248046875, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12035583464154893, |
|
"grad_norm": 758644.6025801541, |
|
"learning_rate": 4.993966686770933e-07, |
|
"logits/chosen": -2.8740134239196777, |
|
"logits/rejected": -2.849520683288574, |
|
"logps/chosen": -286.97998046875, |
|
"logps/rejected": -302.22589111328125, |
|
"loss": 59542.8562, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -286.97998046875, |
|
"rewards/margins": 15.245903015136719, |
|
"rewards/rejected": -302.22589111328125, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 839068.6603800668, |
|
"learning_rate": 4.990375746213598e-07, |
|
"logits/chosen": -2.8500800132751465, |
|
"logits/rejected": -2.813788414001465, |
|
"logps/chosen": -252.0970458984375, |
|
"logps/rejected": -269.5028381347656, |
|
"loss": 58766.425, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -252.0970458984375, |
|
"rewards/margins": 17.40580177307129, |
|
"rewards/rejected": -269.5028381347656, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13082155939298795, |
|
"grad_norm": 790620.3365762861, |
|
"learning_rate": 4.985952071837474e-07, |
|
"logits/chosen": -2.8092734813690186, |
|
"logits/rejected": -2.8068203926086426, |
|
"logps/chosen": -272.0372619628906, |
|
"logps/rejected": -282.043701171875, |
|
"loss": 57950.4375, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -272.0372619628906, |
|
"rewards/margins": 10.00644588470459, |
|
"rewards/rejected": -282.043701171875, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 800005.8930982946, |
|
"learning_rate": 4.980697142834314e-07, |
|
"logits/chosen": -2.9066848754882812, |
|
"logits/rejected": -2.889483690261841, |
|
"logps/chosen": -358.52880859375, |
|
"logps/rejected": -351.5975341796875, |
|
"loss": 57769.1687, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -358.52880859375, |
|
"rewards/margins": -6.931341648101807, |
|
"rewards/rejected": -351.5975341796875, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.141287284144427, |
|
"grad_norm": 991860.3958541746, |
|
"learning_rate": 4.974612716351446e-07, |
|
"logits/chosen": -2.8132946491241455, |
|
"logits/rejected": -2.807452917098999, |
|
"logps/chosen": -269.17333984375, |
|
"logps/rejected": -304.22784423828125, |
|
"loss": 57210.9125, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -269.17333984375, |
|
"rewards/margins": 35.054466247558594, |
|
"rewards/rejected": -304.22784423828125, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 1217484.7693174647, |
|
"learning_rate": 4.967700826904229e-07, |
|
"logits/chosen": -2.881108045578003, |
|
"logits/rejected": -2.877159357070923, |
|
"logps/chosen": -324.2433166503906, |
|
"logps/rejected": -289.4080505371094, |
|
"loss": 58436.2625, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -324.2433166503906, |
|
"rewards/margins": -34.83523941040039, |
|
"rewards/rejected": -289.4080505371094, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15175300889586604, |
|
"grad_norm": 1144094.5245424435, |
|
"learning_rate": 4.95996378569574e-07, |
|
"logits/chosen": -2.861013889312744, |
|
"logits/rejected": -2.8163299560546875, |
|
"logps/chosen": -310.35223388671875, |
|
"logps/rejected": -315.37078857421875, |
|
"loss": 56525.3125, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -310.35223388671875, |
|
"rewards/margins": 5.018545627593994, |
|
"rewards/rejected": -315.37078857421875, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 906591.8638946635, |
|
"learning_rate": 4.951404179843962e-07, |
|
"logits/chosen": -2.8345422744750977, |
|
"logits/rejected": -2.8686890602111816, |
|
"logps/chosen": -276.36981201171875, |
|
"logps/rejected": -285.62548828125, |
|
"loss": 58509.9375, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -276.36981201171875, |
|
"rewards/margins": 9.255735397338867, |
|
"rewards/rejected": -285.62548828125, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16221873364730507, |
|
"grad_norm": 907999.1062550667, |
|
"learning_rate": 4.942024871516694e-07, |
|
"logits/chosen": -2.8697471618652344, |
|
"logits/rejected": -2.8267807960510254, |
|
"logps/chosen": -320.91058349609375, |
|
"logps/rejected": -321.4515075683594, |
|
"loss": 58345.9, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -320.91058349609375, |
|
"rewards/margins": 0.5409385561943054, |
|
"rewards/rejected": -321.4515075683594, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 885328.1305549938, |
|
"learning_rate": 4.931828996974498e-07, |
|
"logits/chosen": -2.7532379627227783, |
|
"logits/rejected": -2.7566537857055664, |
|
"logps/chosen": -237.9208526611328, |
|
"logps/rejected": -254.9251251220703, |
|
"loss": 58183.8625, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -237.9208526611328, |
|
"rewards/margins": 17.00423812866211, |
|
"rewards/rejected": -254.9251251220703, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1726844583987441, |
|
"grad_norm": 1593598.7457023177, |
|
"learning_rate": 4.920819965521997e-07, |
|
"logits/chosen": -2.6699514389038086, |
|
"logits/rejected": -2.670328378677368, |
|
"logps/chosen": -305.18328857421875, |
|
"logps/rejected": -284.074951171875, |
|
"loss": 57758.7562, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -305.18328857421875, |
|
"rewards/margins": -21.108369827270508, |
|
"rewards/rejected": -284.074951171875, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.17791732077446362, |
|
"grad_norm": 993568.4034911739, |
|
"learning_rate": 4.909001458367866e-07, |
|
"logits/chosen": -2.7054855823516846, |
|
"logits/rejected": -2.7096757888793945, |
|
"logps/chosen": -286.2120666503906, |
|
"logps/rejected": -321.2934265136719, |
|
"loss": 57056.9187, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -286.2120666503906, |
|
"rewards/margins": 35.081356048583984, |
|
"rewards/rejected": -321.2934265136719, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18315018315018314, |
|
"grad_norm": 934004.2559217811, |
|
"learning_rate": 4.896377427393911e-07, |
|
"logits/chosen": -2.7484357357025146, |
|
"logits/rejected": -2.7158854007720947, |
|
"logps/chosen": -286.253662109375, |
|
"logps/rejected": -315.47406005859375, |
|
"loss": 57739.1625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -286.253662109375, |
|
"rewards/margins": 29.220422744750977, |
|
"rewards/rejected": -315.47406005859375, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 854532.9754199074, |
|
"learning_rate": 4.882952093833627e-07, |
|
"logits/chosen": -2.6975908279418945, |
|
"logits/rejected": -2.697767972946167, |
|
"logps/chosen": -299.58221435546875, |
|
"logps/rejected": -306.10247802734375, |
|
"loss": 56578.5375, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -299.58221435546875, |
|
"rewards/margins": 6.520210266113281, |
|
"rewards/rejected": -306.10247802734375, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1936159079016222, |
|
"grad_norm": 1018591.7582230872, |
|
"learning_rate": 4.868729946860708e-07, |
|
"logits/chosen": -2.697580575942993, |
|
"logits/rejected": -2.6543309688568115, |
|
"logps/chosen": -300.19854736328125, |
|
"logps/rejected": -279.4755859375, |
|
"loss": 56696.2875, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -300.19854736328125, |
|
"rewards/margins": -20.722976684570312, |
|
"rewards/rejected": -279.4755859375, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1988487702773417, |
|
"grad_norm": 1865987.6965253549, |
|
"learning_rate": 4.853715742087946e-07, |
|
"logits/chosen": -2.715686321258545, |
|
"logits/rejected": -2.6946115493774414, |
|
"logps/chosen": -261.4237060546875, |
|
"logps/rejected": -260.59552001953125, |
|
"loss": 55295.3625, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -261.4237060546875, |
|
"rewards/margins": -0.8281745910644531, |
|
"rewards/rejected": -260.59552001953125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 2075031.8789570439, |
|
"learning_rate": 4.837914499977052e-07, |
|
"logits/chosen": -2.7049078941345215, |
|
"logits/rejected": -2.649726152420044, |
|
"logps/chosen": -348.7242126464844, |
|
"logps/rejected": -302.77056884765625, |
|
"loss": 56870.6875, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -348.7242126464844, |
|
"rewards/margins": -45.95365524291992, |
|
"rewards/rejected": -302.77056884765625, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 1044606.2904041886, |
|
"learning_rate": 4.821331504159906e-07, |
|
"logits/chosen": -2.662055253982544, |
|
"logits/rejected": -2.6654608249664307, |
|
"logps/chosen": -240.9337921142578, |
|
"logps/rejected": -280.35516357421875, |
|
"loss": 57408.1, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -240.9337921142578, |
|
"rewards/margins": 39.421363830566406, |
|
"rewards/rejected": -280.35516357421875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21454735740450026, |
|
"grad_norm": 1613413.1304386982, |
|
"learning_rate": 4.80397229967181e-07, |
|
"logits/chosen": -2.5958218574523926, |
|
"logits/rejected": -2.5995872020721436, |
|
"logps/chosen": -260.1720275878906, |
|
"logps/rejected": -268.8197326660156, |
|
"loss": 57515.7125, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -260.1720275878906, |
|
"rewards/margins": 8.647693634033203, |
|
"rewards/rejected": -268.8197326660156, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 965375.2956772823, |
|
"learning_rate": 4.785842691097342e-07, |
|
"logits/chosen": -2.722567081451416, |
|
"logits/rejected": -2.6706037521362305, |
|
"logps/chosen": -301.97955322265625, |
|
"logps/rejected": -308.42462158203125, |
|
"loss": 56186.2937, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -301.97955322265625, |
|
"rewards/margins": 6.445120334625244, |
|
"rewards/rejected": -308.42462158203125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2250130821559393, |
|
"grad_norm": 1682586.8851408535, |
|
"learning_rate": 4.7669487406294076e-07, |
|
"logits/chosen": -2.691540479660034, |
|
"logits/rejected": -2.6860575675964355, |
|
"logps/chosen": -292.8274230957031, |
|
"logps/rejected": -352.30621337890625, |
|
"loss": 57221.1375, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -292.8274230957031, |
|
"rewards/margins": 59.47880172729492, |
|
"rewards/rejected": -352.30621337890625, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 1043252.7613651449, |
|
"learning_rate": 4.7472967660421603e-07, |
|
"logits/chosen": -2.7390644550323486, |
|
"logits/rejected": -2.6686208248138428, |
|
"logps/chosen": -251.1779327392578, |
|
"logps/rejected": -252.0894317626953, |
|
"loss": 56568.1813, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -251.1779327392578, |
|
"rewards/margins": 0.9114850163459778, |
|
"rewards/rejected": -252.0894317626953, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23547880690737832, |
|
"grad_norm": 1024702.6869019131, |
|
"learning_rate": 4.7268933385784627e-07, |
|
"logits/chosen": -2.682610273361206, |
|
"logits/rejected": -2.640778064727783, |
|
"logps/chosen": -247.3615264892578, |
|
"logps/rejected": -284.06402587890625, |
|
"loss": 56326.825, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -247.3615264892578, |
|
"rewards/margins": 36.7025146484375, |
|
"rewards/rejected": -284.06402587890625, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.24071166928309787, |
|
"grad_norm": 977761.8133840163, |
|
"learning_rate": 4.705745280752585e-07, |
|
"logits/chosen": -2.6460351943969727, |
|
"logits/rejected": -2.5948281288146973, |
|
"logps/chosen": -320.40252685546875, |
|
"logps/rejected": -341.423583984375, |
|
"loss": 56747.225, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -320.40252685546875, |
|
"rewards/margins": 21.021081924438477, |
|
"rewards/rejected": -341.423583984375, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.24594453165881738, |
|
"grad_norm": 987590.8827444692, |
|
"learning_rate": 4.68385966406889e-07, |
|
"logits/chosen": -2.592116117477417, |
|
"logits/rejected": -2.5488688945770264, |
|
"logps/chosen": -270.15057373046875, |
|
"logps/rejected": -273.60870361328125, |
|
"loss": 57541.425, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -270.15057373046875, |
|
"rewards/margins": 3.458080768585205, |
|
"rewards/rejected": -273.60870361328125, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 1115088.0063134031, |
|
"learning_rate": 4.6612438066572555e-07, |
|
"logits/chosen": -2.5365209579467773, |
|
"logits/rejected": -2.5030362606048584, |
|
"logps/chosen": -303.14288330078125, |
|
"logps/rejected": -285.1949462890625, |
|
"loss": 57592.9, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -303.14288330078125, |
|
"rewards/margins": -17.947914123535156, |
|
"rewards/rejected": -285.1949462890625, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 872007.2172912332, |
|
"learning_rate": 4.6379052708260356e-07, |
|
"logits/chosen": -2.571394443511963, |
|
"logits/rejected": -2.5047571659088135, |
|
"logps/chosen": -271.99029541015625, |
|
"logps/rejected": -268.821533203125, |
|
"loss": 57330.4187, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -271.99029541015625, |
|
"rewards/margins": -3.168781280517578, |
|
"rewards/rejected": -268.821533203125, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2616431187859759, |
|
"grad_norm": 1283172.0120638541, |
|
"learning_rate": 4.6138518605333664e-07, |
|
"logits/chosen": -2.591219663619995, |
|
"logits/rejected": -2.5536255836486816, |
|
"logps/chosen": -340.99761962890625, |
|
"logps/rejected": -338.05084228515625, |
|
"loss": 58544.5125, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -340.99761962890625, |
|
"rewards/margins": -2.9467933177948, |
|
"rewards/rejected": -338.05084228515625, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2668759811616955, |
|
"grad_norm": 887220.7931197283, |
|
"learning_rate": 4.589091618777674e-07, |
|
"logits/chosen": -2.452988862991333, |
|
"logits/rejected": -2.426440715789795, |
|
"logps/chosen": -310.2080993652344, |
|
"logps/rejected": -326.74005126953125, |
|
"loss": 59796.9938, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -310.2080993652344, |
|
"rewards/margins": 16.531951904296875, |
|
"rewards/rejected": -326.74005126953125, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 1079677.4885566523, |
|
"learning_rate": 4.5636328249082514e-07, |
|
"logits/chosen": -2.6359188556671143, |
|
"logits/rejected": -2.5355026721954346, |
|
"logps/chosen": -310.75189208984375, |
|
"logps/rejected": -308.8578796386719, |
|
"loss": 59678.8, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -310.75189208984375, |
|
"rewards/margins": -1.893977403640747, |
|
"rewards/rejected": -308.8578796386719, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2773417059131345, |
|
"grad_norm": 1084565.0284754713, |
|
"learning_rate": 4.5374839918567996e-07, |
|
"logits/chosen": -2.6321051120758057, |
|
"logits/rejected": -2.567678928375244, |
|
"logps/chosen": -327.0538635253906, |
|
"logps/rejected": -316.69342041015625, |
|
"loss": 58093.8688, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -327.0538635253906, |
|
"rewards/margins": -10.360448837280273, |
|
"rewards/rejected": -316.69342041015625, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 1295931.023547164, |
|
"learning_rate": 4.510653863290871e-07, |
|
"logits/chosen": -2.627354383468628, |
|
"logits/rejected": -2.5420610904693604, |
|
"logps/chosen": -284.817138671875, |
|
"logps/rejected": -295.0710144042969, |
|
"loss": 56263.8875, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -284.817138671875, |
|
"rewards/margins": 10.253904342651367, |
|
"rewards/rejected": -295.0710144042969, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.28780743066457354, |
|
"grad_norm": 1013086.5803710954, |
|
"learning_rate": 4.483151410690151e-07, |
|
"logits/chosen": -2.6444249153137207, |
|
"logits/rejected": -2.5427169799804688, |
|
"logps/chosen": -279.9425354003906, |
|
"logps/rejected": -270.55450439453125, |
|
"loss": 54940.2875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -279.9425354003906, |
|
"rewards/margins": -9.388038635253906, |
|
"rewards/rejected": -270.55450439453125, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 1576188.4710046574, |
|
"learning_rate": 4.4549858303465737e-07, |
|
"logits/chosen": -2.6243691444396973, |
|
"logits/rejected": -2.5685534477233887, |
|
"logps/chosen": -296.85418701171875, |
|
"logps/rejected": -310.7580261230469, |
|
"loss": 56116.5938, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -296.85418701171875, |
|
"rewards/margins": 13.903894424438477, |
|
"rewards/rejected": -310.7580261230469, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.29827315541601257, |
|
"grad_norm": 1319520.840838825, |
|
"learning_rate": 4.4261665402892476e-07, |
|
"logits/chosen": -2.5911037921905518, |
|
"logits/rejected": -2.5209097862243652, |
|
"logps/chosen": -265.95025634765625, |
|
"logps/rejected": -334.61431884765625, |
|
"loss": 57866.4625, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -265.95025634765625, |
|
"rewards/margins": 68.66404724121094, |
|
"rewards/rejected": -334.61431884765625, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3035060177917321, |
|
"grad_norm": 1164732.143957571, |
|
"learning_rate": 4.396703177135261e-07, |
|
"logits/chosen": -2.6242473125457764, |
|
"logits/rejected": -2.5436782836914062, |
|
"logps/chosen": -349.99383544921875, |
|
"logps/rejected": -329.6797180175781, |
|
"loss": 56799.7375, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -349.99383544921875, |
|
"rewards/margins": -20.31418800354004, |
|
"rewards/rejected": -329.6797180175781, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3087388801674516, |
|
"grad_norm": 1036095.2706155936, |
|
"learning_rate": 4.3666055928673697e-07, |
|
"logits/chosen": -2.6259796619415283, |
|
"logits/rejected": -2.596653938293457, |
|
"logps/chosen": -294.8160400390625, |
|
"logps/rejected": -268.30645751953125, |
|
"loss": 55223.3125, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -294.8160400390625, |
|
"rewards/margins": -26.509592056274414, |
|
"rewards/rejected": -268.30645751953125, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 1421793.3450062282, |
|
"learning_rate": 4.335883851539693e-07, |
|
"logits/chosen": -2.536402702331543, |
|
"logits/rejected": -2.470693588256836, |
|
"logps/chosen": -266.8374328613281, |
|
"logps/rejected": -269.9141845703125, |
|
"loss": 54316.75, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -266.8374328613281, |
|
"rewards/margins": 3.0767579078674316, |
|
"rewards/rejected": -269.9141845703125, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31920460491889063, |
|
"grad_norm": 1145803.694963496, |
|
"learning_rate": 4.304548225912481e-07, |
|
"logits/chosen": -2.4925479888916016, |
|
"logits/rejected": -2.4637606143951416, |
|
"logps/chosen": -268.6978454589844, |
|
"logps/rejected": -288.32489013671875, |
|
"loss": 56123.5, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -268.6978454589844, |
|
"rewards/margins": 19.627042770385742, |
|
"rewards/rejected": -288.32489013671875, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.32443746729461015, |
|
"grad_norm": 1320708.0317829524, |
|
"learning_rate": 4.272609194017105e-07, |
|
"logits/chosen": -2.427326202392578, |
|
"logits/rejected": -2.375277519226074, |
|
"logps/chosen": -273.1225280761719, |
|
"logps/rejected": -294.8364562988281, |
|
"loss": 55285.2125, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -273.1225280761719, |
|
"rewards/margins": 21.713897705078125, |
|
"rewards/rejected": -294.8364562988281, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 1175173.27697323, |
|
"learning_rate": 4.2400774356524003e-07, |
|
"logits/chosen": -2.463435649871826, |
|
"logits/rejected": -2.390852689743042, |
|
"logps/chosen": -291.9449768066406, |
|
"logps/rejected": -351.9012756347656, |
|
"loss": 55227.475, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -291.9449768066406, |
|
"rewards/margins": 59.956260681152344, |
|
"rewards/rejected": -351.9012756347656, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 1736713.8372362903, |
|
"learning_rate": 4.2069638288135547e-07, |
|
"logits/chosen": -2.424726724624634, |
|
"logits/rejected": -2.4184367656707764, |
|
"logps/chosen": -293.0435485839844, |
|
"logps/rejected": -315.5990905761719, |
|
"loss": 56523.2438, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -293.0435485839844, |
|
"rewards/margins": 22.555578231811523, |
|
"rewards/rejected": -315.5990905761719, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 1392044.8934369895, |
|
"learning_rate": 4.1732794460547037e-07, |
|
"logits/chosen": -2.4518871307373047, |
|
"logits/rejected": -2.444579601287842, |
|
"logps/chosen": -241.4635009765625, |
|
"logps/rejected": -265.34478759765625, |
|
"loss": 57858.325, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -241.4635009765625, |
|
"rewards/margins": 23.881275177001953, |
|
"rewards/rejected": -265.34478759765625, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 1151359.8150083232, |
|
"learning_rate": 4.139035550786494e-07, |
|
"logits/chosen": -2.4895317554473877, |
|
"logits/rejected": -2.476973056793213, |
|
"logps/chosen": -236.6543426513672, |
|
"logps/rejected": -301.2790832519531, |
|
"loss": 54808.6438, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -236.6543426513672, |
|
"rewards/margins": 64.62477111816406, |
|
"rewards/rejected": -301.2790832519531, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35060177917320773, |
|
"grad_norm": 1148068.4271174779, |
|
"learning_rate": 4.104243593509806e-07, |
|
"logits/chosen": -2.511590003967285, |
|
"logits/rejected": -2.449333906173706, |
|
"logps/chosen": -255.1795196533203, |
|
"logps/rejected": -306.39111328125, |
|
"loss": 56303.15, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -255.1795196533203, |
|
"rewards/margins": 51.211570739746094, |
|
"rewards/rejected": -306.39111328125, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 1398198.9347442659, |
|
"learning_rate": 4.0689152079869306e-07, |
|
"logits/chosen": -2.4384443759918213, |
|
"logits/rejected": -2.4097814559936523, |
|
"logps/chosen": -313.1650085449219, |
|
"logps/rejected": -348.493896484375, |
|
"loss": 54666.4, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -313.1650085449219, |
|
"rewards/margins": 35.3288688659668, |
|
"rewards/rejected": -348.493896484375, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36106750392464676, |
|
"grad_norm": 1048517.8304177759, |
|
"learning_rate": 4.0330622073514606e-07, |
|
"logits/chosen": -2.456749439239502, |
|
"logits/rejected": -2.353886127471924, |
|
"logps/chosen": -325.97222900390625, |
|
"logps/rejected": -289.00445556640625, |
|
"loss": 55775.8562, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -325.97222900390625, |
|
"rewards/margins": -36.96786117553711, |
|
"rewards/rejected": -289.00445556640625, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 1751549.502482873, |
|
"learning_rate": 3.99669658015821e-07, |
|
"logits/chosen": -2.325648784637451, |
|
"logits/rejected": -2.3088955879211426, |
|
"logps/chosen": -249.3928680419922, |
|
"logps/rejected": -300.55267333984375, |
|
"loss": 56549.7063, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -249.3928680419922, |
|
"rewards/margins": 51.1598014831543, |
|
"rewards/rejected": -300.55267333984375, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3715332286760858, |
|
"grad_norm": 1743710.2168055333, |
|
"learning_rate": 3.9598304863744615e-07, |
|
"logits/chosen": -2.3647897243499756, |
|
"logits/rejected": -2.302427053451538, |
|
"logps/chosen": -264.53399658203125, |
|
"logps/rejected": -291.4407958984375, |
|
"loss": 55886.3688, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -264.53399658203125, |
|
"rewards/margins": 26.906795501708984, |
|
"rewards/rejected": -291.4407958984375, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 1076304.712482483, |
|
"learning_rate": 3.92247625331392e-07, |
|
"logits/chosen": -2.3268961906433105, |
|
"logits/rejected": -2.2726428508758545, |
|
"logps/chosen": -233.88784790039062, |
|
"logps/rejected": -254.55062866210938, |
|
"loss": 55491.6813, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -233.88784790039062, |
|
"rewards/margins": 20.662763595581055, |
|
"rewards/rejected": -254.55062866210938, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3819989534275249, |
|
"grad_norm": 1089826.502625074, |
|
"learning_rate": 3.8846463715146867e-07, |
|
"logits/chosen": -2.4054033756256104, |
|
"logits/rejected": -2.35465669631958, |
|
"logps/chosen": -293.21893310546875, |
|
"logps/rejected": -311.3880920410156, |
|
"loss": 56063.525, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -293.21893310546875, |
|
"rewards/margins": 18.16920280456543, |
|
"rewards/rejected": -311.3880920410156, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.3872318158032444, |
|
"grad_norm": 1476308.11101748, |
|
"learning_rate": 3.846353490562664e-07, |
|
"logits/chosen": -2.3780322074890137, |
|
"logits/rejected": -2.329284191131592, |
|
"logps/chosen": -254.2071990966797, |
|
"logps/rejected": -289.7637634277344, |
|
"loss": 55320.5563, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -254.2071990966797, |
|
"rewards/margins": 35.55649948120117, |
|
"rewards/rejected": -289.7637634277344, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3924646781789639, |
|
"grad_norm": 1105169.4119545654, |
|
"learning_rate": 3.8076104148617817e-07, |
|
"logits/chosen": -2.3992652893066406, |
|
"logits/rejected": -2.3519163131713867, |
|
"logps/chosen": -297.7577209472656, |
|
"logps/rejected": -303.87060546875, |
|
"loss": 55865.7375, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -297.7577209472656, |
|
"rewards/margins": 6.11287260055542, |
|
"rewards/rejected": -303.87060546875, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 1165531.8808372426, |
|
"learning_rate": 3.768430099352445e-07, |
|
"logits/chosen": -2.4510560035705566, |
|
"logits/rejected": -2.369868278503418, |
|
"logps/chosen": -297.7958984375, |
|
"logps/rejected": -273.23046875, |
|
"loss": 57969.975, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -297.7958984375, |
|
"rewards/margins": -24.565448760986328, |
|
"rewards/rejected": -273.23046875, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.40293040293040294, |
|
"grad_norm": 1756290.8872507422, |
|
"learning_rate": 3.728825645179653e-07, |
|
"logits/chosen": -2.4245288372039795, |
|
"logits/rejected": -2.3175346851348877, |
|
"logps/chosen": -359.3501892089844, |
|
"logps/rejected": -339.73492431640625, |
|
"loss": 57982.1, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -359.3501892089844, |
|
"rewards/margins": -19.61526107788086, |
|
"rewards/rejected": -339.73492431640625, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 1538328.85318582, |
|
"learning_rate": 3.6888102953122304e-07, |
|
"logits/chosen": -2.190237045288086, |
|
"logits/rejected": -2.2050204277038574, |
|
"logps/chosen": -305.574951171875, |
|
"logps/rejected": -310.0028991699219, |
|
"loss": 56215.6438, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -305.574951171875, |
|
"rewards/margins": 4.427947044372559, |
|
"rewards/rejected": -310.0028991699219, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.413396127681842, |
|
"grad_norm": 1032609.5663318251, |
|
"learning_rate": 3.6483974301146263e-07, |
|
"logits/chosen": -2.409813165664673, |
|
"logits/rejected": -2.279897451400757, |
|
"logps/chosen": -289.708984375, |
|
"logps/rejected": -292.1055908203125, |
|
"loss": 55959.5, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -289.708984375, |
|
"rewards/margins": 2.3966078758239746, |
|
"rewards/rejected": -292.1055908203125, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 1349205.656483844, |
|
"learning_rate": 3.607600562872785e-07, |
|
"logits/chosen": -2.303772211074829, |
|
"logits/rejected": -2.219710111618042, |
|
"logps/chosen": -319.6715087890625, |
|
"logps/rejected": -316.0106506347656, |
|
"loss": 57163.6375, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -319.6715087890625, |
|
"rewards/margins": -3.66082501411438, |
|
"rewards/rejected": -316.0106506347656, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.423861852433281, |
|
"grad_norm": 979251.717327062, |
|
"learning_rate": 3.566433335275558e-07, |
|
"logits/chosen": -2.2218708992004395, |
|
"logits/rejected": -2.146432876586914, |
|
"logps/chosen": -270.113037109375, |
|
"logps/rejected": -288.05926513671875, |
|
"loss": 54550.1687, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -270.113037109375, |
|
"rewards/margins": 17.94621467590332, |
|
"rewards/rejected": -288.05926513671875, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.4290947148090005, |
|
"grad_norm": 1332038.5850987951, |
|
"learning_rate": 3.5249095128531856e-07, |
|
"logits/chosen": -2.1922194957733154, |
|
"logits/rejected": -2.0968267917633057, |
|
"logps/chosen": -301.511962890625, |
|
"logps/rejected": -319.2638244628906, |
|
"loss": 55946.6562, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -301.511962890625, |
|
"rewards/margins": 17.751834869384766, |
|
"rewards/rejected": -319.2638244628906, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43432757718472004, |
|
"grad_norm": 1088873.3097436083, |
|
"learning_rate": 3.4830429803743705e-07, |
|
"logits/chosen": -2.3102076053619385, |
|
"logits/rejected": -2.264838695526123, |
|
"logps/chosen": -313.7403869628906, |
|
"logps/rejected": -312.85125732421875, |
|
"loss": 55392.65, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -313.7403869628906, |
|
"rewards/margins": -0.8891464471817017, |
|
"rewards/rejected": -312.85125732421875, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 1374961.051492005, |
|
"learning_rate": 3.4408477372034736e-07, |
|
"logits/chosen": -2.271077871322632, |
|
"logits/rejected": -2.221766948699951, |
|
"logps/chosen": -293.64752197265625, |
|
"logps/rejected": -334.50390625, |
|
"loss": 56162.7438, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -293.64752197265625, |
|
"rewards/margins": 40.85638427734375, |
|
"rewards/rejected": -334.50390625, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44479330193615907, |
|
"grad_norm": 1029506.0927493338, |
|
"learning_rate": 3.3983378926194015e-07, |
|
"logits/chosen": -2.24725079536438, |
|
"logits/rejected": -2.1463942527770996, |
|
"logps/chosen": -292.072021484375, |
|
"logps/rejected": -306.45660400390625, |
|
"loss": 55289.5437, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -292.072021484375, |
|
"rewards/margins": 14.384634017944336, |
|
"rewards/rejected": -306.45660400390625, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.4500261643118786, |
|
"grad_norm": 1002942.6665806974, |
|
"learning_rate": 3.3555276610977276e-07, |
|
"logits/chosen": -2.2519736289978027, |
|
"logits/rejected": -2.1914682388305664, |
|
"logps/chosen": -308.74169921875, |
|
"logps/rejected": -312.30438232421875, |
|
"loss": 55870.9875, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -308.74169921875, |
|
"rewards/margins": 3.5627059936523438, |
|
"rewards/rejected": -312.30438232421875, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4552590266875981, |
|
"grad_norm": 1018589.2167974291, |
|
"learning_rate": 3.3124313575576487e-07, |
|
"logits/chosen": -2.17337703704834, |
|
"logits/rejected": -2.1850523948669434, |
|
"logps/chosen": -284.9986267089844, |
|
"logps/rejected": -300.66607666015625, |
|
"loss": 54878.6375, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -284.9986267089844, |
|
"rewards/margins": 15.66742992401123, |
|
"rewards/rejected": -300.66607666015625, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 1076240.3043484294, |
|
"learning_rate": 3.269063392575352e-07, |
|
"logits/chosen": -2.107131242752075, |
|
"logits/rejected": -2.0504283905029297, |
|
"logps/chosen": -245.75363159179688, |
|
"logps/rejected": -265.7157897949219, |
|
"loss": 55359.2375, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -245.75363159179688, |
|
"rewards/margins": 19.962154388427734, |
|
"rewards/rejected": -265.7157897949219, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46572475143903713, |
|
"grad_norm": 1259375.5689547102, |
|
"learning_rate": 3.2254382675653905e-07, |
|
"logits/chosen": -2.274196147918701, |
|
"logits/rejected": -2.182969331741333, |
|
"logps/chosen": -341.2582092285156, |
|
"logps/rejected": -347.05010986328125, |
|
"loss": 55359.3875, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -341.2582092285156, |
|
"rewards/margins": 5.791925430297852, |
|
"rewards/rejected": -347.05010986328125, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 1927449.2632160257, |
|
"learning_rate": 3.1815705699316964e-07, |
|
"logits/chosen": -2.264638662338257, |
|
"logits/rejected": -2.235848903656006, |
|
"logps/chosen": -247.626220703125, |
|
"logps/rejected": -299.621826171875, |
|
"loss": 55009.4375, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -247.626220703125, |
|
"rewards/margins": 51.99560546875, |
|
"rewards/rejected": -299.621826171875, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 2337791.6005255897, |
|
"learning_rate": 3.1374749681898216e-07, |
|
"logits/chosen": -2.189664125442505, |
|
"logits/rejected": -2.1661365032196045, |
|
"logps/chosen": -283.037841796875, |
|
"logps/rejected": -331.63189697265625, |
|
"loss": 56368.575, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -283.037841796875, |
|
"rewards/margins": 48.59403991699219, |
|
"rewards/rejected": -331.63189697265625, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 1581637.4423084452, |
|
"learning_rate": 3.0931662070620794e-07, |
|
"logits/chosen": -2.2392799854278564, |
|
"logits/rejected": -2.1875014305114746, |
|
"logps/chosen": -283.40338134765625, |
|
"logps/rejected": -318.47039794921875, |
|
"loss": 55090.0, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -283.40338134765625, |
|
"rewards/margins": 35.06700897216797, |
|
"rewards/rejected": -318.47039794921875, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.48665620094191525, |
|
"grad_norm": 1456316.7528858548, |
|
"learning_rate": 3.048659102547186e-07, |
|
"logits/chosen": -2.3513636589050293, |
|
"logits/rejected": -2.2428977489471436, |
|
"logps/chosen": -318.89703369140625, |
|
"logps/rejected": -347.49859619140625, |
|
"loss": 56281.025, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -318.89703369140625, |
|
"rewards/margins": 28.60154151916504, |
|
"rewards/rejected": -347.49859619140625, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.49188906331763477, |
|
"grad_norm": 1026249.5450756603, |
|
"learning_rate": 3.003968536966078e-07, |
|
"logits/chosen": -2.180349826812744, |
|
"logits/rejected": -2.0016205310821533, |
|
"logps/chosen": -281.388916015625, |
|
"logps/rejected": -276.56427001953125, |
|
"loss": 53576.8, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -281.388916015625, |
|
"rewards/margins": -4.824639320373535, |
|
"rewards/rejected": -276.56427001953125, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4971219256933543, |
|
"grad_norm": 1429705.0746835866, |
|
"learning_rate": 2.959109453985547e-07, |
|
"logits/chosen": -2.2324633598327637, |
|
"logits/rejected": -2.0949769020080566, |
|
"logps/chosen": -299.1005554199219, |
|
"logps/rejected": -289.7796325683594, |
|
"loss": 55444.925, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -299.1005554199219, |
|
"rewards/margins": -9.320911407470703, |
|
"rewards/rejected": -289.7796325683594, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 1213056.8914210084, |
|
"learning_rate": 2.9140968536213693e-07, |
|
"logits/chosen": -2.1725077629089355, |
|
"logits/rejected": -2.1495959758758545, |
|
"logps/chosen": -259.3185729980469, |
|
"logps/rejected": -283.7967529296875, |
|
"loss": 54958.5125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -259.3185729980469, |
|
"rewards/margins": 24.478168487548828, |
|
"rewards/rejected": -283.7967529296875, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5075876504447933, |
|
"grad_norm": 1461214.8419503546, |
|
"learning_rate": 2.868945787222582e-07, |
|
"logits/chosen": -2.1361522674560547, |
|
"logits/rejected": -2.180379867553711, |
|
"logps/chosen": -234.53329467773438, |
|
"logps/rejected": -269.418701171875, |
|
"loss": 55915.4812, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -234.53329467773438, |
|
"rewards/margins": 34.885379791259766, |
|
"rewards/rejected": -269.418701171875, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 1374060.3676287297, |
|
"learning_rate": 2.823671352438608e-07, |
|
"logits/chosen": -2.101999044418335, |
|
"logits/rejected": -2.050888776779175, |
|
"logps/chosen": -254.61770629882812, |
|
"logps/rejected": -283.10235595703125, |
|
"loss": 55689.6875, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -254.61770629882812, |
|
"rewards/margins": 28.48464012145996, |
|
"rewards/rejected": -283.10235595703125, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5180533751962323, |
|
"grad_norm": 1234397.4379234589, |
|
"learning_rate": 2.7782886881708866e-07, |
|
"logits/chosen": -2.2712063789367676, |
|
"logits/rejected": -2.099457263946533, |
|
"logps/chosen": -310.07879638671875, |
|
"logps/rejected": -374.5906677246094, |
|
"loss": 54732.425, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -310.07879638671875, |
|
"rewards/margins": 64.51188659667969, |
|
"rewards/rejected": -374.5906677246094, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 2228603.377630035, |
|
"learning_rate": 2.73281296951072e-07, |
|
"logits/chosen": -2.017988920211792, |
|
"logits/rejected": -2.0341272354125977, |
|
"logps/chosen": -222.3467559814453, |
|
"logps/rejected": -274.4383239746094, |
|
"loss": 57469.6625, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -222.3467559814453, |
|
"rewards/margins": 52.0915641784668, |
|
"rewards/rejected": -274.4383239746094, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5285190999476713, |
|
"grad_norm": 1599169.5252724146, |
|
"learning_rate": 2.6872594026650096e-07, |
|
"logits/chosen": -2.240408420562744, |
|
"logits/rejected": -2.2190628051757812, |
|
"logps/chosen": -270.1956481933594, |
|
"logps/rejected": -335.818359375, |
|
"loss": 53495.1125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -270.1956481933594, |
|
"rewards/margins": 65.6227035522461, |
|
"rewards/rejected": -335.818359375, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 1490279.794358093, |
|
"learning_rate": 2.641643219871597e-07, |
|
"logits/chosen": -2.219712734222412, |
|
"logits/rejected": -2.139911651611328, |
|
"logps/chosen": -288.52215576171875, |
|
"logps/rejected": -317.8120422363281, |
|
"loss": 54654.5563, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -288.52215576171875, |
|
"rewards/margins": 29.289892196655273, |
|
"rewards/rejected": -317.8120422363281, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5389848246991105, |
|
"grad_norm": 935331.0867922652, |
|
"learning_rate": 2.595979674305891e-07, |
|
"logits/chosen": -2.084282398223877, |
|
"logits/rejected": -2.0336263179779053, |
|
"logps/chosen": -237.1022186279297, |
|
"logps/rejected": -258.7450256347656, |
|
"loss": 54242.45, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -237.1022186279297, |
|
"rewards/margins": 21.642807006835938, |
|
"rewards/rejected": -258.7450256347656, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 856340.2472942632, |
|
"learning_rate": 2.550284034980507e-07, |
|
"logits/chosen": -2.1015374660491943, |
|
"logits/rejected": -2.0551133155822754, |
|
"logps/chosen": -279.68505859375, |
|
"logps/rejected": -288.3494567871094, |
|
"loss": 55627.8063, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -279.68505859375, |
|
"rewards/margins": 8.664429664611816, |
|
"rewards/rejected": -288.3494567871094, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"grad_norm": 990531.7402526786, |
|
"learning_rate": 2.5045715816395916e-07, |
|
"logits/chosen": -2.2954821586608887, |
|
"logits/rejected": -2.194169521331787, |
|
"logps/chosen": -299.71234130859375, |
|
"logps/rejected": -318.68243408203125, |
|
"loss": 55352.35, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -299.71234130859375, |
|
"rewards/margins": 18.970050811767578, |
|
"rewards/rejected": -318.68243408203125, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.554683411826269, |
|
"grad_norm": 1450335.2512408984, |
|
"learning_rate": 2.4588575996495794e-07, |
|
"logits/chosen": -2.2317874431610107, |
|
"logits/rejected": -2.169450521469116, |
|
"logps/chosen": -276.4530334472656, |
|
"logps/rejected": -311.96551513671875, |
|
"loss": 54057.4375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -276.4530334472656, |
|
"rewards/margins": 35.512451171875, |
|
"rewards/rejected": -311.96551513671875, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5599162742019885, |
|
"grad_norm": 1671087.2529512038, |
|
"learning_rate": 2.413157374888054e-07, |
|
"logits/chosen": -2.2822182178497314, |
|
"logits/rejected": -2.2092044353485107, |
|
"logps/chosen": -297.13531494140625, |
|
"logps/rejected": -293.6783142089844, |
|
"loss": 56565.0062, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -297.13531494140625, |
|
"rewards/margins": -3.457014560699463, |
|
"rewards/rejected": -293.6783142089844, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 1228860.5418419128, |
|
"learning_rate": 2.367486188632446e-07, |
|
"logits/chosen": -2.173696279525757, |
|
"logits/rejected": -2.099151849746704, |
|
"logps/chosen": -266.07257080078125, |
|
"logps/rejected": -315.889892578125, |
|
"loss": 56023.175, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -266.07257080078125, |
|
"rewards/margins": 49.81734085083008, |
|
"rewards/rejected": -315.889892578125, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5703819989534276, |
|
"grad_norm": 1271355.6361364825, |
|
"learning_rate": 2.321859312450267e-07, |
|
"logits/chosen": -2.364675760269165, |
|
"logits/rejected": -2.297121286392212, |
|
"logps/chosen": -312.4518737792969, |
|
"logps/rejected": -373.5928039550781, |
|
"loss": 54175.6875, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -312.4518737792969, |
|
"rewards/margins": 61.14093017578125, |
|
"rewards/rejected": -373.5928039550781, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5756148613291471, |
|
"grad_norm": 1398597.9993464884, |
|
"learning_rate": 2.276292003092593e-07, |
|
"logits/chosen": -2.2173264026641846, |
|
"logits/rejected": -2.1705925464630127, |
|
"logps/chosen": -307.5565490722656, |
|
"logps/rejected": -310.6933898925781, |
|
"loss": 54367.9, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -307.5565490722656, |
|
"rewards/margins": 3.136824131011963, |
|
"rewards/rejected": -310.6933898925781, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5808477237048666, |
|
"grad_norm": 1551718.3427722957, |
|
"learning_rate": 2.230799497392495e-07, |
|
"logits/chosen": -2.2841944694519043, |
|
"logits/rejected": -2.225440502166748, |
|
"logps/chosen": -272.79681396484375, |
|
"logps/rejected": -291.16204833984375, |
|
"loss": 56317.2063, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -272.79681396484375, |
|
"rewards/margins": 18.365182876586914, |
|
"rewards/rejected": -291.16204833984375, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 1420409.765508531, |
|
"learning_rate": 2.185397007170141e-07, |
|
"logits/chosen": -2.300354480743408, |
|
"logits/rejected": -2.2717068195343018, |
|
"logps/chosen": -313.4623107910156, |
|
"logps/rejected": -360.75189208984375, |
|
"loss": 55098.0625, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -313.4623107910156, |
|
"rewards/margins": 47.28960418701172, |
|
"rewards/rejected": -360.75189208984375, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5913134484563056, |
|
"grad_norm": 1392120.5854896335, |
|
"learning_rate": 2.14009971414625e-07, |
|
"logits/chosen": -2.2033753395080566, |
|
"logits/rejected": -2.1571030616760254, |
|
"logps/chosen": -282.2511291503906, |
|
"logps/rejected": -287.3548889160156, |
|
"loss": 54579.0062, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -282.2511291503906, |
|
"rewards/margins": 5.103717803955078, |
|
"rewards/rejected": -287.3548889160156, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 1488981.6454046704, |
|
"learning_rate": 2.094922764865619e-07, |
|
"logits/chosen": -2.218703031539917, |
|
"logits/rejected": -2.244843006134033, |
|
"logps/chosen": -232.9685821533203, |
|
"logps/rejected": -295.5643615722656, |
|
"loss": 56100.95, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -232.9685821533203, |
|
"rewards/margins": 62.59580612182617, |
|
"rewards/rejected": -295.5643615722656, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6017791732077447, |
|
"grad_norm": 1135626.4886801469, |
|
"learning_rate": 2.0498812656324064e-07, |
|
"logits/chosen": -2.142216205596924, |
|
"logits/rejected": -2.1622607707977295, |
|
"logps/chosen": -289.1842041015625, |
|
"logps/rejected": -325.3665466308594, |
|
"loss": 54899.825, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -289.1842041015625, |
|
"rewards/margins": 36.182373046875, |
|
"rewards/rejected": -325.3665466308594, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 1329372.6719742662, |
|
"learning_rate": 2.0049902774588797e-07, |
|
"logits/chosen": -2.207730770111084, |
|
"logits/rejected": -2.0855050086975098, |
|
"logps/chosen": -299.4252014160156, |
|
"logps/rejected": -332.8768615722656, |
|
"loss": 55401.0125, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -299.4252014160156, |
|
"rewards/margins": 33.4516487121582, |
|
"rewards/rejected": -332.8768615722656, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 1187559.6151840126, |
|
"learning_rate": 1.960264811029297e-07, |
|
"logits/chosen": -2.22457218170166, |
|
"logits/rejected": -2.148383617401123, |
|
"logps/chosen": -281.52923583984375, |
|
"logps/rejected": -282.35784912109375, |
|
"loss": 56603.25, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -281.52923583984375, |
|
"rewards/margins": 0.828582763671875, |
|
"rewards/rejected": -282.35784912109375, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.6174777603349032, |
|
"grad_norm": 1671629.5147047387, |
|
"learning_rate": 1.9157198216806238e-07, |
|
"logits/chosen": -2.209186315536499, |
|
"logits/rejected": -2.1159491539001465, |
|
"logps/chosen": -251.740966796875, |
|
"logps/rejected": -269.68011474609375, |
|
"loss": 55453.7562, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -251.740966796875, |
|
"rewards/margins": 17.939146041870117, |
|
"rewards/rejected": -269.68011474609375, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6227106227106227, |
|
"grad_norm": 1173783.4292100056, |
|
"learning_rate": 1.8713702044017577e-07, |
|
"logits/chosen": -2.1656856536865234, |
|
"logits/rejected": -2.1623783111572266, |
|
"logps/chosen": -301.41497802734375, |
|
"logps/rejected": -317.84295654296875, |
|
"loss": 54113.325, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -301.41497802734375, |
|
"rewards/margins": 16.427982330322266, |
|
"rewards/rejected": -317.84295654296875, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 1387972.3011875993, |
|
"learning_rate": 1.8272307888529274e-07, |
|
"logits/chosen": -2.1883492469787598, |
|
"logits/rejected": -2.1378281116485596, |
|
"logps/chosen": -257.42822265625, |
|
"logps/rejected": -320.2197265625, |
|
"loss": 55090.8625, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -257.42822265625, |
|
"rewards/margins": 62.7915153503418, |
|
"rewards/rejected": -320.2197265625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6331763474620618, |
|
"grad_norm": 1488821.1810637303, |
|
"learning_rate": 1.783316334406939e-07, |
|
"logits/chosen": -2.185284376144409, |
|
"logits/rejected": -2.0930609703063965, |
|
"logps/chosen": -322.49005126953125, |
|
"logps/rejected": -319.7123718261719, |
|
"loss": 54071.0125, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -322.49005126953125, |
|
"rewards/margins": -2.777683973312378, |
|
"rewards/rejected": -319.7123718261719, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.6384092098377813, |
|
"grad_norm": 1496837.8340915893, |
|
"learning_rate": 1.7396415252139288e-07, |
|
"logits/chosen": -2.2097795009613037, |
|
"logits/rejected": -2.0639331340789795, |
|
"logps/chosen": -308.24530029296875, |
|
"logps/rejected": -331.1815490722656, |
|
"loss": 54010.9875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -308.24530029296875, |
|
"rewards/margins": 22.936208724975586, |
|
"rewards/rejected": -331.1815490722656, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6436420722135008, |
|
"grad_norm": 1535540.9500706908, |
|
"learning_rate": 1.6962209652912625e-07, |
|
"logits/chosen": -2.1692049503326416, |
|
"logits/rejected": -2.077504873275757, |
|
"logps/chosen": -255.7120361328125, |
|
"logps/rejected": -309.77008056640625, |
|
"loss": 54530.4875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -255.7120361328125, |
|
"rewards/margins": 54.058021545410156, |
|
"rewards/rejected": -309.77008056640625, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 1397345.2747377793, |
|
"learning_rate": 1.6530691736402316e-07, |
|
"logits/chosen": -2.1868765354156494, |
|
"logits/rejected": -2.1478359699249268, |
|
"logps/chosen": -292.8278503417969, |
|
"logps/rejected": -312.1719055175781, |
|
"loss": 54489.7375, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -292.8278503417969, |
|
"rewards/margins": 19.344045639038086, |
|
"rewards/rejected": -312.1719055175781, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6541077969649398, |
|
"grad_norm": 1977409.2998021427, |
|
"learning_rate": 1.610200579391182e-07, |
|
"logits/chosen": -2.1679329872131348, |
|
"logits/rejected": -2.1316826343536377, |
|
"logps/chosen": -283.0874938964844, |
|
"logps/rejected": -364.5801696777344, |
|
"loss": 55410.75, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -283.0874938964844, |
|
"rewards/margins": 81.49267578125, |
|
"rewards/rejected": -364.5801696777344, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 1362818.5877687463, |
|
"learning_rate": 1.5676295169786864e-07, |
|
"logits/chosen": -2.0093648433685303, |
|
"logits/rejected": -1.9298946857452393, |
|
"logps/chosen": -282.3995056152344, |
|
"logps/rejected": -278.3210754394531, |
|
"loss": 54493.85, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -282.3995056152344, |
|
"rewards/margins": -4.078440189361572, |
|
"rewards/rejected": -278.3210754394531, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6645735217163788, |
|
"grad_norm": 956804.5377818815, |
|
"learning_rate": 1.5253702213483842e-07, |
|
"logits/chosen": -2.1643216609954834, |
|
"logits/rejected": -2.119776964187622, |
|
"logps/chosen": -271.3257751464844, |
|
"logps/rejected": -303.90423583984375, |
|
"loss": 54765.8125, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -271.3257751464844, |
|
"rewards/margins": 32.57844924926758, |
|
"rewards/rejected": -303.90423583984375, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 1933509.9856251064, |
|
"learning_rate": 1.483436823197092e-07, |
|
"logits/chosen": -2.093644857406616, |
|
"logits/rejected": -2.10066556930542, |
|
"logps/chosen": -269.1563415527344, |
|
"logps/rejected": -319.5292663574219, |
|
"loss": 54325.475, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -269.1563415527344, |
|
"rewards/margins": 50.37293243408203, |
|
"rewards/rejected": -319.5292663574219, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6750392464678179, |
|
"grad_norm": 1218847.4753339728, |
|
"learning_rate": 1.4418433442477703e-07, |
|
"logits/chosen": -2.216813087463379, |
|
"logits/rejected": -2.1345386505126953, |
|
"logps/chosen": -338.1468505859375, |
|
"logps/rejected": -352.8824768066406, |
|
"loss": 53920.6188, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -338.1468505859375, |
|
"rewards/margins": 14.735623359680176, |
|
"rewards/rejected": -352.8824768066406, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 1418811.7836556053, |
|
"learning_rate": 1.4006036925609243e-07, |
|
"logits/chosen": -2.139899492263794, |
|
"logits/rejected": -2.0506820678710938, |
|
"logps/chosen": -257.8123779296875, |
|
"logps/rejected": -283.587890625, |
|
"loss": 55958.4187, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -257.8123779296875, |
|
"rewards/margins": 25.775487899780273, |
|
"rewards/rejected": -283.587890625, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6855049712192569, |
|
"grad_norm": 1109912.054173663, |
|
"learning_rate": 1.3597316578840216e-07, |
|
"logits/chosen": -2.0801479816436768, |
|
"logits/rejected": -2.0766029357910156, |
|
"logps/chosen": -256.91619873046875, |
|
"logps/rejected": -276.5906677246094, |
|
"loss": 54215.7375, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -256.91619873046875, |
|
"rewards/margins": 19.674455642700195, |
|
"rewards/rejected": -276.5906677246094, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 1210210.3382933068, |
|
"learning_rate": 1.319240907040458e-07, |
|
"logits/chosen": -2.245999574661255, |
|
"logits/rejected": -2.1108059883117676, |
|
"logps/chosen": -321.09796142578125, |
|
"logps/rejected": -322.8074645996094, |
|
"loss": 55360.3, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -321.09796142578125, |
|
"rewards/margins": 1.7095245122909546, |
|
"rewards/rejected": -322.8074645996094, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6959706959706959, |
|
"grad_norm": 1098340.112919491, |
|
"learning_rate": 1.279144979359641e-07, |
|
"logits/chosen": -2.1789064407348633, |
|
"logits/rejected": -2.157804489135742, |
|
"logps/chosen": -267.2674865722656, |
|
"logps/rejected": -305.3499450683594, |
|
"loss": 55846.8812, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -267.2674865722656, |
|
"rewards/margins": 38.08247756958008, |
|
"rewards/rejected": -305.3499450683594, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.7012035583464155, |
|
"grad_norm": 1447767.6648965469, |
|
"learning_rate": 1.2394572821496948e-07, |
|
"logits/chosen": -2.2281277179718018, |
|
"logits/rejected": -2.21685791015625, |
|
"logps/chosen": -273.71417236328125, |
|
"logps/rejected": -327.2356262207031, |
|
"loss": 54601.5563, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -273.71417236328125, |
|
"rewards/margins": 53.521484375, |
|
"rewards/rejected": -327.2356262207031, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.706436420722135, |
|
"grad_norm": 1277662.338967538, |
|
"learning_rate": 1.2001910862143174e-07, |
|
"logits/chosen": -2.2508022785186768, |
|
"logits/rejected": -2.217378854751587, |
|
"logps/chosen": -325.85906982421875, |
|
"logps/rejected": -380.45074462890625, |
|
"loss": 55330.475, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -325.85906982421875, |
|
"rewards/margins": 54.59168243408203, |
|
"rewards/rejected": -380.45074462890625, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 1785233.744803184, |
|
"learning_rate": 1.1613595214152711e-07, |
|
"logits/chosen": -2.2163052558898926, |
|
"logits/rejected": -2.1031951904296875, |
|
"logps/chosen": -284.403076171875, |
|
"logps/rejected": -271.61138916015625, |
|
"loss": 54460.6625, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -284.403076171875, |
|
"rewards/margins": -12.791729927062988, |
|
"rewards/rejected": -271.61138916015625, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.716902145473574, |
|
"grad_norm": 1084153.5773127347, |
|
"learning_rate": 1.122975572282018e-07, |
|
"logits/chosen": -2.19317364692688, |
|
"logits/rejected": -2.1025004386901855, |
|
"logps/chosen": -290.7996520996094, |
|
"logps/rejected": -270.1470947265625, |
|
"loss": 54599.6188, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -290.7996520996094, |
|
"rewards/margins": -20.65255355834961, |
|
"rewards/rejected": -270.1470947265625, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 1089542.9473462715, |
|
"learning_rate": 1.0850520736699362e-07, |
|
"logits/chosen": -2.144193172454834, |
|
"logits/rejected": -2.0945630073547363, |
|
"logps/chosen": -264.43109130859375, |
|
"logps/rejected": -340.2378845214844, |
|
"loss": 54947.6625, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -264.43109130859375, |
|
"rewards/margins": 75.80680084228516, |
|
"rewards/rejected": -340.2378845214844, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.727367870225013, |
|
"grad_norm": 1178567.4912604708, |
|
"learning_rate": 1.0476017064685941e-07, |
|
"logits/chosen": -2.2328460216522217, |
|
"logits/rejected": -2.1399552822113037, |
|
"logps/chosen": -284.4504089355469, |
|
"logps/rejected": -293.85321044921875, |
|
"loss": 55292.35, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -284.4504089355469, |
|
"rewards/margins": 9.402796745300293, |
|
"rewards/rejected": -293.85321044921875, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 1333559.7423557746, |
|
"learning_rate": 1.0106369933615042e-07, |
|
"logits/chosen": -2.011481761932373, |
|
"logits/rejected": -1.935136079788208, |
|
"logps/chosen": -258.0648193359375, |
|
"logps/rejected": -269.0512390136719, |
|
"loss": 56453.9, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -258.0648193359375, |
|
"rewards/margins": 10.98639965057373, |
|
"rewards/rejected": -269.0512390136719, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7378335949764521, |
|
"grad_norm": 1081251.2157163108, |
|
"learning_rate": 9.741702946387748e-08, |
|
"logits/chosen": -2.1545426845550537, |
|
"logits/rejected": -2.0765717029571533, |
|
"logps/chosen": -247.3363494873047, |
|
"logps/rejected": -301.45672607421875, |
|
"loss": 54404.8, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -247.3363494873047, |
|
"rewards/margins": 54.120391845703125, |
|
"rewards/rejected": -301.45672607421875, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.7430664573521716, |
|
"grad_norm": 1104364.5468847684, |
|
"learning_rate": 9.382138040640714e-08, |
|
"logits/chosen": -1.989871621131897, |
|
"logits/rejected": -1.9418586492538452, |
|
"logps/chosen": -244.57852172851562, |
|
"logps/rejected": -289.2986755371094, |
|
"loss": 54110.525, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -244.57852172851562, |
|
"rewards/margins": 44.72013854980469, |
|
"rewards/rejected": -289.2986755371094, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7482993197278912, |
|
"grad_norm": 1104089.7558876271, |
|
"learning_rate": 9.027795447972545e-08, |
|
"logits/chosen": -2.2300283908843994, |
|
"logits/rejected": -2.1951324939727783, |
|
"logps/chosen": -286.88922119140625, |
|
"logps/rejected": -345.98822021484375, |
|
"loss": 52983.1375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -286.88922119140625, |
|
"rewards/margins": 59.0989990234375, |
|
"rewards/rejected": -345.98822021484375, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 1208966.7828290404, |
|
"learning_rate": 8.678793653740632e-08, |
|
"logits/chosen": -2.19745135307312, |
|
"logits/rejected": -2.0950427055358887, |
|
"logps/chosen": -259.8890686035156, |
|
"logps/rejected": -310.04876708984375, |
|
"loss": 55099.525, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -259.8890686035156, |
|
"rewards/margins": 50.15970993041992, |
|
"rewards/rejected": -310.04876708984375, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7587650444793302, |
|
"grad_norm": 1272614.4979089308, |
|
"learning_rate": 8.335249357441945e-08, |
|
"logits/chosen": -2.041647434234619, |
|
"logits/rejected": -2.0392508506774902, |
|
"logps/chosen": -260.08172607421875, |
|
"logps/rejected": -329.94854736328125, |
|
"loss": 54837.7125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -260.08172607421875, |
|
"rewards/margins": 69.86690521240234, |
|
"rewards/rejected": -329.94854736328125, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7639979068550498, |
|
"grad_norm": 1162448.1540473108, |
|
"learning_rate": 7.997277433690983e-08, |
|
"logits/chosen": -2.1625466346740723, |
|
"logits/rejected": -2.0773284435272217, |
|
"logps/chosen": -268.3184814453125, |
|
"logps/rejected": -292.38433837890625, |
|
"loss": 55808.2125, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -268.3184814453125, |
|
"rewards/margins": 24.065847396850586, |
|
"rewards/rejected": -292.38433837890625, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 1243184.3713818155, |
|
"learning_rate": 7.664990893807885e-08, |
|
"logits/chosen": -2.1861138343811035, |
|
"logits/rejected": -2.1057441234588623, |
|
"logps/chosen": -248.58114624023438, |
|
"logps/rejected": -316.4317626953125, |
|
"loss": 54297.5375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -248.58114624023438, |
|
"rewards/margins": 67.85064697265625, |
|
"rewards/rejected": -316.4317626953125, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 1338047.2392976265, |
|
"learning_rate": 7.338500848029602e-08, |
|
"logits/chosen": -2.1806750297546387, |
|
"logits/rejected": -2.1461918354034424, |
|
"logps/chosen": -285.19451904296875, |
|
"logps/rejected": -319.1790466308594, |
|
"loss": 55123.75, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -285.19451904296875, |
|
"rewards/margins": 33.984554290771484, |
|
"rewards/rejected": -319.1790466308594, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7796964939822083, |
|
"grad_norm": 1984510.6026826864, |
|
"learning_rate": 7.01791646835681e-08, |
|
"logits/chosen": -2.2138607501983643, |
|
"logits/rejected": -2.1573081016540527, |
|
"logps/chosen": -270.462890625, |
|
"logps/rejected": -285.9214172363281, |
|
"loss": 54790.0, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -270.462890625, |
|
"rewards/margins": 15.458574295043945, |
|
"rewards/rejected": -285.9214172363281, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 1378850.8751623577, |
|
"learning_rate": 6.70334495204884e-08, |
|
"logits/chosen": -2.117934465408325, |
|
"logits/rejected": -2.0909981727600098, |
|
"logps/chosen": -254.19442749023438, |
|
"logps/rejected": -307.30255126953125, |
|
"loss": 54093.9875, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -254.19442749023438, |
|
"rewards/margins": 53.108154296875, |
|
"rewards/rejected": -307.30255126953125, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7901622187336473, |
|
"grad_norm": 1370111.0134525597, |
|
"learning_rate": 6.394891485779022e-08, |
|
"logits/chosen": -2.266648292541504, |
|
"logits/rejected": -2.2330288887023926, |
|
"logps/chosen": -290.75335693359375, |
|
"logps/rejected": -312.68597412109375, |
|
"loss": 54021.125, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -290.75335693359375, |
|
"rewards/margins": 21.932575225830078, |
|
"rewards/rejected": -312.68597412109375, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 1572823.5723971077, |
|
"learning_rate": 6.092659210462231e-08, |
|
"logits/chosen": -2.1503944396972656, |
|
"logits/rejected": -2.113105297088623, |
|
"logps/chosen": -281.13037109375, |
|
"logps/rejected": -321.85693359375, |
|
"loss": 54900.25, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -281.13037109375, |
|
"rewards/margins": 40.7265510559082, |
|
"rewards/rejected": -321.85693359375, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8006279434850864, |
|
"grad_norm": 1225741.5170516171, |
|
"learning_rate": 5.7967491867665975e-08, |
|
"logits/chosen": -2.0941481590270996, |
|
"logits/rejected": -2.064021348953247, |
|
"logps/chosen": -251.21670532226562, |
|
"logps/rejected": -310.03631591796875, |
|
"loss": 54873.5938, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -251.21670532226562, |
|
"rewards/margins": 58.8195915222168, |
|
"rewards/rejected": -310.03631591796875, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 1642379.1878661881, |
|
"learning_rate": 5.507260361320737e-08, |
|
"logits/chosen": -2.1802749633789062, |
|
"logits/rejected": -2.097052812576294, |
|
"logps/chosen": -280.42254638671875, |
|
"logps/rejected": -292.81768798828125, |
|
"loss": 54552.0125, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -280.42254638671875, |
|
"rewards/margins": 12.39512825012207, |
|
"rewards/rejected": -292.81768798828125, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8110936682365254, |
|
"grad_norm": 1328549.6940408363, |
|
"learning_rate": 5.2242895336278734e-08, |
|
"logits/chosen": -2.2298295497894287, |
|
"logits/rejected": -2.1420650482177734, |
|
"logps/chosen": -275.43951416015625, |
|
"logps/rejected": -293.7701110839844, |
|
"loss": 54556.4625, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -275.43951416015625, |
|
"rewards/margins": 18.330612182617188, |
|
"rewards/rejected": -293.7701110839844, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 1403447.9375936964, |
|
"learning_rate": 4.947931323697982e-08, |
|
"logits/chosen": -2.1510796546936035, |
|
"logits/rejected": -2.070650339126587, |
|
"logps/chosen": -281.1759338378906, |
|
"logps/rejected": -301.5060119628906, |
|
"loss": 53222.4187, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -281.1759338378906, |
|
"rewards/margins": 20.330089569091797, |
|
"rewards/rejected": -301.5060119628906, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8215593929879644, |
|
"grad_norm": 1248429.4711292263, |
|
"learning_rate": 4.678278140408667e-08, |
|
"logits/chosen": -2.2055792808532715, |
|
"logits/rejected": -2.0527145862579346, |
|
"logps/chosen": -284.21142578125, |
|
"logps/rejected": -297.88018798828125, |
|
"loss": 51932.0875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -284.21142578125, |
|
"rewards/margins": 13.668767929077148, |
|
"rewards/rejected": -297.88018798828125, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.826792255363684, |
|
"grad_norm": 1250406.1121283756, |
|
"learning_rate": 4.415420150605398e-08, |
|
"logits/chosen": -2.110973596572876, |
|
"logits/rejected": -1.9595458507537842, |
|
"logps/chosen": -252.9059295654297, |
|
"logps/rejected": -279.2762451171875, |
|
"loss": 55843.9812, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -252.9059295654297, |
|
"rewards/margins": 26.370315551757812, |
|
"rewards/rejected": -279.2762451171875, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8320251177394035, |
|
"grad_norm": 1200316.971470453, |
|
"learning_rate": 4.159445248951457e-08, |
|
"logits/chosen": -2.0804190635681152, |
|
"logits/rejected": -2.0888171195983887, |
|
"logps/chosen": -227.65390014648438, |
|
"logps/rejected": -293.1388244628906, |
|
"loss": 54166.2125, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -227.65390014648438, |
|
"rewards/margins": 65.48490905761719, |
|
"rewards/rejected": -293.1388244628906, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 1453921.71532858, |
|
"learning_rate": 3.9104390285376374e-08, |
|
"logits/chosen": -2.275310754776001, |
|
"logits/rejected": -2.17592191696167, |
|
"logps/chosen": -284.0006103515625, |
|
"logps/rejected": -263.94525146484375, |
|
"loss": 55792.875, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -284.0006103515625, |
|
"rewards/margins": -20.05536460876465, |
|
"rewards/rejected": -263.94525146484375, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8424908424908425, |
|
"grad_norm": 1115550.7841994467, |
|
"learning_rate": 3.6684847522615664e-08, |
|
"logits/chosen": -2.1132473945617676, |
|
"logits/rejected": -2.0296568870544434, |
|
"logps/chosen": -242.7162628173828, |
|
"logps/rejected": -279.27545166015625, |
|
"loss": 55248.8063, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -242.7162628173828, |
|
"rewards/margins": 36.55915069580078, |
|
"rewards/rejected": -279.27545166015625, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 1588544.8496029316, |
|
"learning_rate": 3.433663324986208e-08, |
|
"logits/chosen": -2.1658711433410645, |
|
"logits/rejected": -2.0674452781677246, |
|
"logps/chosen": -296.5272216796875, |
|
"logps/rejected": -326.5904541015625, |
|
"loss": 55337.175, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -296.5272216796875, |
|
"rewards/margins": 30.063217163085938, |
|
"rewards/rejected": -326.5904541015625, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8529565672422815, |
|
"grad_norm": 1954751.458337351, |
|
"learning_rate": 3.206053266486808e-08, |
|
"logits/chosen": -2.254883289337158, |
|
"logits/rejected": -2.1984355449676514, |
|
"logps/chosen": -274.1257629394531, |
|
"logps/rejected": -293.55303955078125, |
|
"loss": 54866.6188, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -274.1257629394531, |
|
"rewards/margins": 19.427263259887695, |
|
"rewards/rejected": -293.55303955078125, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 1259920.9995805293, |
|
"learning_rate": 2.9857306851953897e-08, |
|
"logits/chosen": -2.12813663482666, |
|
"logits/rejected": -2.065500259399414, |
|
"logps/chosen": -282.5124206542969, |
|
"logps/rejected": -329.7523498535156, |
|
"loss": 54957.6875, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -282.5124206542969, |
|
"rewards/margins": 47.23994064331055, |
|
"rewards/rejected": -329.7523498535156, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8634222919937206, |
|
"grad_norm": 2932710.1060309387, |
|
"learning_rate": 2.772769252751575e-08, |
|
"logits/chosen": -2.2625370025634766, |
|
"logits/rejected": -2.1728615760803223, |
|
"logps/chosen": -326.66375732421875, |
|
"logps/rejected": -282.999755859375, |
|
"loss": 55274.6625, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -326.66375732421875, |
|
"rewards/margins": -43.66400909423828, |
|
"rewards/rejected": -282.999755859375, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8686551543694401, |
|
"grad_norm": 1506078.4494627095, |
|
"learning_rate": 2.567240179368185e-08, |
|
"logits/chosen": -2.1724421977996826, |
|
"logits/rejected": -2.121241569519043, |
|
"logps/chosen": -305.38079833984375, |
|
"logps/rejected": -287.86627197265625, |
|
"loss": 53377.5625, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -305.38079833984375, |
|
"rewards/margins": -17.514530181884766, |
|
"rewards/rejected": -287.86627197265625, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8738880167451596, |
|
"grad_norm": 1304314.0364927459, |
|
"learning_rate": 2.3692121900199174e-08, |
|
"logits/chosen": -2.153219699859619, |
|
"logits/rejected": -2.0992071628570557, |
|
"logps/chosen": -261.697998046875, |
|
"logps/rejected": -283.06072998046875, |
|
"loss": 54374.4625, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -261.697998046875, |
|
"rewards/margins": 21.36276626586914, |
|
"rewards/rejected": -283.06072998046875, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 1648439.4660647989, |
|
"learning_rate": 2.1787515014630357e-08, |
|
"logits/chosen": -2.146265983581543, |
|
"logits/rejected": -2.111722946166992, |
|
"logps/chosen": -265.7535705566406, |
|
"logps/rejected": -268.1636962890625, |
|
"loss": 55597.7875, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -265.7535705566406, |
|
"rewards/margins": 2.4101357460021973, |
|
"rewards/rejected": -268.1636962890625, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8843537414965986, |
|
"grad_norm": 1734398.4767520986, |
|
"learning_rate": 1.995921800093761e-08, |
|
"logits/chosen": -2.073884963989258, |
|
"logits/rejected": -1.9895031452178955, |
|
"logps/chosen": -282.88983154296875, |
|
"logps/rejected": -306.662353515625, |
|
"loss": 53997.5125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -282.88983154296875, |
|
"rewards/margins": 23.772525787353516, |
|
"rewards/rejected": -306.662353515625, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.8895866038723181, |
|
"grad_norm": 1454626.9788120938, |
|
"learning_rate": 1.820784220652766e-08, |
|
"logits/chosen": -2.1386914253234863, |
|
"logits/rejected": -2.0203399658203125, |
|
"logps/chosen": -289.72161865234375, |
|
"logps/rejected": -275.7218017578125, |
|
"loss": 55009.9875, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -289.72161865234375, |
|
"rewards/margins": -13.99982738494873, |
|
"rewards/rejected": -275.7218017578125, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8948194662480377, |
|
"grad_norm": 1089368.648801681, |
|
"learning_rate": 1.6533973257828765e-08, |
|
"logits/chosen": -2.091768980026245, |
|
"logits/rejected": -2.0091001987457275, |
|
"logps/chosen": -287.00640869140625, |
|
"logps/rejected": -331.1282958984375, |
|
"loss": 54365.375, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -287.00640869140625, |
|
"rewards/margins": 44.12189483642578, |
|
"rewards/rejected": -331.1282958984375, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 1510934.5115232496, |
|
"learning_rate": 1.4938170864468636e-08, |
|
"logits/chosen": -2.1866893768310547, |
|
"logits/rejected": -2.085561513900757, |
|
"logps/chosen": -258.2256774902344, |
|
"logps/rejected": -292.9275817871094, |
|
"loss": 54320.6625, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -258.2256774902344, |
|
"rewards/margins": 34.7019157409668, |
|
"rewards/rejected": -292.9275817871094, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9052851909994767, |
|
"grad_norm": 1724641.859318044, |
|
"learning_rate": 1.342096863211828e-08, |
|
"logits/chosen": -2.1254117488861084, |
|
"logits/rejected": -2.0715444087982178, |
|
"logps/chosen": -281.90814208984375, |
|
"logps/rejected": -320.0205078125, |
|
"loss": 56361.75, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -281.90814208984375, |
|
"rewards/margins": 38.11237335205078, |
|
"rewards/rejected": -320.0205078125, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 1063487.5975205353, |
|
"learning_rate": 1.1982873884064465e-08, |
|
"logits/chosen": -1.9770715236663818, |
|
"logits/rejected": -2.01908540725708, |
|
"logps/chosen": -227.65396118164062, |
|
"logps/rejected": -317.01251220703125, |
|
"loss": 55278.3875, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -227.65396118164062, |
|
"rewards/margins": 89.35859680175781, |
|
"rewards/rejected": -317.01251220703125, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9157509157509157, |
|
"grad_norm": 1565693.148460451, |
|
"learning_rate": 1.062436749157053e-08, |
|
"logits/chosen": -2.1096649169921875, |
|
"logits/rejected": -2.111191749572754, |
|
"logps/chosen": -293.599609375, |
|
"logps/rejected": -321.7491760253906, |
|
"loss": 54704.9375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -293.599609375, |
|
"rewards/margins": 28.14957046508789, |
|
"rewards/rejected": -321.7491760253906, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 1036263.9285741834, |
|
"learning_rate": 9.345903713082304e-09, |
|
"logits/chosen": -2.1749892234802246, |
|
"logits/rejected": -2.0691840648651123, |
|
"logps/chosen": -331.82086181640625, |
|
"logps/rejected": -299.9912414550781, |
|
"loss": 53077.875, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -331.82086181640625, |
|
"rewards/margins": -31.82961082458496, |
|
"rewards/rejected": -299.9912414550781, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9262166405023547, |
|
"grad_norm": 1469306.753540594, |
|
"learning_rate": 8.147910042332922e-09, |
|
"logits/chosen": -2.1455626487731934, |
|
"logits/rejected": -2.0270955562591553, |
|
"logps/chosen": -334.5442810058594, |
|
"logps/rejected": -350.59002685546875, |
|
"loss": 55319.25, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -334.5442810058594, |
|
"rewards/margins": 16.04566764831543, |
|
"rewards/rejected": -350.59002685546875, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.9314495028780743, |
|
"grad_norm": 1665409.940510744, |
|
"learning_rate": 7.030787065396865e-09, |
|
"logits/chosen": -2.038339614868164, |
|
"logits/rejected": -1.9863135814666748, |
|
"logps/chosen": -280.74298095703125, |
|
"logps/rejected": -290.1654052734375, |
|
"loss": 54026.875, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -280.74298095703125, |
|
"rewards/margins": 9.422399520874023, |
|
"rewards/rejected": -290.1654052734375, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9366823652537938, |
|
"grad_norm": 1264428.2705349482, |
|
"learning_rate": 5.994908326741876e-09, |
|
"logits/chosen": -2.1871466636657715, |
|
"logits/rejected": -2.144632339477539, |
|
"logps/chosen": -302.3477478027344, |
|
"logps/rejected": -335.5939636230469, |
|
"loss": 54326.7562, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -302.3477478027344, |
|
"rewards/margins": 33.246219635009766, |
|
"rewards/rejected": -335.5939636230469, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 1732479.872330989, |
|
"learning_rate": 5.04062020432286e-09, |
|
"logits/chosen": -2.223008632659912, |
|
"logits/rejected": -2.123403787612915, |
|
"logps/chosen": -267.91107177734375, |
|
"logps/rejected": -292.2001953125, |
|
"loss": 53162.075, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -267.91107177734375, |
|
"rewards/margins": 24.28915023803711, |
|
"rewards/rejected": -292.2001953125, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9471480900052328, |
|
"grad_norm": 1668574.1463273366, |
|
"learning_rate": 4.168241793759658e-09, |
|
"logits/chosen": -2.1200461387634277, |
|
"logits/rejected": -2.0498270988464355, |
|
"logps/chosen": -266.21112060546875, |
|
"logps/rejected": -335.3847351074219, |
|
"loss": 52995.9688, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -266.21112060546875, |
|
"rewards/margins": 69.17359924316406, |
|
"rewards/rejected": -335.3847351074219, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 1455951.9893622866, |
|
"learning_rate": 3.3780648016376866e-09, |
|
"logits/chosen": -2.221703052520752, |
|
"logits/rejected": -2.0837242603302, |
|
"logps/chosen": -328.39630126953125, |
|
"logps/rejected": -332.1032409667969, |
|
"loss": 55753.5, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -328.39630126953125, |
|
"rewards/margins": 3.7069344520568848, |
|
"rewards/rejected": -332.1032409667969, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.957613814756672, |
|
"grad_norm": 1397349.994078792, |
|
"learning_rate": 2.6703534479667887e-09, |
|
"logits/chosen": -2.1655023097991943, |
|
"logits/rejected": -2.0703787803649902, |
|
"logps/chosen": -253.6987762451172, |
|
"logps/rejected": -273.0363464355469, |
|
"loss": 53243.575, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -253.6987762451172, |
|
"rewards/margins": 19.337589263916016, |
|
"rewards/rejected": -273.0363464355469, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 1342408.6426420235, |
|
"learning_rate": 2.0453443778310766e-09, |
|
"logits/chosen": -2.0957493782043457, |
|
"logits/rejected": -2.029906988143921, |
|
"logps/chosen": -270.45806884765625, |
|
"logps/rejected": -297.3926086425781, |
|
"loss": 54182.1375, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -270.45806884765625, |
|
"rewards/margins": 26.934490203857422, |
|
"rewards/rejected": -297.3926086425781, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.968079539508111, |
|
"grad_norm": 1458294.4502452172, |
|
"learning_rate": 1.5032465822596153e-09, |
|
"logits/chosen": -2.1939797401428223, |
|
"logits/rejected": -2.1166329383850098, |
|
"logps/chosen": -300.76947021484375, |
|
"logps/rejected": -320.9613952636719, |
|
"loss": 54235.6937, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -300.76947021484375, |
|
"rewards/margins": 20.191925048828125, |
|
"rewards/rejected": -320.9613952636719, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 2290841.929562142, |
|
"learning_rate": 1.0442413283435758e-09, |
|
"logits/chosen": -2.114621639251709, |
|
"logits/rejected": -2.098475217819214, |
|
"logps/chosen": -277.58563232421875, |
|
"logps/rejected": -333.00006103515625, |
|
"loss": 53597.825, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -277.58563232421875, |
|
"rewards/margins": 55.41447830200195, |
|
"rewards/rejected": -333.00006103515625, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97854526425955, |
|
"grad_norm": 2365466.4829686345, |
|
"learning_rate": 6.684820986240513e-10, |
|
"logits/chosen": -2.1461949348449707, |
|
"logits/rejected": -2.1061387062072754, |
|
"logps/chosen": -285.5892333984375, |
|
"logps/rejected": -329.62567138671875, |
|
"loss": 55886.8125, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -285.5892333984375, |
|
"rewards/margins": 44.03642272949219, |
|
"rewards/rejected": -329.62567138671875, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 1714580.7073031003, |
|
"learning_rate": 3.760945397705828e-10, |
|
"logits/chosen": -2.290830135345459, |
|
"logits/rejected": -2.2668721675872803, |
|
"logps/chosen": -314.2235107421875, |
|
"logps/rejected": -362.34698486328125, |
|
"loss": 54598.4375, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -314.2235107421875, |
|
"rewards/margins": 48.12348556518555, |
|
"rewards/rejected": -362.34698486328125, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.989010989010989, |
|
"grad_norm": 1433494.2103824487, |
|
"learning_rate": 1.6717642056721104e-10, |
|
"logits/chosen": -2.0160892009735107, |
|
"logits/rejected": -2.0129268169403076, |
|
"logps/chosen": -284.138916015625, |
|
"logps/rejected": -306.3015441894531, |
|
"loss": 54053.5687, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -284.138916015625, |
|
"rewards/margins": 22.162614822387695, |
|
"rewards/rejected": -306.3015441894531, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.9942438513867086, |
|
"grad_norm": 1191159.0388659274, |
|
"learning_rate": 4.17975992204056e-11, |
|
"logits/chosen": -2.057304620742798, |
|
"logits/rejected": -2.056112289428711, |
|
"logps/chosen": -266.7309875488281, |
|
"logps/rejected": -323.4661865234375, |
|
"loss": 55682.3375, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -266.7309875488281, |
|
"rewards/margins": 56.735191345214844, |
|
"rewards/rejected": -323.4661865234375, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9994767137624281, |
|
"grad_norm": 1276570.622002559, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.1545863151550293, |
|
"logits/rejected": -2.146925449371338, |
|
"logps/chosen": -280.2084045410156, |
|
"logps/rejected": -343.4630432128906, |
|
"loss": 54058.05, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -280.2084045410156, |
|
"rewards/margins": 63.254638671875, |
|
"rewards/rejected": -343.4630432128906, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.9994767137624281, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 56244.764594240834, |
|
"train_runtime": 21694.4484, |
|
"train_samples_per_second": 2.818, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|