|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 50, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002093692750588851, |
|
"grad_norm": 4.483342826138069, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -0.4866575300693512, |
|
"logits/rejected": -0.7110590934753418, |
|
"logps/chosen": -355.9316101074219, |
|
"logps/rejected": -328.53912353515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 4.128088001642986, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -0.5703912377357483, |
|
"logits/rejected": -0.648878812789917, |
|
"logps/chosen": -295.92047119140625, |
|
"logps/rejected": -294.49114990234375, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4618055522441864, |
|
"rewards/chosen": 4.306111441110261e-05, |
|
"rewards/margins": -4.887706381850876e-05, |
|
"rewards/rejected": 9.193811274599284e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 5.410229562758306, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.6405607461929321, |
|
"logits/rejected": -0.716262936592102, |
|
"logps/chosen": -302.8559265136719, |
|
"logps/rejected": -268.3453674316406, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.003174368990585208, |
|
"rewards/margins": 0.0005290606059134007, |
|
"rewards/rejected": 0.0026453081518411636, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 3.6935020021345912, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.5716087222099304, |
|
"logits/rejected": -0.7064056992530823, |
|
"logps/chosen": -317.4009704589844, |
|
"logps/rejected": -286.69091796875, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": 0.019368406385183334, |
|
"rewards/margins": 0.0026101372204720974, |
|
"rewards/rejected": 0.0167582668364048, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 4.40245909291114, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.6235290765762329, |
|
"logits/rejected": -0.6784546971321106, |
|
"logps/chosen": -311.2938537597656, |
|
"logps/rejected": -279.271484375, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.06372524797916412, |
|
"rewards/margins": 0.011987941339612007, |
|
"rewards/rejected": 0.051737308502197266, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 3.500658089915997, |
|
"learning_rate": 4.999731868769026e-07, |
|
"logits/chosen": -0.6384499073028564, |
|
"logits/rejected": -0.7284350395202637, |
|
"logps/chosen": -280.0108337402344, |
|
"logps/rejected": -262.5020446777344, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.0958619937300682, |
|
"rewards/margins": 0.02134247124195099, |
|
"rewards/rejected": 0.07451952993869781, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": -0.6590862274169922, |
|
"eval_logits/rejected": -0.7374107241630554, |
|
"eval_logps/chosen": -280.3077392578125, |
|
"eval_logps/rejected": -259.5904541015625, |
|
"eval_loss": 0.6799615621566772, |
|
"eval_rewards/accuracies": 0.6399999856948853, |
|
"eval_rewards/chosen": 0.10503920167684555, |
|
"eval_rewards/margins": 0.025244150310754776, |
|
"eval_rewards/rejected": 0.07979504764080048, |
|
"eval_runtime": 364.1271, |
|
"eval_samples_per_second": 5.493, |
|
"eval_steps_per_second": 1.373, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 3.529016261160226, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -0.6414979696273804, |
|
"logits/rejected": -0.7368530631065369, |
|
"logps/chosen": -270.6864318847656, |
|
"logps/rejected": -248.44384765625, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.11528738588094711, |
|
"rewards/margins": 0.03561442345380783, |
|
"rewards/rejected": 0.07967296242713928, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 3.6469037758356073, |
|
"learning_rate": 4.967625656594781e-07, |
|
"logits/chosen": -0.6982444524765015, |
|
"logits/rejected": -0.7211685180664062, |
|
"logps/chosen": -289.2667541503906, |
|
"logps/rejected": -275.0166931152344, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": 0.09976810961961746, |
|
"rewards/margins": 0.042868759483098984, |
|
"rewards/rejected": 0.05689934641122818, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 4.175678976701424, |
|
"learning_rate": 4.93167072587771e-07, |
|
"logits/chosen": -0.7570616602897644, |
|
"logits/rejected": -0.7942430377006531, |
|
"logps/chosen": -315.99798583984375, |
|
"logps/rejected": -287.9906921386719, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.07903473824262619, |
|
"rewards/margins": 0.07849308103322983, |
|
"rewards/rejected": 0.0005416579660959542, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 5.281508008175948, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -0.7726985216140747, |
|
"logits/rejected": -0.8362387418746948, |
|
"logps/chosen": -275.9876403808594, |
|
"logps/rejected": -277.8388671875, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.027138452976942062, |
|
"rewards/margins": 0.10656658560037613, |
|
"rewards/rejected": -0.07942812889814377, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 12.951722618692823, |
|
"learning_rate": 4.820919832540181e-07, |
|
"logits/chosen": -0.7694597840309143, |
|
"logits/rejected": -0.8001770973205566, |
|
"logps/chosen": -281.1448974609375, |
|
"logps/rejected": -265.9299621582031, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.029758170247077942, |
|
"rewards/margins": 0.13903483748435974, |
|
"rewards/rejected": -0.10927668958902359, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -0.7621766924858093, |
|
"eval_logits/rejected": -0.8268935680389404, |
|
"eval_logps/chosen": -289.7268981933594, |
|
"eval_logps/rejected": -281.2423400878906, |
|
"eval_loss": 0.6362168788909912, |
|
"eval_rewards/accuracies": 0.7080000042915344, |
|
"eval_rewards/chosen": 0.010847779922187328, |
|
"eval_rewards/margins": 0.1475716382265091, |
|
"eval_rewards/rejected": -0.1367238610982895, |
|
"eval_runtime": 362.8709, |
|
"eval_samples_per_second": 5.512, |
|
"eval_steps_per_second": 1.378, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 5.692610235446284, |
|
"learning_rate": 4.7467175306295647e-07, |
|
"logits/chosen": -0.7423380017280579, |
|
"logits/rejected": -0.7826918363571167, |
|
"logps/chosen": -288.1560974121094, |
|
"logps/rejected": -290.35430908203125, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.01307572703808546, |
|
"rewards/margins": 0.1582353413105011, |
|
"rewards/rejected": -0.17131105065345764, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 12.622342550289487, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -0.7232716679573059, |
|
"logits/rejected": -0.8170641660690308, |
|
"logps/chosen": -318.8392333984375, |
|
"logps/rejected": -287.73712158203125, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.017950385808944702, |
|
"rewards/margins": 0.2194708287715912, |
|
"rewards/rejected": -0.2374211996793747, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 9.598057625140969, |
|
"learning_rate": 4.5626458262912735e-07, |
|
"logits/chosen": -0.7142191529273987, |
|
"logits/rejected": -0.7762171030044556, |
|
"logps/chosen": -296.99627685546875, |
|
"logps/rejected": -309.0848083496094, |
|
"loss": 0.6031, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.011398183181881905, |
|
"rewards/margins": 0.25749707221984863, |
|
"rewards/rejected": -0.24609890580177307, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 16.604631406571734, |
|
"learning_rate": 4.453763107901675e-07, |
|
"logits/chosen": -0.8028281331062317, |
|
"logits/rejected": -0.8350030779838562, |
|
"logps/chosen": -330.8978271484375, |
|
"logps/rejected": -323.8652648925781, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.10223275423049927, |
|
"rewards/margins": 0.2399192601442337, |
|
"rewards/rejected": -0.34215205907821655, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 30.58016472417995, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -0.7252210378646851, |
|
"logits/rejected": -0.8026005625724792, |
|
"logps/chosen": -334.1993408203125, |
|
"logps/rejected": -327.7638244628906, |
|
"loss": 0.5998, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.12661166489124298, |
|
"rewards/margins": 0.3481716215610504, |
|
"rewards/rejected": -0.4747832715511322, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": -0.7373998761177063, |
|
"eval_logits/rejected": -0.7867802977561951, |
|
"eval_logps/chosen": -305.2001953125, |
|
"eval_logps/rejected": -312.2310791015625, |
|
"eval_loss": 0.5975355505943298, |
|
"eval_rewards/accuracies": 0.7120000123977661, |
|
"eval_rewards/chosen": -0.14388525485992432, |
|
"eval_rewards/margins": 0.30272597074508667, |
|
"eval_rewards/rejected": -0.446611225605011, |
|
"eval_runtime": 362.2912, |
|
"eval_samples_per_second": 5.52, |
|
"eval_steps_per_second": 1.38, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 28.133150055352257, |
|
"learning_rate": 4.2052190435769554e-07, |
|
"logits/chosen": -0.7442792654037476, |
|
"logits/rejected": -0.771621823310852, |
|
"logps/chosen": -305.1579284667969, |
|
"logps/rejected": -323.0625, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.18137095868587494, |
|
"rewards/margins": 0.29733848571777344, |
|
"rewards/rejected": -0.4787093997001648, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 12.295681125044647, |
|
"learning_rate": 4.0668899744407567e-07, |
|
"logits/chosen": -0.7585436701774597, |
|
"logits/rejected": -0.7841253280639648, |
|
"logps/chosen": -289.6251525878906, |
|
"logps/rejected": -321.77813720703125, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11477382481098175, |
|
"rewards/margins": 0.3381142020225525, |
|
"rewards/rejected": -0.45288801193237305, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 13.157713149998406, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -0.7680947184562683, |
|
"logits/rejected": -0.8287630081176758, |
|
"logps/chosen": -319.4246520996094, |
|
"logps/rejected": -336.8138122558594, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.10018811374902725, |
|
"rewards/margins": 0.4259633421897888, |
|
"rewards/rejected": -0.5261515378952026, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 30.132051575559856, |
|
"learning_rate": 3.765821230985757e-07, |
|
"logits/chosen": -0.7916001081466675, |
|
"logits/rejected": -0.7729935646057129, |
|
"logps/chosen": -299.23089599609375, |
|
"logps/rejected": -327.81121826171875, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.18419644236564636, |
|
"rewards/margins": 0.3600786626338959, |
|
"rewards/rejected": -0.5442751049995422, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 113.93503316454188, |
|
"learning_rate": 3.604695382782159e-07, |
|
"logits/chosen": -0.784223198890686, |
|
"logits/rejected": -0.8361312747001648, |
|
"logps/chosen": -300.06658935546875, |
|
"logps/rejected": -320.19384765625, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.09815371781587601, |
|
"rewards/margins": 0.3431313931941986, |
|
"rewards/rejected": -0.4412851929664612, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -0.7814826369285583, |
|
"eval_logits/rejected": -0.8277723789215088, |
|
"eval_logps/chosen": -303.068115234375, |
|
"eval_logps/rejected": -314.3644104003906, |
|
"eval_loss": 0.5899724364280701, |
|
"eval_rewards/accuracies": 0.7160000205039978, |
|
"eval_rewards/chosen": -0.12256460636854172, |
|
"eval_rewards/margins": 0.3453800678253174, |
|
"eval_rewards/rejected": -0.4679446518421173, |
|
"eval_runtime": 360.3908, |
|
"eval_samples_per_second": 5.55, |
|
"eval_steps_per_second": 1.387, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 38.792460592933615, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -0.7720664143562317, |
|
"logits/rejected": -0.8168239593505859, |
|
"logps/chosen": -311.8421630859375, |
|
"logps/rejected": -321.9998474121094, |
|
"loss": 0.5882, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.14088550209999084, |
|
"rewards/margins": 0.4021270275115967, |
|
"rewards/rejected": -0.5430124402046204, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 20.43009138056781, |
|
"learning_rate": 3.265574537815398e-07, |
|
"logits/chosen": -0.8144097328186035, |
|
"logits/rejected": -0.8275222778320312, |
|
"logps/chosen": -294.3929443359375, |
|
"logps/rejected": -337.9758605957031, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.13375496864318848, |
|
"rewards/margins": 0.38527020812034607, |
|
"rewards/rejected": -0.5190251469612122, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 16.46191110737671, |
|
"learning_rate": 3.0893973387735683e-07, |
|
"logits/chosen": -0.8534911870956421, |
|
"logits/rejected": -0.8597370386123657, |
|
"logps/chosen": -299.59765625, |
|
"logps/rejected": -345.2113342285156, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.22797565162181854, |
|
"rewards/margins": 0.38332921266555786, |
|
"rewards/rejected": -0.6113048791885376, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 32.75704989898829, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -0.8098493814468384, |
|
"logits/rejected": -0.835224986076355, |
|
"logps/chosen": -318.60662841796875, |
|
"logps/rejected": -338.097900390625, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.10866376012563705, |
|
"rewards/margins": 0.47497329115867615, |
|
"rewards/rejected": -0.5836370587348938, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 18.984052465543908, |
|
"learning_rate": 2.7285261601056697e-07, |
|
"logits/chosen": -0.8844022750854492, |
|
"logits/rejected": -0.8972233533859253, |
|
"logps/chosen": -321.65606689453125, |
|
"logps/rejected": -363.1982116699219, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.20314355194568634, |
|
"rewards/margins": 0.5058295130729675, |
|
"rewards/rejected": -0.7089730501174927, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -0.8324649930000305, |
|
"eval_logits/rejected": -0.8732168674468994, |
|
"eval_logps/chosen": -316.3727111816406, |
|
"eval_logps/rejected": -336.83245849609375, |
|
"eval_loss": 0.5731549263000488, |
|
"eval_rewards/accuracies": 0.7300000190734863, |
|
"eval_rewards/chosen": -0.2556101679801941, |
|
"eval_rewards/margins": 0.43701496720314026, |
|
"eval_rewards/rejected": -0.692625105381012, |
|
"eval_runtime": 359.7767, |
|
"eval_samples_per_second": 5.559, |
|
"eval_steps_per_second": 1.39, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 32.87677559683423, |
|
"learning_rate": 2.5457665670441937e-07, |
|
"logits/chosen": -0.8810909390449524, |
|
"logits/rejected": -0.8625443577766418, |
|
"logps/chosen": -315.59381103515625, |
|
"logps/rejected": -357.4444885253906, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2839288115501404, |
|
"rewards/margins": 0.45044684410095215, |
|
"rewards/rejected": -0.7343756556510925, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 23.00700816288736, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -0.8361509442329407, |
|
"logits/rejected": -0.8887462615966797, |
|
"logps/chosen": -311.9130554199219, |
|
"logps/rejected": -320.8434753417969, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.22881212830543518, |
|
"rewards/margins": 0.47015079855918884, |
|
"rewards/rejected": -0.6989628672599792, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 26.64793214141023, |
|
"learning_rate": 2.1804923757009882e-07, |
|
"logits/chosen": -0.8444174528121948, |
|
"logits/rejected": -0.8600177764892578, |
|
"logps/chosen": -322.73712158203125, |
|
"logps/rejected": -354.7949523925781, |
|
"loss": 0.5765, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2344287633895874, |
|
"rewards/margins": 0.4120521545410156, |
|
"rewards/rejected": -0.646480917930603, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 32.58348069746148, |
|
"learning_rate": 1.9999357655598891e-07, |
|
"logits/chosen": -0.8282186388969421, |
|
"logits/rejected": -0.8633724451065063, |
|
"logps/chosen": -324.4151611328125, |
|
"logps/rejected": -345.1200256347656, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.22642645239830017, |
|
"rewards/margins": 0.5329864025115967, |
|
"rewards/rejected": -0.7594128847122192, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 19.561696320722955, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -0.8326929211616516, |
|
"logits/rejected": -0.8673263788223267, |
|
"logps/chosen": -331.79302978515625, |
|
"logps/rejected": -372.30010986328125, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.315957635641098, |
|
"rewards/margins": 0.5605665445327759, |
|
"rewards/rejected": -0.8765242695808411, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -0.8084123730659485, |
|
"eval_logits/rejected": -0.8502717018127441, |
|
"eval_logps/chosen": -322.279541015625, |
|
"eval_logps/rejected": -346.937255859375, |
|
"eval_loss": 0.5730212330818176, |
|
"eval_rewards/accuracies": 0.7160000205039978, |
|
"eval_rewards/chosen": -0.31467828154563904, |
|
"eval_rewards/margins": 0.4789942800998688, |
|
"eval_rewards/rejected": -0.7936726808547974, |
|
"eval_runtime": 357.3312, |
|
"eval_samples_per_second": 5.597, |
|
"eval_steps_per_second": 1.399, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 15.64244928067169, |
|
"learning_rate": 1.647817538357072e-07, |
|
"logits/chosen": -0.8278627395629883, |
|
"logits/rejected": -0.8856652975082397, |
|
"logps/chosen": -348.7338562011719, |
|
"logps/rejected": -350.25567626953125, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.24682530760765076, |
|
"rewards/margins": 0.49646225571632385, |
|
"rewards/rejected": -0.7432876229286194, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 43.67182696049494, |
|
"learning_rate": 1.478143389201113e-07, |
|
"logits/chosen": -0.8275814056396484, |
|
"logits/rejected": -0.8235956430435181, |
|
"logps/chosen": -296.8677978515625, |
|
"logps/rejected": -324.29022216796875, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18848630785942078, |
|
"rewards/margins": 0.5224109292030334, |
|
"rewards/rejected": -0.7108971476554871, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 27.237411423219683, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -0.8245242834091187, |
|
"logits/rejected": -0.831591784954071, |
|
"logps/chosen": -323.1595764160156, |
|
"logps/rejected": -358.5306701660156, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2390444278717041, |
|
"rewards/margins": 0.5187050700187683, |
|
"rewards/rejected": -0.7577494382858276, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 49.39077553232361, |
|
"learning_rate": 1.1561076868822755e-07, |
|
"logits/chosen": -0.8345288038253784, |
|
"logits/rejected": -0.8311136960983276, |
|
"logps/chosen": -320.93011474609375, |
|
"logps/rejected": -367.7865295410156, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.17862586677074432, |
|
"rewards/margins": 0.508815586566925, |
|
"rewards/rejected": -0.6874415278434753, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 16.75988502350672, |
|
"learning_rate": 1.0054723495346482e-07, |
|
"logits/chosen": -0.8540140390396118, |
|
"logits/rejected": -0.856400191783905, |
|
"logps/chosen": -304.7636413574219, |
|
"logps/rejected": -342.74896240234375, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.13501903414726257, |
|
"rewards/margins": 0.5515834093093872, |
|
"rewards/rejected": -0.6866023540496826, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": -0.8289144039154053, |
|
"eval_logits/rejected": -0.8694319128990173, |
|
"eval_logps/chosen": -311.679443359375, |
|
"eval_logps/rejected": -336.6546936035156, |
|
"eval_loss": 0.5626152157783508, |
|
"eval_rewards/accuracies": 0.7319999933242798, |
|
"eval_rewards/chosen": -0.20867733657360077, |
|
"eval_rewards/margins": 0.482170045375824, |
|
"eval_rewards/rejected": -0.6908472776412964, |
|
"eval_runtime": 355.7788, |
|
"eval_samples_per_second": 5.621, |
|
"eval_steps_per_second": 1.405, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 21.657298235918294, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -0.8230468034744263, |
|
"logits/rejected": -0.8405194282531738, |
|
"logps/chosen": -334.99298095703125, |
|
"logps/rejected": -355.60504150390625, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.2502953112125397, |
|
"rewards/margins": 0.46560558676719666, |
|
"rewards/rejected": -0.7159008979797363, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 74.8260958025737, |
|
"learning_rate": 7.289996455765748e-08, |
|
"logits/chosen": -0.821232795715332, |
|
"logits/rejected": -0.8672993779182434, |
|
"logps/chosen": -309.51519775390625, |
|
"logps/rejected": -335.122802734375, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1752547323703766, |
|
"rewards/margins": 0.5394274592399597, |
|
"rewards/rejected": -0.7146821618080139, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 16.46276766957732, |
|
"learning_rate": 6.046442623320145e-08, |
|
"logits/chosen": -0.8121312260627747, |
|
"logits/rejected": -0.8373250961303711, |
|
"logps/chosen": -325.12091064453125, |
|
"logps/rejected": -360.0669250488281, |
|
"loss": 0.5565, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.20427687466144562, |
|
"rewards/margins": 0.5377046465873718, |
|
"rewards/rejected": -0.7419815063476562, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 38.336055145825206, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -0.8095279932022095, |
|
"logits/rejected": -0.8635553121566772, |
|
"logps/chosen": -326.71759033203125, |
|
"logps/rejected": -357.30517578125, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.1362684667110443, |
|
"rewards/margins": 0.5115006566047668, |
|
"rewards/rejected": -0.6477690935134888, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 24.60952754885755, |
|
"learning_rate": 3.8702478614051345e-08, |
|
"logits/chosen": -0.846905529499054, |
|
"logits/rejected": -0.8851727247238159, |
|
"logps/chosen": -319.1526184082031, |
|
"logps/rejected": -359.0050354003906, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.18566596508026123, |
|
"rewards/margins": 0.5182815194129944, |
|
"rewards/rejected": -0.7039474844932556, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -0.8328730463981628, |
|
"eval_logits/rejected": -0.8715097308158875, |
|
"eval_logps/chosen": -312.7687072753906, |
|
"eval_logps/rejected": -338.257568359375, |
|
"eval_loss": 0.560410737991333, |
|
"eval_rewards/accuracies": 0.7300000190734863, |
|
"eval_rewards/chosen": -0.21957026422023773, |
|
"eval_rewards/margins": 0.48730605840682983, |
|
"eval_rewards/rejected": -0.7068763375282288, |
|
"eval_runtime": 354.669, |
|
"eval_samples_per_second": 5.639, |
|
"eval_steps_per_second": 1.41, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 11.20497542941971, |
|
"learning_rate": 2.9492720416985e-08, |
|
"logits/chosen": -0.9012954831123352, |
|
"logits/rejected": -0.906468391418457, |
|
"logps/chosen": -334.8082275390625, |
|
"logps/rejected": -345.9796447753906, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.22493624687194824, |
|
"rewards/margins": 0.5446537137031555, |
|
"rewards/rejected": -0.7695900201797485, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 26.680659771781972, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -0.8376303911209106, |
|
"logits/rejected": -0.8529024124145508, |
|
"logps/chosen": -308.93817138671875, |
|
"logps/rejected": -372.81298828125, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.2364557683467865, |
|
"rewards/margins": 0.5860723257064819, |
|
"rewards/rejected": -0.8225281834602356, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 37.70065982337463, |
|
"learning_rate": 1.4662207078575684e-08, |
|
"logits/chosen": -0.8422588109970093, |
|
"logits/rejected": -0.8835725784301758, |
|
"logps/chosen": -331.5950622558594, |
|
"logps/rejected": -352.51531982421875, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.23024725914001465, |
|
"rewards/margins": 0.5459326505661011, |
|
"rewards/rejected": -0.7761799097061157, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 28.46050909044336, |
|
"learning_rate": 9.12094829893642e-09, |
|
"logits/chosen": -0.8466767072677612, |
|
"logits/rejected": -0.8686957359313965, |
|
"logps/chosen": -322.8841552734375, |
|
"logps/rejected": -369.8939208984375, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.1927202194929123, |
|
"rewards/margins": 0.6201958060264587, |
|
"rewards/rejected": -0.8129159808158875, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 37.07373569586965, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -0.8417257070541382, |
|
"logits/rejected": -0.8639878034591675, |
|
"logps/chosen": -304.902099609375, |
|
"logps/rejected": -353.9253845214844, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.24326789379119873, |
|
"rewards/margins": 0.5259731411933899, |
|
"rewards/rejected": -0.7692410349845886, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": -0.8397230505943298, |
|
"eval_logits/rejected": -0.8778651356697083, |
|
"eval_logps/chosen": -316.7488098144531, |
|
"eval_logps/rejected": -344.3741149902344, |
|
"eval_loss": 0.5599729418754578, |
|
"eval_rewards/accuracies": 0.7279999852180481, |
|
"eval_rewards/chosen": -0.25937125086784363, |
|
"eval_rewards/margins": 0.5086703896522522, |
|
"eval_rewards/rejected": -0.7680416703224182, |
|
"eval_runtime": 355.4201, |
|
"eval_samples_per_second": 5.627, |
|
"eval_steps_per_second": 1.407, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 32.1943094596091, |
|
"learning_rate": 1.9347820230782295e-09, |
|
"logits/chosen": -0.864120364189148, |
|
"logits/rejected": -0.8782867193222046, |
|
"logps/chosen": -319.0982971191406, |
|
"logps/rejected": -347.4610900878906, |
|
"loss": 0.5558, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.27945682406425476, |
|
"rewards/margins": 0.5023162364959717, |
|
"rewards/rejected": -0.7817729711532593, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 22.405117438704156, |
|
"learning_rate": 3.2839470889836627e-10, |
|
"logits/chosen": -0.7893309593200684, |
|
"logits/rejected": -0.8491501808166504, |
|
"logps/chosen": -327.20037841796875, |
|
"logps/rejected": -342.6055603027344, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.24120506644248962, |
|
"rewards/margins": 0.6007115244865417, |
|
"rewards/rejected": -0.8419166803359985, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5919944611485399, |
|
"train_runtime": 28847.4829, |
|
"train_samples_per_second": 2.119, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|