NoManDeRY's picture
Upload folder using huggingface_hub
c77696c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.998691442030882,
"eval_steps": 50,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002093692750588851,
"grad_norm": 4.483342826138069,
"learning_rate": 1.0416666666666666e-08,
"logits/chosen": -0.4866575300693512,
"logits/rejected": -0.7110590934753418,
"logps/chosen": -355.9316101074219,
"logps/rejected": -328.53912353515625,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02093692750588851,
"grad_norm": 4.128088001642986,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -0.5703912377357483,
"logits/rejected": -0.648878812789917,
"logps/chosen": -295.92047119140625,
"logps/rejected": -294.49114990234375,
"loss": 0.6932,
"rewards/accuracies": 0.4618055522441864,
"rewards/chosen": 4.306111441110261e-05,
"rewards/margins": -4.887706381850876e-05,
"rewards/rejected": 9.193811274599284e-05,
"step": 10
},
{
"epoch": 0.04187385501177702,
"grad_norm": 5.410229562758306,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -0.6405607461929321,
"logits/rejected": -0.716262936592102,
"logps/chosen": -302.8559265136719,
"logps/rejected": -268.3453674316406,
"loss": 0.6928,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.003174368990585208,
"rewards/margins": 0.0005290606059134007,
"rewards/rejected": 0.0026453081518411636,
"step": 20
},
{
"epoch": 0.06281078251766553,
"grad_norm": 3.6935020021345912,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -0.5716087222099304,
"logits/rejected": -0.7064056992530823,
"logps/chosen": -317.4009704589844,
"logps/rejected": -286.69091796875,
"loss": 0.6912,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": 0.019368406385183334,
"rewards/margins": 0.0026101372204720974,
"rewards/rejected": 0.0167582668364048,
"step": 30
},
{
"epoch": 0.08374771002355404,
"grad_norm": 4.40245909291114,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -0.6235290765762329,
"logits/rejected": -0.6784546971321106,
"logps/chosen": -311.2938537597656,
"logps/rejected": -279.271484375,
"loss": 0.687,
"rewards/accuracies": 0.671875,
"rewards/chosen": 0.06372524797916412,
"rewards/margins": 0.011987941339612007,
"rewards/rejected": 0.051737308502197266,
"step": 40
},
{
"epoch": 0.10468463752944256,
"grad_norm": 3.500658089915997,
"learning_rate": 4.999731868769026e-07,
"logits/chosen": -0.6384499073028564,
"logits/rejected": -0.7284350395202637,
"logps/chosen": -280.0108337402344,
"logps/rejected": -262.5020446777344,
"loss": 0.6819,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": 0.0958619937300682,
"rewards/margins": 0.02134247124195099,
"rewards/rejected": 0.07451952993869781,
"step": 50
},
{
"epoch": 0.10468463752944256,
"eval_logits/chosen": -0.6590862274169922,
"eval_logits/rejected": -0.7374107241630554,
"eval_logps/chosen": -280.3077392578125,
"eval_logps/rejected": -259.5904541015625,
"eval_loss": 0.6799615621566772,
"eval_rewards/accuracies": 0.6399999856948853,
"eval_rewards/chosen": 0.10503920167684555,
"eval_rewards/margins": 0.025244150310754776,
"eval_rewards/rejected": 0.07979504764080048,
"eval_runtime": 364.1271,
"eval_samples_per_second": 5.493,
"eval_steps_per_second": 1.373,
"step": 50
},
{
"epoch": 0.12562156503533106,
"grad_norm": 3.529016261160226,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -0.6414979696273804,
"logits/rejected": -0.7368530631065369,
"logps/chosen": -270.6864318847656,
"logps/rejected": -248.44384765625,
"loss": 0.6759,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.11528738588094711,
"rewards/margins": 0.03561442345380783,
"rewards/rejected": 0.07967296242713928,
"step": 60
},
{
"epoch": 0.14655849254121958,
"grad_norm": 3.6469037758356073,
"learning_rate": 4.967625656594781e-07,
"logits/chosen": -0.6982444524765015,
"logits/rejected": -0.7211685180664062,
"logps/chosen": -289.2667541503906,
"logps/rejected": -275.0166931152344,
"loss": 0.6671,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": 0.09976810961961746,
"rewards/margins": 0.042868759483098984,
"rewards/rejected": 0.05689934641122818,
"step": 70
},
{
"epoch": 0.16749542004710807,
"grad_norm": 4.175678976701424,
"learning_rate": 4.93167072587771e-07,
"logits/chosen": -0.7570616602897644,
"logits/rejected": -0.7942430377006531,
"logps/chosen": -315.99798583984375,
"logps/rejected": -287.9906921386719,
"loss": 0.6607,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.07903473824262619,
"rewards/margins": 0.07849308103322983,
"rewards/rejected": 0.0005416579660959542,
"step": 80
},
{
"epoch": 0.1884323475529966,
"grad_norm": 5.281508008175948,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -0.7726985216140747,
"logits/rejected": -0.8362387418746948,
"logps/chosen": -275.9876403808594,
"logps/rejected": -277.8388671875,
"loss": 0.646,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.027138452976942062,
"rewards/margins": 0.10656658560037613,
"rewards/rejected": -0.07942812889814377,
"step": 90
},
{
"epoch": 0.2093692750588851,
"grad_norm": 12.951722618692823,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": -0.7694597840309143,
"logits/rejected": -0.8001770973205566,
"logps/chosen": -281.1448974609375,
"logps/rejected": -265.9299621582031,
"loss": 0.6361,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": 0.029758170247077942,
"rewards/margins": 0.13903483748435974,
"rewards/rejected": -0.10927668958902359,
"step": 100
},
{
"epoch": 0.2093692750588851,
"eval_logits/chosen": -0.7621766924858093,
"eval_logits/rejected": -0.8268935680389404,
"eval_logps/chosen": -289.7268981933594,
"eval_logps/rejected": -281.2423400878906,
"eval_loss": 0.6362168788909912,
"eval_rewards/accuracies": 0.7080000042915344,
"eval_rewards/chosen": 0.010847779922187328,
"eval_rewards/margins": 0.1475716382265091,
"eval_rewards/rejected": -0.1367238610982895,
"eval_runtime": 362.8709,
"eval_samples_per_second": 5.512,
"eval_steps_per_second": 1.378,
"step": 100
},
{
"epoch": 0.23030620256477363,
"grad_norm": 5.692610235446284,
"learning_rate": 4.7467175306295647e-07,
"logits/chosen": -0.7423380017280579,
"logits/rejected": -0.7826918363571167,
"logps/chosen": -288.1560974121094,
"logps/rejected": -290.35430908203125,
"loss": 0.6345,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.01307572703808546,
"rewards/margins": 0.1582353413105011,
"rewards/rejected": -0.17131105065345764,
"step": 110
},
{
"epoch": 0.2512431300706621,
"grad_norm": 12.622342550289487,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -0.7232716679573059,
"logits/rejected": -0.8170641660690308,
"logps/chosen": -318.8392333984375,
"logps/rejected": -287.73712158203125,
"loss": 0.616,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.017950385808944702,
"rewards/margins": 0.2194708287715912,
"rewards/rejected": -0.2374211996793747,
"step": 120
},
{
"epoch": 0.2721800575765506,
"grad_norm": 9.598057625140969,
"learning_rate": 4.5626458262912735e-07,
"logits/chosen": -0.7142191529273987,
"logits/rejected": -0.7762171030044556,
"logps/chosen": -296.99627685546875,
"logps/rejected": -309.0848083496094,
"loss": 0.6031,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.011398183181881905,
"rewards/margins": 0.25749707221984863,
"rewards/rejected": -0.24609890580177307,
"step": 130
},
{
"epoch": 0.29311698508243916,
"grad_norm": 16.604631406571734,
"learning_rate": 4.453763107901675e-07,
"logits/chosen": -0.8028281331062317,
"logits/rejected": -0.8350030779838562,
"logps/chosen": -330.8978271484375,
"logps/rejected": -323.8652648925781,
"loss": 0.6027,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.10223275423049927,
"rewards/margins": 0.2399192601442337,
"rewards/rejected": -0.34215205907821655,
"step": 140
},
{
"epoch": 0.31405391258832765,
"grad_norm": 30.58016472417995,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -0.7252210378646851,
"logits/rejected": -0.8026005625724792,
"logps/chosen": -334.1993408203125,
"logps/rejected": -327.7638244628906,
"loss": 0.5998,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.12661166489124298,
"rewards/margins": 0.3481716215610504,
"rewards/rejected": -0.4747832715511322,
"step": 150
},
{
"epoch": 0.31405391258832765,
"eval_logits/chosen": -0.7373998761177063,
"eval_logits/rejected": -0.7867802977561951,
"eval_logps/chosen": -305.2001953125,
"eval_logps/rejected": -312.2310791015625,
"eval_loss": 0.5975355505943298,
"eval_rewards/accuracies": 0.7120000123977661,
"eval_rewards/chosen": -0.14388525485992432,
"eval_rewards/margins": 0.30272597074508667,
"eval_rewards/rejected": -0.446611225605011,
"eval_runtime": 362.2912,
"eval_samples_per_second": 5.52,
"eval_steps_per_second": 1.38,
"step": 150
},
{
"epoch": 0.33499084009421615,
"grad_norm": 28.133150055352257,
"learning_rate": 4.2052190435769554e-07,
"logits/chosen": -0.7442792654037476,
"logits/rejected": -0.771621823310852,
"logps/chosen": -305.1579284667969,
"logps/rejected": -323.0625,
"loss": 0.5914,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.18137095868587494,
"rewards/margins": 0.29733848571777344,
"rewards/rejected": -0.4787093997001648,
"step": 160
},
{
"epoch": 0.3559277676001047,
"grad_norm": 12.295681125044647,
"learning_rate": 4.0668899744407567e-07,
"logits/chosen": -0.7585436701774597,
"logits/rejected": -0.7841253280639648,
"logps/chosen": -289.6251525878906,
"logps/rejected": -321.77813720703125,
"loss": 0.5953,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.11477382481098175,
"rewards/margins": 0.3381142020225525,
"rewards/rejected": -0.45288801193237305,
"step": 170
},
{
"epoch": 0.3768646951059932,
"grad_norm": 13.157713149998406,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -0.7680947184562683,
"logits/rejected": -0.8287630081176758,
"logps/chosen": -319.4246520996094,
"logps/rejected": -336.8138122558594,
"loss": 0.5777,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.10018811374902725,
"rewards/margins": 0.4259633421897888,
"rewards/rejected": -0.5261515378952026,
"step": 180
},
{
"epoch": 0.39780162261188173,
"grad_norm": 30.132051575559856,
"learning_rate": 3.765821230985757e-07,
"logits/chosen": -0.7916001081466675,
"logits/rejected": -0.7729935646057129,
"logps/chosen": -299.23089599609375,
"logps/rejected": -327.81121826171875,
"loss": 0.5818,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.18419644236564636,
"rewards/margins": 0.3600786626338959,
"rewards/rejected": -0.5442751049995422,
"step": 190
},
{
"epoch": 0.4187385501177702,
"grad_norm": 113.93503316454188,
"learning_rate": 3.604695382782159e-07,
"logits/chosen": -0.784223198890686,
"logits/rejected": -0.8361312747001648,
"logps/chosen": -300.06658935546875,
"logps/rejected": -320.19384765625,
"loss": 0.5873,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.09815371781587601,
"rewards/margins": 0.3431313931941986,
"rewards/rejected": -0.4412851929664612,
"step": 200
},
{
"epoch": 0.4187385501177702,
"eval_logits/chosen": -0.7814826369285583,
"eval_logits/rejected": -0.8277723789215088,
"eval_logps/chosen": -303.068115234375,
"eval_logps/rejected": -314.3644104003906,
"eval_loss": 0.5899724364280701,
"eval_rewards/accuracies": 0.7160000205039978,
"eval_rewards/chosen": -0.12256460636854172,
"eval_rewards/margins": 0.3453800678253174,
"eval_rewards/rejected": -0.4679446518421173,
"eval_runtime": 360.3908,
"eval_samples_per_second": 5.55,
"eval_steps_per_second": 1.387,
"step": 200
},
{
"epoch": 0.4396754776236587,
"grad_norm": 38.792460592933615,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -0.7720664143562317,
"logits/rejected": -0.8168239593505859,
"logps/chosen": -311.8421630859375,
"logps/rejected": -321.9998474121094,
"loss": 0.5882,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.14088550209999084,
"rewards/margins": 0.4021270275115967,
"rewards/rejected": -0.5430124402046204,
"step": 210
},
{
"epoch": 0.46061240512954726,
"grad_norm": 20.43009138056781,
"learning_rate": 3.265574537815398e-07,
"logits/chosen": -0.8144097328186035,
"logits/rejected": -0.8275222778320312,
"logps/chosen": -294.3929443359375,
"logps/rejected": -337.9758605957031,
"loss": 0.5823,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.13375496864318848,
"rewards/margins": 0.38527020812034607,
"rewards/rejected": -0.5190251469612122,
"step": 220
},
{
"epoch": 0.48154933263543576,
"grad_norm": 16.46191110737671,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": -0.8534911870956421,
"logits/rejected": -0.8597370386123657,
"logps/chosen": -299.59765625,
"logps/rejected": -345.2113342285156,
"loss": 0.5853,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.22797565162181854,
"rewards/margins": 0.38332921266555786,
"rewards/rejected": -0.6113048791885376,
"step": 230
},
{
"epoch": 0.5024862601413242,
"grad_norm": 32.75704989898829,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -0.8098493814468384,
"logits/rejected": -0.835224986076355,
"logps/chosen": -318.60662841796875,
"logps/rejected": -338.097900390625,
"loss": 0.5545,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.10866376012563705,
"rewards/margins": 0.47497329115867615,
"rewards/rejected": -0.5836370587348938,
"step": 240
},
{
"epoch": 0.5234231876472127,
"grad_norm": 18.984052465543908,
"learning_rate": 2.7285261601056697e-07,
"logits/chosen": -0.8844022750854492,
"logits/rejected": -0.8972233533859253,
"logps/chosen": -321.65606689453125,
"logps/rejected": -363.1982116699219,
"loss": 0.5692,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.20314355194568634,
"rewards/margins": 0.5058295130729675,
"rewards/rejected": -0.7089730501174927,
"step": 250
},
{
"epoch": 0.5234231876472127,
"eval_logits/chosen": -0.8324649930000305,
"eval_logits/rejected": -0.8732168674468994,
"eval_logps/chosen": -316.3727111816406,
"eval_logps/rejected": -336.83245849609375,
"eval_loss": 0.5731549263000488,
"eval_rewards/accuracies": 0.7300000190734863,
"eval_rewards/chosen": -0.2556101679801941,
"eval_rewards/margins": 0.43701496720314026,
"eval_rewards/rejected": -0.692625105381012,
"eval_runtime": 359.7767,
"eval_samples_per_second": 5.559,
"eval_steps_per_second": 1.39,
"step": 250
},
{
"epoch": 0.5443601151531012,
"grad_norm": 32.87677559683423,
"learning_rate": 2.5457665670441937e-07,
"logits/chosen": -0.8810909390449524,
"logits/rejected": -0.8625443577766418,
"logps/chosen": -315.59381103515625,
"logps/rejected": -357.4444885253906,
"loss": 0.5713,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.2839288115501404,
"rewards/margins": 0.45044684410095215,
"rewards/rejected": -0.7343756556510925,
"step": 260
},
{
"epoch": 0.5652970426589898,
"grad_norm": 23.00700816288736,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -0.8361509442329407,
"logits/rejected": -0.8887462615966797,
"logps/chosen": -311.9130554199219,
"logps/rejected": -320.8434753417969,
"loss": 0.5679,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.22881212830543518,
"rewards/margins": 0.47015079855918884,
"rewards/rejected": -0.6989628672599792,
"step": 270
},
{
"epoch": 0.5862339701648783,
"grad_norm": 26.64793214141023,
"learning_rate": 2.1804923757009882e-07,
"logits/chosen": -0.8444174528121948,
"logits/rejected": -0.8600177764892578,
"logps/chosen": -322.73712158203125,
"logps/rejected": -354.7949523925781,
"loss": 0.5765,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.2344287633895874,
"rewards/margins": 0.4120521545410156,
"rewards/rejected": -0.646480917930603,
"step": 280
},
{
"epoch": 0.6071708976707668,
"grad_norm": 32.58348069746148,
"learning_rate": 1.9999357655598891e-07,
"logits/chosen": -0.8282186388969421,
"logits/rejected": -0.8633724451065063,
"logps/chosen": -324.4151611328125,
"logps/rejected": -345.1200256347656,
"loss": 0.5692,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.22642645239830017,
"rewards/margins": 0.5329864025115967,
"rewards/rejected": -0.7594128847122192,
"step": 290
},
{
"epoch": 0.6281078251766553,
"grad_norm": 19.561696320722955,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -0.8326929211616516,
"logits/rejected": -0.8673263788223267,
"logps/chosen": -331.79302978515625,
"logps/rejected": -372.30010986328125,
"loss": 0.5668,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.315957635641098,
"rewards/margins": 0.5605665445327759,
"rewards/rejected": -0.8765242695808411,
"step": 300
},
{
"epoch": 0.6281078251766553,
"eval_logits/chosen": -0.8084123730659485,
"eval_logits/rejected": -0.8502717018127441,
"eval_logps/chosen": -322.279541015625,
"eval_logps/rejected": -346.937255859375,
"eval_loss": 0.5730212330818176,
"eval_rewards/accuracies": 0.7160000205039978,
"eval_rewards/chosen": -0.31467828154563904,
"eval_rewards/margins": 0.4789942800998688,
"eval_rewards/rejected": -0.7936726808547974,
"eval_runtime": 357.3312,
"eval_samples_per_second": 5.597,
"eval_steps_per_second": 1.399,
"step": 300
},
{
"epoch": 0.6490447526825438,
"grad_norm": 15.64244928067169,
"learning_rate": 1.647817538357072e-07,
"logits/chosen": -0.8278627395629883,
"logits/rejected": -0.8856652975082397,
"logps/chosen": -348.7338562011719,
"logps/rejected": -350.25567626953125,
"loss": 0.5536,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.24682530760765076,
"rewards/margins": 0.49646225571632385,
"rewards/rejected": -0.7432876229286194,
"step": 310
},
{
"epoch": 0.6699816801884323,
"grad_norm": 43.67182696049494,
"learning_rate": 1.478143389201113e-07,
"logits/chosen": -0.8275814056396484,
"logits/rejected": -0.8235956430435181,
"logps/chosen": -296.8677978515625,
"logps/rejected": -324.29022216796875,
"loss": 0.5479,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.18848630785942078,
"rewards/margins": 0.5224109292030334,
"rewards/rejected": -0.7108971476554871,
"step": 320
},
{
"epoch": 0.6909186076943209,
"grad_norm": 27.237411423219683,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -0.8245242834091187,
"logits/rejected": -0.831591784954071,
"logps/chosen": -323.1595764160156,
"logps/rejected": -358.5306701660156,
"loss": 0.5633,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.2390444278717041,
"rewards/margins": 0.5187050700187683,
"rewards/rejected": -0.7577494382858276,
"step": 330
},
{
"epoch": 0.7118555352002094,
"grad_norm": 49.39077553232361,
"learning_rate": 1.1561076868822755e-07,
"logits/chosen": -0.8345288038253784,
"logits/rejected": -0.8311136960983276,
"logps/chosen": -320.93011474609375,
"logps/rejected": -367.7865295410156,
"loss": 0.5605,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.17862586677074432,
"rewards/margins": 0.508815586566925,
"rewards/rejected": -0.6874415278434753,
"step": 340
},
{
"epoch": 0.7327924627060979,
"grad_norm": 16.75988502350672,
"learning_rate": 1.0054723495346482e-07,
"logits/chosen": -0.8540140390396118,
"logits/rejected": -0.856400191783905,
"logps/chosen": -304.7636413574219,
"logps/rejected": -342.74896240234375,
"loss": 0.5415,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.13501903414726257,
"rewards/margins": 0.5515834093093872,
"rewards/rejected": -0.6866023540496826,
"step": 350
},
{
"epoch": 0.7327924627060979,
"eval_logits/chosen": -0.8289144039154053,
"eval_logits/rejected": -0.8694319128990173,
"eval_logps/chosen": -311.679443359375,
"eval_logps/rejected": -336.6546936035156,
"eval_loss": 0.5626152157783508,
"eval_rewards/accuracies": 0.7319999933242798,
"eval_rewards/chosen": -0.20867733657360077,
"eval_rewards/margins": 0.482170045375824,
"eval_rewards/rejected": -0.6908472776412964,
"eval_runtime": 355.7788,
"eval_samples_per_second": 5.621,
"eval_steps_per_second": 1.405,
"step": 350
},
{
"epoch": 0.7537293902119864,
"grad_norm": 21.657298235918294,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -0.8230468034744263,
"logits/rejected": -0.8405194282531738,
"logps/chosen": -334.99298095703125,
"logps/rejected": -355.60504150390625,
"loss": 0.5708,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.2502953112125397,
"rewards/margins": 0.46560558676719666,
"rewards/rejected": -0.7159008979797363,
"step": 360
},
{
"epoch": 0.7746663177178749,
"grad_norm": 74.8260958025737,
"learning_rate": 7.289996455765748e-08,
"logits/chosen": -0.821232795715332,
"logits/rejected": -0.8672993779182434,
"logps/chosen": -309.51519775390625,
"logps/rejected": -335.122802734375,
"loss": 0.551,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.1752547323703766,
"rewards/margins": 0.5394274592399597,
"rewards/rejected": -0.7146821618080139,
"step": 370
},
{
"epoch": 0.7956032452237635,
"grad_norm": 16.46276766957732,
"learning_rate": 6.046442623320145e-08,
"logits/chosen": -0.8121312260627747,
"logits/rejected": -0.8373250961303711,
"logps/chosen": -325.12091064453125,
"logps/rejected": -360.0669250488281,
"loss": 0.5565,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.20427687466144562,
"rewards/margins": 0.5377046465873718,
"rewards/rejected": -0.7419815063476562,
"step": 380
},
{
"epoch": 0.816540172729652,
"grad_norm": 38.336055145825206,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -0.8095279932022095,
"logits/rejected": -0.8635553121566772,
"logps/chosen": -326.71759033203125,
"logps/rejected": -357.30517578125,
"loss": 0.5454,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.1362684667110443,
"rewards/margins": 0.5115006566047668,
"rewards/rejected": -0.6477690935134888,
"step": 390
},
{
"epoch": 0.8374771002355405,
"grad_norm": 24.60952754885755,
"learning_rate": 3.8702478614051345e-08,
"logits/chosen": -0.846905529499054,
"logits/rejected": -0.8851727247238159,
"logps/chosen": -319.1526184082031,
"logps/rejected": -359.0050354003906,
"loss": 0.5595,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.18566596508026123,
"rewards/margins": 0.5182815194129944,
"rewards/rejected": -0.7039474844932556,
"step": 400
},
{
"epoch": 0.8374771002355405,
"eval_logits/chosen": -0.8328730463981628,
"eval_logits/rejected": -0.8715097308158875,
"eval_logps/chosen": -312.7687072753906,
"eval_logps/rejected": -338.257568359375,
"eval_loss": 0.560410737991333,
"eval_rewards/accuracies": 0.7300000190734863,
"eval_rewards/chosen": -0.21957026422023773,
"eval_rewards/margins": 0.48730605840682983,
"eval_rewards/rejected": -0.7068763375282288,
"eval_runtime": 354.669,
"eval_samples_per_second": 5.639,
"eval_steps_per_second": 1.41,
"step": 400
},
{
"epoch": 0.8584140277414289,
"grad_norm": 11.20497542941971,
"learning_rate": 2.9492720416985e-08,
"logits/chosen": -0.9012954831123352,
"logits/rejected": -0.906468391418457,
"logps/chosen": -334.8082275390625,
"logps/rejected": -345.9796447753906,
"loss": 0.5532,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.22493624687194824,
"rewards/margins": 0.5446537137031555,
"rewards/rejected": -0.7695900201797485,
"step": 410
},
{
"epoch": 0.8793509552473174,
"grad_norm": 26.680659771781972,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -0.8376303911209106,
"logits/rejected": -0.8529024124145508,
"logps/chosen": -308.93817138671875,
"logps/rejected": -372.81298828125,
"loss": 0.5547,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.2364557683467865,
"rewards/margins": 0.5860723257064819,
"rewards/rejected": -0.8225281834602356,
"step": 420
},
{
"epoch": 0.9002878827532059,
"grad_norm": 37.70065982337463,
"learning_rate": 1.4662207078575684e-08,
"logits/chosen": -0.8422588109970093,
"logits/rejected": -0.8835725784301758,
"logps/chosen": -331.5950622558594,
"logps/rejected": -352.51531982421875,
"loss": 0.5355,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.23024725914001465,
"rewards/margins": 0.5459326505661011,
"rewards/rejected": -0.7761799097061157,
"step": 430
},
{
"epoch": 0.9212248102590945,
"grad_norm": 28.46050909044336,
"learning_rate": 9.12094829893642e-09,
"logits/chosen": -0.8466767072677612,
"logits/rejected": -0.8686957359313965,
"logps/chosen": -322.8841552734375,
"logps/rejected": -369.8939208984375,
"loss": 0.5512,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.1927202194929123,
"rewards/margins": 0.6201958060264587,
"rewards/rejected": -0.8129159808158875,
"step": 440
},
{
"epoch": 0.942161737764983,
"grad_norm": 37.07373569586965,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -0.8417257070541382,
"logits/rejected": -0.8639878034591675,
"logps/chosen": -304.902099609375,
"logps/rejected": -353.9253845214844,
"loss": 0.5552,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.24326789379119873,
"rewards/margins": 0.5259731411933899,
"rewards/rejected": -0.7692410349845886,
"step": 450
},
{
"epoch": 0.942161737764983,
"eval_logits/chosen": -0.8397230505943298,
"eval_logits/rejected": -0.8778651356697083,
"eval_logps/chosen": -316.7488098144531,
"eval_logps/rejected": -344.3741149902344,
"eval_loss": 0.5599729418754578,
"eval_rewards/accuracies": 0.7279999852180481,
"eval_rewards/chosen": -0.25937125086784363,
"eval_rewards/margins": 0.5086703896522522,
"eval_rewards/rejected": -0.7680416703224182,
"eval_runtime": 355.4201,
"eval_samples_per_second": 5.627,
"eval_steps_per_second": 1.407,
"step": 450
},
{
"epoch": 0.9630986652708715,
"grad_norm": 32.1943094596091,
"learning_rate": 1.9347820230782295e-09,
"logits/chosen": -0.864120364189148,
"logits/rejected": -0.8782867193222046,
"logps/chosen": -319.0982971191406,
"logps/rejected": -347.4610900878906,
"loss": 0.5558,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.27945682406425476,
"rewards/margins": 0.5023162364959717,
"rewards/rejected": -0.7817729711532593,
"step": 460
},
{
"epoch": 0.98403559277676,
"grad_norm": 22.405117438704156,
"learning_rate": 3.2839470889836627e-10,
"logits/chosen": -0.7893309593200684,
"logits/rejected": -0.8491501808166504,
"logps/chosen": -327.20037841796875,
"logps/rejected": -342.6055603027344,
"loss": 0.5308,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.24120506644248962,
"rewards/margins": 0.6007115244865417,
"rewards/rejected": -0.8419166803359985,
"step": 470
},
{
"epoch": 0.998691442030882,
"step": 477,
"total_flos": 0.0,
"train_loss": 0.5919944611485399,
"train_runtime": 28847.4829,
"train_samples_per_second": 2.119,
"train_steps_per_second": 0.017
}
],
"logging_steps": 10,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}