|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998638529611981, |
|
"eval_steps": 500, |
|
"global_step": 3672, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002722940776038121, |
|
"grad_norm": 2.365647699760686, |
|
"learning_rate": 5.434782608695653e-07, |
|
"logits/chosen": -0.8060563206672668, |
|
"logits/rejected": -0.5449127554893494, |
|
"logps/chosen": -397.73370361328125, |
|
"logps/rejected": -461.0686340332031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002722940776038121, |
|
"grad_norm": 2.117870186084684, |
|
"learning_rate": 5.4347826086956525e-06, |
|
"logits/chosen": -1.0161805152893066, |
|
"logits/rejected": -0.9247782230377197, |
|
"logps/chosen": -339.845947265625, |
|
"logps/rejected": -335.201416015625, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.0008954557124525309, |
|
"rewards/margins": 0.0016563390381634235, |
|
"rewards/rejected": -0.0007608832092955709, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005445881552076242, |
|
"grad_norm": 2.3050788819778347, |
|
"learning_rate": 1.0869565217391305e-05, |
|
"logits/chosen": -1.1553447246551514, |
|
"logits/rejected": -1.1530015468597412, |
|
"logps/chosen": -319.328369140625, |
|
"logps/rejected": -281.5868225097656, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.025853103026747704, |
|
"rewards/margins": 0.01720840111374855, |
|
"rewards/rejected": 0.008644700981676579, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008168822328114363, |
|
"grad_norm": 2.018186008036389, |
|
"learning_rate": 1.630434782608696e-05, |
|
"logits/chosen": -1.2003402709960938, |
|
"logits/rejected": -1.0625836849212646, |
|
"logps/chosen": -297.01800537109375, |
|
"logps/rejected": -293.5678405761719, |
|
"loss": 0.6398, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.15041035413742065, |
|
"rewards/margins": 0.10567835718393326, |
|
"rewards/rejected": 0.0447319932281971, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010891763104152484, |
|
"grad_norm": 2.0974366425813176, |
|
"learning_rate": 2.173913043478261e-05, |
|
"logits/chosen": -1.2458221912384033, |
|
"logits/rejected": -1.1523743867874146, |
|
"logps/chosen": -333.25506591796875, |
|
"logps/rejected": -345.1227111816406, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.38798853754997253, |
|
"rewards/margins": 0.30759841203689575, |
|
"rewards/rejected": 0.08039011061191559, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.013614703880190605, |
|
"grad_norm": 1.8585066163676949, |
|
"learning_rate": 2.7173913043478262e-05, |
|
"logits/chosen": -1.1938469409942627, |
|
"logits/rejected": -1.2074568271636963, |
|
"logps/chosen": -302.45819091796875, |
|
"logps/rejected": -314.1945495605469, |
|
"loss": 0.5028, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5669056177139282, |
|
"rewards/margins": 0.5880029201507568, |
|
"rewards/rejected": -0.02109731361269951, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.016337644656228726, |
|
"grad_norm": 2.4589848970734356, |
|
"learning_rate": 3.260869565217392e-05, |
|
"logits/chosen": -1.1839563846588135, |
|
"logits/rejected": -1.250597357749939, |
|
"logps/chosen": -286.90283203125, |
|
"logps/rejected": -327.09271240234375, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.571540892124176, |
|
"rewards/margins": 0.813601016998291, |
|
"rewards/rejected": -0.2420600950717926, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01906058543226685, |
|
"grad_norm": 2.367267226063014, |
|
"learning_rate": 3.804347826086957e-05, |
|
"logits/chosen": -1.2047820091247559, |
|
"logits/rejected": -1.2526741027832031, |
|
"logps/chosen": -298.99395751953125, |
|
"logps/rejected": -348.8773498535156, |
|
"loss": 0.4426, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.3528999388217926, |
|
"rewards/margins": 1.0532305240631104, |
|
"rewards/rejected": -0.7003306150436401, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.021783526208304968, |
|
"grad_norm": 3.298724009406964, |
|
"learning_rate": 4.347826086956522e-05, |
|
"logits/chosen": -1.0405532121658325, |
|
"logits/rejected": -1.1189762353897095, |
|
"logps/chosen": -368.87835693359375, |
|
"logps/rejected": -510.46112060546875, |
|
"loss": 0.3959, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.30884578824043274, |
|
"rewards/margins": 1.878394365310669, |
|
"rewards/rejected": -2.1872401237487793, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02450646698434309, |
|
"grad_norm": 5.125711190041795, |
|
"learning_rate": 4.891304347826087e-05, |
|
"logits/chosen": -0.9169028997421265, |
|
"logits/rejected": -0.8087233304977417, |
|
"logps/chosen": -467.9712829589844, |
|
"logps/rejected": -670.4898071289062, |
|
"loss": 0.3937, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1821720600128174, |
|
"rewards/margins": 1.763218879699707, |
|
"rewards/rejected": -2.9453907012939453, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02722940776038121, |
|
"grad_norm": 2.393390101676132, |
|
"learning_rate": 5.4347826086956524e-05, |
|
"logits/chosen": -0.9179345369338989, |
|
"logits/rejected": -0.9133983850479126, |
|
"logps/chosen": -389.9552307128906, |
|
"logps/rejected": -575.6070556640625, |
|
"loss": 0.3284, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.910792350769043, |
|
"rewards/margins": 1.9828264713287354, |
|
"rewards/rejected": -2.8936190605163574, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.029952348536419333, |
|
"grad_norm": 2.9244828418124116, |
|
"learning_rate": 5.9782608695652175e-05, |
|
"logits/chosen": -0.8454478979110718, |
|
"logits/rejected": -0.7953473329544067, |
|
"logps/chosen": -379.1640625, |
|
"logps/rejected": -630.0404052734375, |
|
"loss": 0.278, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.684828519821167, |
|
"rewards/margins": 2.8788986206054688, |
|
"rewards/rejected": -3.5637271404266357, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03267528931245745, |
|
"grad_norm": 5.119335229124842, |
|
"learning_rate": 6.521739130434783e-05, |
|
"logits/chosen": -0.4422452449798584, |
|
"logits/rejected": -0.3685118854045868, |
|
"logps/chosen": -523.5398559570312, |
|
"logps/rejected": -851.53173828125, |
|
"loss": 0.278, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.0111899375915527, |
|
"rewards/margins": 3.533088207244873, |
|
"rewards/rejected": -5.544278144836426, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.035398230088495575, |
|
"grad_norm": 6.032034269990042, |
|
"learning_rate": 7.065217391304349e-05, |
|
"logits/chosen": -0.42538633942604065, |
|
"logits/rejected": -0.3796442151069641, |
|
"logps/chosen": -537.8934326171875, |
|
"logps/rejected": -948.9290161132812, |
|
"loss": 0.318, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.7698837518692017, |
|
"rewards/margins": 4.274045467376709, |
|
"rewards/rejected": -6.043929100036621, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0381211708645337, |
|
"grad_norm": 3.937864766160224, |
|
"learning_rate": 7.608695652173914e-05, |
|
"logits/chosen": -0.3227941393852234, |
|
"logits/rejected": -0.24235177040100098, |
|
"logps/chosen": -437.365234375, |
|
"logps/rejected": -727.3698120117188, |
|
"loss": 0.2836, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.2195541858673096, |
|
"rewards/margins": 3.049933671951294, |
|
"rewards/rejected": -4.269488334655762, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04084411164057182, |
|
"grad_norm": 5.771526324368126, |
|
"learning_rate": 8.152173913043478e-05, |
|
"logits/chosen": -0.3635067343711853, |
|
"logits/rejected": -0.2840490937232971, |
|
"logps/chosen": -505.41339111328125, |
|
"logps/rejected": -776.4160766601562, |
|
"loss": 0.3556, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4678966999053955, |
|
"rewards/margins": 3.195099353790283, |
|
"rewards/rejected": -4.6629958152771, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.043567052416609936, |
|
"grad_norm": 4.261974292549152, |
|
"learning_rate": 8.695652173913044e-05, |
|
"logits/chosen": -0.5400364398956299, |
|
"logits/rejected": -0.5849201083183289, |
|
"logps/chosen": -511.65802001953125, |
|
"logps/rejected": -811.4276123046875, |
|
"loss": 0.3132, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.5283739566802979, |
|
"rewards/margins": 3.7420878410339355, |
|
"rewards/rejected": -5.2704620361328125, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04628999319264806, |
|
"grad_norm": 5.162764854438213, |
|
"learning_rate": 9.239130434782609e-05, |
|
"logits/chosen": -0.3992065489292145, |
|
"logits/rejected": -0.21120062470436096, |
|
"logps/chosen": -578.4542236328125, |
|
"logps/rejected": -1250.295654296875, |
|
"loss": 0.2808, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.6138319969177246, |
|
"rewards/margins": 6.92595911026001, |
|
"rewards/rejected": -9.539790153503418, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04901293396868618, |
|
"grad_norm": 3.546614542325096, |
|
"learning_rate": 9.782608695652174e-05, |
|
"logits/chosen": 0.12355854362249374, |
|
"logits/rejected": 0.16381272673606873, |
|
"logps/chosen": -525.3487548828125, |
|
"logps/rejected": -877.5525512695312, |
|
"loss": 0.2683, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.712798833847046, |
|
"rewards/margins": 3.966627597808838, |
|
"rewards/rejected": -5.679426670074463, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.051735874744724304, |
|
"grad_norm": 3.4061998514196326, |
|
"learning_rate": 0.00010326086956521738, |
|
"logits/chosen": -0.07915548235177994, |
|
"logits/rejected": 0.03230556100606918, |
|
"logps/chosen": -541.4509887695312, |
|
"logps/rejected": -907.5144653320312, |
|
"loss": 0.2763, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.3669848442077637, |
|
"rewards/margins": 3.6007239818573, |
|
"rewards/rejected": -5.967708587646484, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05445881552076242, |
|
"grad_norm": 4.318373028362168, |
|
"learning_rate": 0.00010869565217391305, |
|
"logits/chosen": -0.5367928743362427, |
|
"logits/rejected": -0.46212688088417053, |
|
"logps/chosen": -589.5100708007812, |
|
"logps/rejected": -1115.4957275390625, |
|
"loss": 0.2403, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.3453047275543213, |
|
"rewards/margins": 5.046809196472168, |
|
"rewards/rejected": -7.392115116119385, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05718175629680054, |
|
"grad_norm": 3.315434156465907, |
|
"learning_rate": 0.0001141304347826087, |
|
"logits/chosen": -0.3160248398780823, |
|
"logits/rejected": -0.2609514594078064, |
|
"logps/chosen": -664.8533325195312, |
|
"logps/rejected": -1347.0562744140625, |
|
"loss": 0.2439, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.291189193725586, |
|
"rewards/margins": 7.1285600662231445, |
|
"rewards/rejected": -10.419748306274414, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.059904697072838665, |
|
"grad_norm": 2.9900477836167583, |
|
"learning_rate": 0.00011956521739130435, |
|
"logits/chosen": 0.034565992653369904, |
|
"logits/rejected": 0.11762849986553192, |
|
"logps/chosen": -673.8556518554688, |
|
"logps/rejected": -1427.378173828125, |
|
"loss": 0.1902, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.362067461013794, |
|
"rewards/margins": 7.918702602386475, |
|
"rewards/rejected": -11.280771255493164, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06262763784887679, |
|
"grad_norm": 2.299272777029839, |
|
"learning_rate": 0.000125, |
|
"logits/chosen": 0.2120196521282196, |
|
"logits/rejected": 0.4011055529117584, |
|
"logps/chosen": -816.4850463867188, |
|
"logps/rejected": -1636.709716796875, |
|
"loss": 0.2206, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.696841239929199, |
|
"rewards/margins": 8.66151237487793, |
|
"rewards/rejected": -13.358352661132812, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0653505786249149, |
|
"grad_norm": 2.520846682761172, |
|
"learning_rate": 0.00013043478260869567, |
|
"logits/chosen": -0.6589404344558716, |
|
"logits/rejected": -0.6260676383972168, |
|
"logps/chosen": -496.455322265625, |
|
"logps/rejected": -948.7802734375, |
|
"loss": 0.2742, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.457515835762024, |
|
"rewards/margins": 5.1725172996521, |
|
"rewards/rejected": -6.630032539367676, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06807351940095303, |
|
"grad_norm": 3.470765195985346, |
|
"learning_rate": 0.0001358695652173913, |
|
"logits/chosen": -0.129820317029953, |
|
"logits/rejected": -0.09683764725923538, |
|
"logps/chosen": -612.4053955078125, |
|
"logps/rejected": -995.4520263671875, |
|
"loss": 0.2801, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.644944667816162, |
|
"rewards/margins": 4.22878885269165, |
|
"rewards/rejected": -6.8737335205078125, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07079646017699115, |
|
"grad_norm": 3.5234122965177432, |
|
"learning_rate": 0.00014130434782608697, |
|
"logits/chosen": 0.4874610900878906, |
|
"logits/rejected": 0.5410028696060181, |
|
"logps/chosen": -530.3843383789062, |
|
"logps/rejected": -806.9224243164062, |
|
"loss": 0.2545, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.913825273513794, |
|
"rewards/margins": 2.779323101043701, |
|
"rewards/rejected": -4.693148136138916, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07351940095302927, |
|
"grad_norm": 3.1672868272882293, |
|
"learning_rate": 0.00014673913043478264, |
|
"logits/chosen": 0.5825409293174744, |
|
"logits/rejected": 0.6227949857711792, |
|
"logps/chosen": -453.08990478515625, |
|
"logps/rejected": -778.2337036132812, |
|
"loss": 0.2666, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.291534423828125, |
|
"rewards/margins": 3.5406317710876465, |
|
"rewards/rejected": -4.8321661949157715, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0762423417290674, |
|
"grad_norm": 3.4653355946787867, |
|
"learning_rate": 0.00015217391304347827, |
|
"logits/chosen": 0.7842821478843689, |
|
"logits/rejected": 0.8171672821044922, |
|
"logps/chosen": -646.3670043945312, |
|
"logps/rejected": -1298.140380859375, |
|
"loss": 0.2762, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.0766921043395996, |
|
"rewards/margins": 6.976851463317871, |
|
"rewards/rejected": -10.053544998168945, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07896528250510551, |
|
"grad_norm": 4.264156688585643, |
|
"learning_rate": 0.0001576086956521739, |
|
"logits/chosen": 0.39463651180267334, |
|
"logits/rejected": 0.38154393434524536, |
|
"logps/chosen": -410.0770568847656, |
|
"logps/rejected": -697.1278686523438, |
|
"loss": 0.2482, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.6038021445274353, |
|
"rewards/margins": 3.522867202758789, |
|
"rewards/rejected": -4.126669883728027, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08168822328114364, |
|
"grad_norm": 7.548808524451859, |
|
"learning_rate": 0.00016304347826086955, |
|
"logits/chosen": 0.36318862438201904, |
|
"logits/rejected": 0.34319767355918884, |
|
"logps/chosen": -570.1300048828125, |
|
"logps/rejected": -1266.759033203125, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.0891940593719482, |
|
"rewards/margins": 7.467465400695801, |
|
"rewards/rejected": -9.556659698486328, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08441116405718176, |
|
"grad_norm": 5.697122001859261, |
|
"learning_rate": 0.00016847826086956522, |
|
"logits/chosen": -0.522394597530365, |
|
"logits/rejected": -0.4420931935310364, |
|
"logps/chosen": -533.5513916015625, |
|
"logps/rejected": -984.91357421875, |
|
"loss": 0.2946, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.0686821937561035, |
|
"rewards/margins": 4.774750709533691, |
|
"rewards/rejected": -6.843433380126953, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08713410483321987, |
|
"grad_norm": 5.515933709027342, |
|
"learning_rate": 0.00017391304347826088, |
|
"logits/chosen": 0.015002882108092308, |
|
"logits/rejected": 0.12198108434677124, |
|
"logps/chosen": -530.60400390625, |
|
"logps/rejected": -830.6185302734375, |
|
"loss": 0.3218, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9283926486968994, |
|
"rewards/margins": 3.346539258956909, |
|
"rewards/rejected": -5.274931907653809, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.089857045609258, |
|
"grad_norm": 20.20417518678615, |
|
"learning_rate": 0.00017934782608695652, |
|
"logits/chosen": -0.8085662722587585, |
|
"logits/rejected": -0.6665461659431458, |
|
"logps/chosen": -557.08984375, |
|
"logps/rejected": -1014.3035888671875, |
|
"loss": 0.3115, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7877311706542969, |
|
"rewards/margins": 4.699117183685303, |
|
"rewards/rejected": -6.4868483543396, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09257998638529612, |
|
"grad_norm": 19.141358392191165, |
|
"learning_rate": 0.00018478260869565218, |
|
"logits/chosen": -1.1671316623687744, |
|
"logits/rejected": -1.022890329360962, |
|
"logps/chosen": -433.53460693359375, |
|
"logps/rejected": -890.1748046875, |
|
"loss": 0.278, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.8231090307235718, |
|
"rewards/margins": 4.5598225593566895, |
|
"rewards/rejected": -5.382931709289551, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09530292716133425, |
|
"grad_norm": 5.004173601837073, |
|
"learning_rate": 0.00019021739130434782, |
|
"logits/chosen": -0.16809643805027008, |
|
"logits/rejected": 0.023858536034822464, |
|
"logps/chosen": -580.4594116210938, |
|
"logps/rejected": -948.2473754882812, |
|
"loss": 0.3074, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.90035080909729, |
|
"rewards/margins": 4.385798454284668, |
|
"rewards/rejected": -6.286149024963379, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09802586793737236, |
|
"grad_norm": 18.975782176199033, |
|
"learning_rate": 0.0001956521739130435, |
|
"logits/chosen": -0.13861560821533203, |
|
"logits/rejected": -0.05721583217382431, |
|
"logps/chosen": -495.3792419433594, |
|
"logps/rejected": -899.0491943359375, |
|
"loss": 0.2447, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.3074277639389038, |
|
"rewards/margins": 4.709783554077148, |
|
"rewards/rejected": -6.017210960388184, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.10074880871341048, |
|
"grad_norm": 12.932097663714115, |
|
"learning_rate": 0.00019999981917872262, |
|
"logits/chosen": -0.7583842873573303, |
|
"logits/rejected": -0.7324551343917847, |
|
"logps/chosen": -502.64813232421875, |
|
"logps/rejected": -696.3719482421875, |
|
"loss": 0.4578, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4866981506347656, |
|
"rewards/margins": 2.141483783721924, |
|
"rewards/rejected": -3.6281819343566895, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.10347174948944861, |
|
"grad_norm": 7.538819283985503, |
|
"learning_rate": 0.000199993490502677, |
|
"logits/chosen": 1.3222413063049316, |
|
"logits/rejected": 1.3994247913360596, |
|
"logps/chosen": -552.2727661132812, |
|
"logps/rejected": -888.35009765625, |
|
"loss": 0.3377, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7047837972640991, |
|
"rewards/margins": 4.129269123077393, |
|
"rewards/rejected": -5.834052562713623, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.10619469026548672, |
|
"grad_norm": 2.698223448211145, |
|
"learning_rate": 0.0001999781214166793, |
|
"logits/chosen": -0.531406581401825, |
|
"logits/rejected": -0.29297247529029846, |
|
"logps/chosen": -579.4844970703125, |
|
"logps/rejected": -1168.0150146484375, |
|
"loss": 0.2687, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2400717735290527, |
|
"rewards/margins": 6.4643354415893555, |
|
"rewards/rejected": -8.704408645629883, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.10891763104152484, |
|
"grad_norm": 2.832583238601158, |
|
"learning_rate": 0.00019995371331024835, |
|
"logits/chosen": -0.9013770818710327, |
|
"logits/rejected": -0.8614107966423035, |
|
"logps/chosen": -567.0159912109375, |
|
"logps/rejected": -1161.518310546875, |
|
"loss": 0.2585, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.9346994161605835, |
|
"rewards/margins": 6.5001540184021, |
|
"rewards/rejected": -8.434852600097656, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11164057181756297, |
|
"grad_norm": 2.4836030301537275, |
|
"learning_rate": 0.00019992026839012067, |
|
"logits/chosen": -0.20795145630836487, |
|
"logits/rejected": -0.05467641353607178, |
|
"logps/chosen": -535.1635131835938, |
|
"logps/rejected": -879.9713745117188, |
|
"loss": 0.2492, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.5837352275848389, |
|
"rewards/margins": 3.8102355003356934, |
|
"rewards/rejected": -5.393970966339111, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11436351259360109, |
|
"grad_norm": 4.0678022085960155, |
|
"learning_rate": 0.00019987778968005106, |
|
"logits/chosen": -1.7985776662826538, |
|
"logits/rejected": -1.5679908990859985, |
|
"logps/chosen": -499.22686767578125, |
|
"logps/rejected": -949.0340576171875, |
|
"loss": 0.3623, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2260411977767944, |
|
"rewards/margins": 4.656464099884033, |
|
"rewards/rejected": -5.882505416870117, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11708645336963922, |
|
"grad_norm": 6.170976378523087, |
|
"learning_rate": 0.00019982628102053899, |
|
"logits/chosen": -1.7474931478500366, |
|
"logits/rejected": -1.472307562828064, |
|
"logps/chosen": -452.9400329589844, |
|
"logps/rejected": -947.8646240234375, |
|
"loss": 0.2443, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.1056087017059326, |
|
"rewards/margins": 5.24057674407959, |
|
"rewards/rejected": -6.346184730529785, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11980939414567733, |
|
"grad_norm": 2.3740869343585986, |
|
"learning_rate": 0.00019976574706848154, |
|
"logits/chosen": -1.0254762172698975, |
|
"logits/rejected": -0.9232420921325684, |
|
"logps/chosen": -577.9771728515625, |
|
"logps/rejected": -1111.5164794921875, |
|
"loss": 0.2136, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.2708170413970947, |
|
"rewards/margins": 5.644797325134277, |
|
"rewards/rejected": -7.915614128112793, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12253233492171545, |
|
"grad_norm": 3.062599110592225, |
|
"learning_rate": 0.00019969619329675249, |
|
"logits/chosen": -1.398494005203247, |
|
"logits/rejected": -1.2401583194732666, |
|
"logps/chosen": -548.5270385742188, |
|
"logps/rejected": -1162.383056640625, |
|
"loss": 0.3083, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.0478172302246094, |
|
"rewards/margins": 6.271994590759277, |
|
"rewards/rejected": -8.31981086730957, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12525527569775358, |
|
"grad_norm": 2.4479245244845, |
|
"learning_rate": 0.00019961762599370723, |
|
"logits/chosen": -1.366804838180542, |
|
"logits/rejected": -1.2496994733810425, |
|
"logps/chosen": -391.8729553222656, |
|
"logps/rejected": -678.1502075195312, |
|
"loss": 0.2812, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6106027364730835, |
|
"rewards/margins": 3.496123790740967, |
|
"rewards/rejected": -4.106726169586182, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1279782164737917, |
|
"grad_norm": 5.570404886434436, |
|
"learning_rate": 0.00019953005226261438, |
|
"logits/chosen": -1.8177093267440796, |
|
"logits/rejected": -1.7150996923446655, |
|
"logps/chosen": -585.9056396484375, |
|
"logps/rejected": -944.1336059570312, |
|
"loss": 0.3153, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.0305967330932617, |
|
"rewards/margins": 4.166149139404297, |
|
"rewards/rejected": -6.196745872497559, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1307011572498298, |
|
"grad_norm": 4.9616313649114305, |
|
"learning_rate": 0.00019943348002101371, |
|
"logits/chosen": -0.35997989773750305, |
|
"logits/rejected": -0.54933100938797, |
|
"logps/chosen": -697.2908325195312, |
|
"logps/rejected": -1242.2154541015625, |
|
"loss": 0.2813, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.610248565673828, |
|
"rewards/margins": 5.970644474029541, |
|
"rewards/rejected": -9.580892562866211, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13342409802586794, |
|
"grad_norm": 3.2104602173488095, |
|
"learning_rate": 0.0001993279180000001, |
|
"logits/chosen": 3.2386550903320312, |
|
"logits/rejected": 2.8118205070495605, |
|
"logps/chosen": -657.9302978515625, |
|
"logps/rejected": -1343.6512451171875, |
|
"loss": 0.304, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.4300479888916016, |
|
"rewards/margins": 7.140336036682129, |
|
"rewards/rejected": -10.57038402557373, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13614703880190607, |
|
"grad_norm": 3.40241770940377, |
|
"learning_rate": 0.00019921337574343423, |
|
"logits/chosen": -1.1426218748092651, |
|
"logits/rejected": -0.986798107624054, |
|
"logps/chosen": -434.1651916503906, |
|
"logps/rejected": -757.93115234375, |
|
"loss": 0.2695, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8630467653274536, |
|
"rewards/margins": 3.374795436859131, |
|
"rewards/rejected": -4.237841606140137, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13614703880190607, |
|
"eval_logits/chosen": -1.5326930284500122, |
|
"eval_logits/rejected": -1.3998456001281738, |
|
"eval_logps/chosen": -383.6803283691406, |
|
"eval_logps/rejected": -745.71533203125, |
|
"eval_loss": 0.2652795910835266, |
|
"eval_rewards/accuracies": 0.8679706454277039, |
|
"eval_rewards/chosen": -0.4398553669452667, |
|
"eval_rewards/margins": 4.098072052001953, |
|
"eval_rewards/rejected": -4.537927627563477, |
|
"eval_runtime": 3754.4478, |
|
"eval_samples_per_second": 1.306, |
|
"eval_steps_per_second": 0.109, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13886997957794417, |
|
"grad_norm": 5.474127540729177, |
|
"learning_rate": 0.00019908986360707981, |
|
"logits/chosen": -1.5042083263397217, |
|
"logits/rejected": -1.2466522455215454, |
|
"logps/chosen": -523.0069580078125, |
|
"logps/rejected": -1227.4290771484375, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.2869787216186523, |
|
"rewards/margins": 7.605735778808594, |
|
"rewards/rejected": -9.89271354675293, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1415929203539823, |
|
"grad_norm": 7.744668859821086, |
|
"learning_rate": 0.00019895739275766717, |
|
"logits/chosen": -1.6189693212509155, |
|
"logits/rejected": -1.3868303298950195, |
|
"logps/chosen": -627.96826171875, |
|
"logps/rejected": -1323.58837890625, |
|
"loss": 0.2436, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.1383042335510254, |
|
"rewards/margins": 7.207145690917969, |
|
"rewards/rejected": -10.345449447631836, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14431586113002043, |
|
"grad_norm": 4.7297851415537995, |
|
"learning_rate": 0.00019881597517188378, |
|
"logits/chosen": -1.7887611389160156, |
|
"logits/rejected": -1.6225858926773071, |
|
"logps/chosen": -476.73077392578125, |
|
"logps/rejected": -840.6845703125, |
|
"loss": 0.3274, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9709323644638062, |
|
"rewards/margins": 4.59786319732666, |
|
"rewards/rejected": -5.568795204162598, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14703880190605853, |
|
"grad_norm": 5.496852164954777, |
|
"learning_rate": 0.00019866562363529146, |
|
"logits/chosen": -0.9063366055488586, |
|
"logits/rejected": -0.7964249849319458, |
|
"logps/chosen": -589.5694580078125, |
|
"logps/rejected": -1222.3099365234375, |
|
"loss": 0.3257, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.250840425491333, |
|
"rewards/margins": 6.9674835205078125, |
|
"rewards/rejected": -9.218323707580566, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14976174268209666, |
|
"grad_norm": 2.955258018366531, |
|
"learning_rate": 0.00019850635174117033, |
|
"logits/chosen": -1.3758379220962524, |
|
"logits/rejected": -1.2850220203399658, |
|
"logps/chosen": -562.1868286132812, |
|
"logps/rejected": -1195.6204833984375, |
|
"loss": 0.2985, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2212133407592773, |
|
"rewards/margins": 7.197710990905762, |
|
"rewards/rejected": -9.418924331665039, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1524846834581348, |
|
"grad_norm": 3.245125438369836, |
|
"learning_rate": 0.00019833817388928985, |
|
"logits/chosen": -1.1794776916503906, |
|
"logits/rejected": -0.9993559122085571, |
|
"logps/chosen": -571.94921875, |
|
"logps/rejected": -1167.773681640625, |
|
"loss": 0.3072, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.339094638824463, |
|
"rewards/margins": 6.1757402420043945, |
|
"rewards/rejected": -8.5148344039917, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15520762423417292, |
|
"grad_norm": 2.604195604750394, |
|
"learning_rate": 0.00019816110528460713, |
|
"logits/chosen": -1.2078096866607666, |
|
"logits/rejected": -0.7949275970458984, |
|
"logps/chosen": -589.8275756835938, |
|
"logps/rejected": -1255.671875, |
|
"loss": 0.3152, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.489520311355591, |
|
"rewards/margins": 6.91791296005249, |
|
"rewards/rejected": -9.40743350982666, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15793056501021102, |
|
"grad_norm": 14.441606451933733, |
|
"learning_rate": 0.00019797516193589194, |
|
"logits/chosen": -1.5580374002456665, |
|
"logits/rejected": -1.270991325378418, |
|
"logps/chosen": -670.9024658203125, |
|
"logps/rejected": -1422.287353515625, |
|
"loss": 0.4552, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.1384835243225098, |
|
"rewards/margins": 7.682137966156006, |
|
"rewards/rejected": -10.8206205368042, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.16065350578624915, |
|
"grad_norm": 3.9273831546290814, |
|
"learning_rate": 0.00019778036065427965, |
|
"logits/chosen": -1.3679311275482178, |
|
"logits/rejected": -1.3166406154632568, |
|
"logps/chosen": -571.2752685546875, |
|
"logps/rejected": -921.2325439453125, |
|
"loss": 0.7785, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.133291006088257, |
|
"rewards/margins": 4.120671272277832, |
|
"rewards/rejected": -6.253961563110352, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16337644656228728, |
|
"grad_norm": 4.005186765164421, |
|
"learning_rate": 0.00019757671905175117, |
|
"logits/chosen": -0.47542038559913635, |
|
"logits/rejected": -0.4297063946723938, |
|
"logps/chosen": -464.59820556640625, |
|
"logps/rejected": -1003.8577880859375, |
|
"loss": 0.4314, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9565982818603516, |
|
"rewards/margins": 5.590369701385498, |
|
"rewards/rejected": -7.54696798324585, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16609938733832538, |
|
"grad_norm": 2.806683383323669, |
|
"learning_rate": 0.00019736425553954065, |
|
"logits/chosen": -1.535628318786621, |
|
"logits/rejected": -1.4295036792755127, |
|
"logps/chosen": -740.3170776367188, |
|
"logps/rejected": -1282.82861328125, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.8235206604003906, |
|
"rewards/margins": 5.897660732269287, |
|
"rewards/rejected": -9.721181869506836, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1688223281143635, |
|
"grad_norm": 4.278979765897441, |
|
"learning_rate": 0.00019714298932647098, |
|
"logits/chosen": -0.43291616439819336, |
|
"logits/rejected": -0.4710071086883545, |
|
"logps/chosen": -545.1385498046875, |
|
"logps/rejected": -900.1130981445312, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2823760509490967, |
|
"rewards/margins": 3.3586456775665283, |
|
"rewards/rejected": -5.641021251678467, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.17154526889040164, |
|
"grad_norm": 11.430682840390572, |
|
"learning_rate": 0.0001969129404172172, |
|
"logits/chosen": 0.8560006022453308, |
|
"logits/rejected": 0.7378341555595398, |
|
"logps/chosen": -513.2482299804688, |
|
"logps/rejected": -754.9190063476562, |
|
"loss": 0.4592, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5915935039520264, |
|
"rewards/margins": 2.9012575149536133, |
|
"rewards/rejected": -4.492851257324219, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17426820966643974, |
|
"grad_norm": 2.199875368036885, |
|
"learning_rate": 0.00019667412961049755, |
|
"logits/chosen": -0.1992878019809723, |
|
"logits/rejected": -0.40254640579223633, |
|
"logps/chosen": -433.77484130859375, |
|
"logps/rejected": -813.1231689453125, |
|
"loss": 0.3492, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.7354617118835449, |
|
"rewards/margins": 4.359702110290527, |
|
"rewards/rejected": -5.0951642990112305, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17699115044247787, |
|
"grad_norm": 4.4540211798567, |
|
"learning_rate": 0.00019642657849719354, |
|
"logits/chosen": 0.4657669961452484, |
|
"logits/rejected": 0.18967926502227783, |
|
"logps/chosen": -536.6248168945312, |
|
"logps/rejected": -1097.6444091796875, |
|
"loss": 0.3092, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8782879114151, |
|
"rewards/margins": 5.84519624710083, |
|
"rewards/rejected": -7.723484992980957, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.179714091218516, |
|
"grad_norm": 3.0889028203904334, |
|
"learning_rate": 0.0001961703094583975, |
|
"logits/chosen": -1.8331960439682007, |
|
"logits/rejected": -1.687788963317871, |
|
"logps/chosen": -544.7129516601562, |
|
"logps/rejected": -981.7268676757812, |
|
"loss": 0.3037, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.7984678745269775, |
|
"rewards/margins": 4.749049186706543, |
|
"rewards/rejected": -6.5475172996521, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1824370319945541, |
|
"grad_norm": 7.960275160298456, |
|
"learning_rate": 0.00019590534566338934, |
|
"logits/chosen": 0.07616959512233734, |
|
"logits/rejected": 0.03734927624464035, |
|
"logps/chosen": -528.64501953125, |
|
"logps/rejected": -872.9537963867188, |
|
"loss": 0.4539, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.8545458316802979, |
|
"rewards/margins": 3.848759412765503, |
|
"rewards/rejected": -5.703305721282959, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18515997277059223, |
|
"grad_norm": 12.196026749043849, |
|
"learning_rate": 0.0001956317110675417, |
|
"logits/chosen": -1.7587692737579346, |
|
"logits/rejected": -1.3882538080215454, |
|
"logps/chosen": -594.3480224609375, |
|
"logps/rejected": -1230.926513671875, |
|
"loss": 0.3005, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.64725923538208, |
|
"rewards/margins": 6.312607765197754, |
|
"rewards/rejected": -8.959867477416992, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18788291354663036, |
|
"grad_norm": 9.39312929425058, |
|
"learning_rate": 0.00019534943041015423, |
|
"logits/chosen": -2.2219159603118896, |
|
"logits/rejected": -2.1489291191101074, |
|
"logps/chosen": -371.2558288574219, |
|
"logps/rejected": -592.716796875, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6846780180931091, |
|
"rewards/margins": 2.5522751808166504, |
|
"rewards/rejected": -3.2369537353515625, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.1906058543226685, |
|
"grad_norm": 2.8421606651782394, |
|
"learning_rate": 0.0001950585292122169, |
|
"logits/chosen": 0.6412663459777832, |
|
"logits/rejected": 0.4461473524570465, |
|
"logps/chosen": -528.5960693359375, |
|
"logps/rejected": -914.9078369140625, |
|
"loss": 0.3292, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2477450370788574, |
|
"rewards/margins": 3.9819042682647705, |
|
"rewards/rejected": -6.229649066925049, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1933287950987066, |
|
"grad_norm": 6.372719336489545, |
|
"learning_rate": 0.00019475903377410252, |
|
"logits/chosen": 3.104797840118408, |
|
"logits/rejected": 3.0445351600646973, |
|
"logps/chosen": -678.0279541015625, |
|
"logps/rejected": -1072.2177734375, |
|
"loss": 0.7254, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.2950127124786377, |
|
"rewards/margins": 4.643362998962402, |
|
"rewards/rejected": -7.938374996185303, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19605173587474473, |
|
"grad_norm": 3.626617866099579, |
|
"learning_rate": 0.00019445097117318913, |
|
"logits/chosen": -1.6091477870941162, |
|
"logits/rejected": -1.535391926765442, |
|
"logps/chosen": -503.35784912109375, |
|
"logps/rejected": -848.8382568359375, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6061153411865234, |
|
"rewards/margins": 4.038764953613281, |
|
"rewards/rejected": -5.644880294799805, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19877467665078286, |
|
"grad_norm": 4.243327644286249, |
|
"learning_rate": 0.00019413436926141175, |
|
"logits/chosen": -0.8332279324531555, |
|
"logits/rejected": -0.8175574541091919, |
|
"logps/chosen": -737.3671875, |
|
"logps/rejected": -1388.236572265625, |
|
"loss": 0.3784, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.6704049110412598, |
|
"rewards/margins": 6.894456386566162, |
|
"rewards/rejected": -10.564861297607422, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.20149761742682096, |
|
"grad_norm": 5.900207156659361, |
|
"learning_rate": 0.00019380925666274444, |
|
"logits/chosen": -0.7356165647506714, |
|
"logits/rejected": -0.7930446267127991, |
|
"logps/chosen": -599.1915283203125, |
|
"logps/rejected": -1108.341552734375, |
|
"loss": 0.3328, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.732137680053711, |
|
"rewards/margins": 5.603833198547363, |
|
"rewards/rejected": -8.335970878601074, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2042205582028591, |
|
"grad_norm": 3.825224843937841, |
|
"learning_rate": 0.0001934756627706122, |
|
"logits/chosen": -1.3480488061904907, |
|
"logits/rejected": -1.160653829574585, |
|
"logps/chosen": -441.1055603027344, |
|
"logps/rejected": -832.5628662109375, |
|
"loss": 0.3124, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.4829859733581543, |
|
"rewards/margins": 3.520029067993164, |
|
"rewards/rejected": -5.003015041351318, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.20694349897889722, |
|
"grad_norm": 5.110356705719763, |
|
"learning_rate": 0.00019313361774523385, |
|
"logits/chosen": -2.0760579109191895, |
|
"logits/rejected": -2.1237432956695557, |
|
"logps/chosen": -549.3860473632812, |
|
"logps/rejected": -1049.9378662109375, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1024250984191895, |
|
"rewards/margins": 5.283135414123535, |
|
"rewards/rejected": -7.385560035705566, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.20966643975493532, |
|
"grad_norm": 2.4636308668975317, |
|
"learning_rate": 0.00019278315251089486, |
|
"logits/chosen": -1.732892632484436, |
|
"logits/rejected": -1.539317011833191, |
|
"logps/chosen": -472.11651611328125, |
|
"logps/rejected": -894.4132690429688, |
|
"loss": 0.4139, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7663090229034424, |
|
"rewards/margins": 4.492798805236816, |
|
"rewards/rejected": -6.2591071128845215, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.21238938053097345, |
|
"grad_norm": 11.617472243164942, |
|
"learning_rate": 0.0001924242987531517, |
|
"logits/chosen": -1.3720897436141968, |
|
"logits/rejected": -1.4372451305389404, |
|
"logps/chosen": -642.5867919921875, |
|
"logps/rejected": -1076.4737548828125, |
|
"loss": 0.7831, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.8324971199035645, |
|
"rewards/margins": 4.546250343322754, |
|
"rewards/rejected": -7.378748416900635, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21511232130701158, |
|
"grad_norm": 4.800064106894111, |
|
"learning_rate": 0.0001920570889159672, |
|
"logits/chosen": -2.131412982940674, |
|
"logits/rejected": -2.0672669410705566, |
|
"logps/chosen": -502.35125732421875, |
|
"logps/rejected": -979.3629150390625, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5898116827011108, |
|
"rewards/margins": 6.003064155578613, |
|
"rewards/rejected": -7.592875003814697, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21783526208304968, |
|
"grad_norm": 7.9969773800091, |
|
"learning_rate": 0.0001916815561987771, |
|
"logits/chosen": -1.6771615743637085, |
|
"logits/rejected": -1.6893478631973267, |
|
"logps/chosen": -475.9574279785156, |
|
"logps/rejected": -858.0924072265625, |
|
"loss": 0.4716, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.2917953729629517, |
|
"rewards/margins": 4.0918192863464355, |
|
"rewards/rejected": -5.383615016937256, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2205582028590878, |
|
"grad_norm": 15.598299647751306, |
|
"learning_rate": 0.00019129773455348864, |
|
"logits/chosen": -2.2555699348449707, |
|
"logits/rejected": -2.179759979248047, |
|
"logps/chosen": -422.21075439453125, |
|
"logps/rejected": -853.91259765625, |
|
"loss": 0.4422, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0770437717437744, |
|
"rewards/margins": 4.805014610290527, |
|
"rewards/rejected": -5.882058143615723, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.22328114363512594, |
|
"grad_norm": 5.334288389286907, |
|
"learning_rate": 0.00019090565868141096, |
|
"logits/chosen": -0.569889485836029, |
|
"logits/rejected": -0.6646596193313599, |
|
"logps/chosen": -913.2365112304688, |
|
"logps/rejected": -1756.2760009765625, |
|
"loss": 1.2924, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.7498884201049805, |
|
"rewards/margins": 8.836221694946289, |
|
"rewards/rejected": -14.586111068725586, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22600408441116407, |
|
"grad_norm": 9.424443385582844, |
|
"learning_rate": 0.0001905053640301176, |
|
"logits/chosen": 1.087628722190857, |
|
"logits/rejected": 0.9164185523986816, |
|
"logps/chosen": -554.4573974609375, |
|
"logps/rejected": -922.4230346679688, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7598752975463867, |
|
"rewards/margins": 4.093839168548584, |
|
"rewards/rejected": -5.853714942932129, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22872702518720217, |
|
"grad_norm": 31.748839971929247, |
|
"learning_rate": 0.0001900968867902419, |
|
"logits/chosen": 1.860044240951538, |
|
"logits/rejected": 1.750836968421936, |
|
"logps/chosen": -501.2557678222656, |
|
"logps/rejected": -742.8480224609375, |
|
"loss": 0.4693, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5709228515625, |
|
"rewards/margins": 2.8358662128448486, |
|
"rewards/rejected": -4.4067888259887695, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2314499659632403, |
|
"grad_norm": 5.037429450172698, |
|
"learning_rate": 0.00018968026389220498, |
|
"logits/chosen": 1.901974081993103, |
|
"logits/rejected": 1.9345325231552124, |
|
"logps/chosen": -473.47100830078125, |
|
"logps/rejected": -704.6774291992188, |
|
"loss": 0.4027, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.1274769306182861, |
|
"rewards/margins": 3.0902841091156006, |
|
"rewards/rejected": -4.217761039733887, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23417290673927843, |
|
"grad_norm": 4.938998989868357, |
|
"learning_rate": 0.0001892555330028766, |
|
"logits/chosen": 1.3002218008041382, |
|
"logits/rejected": 1.288474202156067, |
|
"logps/chosen": -481.8773498535156, |
|
"logps/rejected": -835.0935668945312, |
|
"loss": 0.3162, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5530319213867188, |
|
"rewards/margins": 4.073337078094482, |
|
"rewards/rejected": -5.626368522644043, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23689584751531653, |
|
"grad_norm": 4.653639204830587, |
|
"learning_rate": 0.00018882273252217004, |
|
"logits/chosen": 0.8681214451789856, |
|
"logits/rejected": 0.951123058795929, |
|
"logps/chosen": -576.9916381835938, |
|
"logps/rejected": -988.4410400390625, |
|
"loss": 0.2979, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7273517847061157, |
|
"rewards/margins": 4.580703258514404, |
|
"rewards/rejected": -6.3080549240112305, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23961878829135466, |
|
"grad_norm": 3.633959384080648, |
|
"learning_rate": 0.0001883819015795701, |
|
"logits/chosen": 0.8870223760604858, |
|
"logits/rejected": 0.8765641450881958, |
|
"logps/chosen": -463.49993896484375, |
|
"logps/rejected": -962.3264770507812, |
|
"loss": 0.3212, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.3259989023208618, |
|
"rewards/margins": 5.314154148101807, |
|
"rewards/rejected": -6.640152931213379, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2423417290673928, |
|
"grad_norm": 4.332123847510828, |
|
"learning_rate": 0.00018793308003059572, |
|
"logits/chosen": 1.1213561296463013, |
|
"logits/rejected": 1.1425080299377441, |
|
"logps/chosen": -459.064453125, |
|
"logps/rejected": -865.5914306640625, |
|
"loss": 0.4558, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.331207036972046, |
|
"rewards/margins": 4.389949798583984, |
|
"rewards/rejected": -5.721157073974609, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.2450646698434309, |
|
"grad_norm": 15.682490544625567, |
|
"learning_rate": 0.00018747630845319612, |
|
"logits/chosen": 1.2807202339172363, |
|
"logits/rejected": 1.3666961193084717, |
|
"logps/chosen": -704.359375, |
|
"logps/rejected": -1433.5845947265625, |
|
"loss": 0.3848, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.641261577606201, |
|
"rewards/margins": 7.442375183105469, |
|
"rewards/rejected": -11.083636283874512, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24778761061946902, |
|
"grad_norm": 11.750100569612796, |
|
"learning_rate": 0.00018701162814408278, |
|
"logits/chosen": 0.8161400556564331, |
|
"logits/rejected": 0.7485871315002441, |
|
"logps/chosen": -518.4786987304688, |
|
"logps/rejected": -798.63720703125, |
|
"loss": 0.5486, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.984863042831421, |
|
"rewards/margins": 2.9147756099700928, |
|
"rewards/rejected": -4.8996381759643555, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.25051055139550715, |
|
"grad_norm": 7.000935714029327, |
|
"learning_rate": 0.00018653908111499533, |
|
"logits/chosen": -0.31201204657554626, |
|
"logits/rejected": -0.2849845290184021, |
|
"logps/chosen": -527.2504272460938, |
|
"logps/rejected": -787.3291015625, |
|
"loss": 0.4349, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6963837146759033, |
|
"rewards/margins": 3.1402206420898438, |
|
"rewards/rejected": -4.836604118347168, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2532334921715453, |
|
"grad_norm": 11.18800926600743, |
|
"learning_rate": 0.00018605871008890346, |
|
"logits/chosen": -1.294745683670044, |
|
"logits/rejected": -1.240708351135254, |
|
"logps/chosen": -511.86309814453125, |
|
"logps/rejected": -744.6692504882812, |
|
"loss": 1.0639, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8128286600112915, |
|
"rewards/margins": 2.8213400840759277, |
|
"rewards/rejected": -4.634169101715088, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2559564329475834, |
|
"grad_norm": 10.556179766281154, |
|
"learning_rate": 0.0001855705584961443, |
|
"logits/chosen": 0.7220875024795532, |
|
"logits/rejected": 0.6994001269340515, |
|
"logps/chosen": -607.748291015625, |
|
"logps/rejected": -1030.421875, |
|
"loss": 0.451, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.9004902839660645, |
|
"rewards/margins": 4.076137542724609, |
|
"rewards/rejected": -6.976628303527832, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2586793737236215, |
|
"grad_norm": 6.13682401768759, |
|
"learning_rate": 0.00018507467047049593, |
|
"logits/chosen": 0.3612140715122223, |
|
"logits/rejected": 0.24034667015075684, |
|
"logps/chosen": -518.5841674804688, |
|
"logps/rejected": -700.7687377929688, |
|
"loss": 0.46, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6922804117202759, |
|
"rewards/margins": 2.2763285636901855, |
|
"rewards/rejected": -3.968608856201172, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2614023144996596, |
|
"grad_norm": 6.121773697604543, |
|
"learning_rate": 0.0001845710908451872, |
|
"logits/chosen": 0.5088990330696106, |
|
"logits/rejected": 0.33550935983657837, |
|
"logps/chosen": -541.6002197265625, |
|
"logps/rejected": -806.7115478515625, |
|
"loss": 0.4299, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.0940399169921875, |
|
"rewards/margins": 3.1160244941711426, |
|
"rewards/rejected": -5.21006441116333, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.26412525527569775, |
|
"grad_norm": 10.899985502581202, |
|
"learning_rate": 0.00018405986514884434, |
|
"logits/chosen": 2.749624252319336, |
|
"logits/rejected": 2.3466954231262207, |
|
"logps/chosen": -582.1544799804688, |
|
"logps/rejected": -924.7894287109375, |
|
"loss": 0.4395, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.315011501312256, |
|
"rewards/margins": 3.5922577381134033, |
|
"rewards/rejected": -5.907269477844238, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2668481960517359, |
|
"grad_norm": 4.048659705337102, |
|
"learning_rate": 0.00018354103960137473, |
|
"logits/chosen": 1.1572563648223877, |
|
"logits/rejected": 1.267608404159546, |
|
"logps/chosen": -582.2197875976562, |
|
"logps/rejected": -861.1555786132812, |
|
"loss": 0.3092, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.239684820175171, |
|
"rewards/margins": 3.4138379096984863, |
|
"rewards/rejected": -5.65352201461792, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.269571136827774, |
|
"grad_norm": 6.445129082017715, |
|
"learning_rate": 0.00018301466110978826, |
|
"logits/chosen": 5.172424793243408, |
|
"logits/rejected": 5.469472885131836, |
|
"logps/chosen": -583.6631469726562, |
|
"logps/rejected": -822.27783203125, |
|
"loss": 0.3662, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.1274476051330566, |
|
"rewards/margins": 3.208089828491211, |
|
"rewards/rejected": -5.335537910461426, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.27229407760381213, |
|
"grad_norm": 6.566218610450217, |
|
"learning_rate": 0.00018248077726395635, |
|
"logits/chosen": 6.2127532958984375, |
|
"logits/rejected": 5.775629043579102, |
|
"logps/chosen": -699.6742553710938, |
|
"logps/rejected": -1013.6728515625, |
|
"loss": 0.4349, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.4475739002227783, |
|
"rewards/margins": 3.560396671295166, |
|
"rewards/rejected": -7.007970333099365, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27229407760381213, |
|
"eval_logits/chosen": 4.7867841720581055, |
|
"eval_logits/rejected": 4.172375202178955, |
|
"eval_logps/chosen": -599.8698120117188, |
|
"eval_logps/rejected": -1004.0470581054688, |
|
"eval_loss": 0.3151506781578064, |
|
"eval_rewards/accuracies": 0.8514670133590698, |
|
"eval_rewards/chosen": -2.601750135421753, |
|
"eval_rewards/margins": 4.519495487213135, |
|
"eval_rewards/rejected": -7.121245384216309, |
|
"eval_runtime": 3748.1009, |
|
"eval_samples_per_second": 1.308, |
|
"eval_steps_per_second": 0.109, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27501701837985026, |
|
"grad_norm": 4.1859048562805325, |
|
"learning_rate": 0.0001819394363323093, |
|
"logits/chosen": 2.4643194675445557, |
|
"logits/rejected": 2.2225146293640137, |
|
"logps/chosen": -545.6774291992188, |
|
"logps/rejected": -996.73681640625, |
|
"loss": 0.2765, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.89382004737854, |
|
"rewards/margins": 5.465787410736084, |
|
"rewards/rejected": -7.3596086502075195, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27773995915588834, |
|
"grad_norm": 4.563487274752729, |
|
"learning_rate": 0.00018139068725747253, |
|
"logits/chosen": 1.0098166465759277, |
|
"logits/rejected": 1.0116276741027832, |
|
"logps/chosen": -602.1691284179688, |
|
"logps/rejected": -1046.082763671875, |
|
"loss": 0.2743, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.5139992237091064, |
|
"rewards/margins": 4.713437557220459, |
|
"rewards/rejected": -7.2274370193481445, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.28046289993192647, |
|
"grad_norm": 7.5233407893974364, |
|
"learning_rate": 0.0001808345796518415, |
|
"logits/chosen": -0.4183047413825989, |
|
"logits/rejected": -0.31315118074417114, |
|
"logps/chosen": -532.0780029296875, |
|
"logps/rejected": -1177.1412353515625, |
|
"loss": 0.2154, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.2215864658355713, |
|
"rewards/margins": 6.905013084411621, |
|
"rewards/rejected": -9.12660026550293, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2831858407079646, |
|
"grad_norm": 5.735221398988511, |
|
"learning_rate": 0.00018027116379309638, |
|
"logits/chosen": -0.38075047731399536, |
|
"logits/rejected": -0.2400140017271042, |
|
"logps/chosen": -578.424072265625, |
|
"logps/rejected": -1119.6865234375, |
|
"loss": 0.3202, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.2283248901367188, |
|
"rewards/margins": 5.826390743255615, |
|
"rewards/rejected": -8.054716110229492, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2859087814840027, |
|
"grad_norm": 4.097854425465531, |
|
"learning_rate": 0.00017970049061965637, |
|
"logits/chosen": 3.559316635131836, |
|
"logits/rejected": 3.129002571105957, |
|
"logps/chosen": -522.4072265625, |
|
"logps/rejected": -1131.6197509765625, |
|
"loss": 0.2673, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.1318278312683105, |
|
"rewards/margins": 6.1658806800842285, |
|
"rewards/rejected": -8.297708511352539, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28863172226004086, |
|
"grad_norm": 5.557225788607541, |
|
"learning_rate": 0.00017912261172607437, |
|
"logits/chosen": 0.9803678393363953, |
|
"logits/rejected": 0.749468207359314, |
|
"logps/chosen": -525.6317138671875, |
|
"logps/rejected": -1126.9403076171875, |
|
"loss": 0.2935, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.0150177478790283, |
|
"rewards/margins": 5.855062484741211, |
|
"rewards/rejected": -7.87007999420166, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.291354663036079, |
|
"grad_norm": 16.881151266803318, |
|
"learning_rate": 0.00017853757935837235, |
|
"logits/chosen": 2.2658114433288574, |
|
"logits/rejected": 2.1108152866363525, |
|
"logps/chosen": -653.8731689453125, |
|
"logps/rejected": -1232.9185791015625, |
|
"loss": 0.2515, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.2572929859161377, |
|
"rewards/margins": 6.0115251541137695, |
|
"rewards/rejected": -9.268818855285645, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.29407760381211706, |
|
"grad_norm": 2.9796008894176103, |
|
"learning_rate": 0.00017794544640931773, |
|
"logits/chosen": 0.10931304842233658, |
|
"logits/rejected": 0.12252505123615265, |
|
"logps/chosen": -558.3709106445312, |
|
"logps/rejected": -942.2255859375, |
|
"loss": 0.3009, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7539688348770142, |
|
"rewards/margins": 4.423993110656738, |
|
"rewards/rejected": -6.177962303161621, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2968005445881552, |
|
"grad_norm": 4.281633296458022, |
|
"learning_rate": 0.00017734626641364147, |
|
"logits/chosen": 0.3914720416069031, |
|
"logits/rejected": 0.32610705494880676, |
|
"logps/chosen": -486.5399475097656, |
|
"logps/rejected": -891.3079833984375, |
|
"loss": 0.2547, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6498864889144897, |
|
"rewards/margins": 4.376126289367676, |
|
"rewards/rejected": -6.026012420654297, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.2995234853641933, |
|
"grad_norm": 5.5930892470284475, |
|
"learning_rate": 0.00017674009354319778, |
|
"logits/chosen": 0.5930287837982178, |
|
"logits/rejected": 0.653694748878479, |
|
"logps/chosen": -603.6002807617188, |
|
"logps/rejected": -1086.633544921875, |
|
"loss": 0.3199, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.18953800201416, |
|
"rewards/margins": 5.238548755645752, |
|
"rewards/rejected": -7.428086757659912, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.30224642614023145, |
|
"grad_norm": 2.722087080665883, |
|
"learning_rate": 0.00017612698260206666, |
|
"logits/chosen": 0.37087658047676086, |
|
"logits/rejected": 0.4003225266933441, |
|
"logps/chosen": -492.9803771972656, |
|
"logps/rejected": -1075.6131591796875, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6157805919647217, |
|
"rewards/margins": 6.397913932800293, |
|
"rewards/rejected": -8.013693809509277, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3049693669162696, |
|
"grad_norm": 14.979862803572216, |
|
"learning_rate": 0.00017550698902159896, |
|
"logits/chosen": 1.7447624206542969, |
|
"logits/rejected": 1.8597869873046875, |
|
"logps/chosen": -696.9720458984375, |
|
"logps/rejected": -1302.992431640625, |
|
"loss": 0.3806, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.522810697555542, |
|
"rewards/margins": 6.992275238037109, |
|
"rewards/rejected": -10.51508617401123, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 3.147793738872875, |
|
"learning_rate": 0.00017488016885540484, |
|
"logits/chosen": 3.254767656326294, |
|
"logits/rejected": 2.9010090827941895, |
|
"logps/chosen": -507.4468688964844, |
|
"logps/rejected": -1095.754638671875, |
|
"loss": 0.3034, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7343683242797852, |
|
"rewards/margins": 5.961197853088379, |
|
"rewards/rejected": -7.695566654205322, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.31041524846834584, |
|
"grad_norm": 3.677842943487348, |
|
"learning_rate": 0.00017424657877428596, |
|
"logits/chosen": -0.7234563231468201, |
|
"logits/rejected": -0.5526185035705566, |
|
"logps/chosen": -540.0379638671875, |
|
"logps/rejected": -1126.988037109375, |
|
"loss": 0.3271, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7782533168792725, |
|
"rewards/margins": 6.2880048751831055, |
|
"rewards/rejected": -8.066259384155273, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3131381892443839, |
|
"grad_norm": 10.43709043871757, |
|
"learning_rate": 0.0001736062760611119, |
|
"logits/chosen": 0.05422030761837959, |
|
"logits/rejected": 0.14549703896045685, |
|
"logps/chosen": -657.1055908203125, |
|
"logps/rejected": -1551.24560546875, |
|
"loss": 0.3395, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.0819263458251953, |
|
"rewards/margins": 9.380105018615723, |
|
"rewards/rejected": -12.462031364440918, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.31586113002042204, |
|
"grad_norm": 5.657046152510863, |
|
"learning_rate": 0.00017295931860564117, |
|
"logits/chosen": -0.35942110419273376, |
|
"logits/rejected": -0.25374308228492737, |
|
"logps/chosen": -654.0399169921875, |
|
"logps/rejected": -1307.819580078125, |
|
"loss": 0.2725, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.1632027626037598, |
|
"rewards/margins": 6.858729362487793, |
|
"rewards/rejected": -10.021931648254395, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.3185840707964602, |
|
"grad_norm": 6.835124252986284, |
|
"learning_rate": 0.0001723057648992875, |
|
"logits/chosen": -1.796087622642517, |
|
"logits/rejected": -1.386889934539795, |
|
"logps/chosen": -551.5918579101562, |
|
"logps/rejected": -1379.042724609375, |
|
"loss": 0.2229, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.4278125762939453, |
|
"rewards/margins": 8.520901679992676, |
|
"rewards/rejected": -10.948714256286621, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.3213070115724983, |
|
"grad_norm": 3.841956624946803, |
|
"learning_rate": 0.00017164567402983152, |
|
"logits/chosen": -1.5889203548431396, |
|
"logits/rejected": -1.3325690031051636, |
|
"logps/chosen": -705.3286743164062, |
|
"logps/rejected": -1427.21484375, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.543011426925659, |
|
"rewards/margins": 7.749202728271484, |
|
"rewards/rejected": -11.292215347290039, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.32402995234853643, |
|
"grad_norm": 7.006267955561037, |
|
"learning_rate": 0.00017097910567607865, |
|
"logits/chosen": -1.8383554220199585, |
|
"logits/rejected": -1.5112298727035522, |
|
"logps/chosen": -626.0816650390625, |
|
"logps/rejected": -1356.2662353515625, |
|
"loss": 0.3225, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.867626667022705, |
|
"rewards/margins": 7.212944984436035, |
|
"rewards/rejected": -10.080572128295898, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.32675289312457456, |
|
"grad_norm": 6.2805032054390075, |
|
"learning_rate": 0.0001703061201024636, |
|
"logits/chosen": -1.2520471811294556, |
|
"logits/rejected": -0.8927146792411804, |
|
"logps/chosen": -705.08154296875, |
|
"logps/rejected": -1572.4884033203125, |
|
"loss": 0.22, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.847522020339966, |
|
"rewards/margins": 8.92027473449707, |
|
"rewards/rejected": -12.76779556274414, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32947583390061264, |
|
"grad_norm": 5.6676894500481065, |
|
"learning_rate": 0.0001696267781536018, |
|
"logits/chosen": -1.0109606981277466, |
|
"logits/rejected": -0.7403501868247986, |
|
"logps/chosen": -720.68798828125, |
|
"logps/rejected": -1489.216064453125, |
|
"loss": 0.2525, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.673945665359497, |
|
"rewards/margins": 8.27706527709961, |
|
"rewards/rejected": -11.951011657714844, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.33219877467665077, |
|
"grad_norm": 6.806373569251935, |
|
"learning_rate": 0.0001689411412487885, |
|
"logits/chosen": -1.2114444971084595, |
|
"logits/rejected": -0.9269537925720215, |
|
"logps/chosen": -660.3043823242188, |
|
"logps/rejected": -1289.1474609375, |
|
"loss": 0.2564, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.7403571605682373, |
|
"rewards/margins": 6.5076584815979, |
|
"rewards/rejected": -9.248014450073242, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3349217154526889, |
|
"grad_norm": 1.6484966634408573, |
|
"learning_rate": 0.00016824927137644587, |
|
"logits/chosen": -1.5764806270599365, |
|
"logits/rejected": -1.3572378158569336, |
|
"logps/chosen": -622.459228515625, |
|
"logps/rejected": -1207.199951171875, |
|
"loss": 0.2831, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.8502438068389893, |
|
"rewards/margins": 6.140684604644775, |
|
"rewards/rejected": -8.990928649902344, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.337644656228727, |
|
"grad_norm": 3.1773340218228983, |
|
"learning_rate": 0.00016755123108851843, |
|
"logits/chosen": -0.8344672918319702, |
|
"logits/rejected": -0.7575286030769348, |
|
"logps/chosen": -557.8627319335938, |
|
"logps/rejected": -1094.7261962890625, |
|
"loss": 0.2617, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.271852970123291, |
|
"rewards/margins": 6.080749988555908, |
|
"rewards/rejected": -8.352602005004883, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.34036759700476515, |
|
"grad_norm": 7.197978920136861, |
|
"learning_rate": 0.00016684708349481804, |
|
"logits/chosen": -1.293826937675476, |
|
"logits/rejected": -1.111547589302063, |
|
"logps/chosen": -684.3062744140625, |
|
"logps/rejected": -1335.118896484375, |
|
"loss": 0.4245, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.4491424560546875, |
|
"rewards/margins": 6.6166839599609375, |
|
"rewards/rejected": -10.065826416015625, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3430905377808033, |
|
"grad_norm": 8.542463349127416, |
|
"learning_rate": 0.00016613689225731789, |
|
"logits/chosen": -1.3532450199127197, |
|
"logits/rejected": -1.0953752994537354, |
|
"logps/chosen": -587.2614135742188, |
|
"logps/rejected": -1286.6195068359375, |
|
"loss": 0.2996, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.7271084785461426, |
|
"rewards/margins": 6.817942142486572, |
|
"rewards/rejected": -9.545049667358398, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3458134785568414, |
|
"grad_norm": 2.1114546108326517, |
|
"learning_rate": 0.00016542072158439691, |
|
"logits/chosen": -1.4009244441986084, |
|
"logits/rejected": -1.2579541206359863, |
|
"logps/chosen": -632.0369873046875, |
|
"logps/rejected": -1034.799072265625, |
|
"loss": 0.2368, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.2211804389953613, |
|
"rewards/margins": 4.827291011810303, |
|
"rewards/rejected": -7.048470973968506, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.3485364193328795, |
|
"grad_norm": 24.697563459789354, |
|
"learning_rate": 0.00016469863622503477, |
|
"logits/chosen": -1.9558353424072266, |
|
"logits/rejected": -1.6197192668914795, |
|
"logps/chosen": -671.5903930664062, |
|
"logps/rejected": -1494.4512939453125, |
|
"loss": 0.2951, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.403165340423584, |
|
"rewards/margins": 8.787665367126465, |
|
"rewards/rejected": -12.190831184387207, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3512593601089176, |
|
"grad_norm": 265.08504869734134, |
|
"learning_rate": 0.00016397070146295778, |
|
"logits/chosen": 0.3683956265449524, |
|
"logits/rejected": 0.6786172389984131, |
|
"logps/chosen": -579.5296630859375, |
|
"logps/rejected": -1230.6458740234375, |
|
"loss": 0.8783, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.450284481048584, |
|
"rewards/margins": 7.403939723968506, |
|
"rewards/rejected": -9.85422420501709, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.35398230088495575, |
|
"grad_norm": 54.97903067864863, |
|
"learning_rate": 0.00016323698311073668, |
|
"logits/chosen": 1.4707801342010498, |
|
"logits/rejected": 1.2925649881362915, |
|
"logps/chosen": -1907.9742431640625, |
|
"logps/rejected": -1993.4407958984375, |
|
"loss": 3.2747, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -15.379717826843262, |
|
"rewards/margins": 1.4004836082458496, |
|
"rewards/rejected": -16.780200958251953, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3567052416609939, |
|
"grad_norm": 8.592614500780192, |
|
"learning_rate": 0.0001624975475038365, |
|
"logits/chosen": -1.2588945627212524, |
|
"logits/rejected": -1.2224012613296509, |
|
"logps/chosen": -716.8282470703125, |
|
"logps/rejected": -945.8975830078125, |
|
"loss": 0.843, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.887277126312256, |
|
"rewards/margins": 2.443674087524414, |
|
"rewards/rejected": -6.330951690673828, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.359428182437032, |
|
"grad_norm": 5.311974463877925, |
|
"learning_rate": 0.0001617524614946192, |
|
"logits/chosen": -1.1290063858032227, |
|
"logits/rejected": -1.064706563949585, |
|
"logps/chosen": -629.7766723632812, |
|
"logps/rejected": -904.5687255859375, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.5980165004730225, |
|
"rewards/margins": 3.1313178539276123, |
|
"rewards/rejected": -5.729334831237793, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.36215112321307014, |
|
"grad_norm": 6.474086591026475, |
|
"learning_rate": 0.00016100179244629952, |
|
"logits/chosen": -1.5740928649902344, |
|
"logits/rejected": -1.4228092432022095, |
|
"logps/chosen": -764.6334228515625, |
|
"logps/rejected": -1126.4373779296875, |
|
"loss": 0.4008, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.025094032287598, |
|
"rewards/margins": 4.259903907775879, |
|
"rewards/rejected": -8.284997940063477, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3648740639891082, |
|
"grad_norm": 5.477184678397902, |
|
"learning_rate": 0.00016024560822685467, |
|
"logits/chosen": -2.164599657058716, |
|
"logits/rejected": -2.0423648357391357, |
|
"logps/chosen": -634.7586669921875, |
|
"logps/rejected": -1000.6339721679688, |
|
"loss": 0.2964, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.5424180030822754, |
|
"rewards/margins": 4.107353210449219, |
|
"rewards/rejected": -6.649771213531494, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.36759700476514634, |
|
"grad_norm": 11.647999715649028, |
|
"learning_rate": 0.0001594839772028884, |
|
"logits/chosen": 1.426068663597107, |
|
"logits/rejected": 1.4375708103179932, |
|
"logps/chosen": -649.6060791015625, |
|
"logps/rejected": -1172.4080810546875, |
|
"loss": 0.3104, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.2215964794158936, |
|
"rewards/margins": 5.544508457183838, |
|
"rewards/rejected": -8.766103744506836, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.37031994554118447, |
|
"grad_norm": 11.349006083438557, |
|
"learning_rate": 0.00015871696823345, |
|
"logits/chosen": -1.0609710216522217, |
|
"logits/rejected": -0.9911941289901733, |
|
"logps/chosen": -605.7052001953125, |
|
"logps/rejected": -1159.8377685546875, |
|
"loss": 0.2463, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.4982380867004395, |
|
"rewards/margins": 5.881691932678223, |
|
"rewards/rejected": -8.37993049621582, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3730428863172226, |
|
"grad_norm": 11.295379259266975, |
|
"learning_rate": 0.00015794465066380867, |
|
"logits/chosen": -1.2733609676361084, |
|
"logits/rejected": -1.1467927694320679, |
|
"logps/chosen": -688.9752197265625, |
|
"logps/rejected": -1283.682373046875, |
|
"loss": 0.3466, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.429932117462158, |
|
"rewards/margins": 6.115570545196533, |
|
"rewards/rejected": -9.545502662658691, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.37576582709326073, |
|
"grad_norm": 12.797060807657392, |
|
"learning_rate": 0.00015716709431918413, |
|
"logits/chosen": -1.3164949417114258, |
|
"logits/rejected": -1.233361840248108, |
|
"logps/chosen": -816.4059448242188, |
|
"logps/rejected": -1399.9384765625, |
|
"loss": 0.3763, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.322115898132324, |
|
"rewards/margins": 6.451016426086426, |
|
"rewards/rejected": -10.77313232421875, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.37848876786929886, |
|
"grad_norm": 10.821771896690832, |
|
"learning_rate": 0.0001563843694984336, |
|
"logits/chosen": 0.735268771648407, |
|
"logits/rejected": 0.6736514568328857, |
|
"logps/chosen": -542.029296875, |
|
"logps/rejected": -1001.3328857421875, |
|
"loss": 0.3622, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.9188995361328125, |
|
"rewards/margins": 5.033745288848877, |
|
"rewards/rejected": -6.952645301818848, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.381211708645337, |
|
"grad_norm": 6.621280422308756, |
|
"learning_rate": 0.00015559654696769627, |
|
"logits/chosen": 3.1346821784973145, |
|
"logits/rejected": 2.477602481842041, |
|
"logps/chosen": -491.7142028808594, |
|
"logps/rejected": -1117.6937255859375, |
|
"loss": 0.2835, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.8081684112548828, |
|
"rewards/margins": 6.549437522888184, |
|
"rewards/rejected": -8.357605934143066, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38393464942137506, |
|
"grad_norm": 7.483334853774204, |
|
"learning_rate": 0.00015480369795399507, |
|
"logits/chosen": 0.8859611749649048, |
|
"logits/rejected": 0.6168441772460938, |
|
"logps/chosen": -614.8277587890625, |
|
"logps/rejected": -1257.08056640625, |
|
"loss": 0.3414, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.5697829723358154, |
|
"rewards/margins": 6.640227317810059, |
|
"rewards/rejected": -9.210010528564453, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3866575901974132, |
|
"grad_norm": 8.321072241953082, |
|
"learning_rate": 0.0001540058941387973, |
|
"logits/chosen": -1.7052541971206665, |
|
"logits/rejected": -1.3722909688949585, |
|
"logps/chosen": -543.1255493164062, |
|
"logps/rejected": -1270.39013671875, |
|
"loss": 0.3713, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.073045253753662, |
|
"rewards/margins": 7.566277980804443, |
|
"rewards/rejected": -9.639322280883789, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3893805309734513, |
|
"grad_norm": 9.566652205072009, |
|
"learning_rate": 0.00015320320765153367, |
|
"logits/chosen": -0.6746680736541748, |
|
"logits/rejected": -0.31939640641212463, |
|
"logps/chosen": -646.5062255859375, |
|
"logps/rejected": -1472.7542724609375, |
|
"loss": 0.2957, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.08526873588562, |
|
"rewards/margins": 9.02385139465332, |
|
"rewards/rejected": -12.10912036895752, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.39210347174948945, |
|
"grad_norm": 5.674672936267165, |
|
"learning_rate": 0.00015239571106307728, |
|
"logits/chosen": -1.5170918703079224, |
|
"logits/rejected": -1.3616416454315186, |
|
"logps/chosen": -488.8348693847656, |
|
"logps/rejected": -978.12158203125, |
|
"loss": 0.3775, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6571521759033203, |
|
"rewards/margins": 5.227241516113281, |
|
"rewards/rejected": -6.88439416885376, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3948264125255276, |
|
"grad_norm": 14.456689496224328, |
|
"learning_rate": 0.0001515834773791824, |
|
"logits/chosen": -1.5392897129058838, |
|
"logits/rejected": -1.2314963340759277, |
|
"logps/chosen": -568.02685546875, |
|
"logps/rejected": -1188.26171875, |
|
"loss": 1.526, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.578427791595459, |
|
"rewards/margins": 6.594627380371094, |
|
"rewards/rejected": -9.173055648803711, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3975493533015657, |
|
"grad_norm": 24.108809069668514, |
|
"learning_rate": 0.000150766580033884, |
|
"logits/chosen": -0.8912268877029419, |
|
"logits/rejected": -0.7923849821090698, |
|
"logps/chosen": -611.3818359375, |
|
"logps/rejected": -1145.431884765625, |
|
"loss": 0.4417, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.100924253463745, |
|
"rewards/margins": 5.812254428863525, |
|
"rewards/rejected": -8.913179397583008, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.4002722940776038, |
|
"grad_norm": 5.034388636648245, |
|
"learning_rate": 0.00014994509288285862, |
|
"logits/chosen": -1.4466712474822998, |
|
"logits/rejected": -1.3241074085235596, |
|
"logps/chosen": -714.8709716796875, |
|
"logps/rejected": -1214.1171875, |
|
"loss": 0.3186, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.5827248096466064, |
|
"rewards/margins": 5.621241092681885, |
|
"rewards/rejected": -9.20396614074707, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.4029952348536419, |
|
"grad_norm": 10.420645648436944, |
|
"learning_rate": 0.00014911909019674704, |
|
"logits/chosen": -1.1877351999282837, |
|
"logits/rejected": -0.9257787466049194, |
|
"logps/chosen": -613.208251953125, |
|
"logps/rejected": -1327.60400390625, |
|
"loss": 0.3966, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.045889139175415, |
|
"rewards/margins": 7.302800178527832, |
|
"rewards/rejected": -10.348688125610352, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.40571817562968004, |
|
"grad_norm": 51.90536060739862, |
|
"learning_rate": 0.00014828864665443952, |
|
"logits/chosen": -1.4053256511688232, |
|
"logits/rejected": -1.2433897256851196, |
|
"logps/chosen": -557.5704345703125, |
|
"logps/rejected": -1206.54345703125, |
|
"loss": 0.2779, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.4509506225585938, |
|
"rewards/margins": 6.966989040374756, |
|
"rewards/rejected": -9.417940139770508, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4084411164057182, |
|
"grad_norm": 14.361486530833302, |
|
"learning_rate": 0.0001474538373363241, |
|
"logits/chosen": -1.412669062614441, |
|
"logits/rejected": -1.3897775411605835, |
|
"logps/chosen": -564.7686767578125, |
|
"logps/rejected": -837.3571166992188, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.930058479309082, |
|
"rewards/margins": 3.367487668991089, |
|
"rewards/rejected": -5.29754638671875, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4084411164057182, |
|
"eval_logits/chosen": -1.6537649631500244, |
|
"eval_logits/rejected": -1.519540786743164, |
|
"eval_logps/chosen": -582.22412109375, |
|
"eval_logps/rejected": -1098.727783203125, |
|
"eval_loss": 0.4872981011867523, |
|
"eval_rewards/accuracies": 0.7854523062705994, |
|
"eval_rewards/chosen": -2.425293445587158, |
|
"eval_rewards/margins": 5.642757415771484, |
|
"eval_rewards/rejected": -8.068050384521484, |
|
"eval_runtime": 3785.6353, |
|
"eval_samples_per_second": 1.295, |
|
"eval_steps_per_second": 0.108, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4111640571817563, |
|
"grad_norm": 9.318696202923965, |
|
"learning_rate": 0.0001466147377174985, |
|
"logits/chosen": -0.715455174446106, |
|
"logits/rejected": -0.6652237176895142, |
|
"logps/chosen": -572.0481567382812, |
|
"logps/rejected": -1031.051513671875, |
|
"loss": 0.418, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.4588992595672607, |
|
"rewards/margins": 4.753371238708496, |
|
"rewards/rejected": -7.212270259857178, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.41388699795779443, |
|
"grad_norm": 19.325914422583597, |
|
"learning_rate": 0.00014577142366094644, |
|
"logits/chosen": -0.05393592268228531, |
|
"logits/rejected": 0.009328609332442284, |
|
"logps/chosen": -620.9105224609375, |
|
"logps/rejected": -1113.5853271484375, |
|
"loss": 0.4078, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.8703293800354004, |
|
"rewards/margins": 5.491124629974365, |
|
"rewards/rejected": -8.36145305633545, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.41660993873383256, |
|
"grad_norm": 12.358286061777378, |
|
"learning_rate": 0.00014492397141067887, |
|
"logits/chosen": 0.14358481764793396, |
|
"logits/rejected": 0.03318501263856888, |
|
"logps/chosen": -594.7238159179688, |
|
"logps/rejected": -1354.75537109375, |
|
"loss": 0.3331, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.9422993659973145, |
|
"rewards/margins": 7.6811628341674805, |
|
"rewards/rejected": -10.623462677001953, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.41933287950987064, |
|
"grad_norm": 6.2960027951032, |
|
"learning_rate": 0.00014407245758484095, |
|
"logits/chosen": -0.20292606949806213, |
|
"logits/rejected": -0.11837242543697357, |
|
"logps/chosen": -618.982666015625, |
|
"logps/rejected": -1294.1939697265625, |
|
"loss": 0.3578, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.7013134956359863, |
|
"rewards/margins": 7.379813194274902, |
|
"rewards/rejected": -10.08112621307373, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.42205582028590877, |
|
"grad_norm": 9.817192732437798, |
|
"learning_rate": 0.00014321695916878454, |
|
"logits/chosen": -0.2398807555437088, |
|
"logits/rejected": -0.1428002268075943, |
|
"logps/chosen": -564.1031494140625, |
|
"logps/rejected": -1201.117431640625, |
|
"loss": 0.3268, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.393275737762451, |
|
"rewards/margins": 6.322265625, |
|
"rewards/rejected": -8.715542793273926, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4247787610619469, |
|
"grad_norm": 5.209206228944598, |
|
"learning_rate": 0.00014235755350810853, |
|
"logits/chosen": 0.9619453549385071, |
|
"logits/rejected": 1.1703174114227295, |
|
"logps/chosen": -648.5733642578125, |
|
"logps/rejected": -1496.5716552734375, |
|
"loss": 0.3565, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.6705708503723145, |
|
"rewards/margins": 8.732547760009766, |
|
"rewards/rejected": -11.403119087219238, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.427501701837985, |
|
"grad_norm": 4.077144779224901, |
|
"learning_rate": 0.0001414943183016655, |
|
"logits/chosen": 2.7115371227264404, |
|
"logits/rejected": 2.399930953979492, |
|
"logps/chosen": -606.0468139648438, |
|
"logps/rejected": -1309.142333984375, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.912951707839966, |
|
"rewards/margins": 7.471800327301025, |
|
"rewards/rejected": -10.38475227355957, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.43022464261402316, |
|
"grad_norm": 17.422115506001596, |
|
"learning_rate": 0.0001406273315945374, |
|
"logits/chosen": -0.34676986932754517, |
|
"logits/rejected": -0.28898870944976807, |
|
"logps/chosen": -653.0081787109375, |
|
"logps/rejected": -1026.3204345703125, |
|
"loss": 0.3029, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.7177176475524902, |
|
"rewards/margins": 3.7674331665039062, |
|
"rewards/rejected": -6.4851508140563965, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4329475833900613, |
|
"grad_norm": 4.86972424076265, |
|
"learning_rate": 0.00013975667177097914, |
|
"logits/chosen": -0.1830269992351532, |
|
"logits/rejected": -0.24179303646087646, |
|
"logps/chosen": -635.9149169921875, |
|
"logps/rejected": -1108.5006103515625, |
|
"loss": 0.3134, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.9789676666259766, |
|
"rewards/margins": 4.767451286315918, |
|
"rewards/rejected": -7.7464189529418945, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.43567052416609936, |
|
"grad_norm": 3.596472735756183, |
|
"learning_rate": 0.00013888241754733208, |
|
"logits/chosen": -0.6846984028816223, |
|
"logits/rejected": -0.6952911019325256, |
|
"logps/chosen": -489.0577087402344, |
|
"logps/rejected": -890.4197998046875, |
|
"loss": 0.2965, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.5424995422363281, |
|
"rewards/margins": 4.706803321838379, |
|
"rewards/rejected": -6.249302387237549, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4383934649421375, |
|
"grad_norm": 8.198013945041934, |
|
"learning_rate": 0.0001380046479649073, |
|
"logits/chosen": -1.4048243761062622, |
|
"logits/rejected": -1.1473358869552612, |
|
"logps/chosen": -611.8831176757812, |
|
"logps/rejected": -1111.0921630859375, |
|
"loss": 0.2666, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.5834736824035645, |
|
"rewards/margins": 5.021603584289551, |
|
"rewards/rejected": -7.605076789855957, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4411164057181756, |
|
"grad_norm": 9.566449101001135, |
|
"learning_rate": 0.00013712344238283933, |
|
"logits/chosen": -0.8325891494750977, |
|
"logits/rejected": -0.7076963186264038, |
|
"logps/chosen": -621.9786376953125, |
|
"logps/rejected": -1179.3758544921875, |
|
"loss": 0.2623, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.860924005508423, |
|
"rewards/margins": 6.293280601501465, |
|
"rewards/rejected": -9.154203414916992, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.44383934649421375, |
|
"grad_norm": 5.767063661996225, |
|
"learning_rate": 0.00013623888047091146, |
|
"logits/chosen": 1.593488335609436, |
|
"logits/rejected": 1.6555019617080688, |
|
"logps/chosen": -590.9708862304688, |
|
"logps/rejected": -1078.3956298828125, |
|
"loss": 0.238, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.6324057579040527, |
|
"rewards/margins": 5.353615760803223, |
|
"rewards/rejected": -7.986021518707275, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.4465622872702519, |
|
"grad_norm": 41.00318315603557, |
|
"learning_rate": 0.00013535104220235261, |
|
"logits/chosen": 1.8902591466903687, |
|
"logits/rejected": 2.1262030601501465, |
|
"logps/chosen": -876.5345458984375, |
|
"logps/rejected": -1053.7332763671875, |
|
"loss": 1.1928, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.313698768615723, |
|
"rewards/margins": 2.054201602935791, |
|
"rewards/rejected": -7.367900848388672, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.44928522804629, |
|
"grad_norm": 26.088451825695778, |
|
"learning_rate": 0.00013446000784660714, |
|
"logits/chosen": 1.4706940650939941, |
|
"logits/rejected": 1.519578218460083, |
|
"logps/chosen": -631.876953125, |
|
"logps/rejected": -1176.569091796875, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.078620672225952, |
|
"rewards/margins": 5.974215507507324, |
|
"rewards/rejected": -9.052835464477539, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.45200816882232814, |
|
"grad_norm": 33.28614487362792, |
|
"learning_rate": 0.00013356585796207756, |
|
"logits/chosen": 2.1754348278045654, |
|
"logits/rejected": 2.0363101959228516, |
|
"logps/chosen": -577.9168701171875, |
|
"logps/rejected": -1242.7994384765625, |
|
"loss": 0.2998, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.5749011039733887, |
|
"rewards/margins": 7.280187129974365, |
|
"rewards/rejected": -9.855088233947754, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4547311095983662, |
|
"grad_norm": 10.427182612989903, |
|
"learning_rate": 0.0001326686733888413, |
|
"logits/chosen": 0.6365132331848145, |
|
"logits/rejected": 0.6580590605735779, |
|
"logps/chosen": -702.520263671875, |
|
"logps/rejected": -1273.900390625, |
|
"loss": 0.3962, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.290257692337036, |
|
"rewards/margins": 5.8721795082092285, |
|
"rewards/rejected": -9.162436485290527, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.45745405037440434, |
|
"grad_norm": 4.282070776156857, |
|
"learning_rate": 0.000131768535241342, |
|
"logits/chosen": -0.5329915285110474, |
|
"logits/rejected": -0.3240591287612915, |
|
"logps/chosen": -792.3297119140625, |
|
"logps/rejected": -1584.3717041015625, |
|
"loss": 0.2377, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.805436134338379, |
|
"rewards/margins": 8.310664176940918, |
|
"rewards/rejected": -13.11609935760498, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.46017699115044247, |
|
"grad_norm": 15.921033696679123, |
|
"learning_rate": 0.00013086552490105573, |
|
"logits/chosen": -0.6514681577682495, |
|
"logits/rejected": -0.5372867584228516, |
|
"logps/chosen": -537.2027587890625, |
|
"logps/rejected": -1119.0516357421875, |
|
"loss": 0.2402, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.059156894683838, |
|
"rewards/margins": 6.330155372619629, |
|
"rewards/rejected": -8.389312744140625, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.4628999319264806, |
|
"grad_norm": 16.08695691527921, |
|
"learning_rate": 0.00012995972400913367, |
|
"logits/chosen": 0.9507571458816528, |
|
"logits/rejected": 1.0892035961151123, |
|
"logps/chosen": -657.4198608398438, |
|
"logps/rejected": -1084.3997802734375, |
|
"loss": 0.3032, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.760071039199829, |
|
"rewards/margins": 4.952934265136719, |
|
"rewards/rejected": -7.713004112243652, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46562287270251873, |
|
"grad_norm": 15.03505603114339, |
|
"learning_rate": 0.00012905121445902067, |
|
"logits/chosen": -0.09099654108285904, |
|
"logits/rejected": 0.08855719864368439, |
|
"logps/chosen": -666.9413452148438, |
|
"logps/rejected": -1238.0989990234375, |
|
"loss": 0.3466, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.3812332153320312, |
|
"rewards/margins": 5.85336971282959, |
|
"rewards/rejected": -9.234602928161621, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.46834581347855686, |
|
"grad_norm": 5.776431534520819, |
|
"learning_rate": 0.00012814007838905128, |
|
"logits/chosen": -0.9203442335128784, |
|
"logits/rejected": -0.8418833017349243, |
|
"logps/chosen": -545.5592651367188, |
|
"logps/rejected": -1109.281005859375, |
|
"loss": 0.2509, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.17928147315979, |
|
"rewards/margins": 6.00477409362793, |
|
"rewards/rejected": -8.184054374694824, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.471068754254595, |
|
"grad_norm": 17.307649652391138, |
|
"learning_rate": 0.0001272263981750237, |
|
"logits/chosen": -1.3016124963760376, |
|
"logits/rejected": -1.0513460636138916, |
|
"logps/chosen": -614.2965087890625, |
|
"logps/rejected": -1212.0992431640625, |
|
"loss": 0.3222, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.9695653915405273, |
|
"rewards/margins": 6.4604692459106445, |
|
"rewards/rejected": -9.430034637451172, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.47379169503063306, |
|
"grad_norm": 6.686259280088899, |
|
"learning_rate": 0.00012631025642275212, |
|
"logits/chosen": -0.5886275172233582, |
|
"logits/rejected": -0.3992864489555359, |
|
"logps/chosen": -639.0484619140625, |
|
"logps/rejected": -1379.347412109375, |
|
"loss": 0.3295, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.267850160598755, |
|
"rewards/margins": 7.818935394287109, |
|
"rewards/rejected": -11.086786270141602, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.4765146358066712, |
|
"grad_norm": 8.332371540931518, |
|
"learning_rate": 0.00012539173596059849, |
|
"logits/chosen": -0.30099961161613464, |
|
"logits/rejected": -0.0007561176898889244, |
|
"logps/chosen": -628.400146484375, |
|
"logps/rejected": -1538.23974609375, |
|
"loss": 0.4098, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.416485548019409, |
|
"rewards/margins": 9.312593460083008, |
|
"rewards/rejected": -12.72907829284668, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.4792375765827093, |
|
"grad_norm": 3.1397396537158566, |
|
"learning_rate": 0.00012447091983198367, |
|
"logits/chosen": -1.1242637634277344, |
|
"logits/rejected": -0.7798986434936523, |
|
"logps/chosen": -593.5460205078125, |
|
"logps/rejected": -1356.296875, |
|
"loss": 0.253, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.9524972438812256, |
|
"rewards/margins": 7.784090995788574, |
|
"rewards/rejected": -10.736586570739746, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.48196051735874745, |
|
"grad_norm": 8.522392208842632, |
|
"learning_rate": 0.0001235478912878799, |
|
"logits/chosen": -0.9284283518791199, |
|
"logits/rejected": -0.6892914772033691, |
|
"logps/chosen": -696.424560546875, |
|
"logps/rejected": -1392.125, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.4410488605499268, |
|
"rewards/margins": 7.498281002044678, |
|
"rewards/rejected": -10.939330101013184, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4846834581347856, |
|
"grad_norm": 6.034875346009079, |
|
"learning_rate": 0.00012262273377928375, |
|
"logits/chosen": -1.1968759298324585, |
|
"logits/rejected": -0.9019950032234192, |
|
"logps/chosen": -613.7315673828125, |
|
"logps/rejected": -1290.041748046875, |
|
"loss": 0.3008, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.713265895843506, |
|
"rewards/margins": 6.849099636077881, |
|
"rewards/rejected": -9.562365531921387, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4874063989108237, |
|
"grad_norm": 9.60806575188711, |
|
"learning_rate": 0.00012169553094967146, |
|
"logits/chosen": -1.1181106567382812, |
|
"logits/rejected": -0.7999989986419678, |
|
"logps/chosen": -614.52001953125, |
|
"logps/rejected": -1425.6143798828125, |
|
"loss": 0.2867, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.5628480911254883, |
|
"rewards/margins": 8.526115417480469, |
|
"rewards/rejected": -11.088964462280273, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4901293396868618, |
|
"grad_norm": 21.702603936533567, |
|
"learning_rate": 0.00012076636662743672, |
|
"logits/chosen": 1.323265790939331, |
|
"logits/rejected": 1.2390327453613281, |
|
"logps/chosen": -779.384521484375, |
|
"logps/rejected": -1323.75390625, |
|
"loss": 0.2856, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.8515422344207764, |
|
"rewards/margins": 5.356608867645264, |
|
"rewards/rejected": -9.208149909973145, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4928522804628999, |
|
"grad_norm": 8.429242968799562, |
|
"learning_rate": 0.0001198353248183118, |
|
"logits/chosen": 2.109199285507202, |
|
"logits/rejected": 1.8442319631576538, |
|
"logps/chosen": -719.3864135742188, |
|
"logps/rejected": -1434.2828369140625, |
|
"loss": 0.1701, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.646466016769409, |
|
"rewards/margins": 7.611660957336426, |
|
"rewards/rejected": -11.258125305175781, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.49557522123893805, |
|
"grad_norm": 6.886953380619344, |
|
"learning_rate": 0.0001189024896977724, |
|
"logits/chosen": 0.2526296079158783, |
|
"logits/rejected": 0.22320647537708282, |
|
"logps/chosen": -618.1570434570312, |
|
"logps/rejected": -1169.676513671875, |
|
"loss": 0.2034, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.075936794281006, |
|
"rewards/margins": 5.886470317840576, |
|
"rewards/rejected": -8.962407112121582, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4982981620149762, |
|
"grad_norm": 8.040343434429271, |
|
"learning_rate": 0.00011796794560342754, |
|
"logits/chosen": -0.7386595606803894, |
|
"logits/rejected": -0.6110578775405884, |
|
"logps/chosen": -660.8449096679688, |
|
"logps/rejected": -1218.1505126953125, |
|
"loss": 0.3243, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.8239331245422363, |
|
"rewards/margins": 6.139927387237549, |
|
"rewards/rejected": -8.963861465454102, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5010211027910143, |
|
"grad_norm": 8.563143413366994, |
|
"learning_rate": 0.0001170317770273946, |
|
"logits/chosen": -1.0322635173797607, |
|
"logits/rejected": -0.8018537759780884, |
|
"logps/chosen": -529.4595336914062, |
|
"logps/rejected": -1181.968505859375, |
|
"loss": 0.2521, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.1946306228637695, |
|
"rewards/margins": 6.752659797668457, |
|
"rewards/rejected": -8.947290420532227, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5037440435670524, |
|
"grad_norm": 3.5682662969072747, |
|
"learning_rate": 0.00011609406860866023, |
|
"logits/chosen": -1.1348320245742798, |
|
"logits/rejected": -1.0071537494659424, |
|
"logps/chosen": -569.1886596679688, |
|
"logps/rejected": -1152.37255859375, |
|
"loss": 0.2472, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.218945026397705, |
|
"rewards/margins": 6.368105888366699, |
|
"rewards/rejected": -8.58704948425293, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5064669843430906, |
|
"grad_norm": 7.758172151259579, |
|
"learning_rate": 0.00011515490512542833, |
|
"logits/chosen": -1.3560707569122314, |
|
"logits/rejected": -1.0845423936843872, |
|
"logps/chosen": -561.7158813476562, |
|
"logps/rejected": -1203.3419189453125, |
|
"loss": 0.2311, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.237051486968994, |
|
"rewards/margins": 7.016552925109863, |
|
"rewards/rejected": -9.2536039352417, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5091899251191286, |
|
"grad_norm": 4.493807328662436, |
|
"learning_rate": 0.00011421437148745502, |
|
"logits/chosen": -1.0917279720306396, |
|
"logits/rejected": -0.9747053384780884, |
|
"logps/chosen": -645.5095825195312, |
|
"logps/rejected": -1229.0311279296875, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.5472307205200195, |
|
"rewards/margins": 6.673137664794922, |
|
"rewards/rejected": -9.220368385314941, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5119128658951668, |
|
"grad_norm": 6.201461431472656, |
|
"learning_rate": 0.00011327255272837221, |
|
"logits/chosen": -0.2544959485530853, |
|
"logits/rejected": -0.12365889549255371, |
|
"logps/chosen": -622.212890625, |
|
"logps/rejected": -1287.2431640625, |
|
"loss": 0.1549, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.9083447456359863, |
|
"rewards/margins": 6.886693000793457, |
|
"rewards/rejected": -9.795038223266602, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5146358066712049, |
|
"grad_norm": 6.2832423410312925, |
|
"learning_rate": 0.00011232953399799957, |
|
"logits/chosen": 0.22300061583518982, |
|
"logits/rejected": 0.48449450731277466, |
|
"logps/chosen": -777.6243286132812, |
|
"logps/rejected": -1476.5068359375, |
|
"loss": 0.2152, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.214239120483398, |
|
"rewards/margins": 7.588281154632568, |
|
"rewards/rejected": -11.802520751953125, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.517358747447243, |
|
"grad_norm": 9.37402734675468, |
|
"learning_rate": 0.0001113854005546461, |
|
"logits/chosen": -0.2933140695095062, |
|
"logits/rejected": -0.16291429102420807, |
|
"logps/chosen": -761.1182861328125, |
|
"logps/rejected": -1387.656494140625, |
|
"loss": 0.2514, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.257036209106445, |
|
"rewards/margins": 6.1938581466674805, |
|
"rewards/rejected": -10.450895309448242, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5200816882232812, |
|
"grad_norm": 7.183032873292223, |
|
"learning_rate": 0.00011044023775740204, |
|
"logits/chosen": -0.7535658478736877, |
|
"logits/rejected": -0.5872179269790649, |
|
"logps/chosen": -660.9326171875, |
|
"logps/rejected": -1360.95458984375, |
|
"loss": 0.2185, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.0175588130950928, |
|
"rewards/margins": 7.479903221130371, |
|
"rewards/rejected": -10.497461318969727, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5228046289993192, |
|
"grad_norm": 15.307025263438828, |
|
"learning_rate": 0.00010949413105842147, |
|
"logits/chosen": -0.6107400059700012, |
|
"logits/rejected": -0.3973458409309387, |
|
"logps/chosen": -550.6314086914062, |
|
"logps/rejected": -1316.4263916015625, |
|
"loss": 0.2485, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.392124652862549, |
|
"rewards/margins": 8.194581985473633, |
|
"rewards/rejected": -10.586706161499023, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5255275697753574, |
|
"grad_norm": 4.269502876033707, |
|
"learning_rate": 0.0001085471659951967, |
|
"logits/chosen": 0.16908931732177734, |
|
"logits/rejected": 0.4170844554901123, |
|
"logps/chosen": -687.2650756835938, |
|
"logps/rejected": -1543.733154296875, |
|
"loss": 0.1662, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.3644537925720215, |
|
"rewards/margins": 8.979719161987305, |
|
"rewards/rejected": -12.344173431396484, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5282505105513955, |
|
"grad_norm": 9.311111235120233, |
|
"learning_rate": 0.00010759942818282454, |
|
"logits/chosen": 0.7541287541389465, |
|
"logits/rejected": 0.9952371716499329, |
|
"logps/chosen": -687.6876220703125, |
|
"logps/rejected": -1431.085693359375, |
|
"loss": 0.2171, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.5470268726348877, |
|
"rewards/margins": 7.848145484924316, |
|
"rewards/rejected": -11.395172119140625, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5309734513274337, |
|
"grad_norm": 6.088442848295023, |
|
"learning_rate": 0.00010665100330626625, |
|
"logits/chosen": -0.1732356995344162, |
|
"logits/rejected": 0.006719267461448908, |
|
"logps/chosen": -520.822265625, |
|
"logps/rejected": -1267.14501953125, |
|
"loss": 0.2347, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.0856547355651855, |
|
"rewards/margins": 7.704294681549072, |
|
"rewards/rejected": -9.789949417114258, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5336963921034718, |
|
"grad_norm": 7.6561162101042, |
|
"learning_rate": 0.0001057019771126004, |
|
"logits/chosen": -0.6633592844009399, |
|
"logits/rejected": -0.38759341835975647, |
|
"logps/chosen": -532.0450439453125, |
|
"logps/rejected": -1239.3480224609375, |
|
"loss": 0.1965, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.8515552282333374, |
|
"rewards/margins": 7.4962158203125, |
|
"rewards/rejected": -9.347769737243652, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5364193328795098, |
|
"grad_norm": 3.313605249754064, |
|
"learning_rate": 0.0001047524354032707, |
|
"logits/chosen": -0.9916669130325317, |
|
"logits/rejected": -0.6737325191497803, |
|
"logps/chosen": -566.5112915039062, |
|
"logps/rejected": -1290.538818359375, |
|
"loss": 0.2498, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.237109661102295, |
|
"rewards/margins": 8.159578323364258, |
|
"rewards/rejected": -10.396687507629395, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.539142273655548, |
|
"grad_norm": 9.196141505536001, |
|
"learning_rate": 0.0001038024640263287, |
|
"logits/chosen": -0.6640629768371582, |
|
"logits/rejected": -0.3816087543964386, |
|
"logps/chosen": -674.1700439453125, |
|
"logps/rejected": -1490.5645751953125, |
|
"loss": 0.1814, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.1683545112609863, |
|
"rewards/margins": 8.756607055664062, |
|
"rewards/rejected": -11.924962997436523, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5418652144315861, |
|
"grad_norm": 6.910953624436588, |
|
"learning_rate": 0.00010285214886867198, |
|
"logits/chosen": -1.0215368270874023, |
|
"logits/rejected": -0.815405011177063, |
|
"logps/chosen": -664.8277587890625, |
|
"logps/rejected": -1331.6253662109375, |
|
"loss": 0.2056, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.9869272708892822, |
|
"rewards/margins": 7.080696105957031, |
|
"rewards/rejected": -10.067623138427734, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5445881552076243, |
|
"grad_norm": 8.829016742458412, |
|
"learning_rate": 0.00010190157584827953, |
|
"logits/chosen": -0.7232739329338074, |
|
"logits/rejected": -0.5292662978172302, |
|
"logps/chosen": -668.0047607421875, |
|
"logps/rejected": -1450.50830078125, |
|
"loss": 0.1681, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.479228973388672, |
|
"rewards/margins": 8.136119842529297, |
|
"rewards/rejected": -11.615349769592285, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5445881552076243, |
|
"eval_logits/chosen": -0.37419021129608154, |
|
"eval_logits/rejected": -0.188820019364357, |
|
"eval_logps/chosen": -735.248779296875, |
|
"eval_logps/rejected": -1603.610595703125, |
|
"eval_loss": 0.20030494034290314, |
|
"eval_rewards/accuracies": 0.9089242219924927, |
|
"eval_rewards/chosen": -3.9555394649505615, |
|
"eval_rewards/margins": 9.161340713500977, |
|
"eval_rewards/rejected": -13.1168794631958, |
|
"eval_runtime": 3761.3505, |
|
"eval_samples_per_second": 1.304, |
|
"eval_steps_per_second": 0.109, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5473110959836623, |
|
"grad_norm": 5.076536520943281, |
|
"learning_rate": 0.00010095083090644364, |
|
"logits/chosen": -0.6943289637565613, |
|
"logits/rejected": -0.5084115266799927, |
|
"logps/chosen": -745.46435546875, |
|
"logps/rejected": -1585.6259765625, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.852506637573242, |
|
"rewards/margins": 9.036561012268066, |
|
"rewards/rejected": -12.889068603515625, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.5500340367597005, |
|
"grad_norm": 5.364296538757404, |
|
"learning_rate": 0.0001, |
|
"logits/chosen": -1.3891135454177856, |
|
"logits/rejected": -1.2110545635223389, |
|
"logps/chosen": -719.6419677734375, |
|
"logps/rejected": -1496.1673583984375, |
|
"loss": 0.1839, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.265209674835205, |
|
"rewards/margins": 8.223184585571289, |
|
"rewards/rejected": -11.48839282989502, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5527569775357386, |
|
"grad_norm": 11.808959554828267, |
|
"learning_rate": 9.904916909355638e-05, |
|
"logits/chosen": -1.5816423892974854, |
|
"logits/rejected": -1.1652607917785645, |
|
"logps/chosen": -539.0081176757812, |
|
"logps/rejected": -1492.573486328125, |
|
"loss": 0.2031, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.16530704498291, |
|
"rewards/margins": 9.950845718383789, |
|
"rewards/rejected": -12.116150856018066, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.5554799183117767, |
|
"grad_norm": 8.952558589174538, |
|
"learning_rate": 9.809842415172048e-05, |
|
"logits/chosen": -1.3569689989089966, |
|
"logits/rejected": -1.1630733013153076, |
|
"logps/chosen": -703.2092895507812, |
|
"logps/rejected": -1485.3570556640625, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.354710817337036, |
|
"rewards/margins": 8.277021408081055, |
|
"rewards/rejected": -11.631732940673828, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5582028590878149, |
|
"grad_norm": 8.492633179657727, |
|
"learning_rate": 9.714785113132801e-05, |
|
"logits/chosen": -0.8783491849899292, |
|
"logits/rejected": -0.5752300024032593, |
|
"logps/chosen": -723.2975463867188, |
|
"logps/rejected": -1613.511962890625, |
|
"loss": 0.1688, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.7593815326690674, |
|
"rewards/margins": 9.781450271606445, |
|
"rewards/rejected": -13.54083251953125, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5609257998638529, |
|
"grad_norm": 5.586376860873606, |
|
"learning_rate": 9.619753597367134e-05, |
|
"logits/chosen": -1.0634355545043945, |
|
"logits/rejected": -0.6238435506820679, |
|
"logps/chosen": -619.2913208007812, |
|
"logps/rejected": -1520.8214111328125, |
|
"loss": 0.1177, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.2205727100372314, |
|
"rewards/margins": 9.413098335266113, |
|
"rewards/rejected": -12.633668899536133, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5636487406398911, |
|
"grad_norm": 4.45704315988639, |
|
"learning_rate": 9.52475645967293e-05, |
|
"logits/chosen": -0.9397182464599609, |
|
"logits/rejected": -0.5685716271400452, |
|
"logps/chosen": -667.0559692382812, |
|
"logps/rejected": -1488.3125, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.539982318878174, |
|
"rewards/margins": 8.752443313598633, |
|
"rewards/rejected": -12.292426109313965, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5663716814159292, |
|
"grad_norm": 7.7241013263377445, |
|
"learning_rate": 9.429802288739963e-05, |
|
"logits/chosen": -0.5390239953994751, |
|
"logits/rejected": -0.31946104764938354, |
|
"logps/chosen": -702.8306884765625, |
|
"logps/rejected": -1439.090087890625, |
|
"loss": 0.1863, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.5139050483703613, |
|
"rewards/margins": 7.537717342376709, |
|
"rewards/rejected": -11.05162239074707, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5690946221919673, |
|
"grad_norm": 980.6412639778208, |
|
"learning_rate": 9.334899669373379e-05, |
|
"logits/chosen": 0.06543504446744919, |
|
"logits/rejected": 0.195405513048172, |
|
"logps/chosen": -813.8810424804688, |
|
"logps/rejected": -1287.7030029296875, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.83243465423584, |
|
"rewards/margins": 5.0349626541137695, |
|
"rewards/rejected": -9.86739730834961, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5718175629680055, |
|
"grad_norm": 13.897944998252893, |
|
"learning_rate": 9.240057181717546e-05, |
|
"logits/chosen": -0.2763592004776001, |
|
"logits/rejected": -0.15278960764408112, |
|
"logps/chosen": -681.8336791992188, |
|
"logps/rejected": -1307.804931640625, |
|
"loss": 0.3034, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.6334025859832764, |
|
"rewards/margins": 6.592775821685791, |
|
"rewards/rejected": -10.226178169250488, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5745405037440435, |
|
"grad_norm": 8.530907247381231, |
|
"learning_rate": 9.145283400480333e-05, |
|
"logits/chosen": -0.15677616000175476, |
|
"logits/rejected": -0.0704054981470108, |
|
"logps/chosen": -658.6497192382812, |
|
"logps/rejected": -1345.2955322265625, |
|
"loss": 0.2164, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.112351655960083, |
|
"rewards/margins": 6.875027656555176, |
|
"rewards/rejected": -9.987378120422363, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5772634445200817, |
|
"grad_norm": 16.381271243779008, |
|
"learning_rate": 9.050586894157854e-05, |
|
"logits/chosen": -0.28872478008270264, |
|
"logits/rejected": -0.1264825314283371, |
|
"logps/chosen": -806.0684814453125, |
|
"logps/rejected": -1522.5067138671875, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.404202461242676, |
|
"rewards/margins": 7.904346466064453, |
|
"rewards/rejected": -12.308548927307129, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5799863852961198, |
|
"grad_norm": 11.196451592798955, |
|
"learning_rate": 8.955976224259798e-05, |
|
"logits/chosen": 0.03705821558833122, |
|
"logits/rejected": 0.18372680246829987, |
|
"logps/chosen": -716.6239013671875, |
|
"logps/rejected": -1472.212646484375, |
|
"loss": 0.2185, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.9464664459228516, |
|
"rewards/margins": 8.058629989624023, |
|
"rewards/rejected": -12.005096435546875, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.582709326072158, |
|
"grad_norm": 33.293697813605284, |
|
"learning_rate": 8.86145994453539e-05, |
|
"logits/chosen": -0.03169569373130798, |
|
"logits/rejected": 0.13412004709243774, |
|
"logps/chosen": -664.3828125, |
|
"logps/rejected": -1275.538330078125, |
|
"loss": 0.2639, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.8449416160583496, |
|
"rewards/margins": 6.907110691070557, |
|
"rewards/rejected": -9.752052307128906, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.585432266848196, |
|
"grad_norm": 7.031252506857269, |
|
"learning_rate": 8.767046600200045e-05, |
|
"logits/chosen": 0.9719666242599487, |
|
"logits/rejected": 1.0802761316299438, |
|
"logps/chosen": -654.38916015625, |
|
"logps/rejected": -1507.3211669921875, |
|
"loss": 0.1713, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.204228639602661, |
|
"rewards/margins": 8.715561866760254, |
|
"rewards/rejected": -11.919790267944336, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5881552076242341, |
|
"grad_norm": 98.49722190943557, |
|
"learning_rate": 8.672744727162781e-05, |
|
"logits/chosen": 1.4611481428146362, |
|
"logits/rejected": 1.6459019184112549, |
|
"logps/chosen": -720.2965698242188, |
|
"logps/rejected": -1626.4840087890625, |
|
"loss": 3.8797, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.969226360321045, |
|
"rewards/margins": 9.662763595581055, |
|
"rewards/rejected": -13.631990432739258, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5908781484002723, |
|
"grad_norm": 81.8395749005472, |
|
"learning_rate": 8.578562851254501e-05, |
|
"logits/chosen": 1.584269404411316, |
|
"logits/rejected": 1.420672059059143, |
|
"logps/chosen": -908.0494995117188, |
|
"logps/rejected": -2301.003173828125, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.922436714172363, |
|
"rewards/margins": 13.946557998657227, |
|
"rewards/rejected": -19.868993759155273, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5936010891763104, |
|
"grad_norm": 37.76082762966783, |
|
"learning_rate": 8.484509487457172e-05, |
|
"logits/chosen": 2.651890993118286, |
|
"logits/rejected": 2.771029472351074, |
|
"logps/chosen": -1107.451904296875, |
|
"logps/rejected": -1360.6392822265625, |
|
"loss": 0.6267, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -7.304482936859131, |
|
"rewards/margins": 3.189244508743286, |
|
"rewards/rejected": -10.49372673034668, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5963240299523486, |
|
"grad_norm": 9.755340455132105, |
|
"learning_rate": 8.390593139133975e-05, |
|
"logits/chosen": 2.368619918823242, |
|
"logits/rejected": 1.9935481548309326, |
|
"logps/chosen": -682.6423950195312, |
|
"logps/rejected": -1282.08935546875, |
|
"loss": 0.2434, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.698317050933838, |
|
"rewards/margins": 6.121725559234619, |
|
"rewards/rejected": -9.820043563842773, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.5990469707283866, |
|
"grad_norm": 18.76492804372937, |
|
"learning_rate": 8.296822297260541e-05, |
|
"logits/chosen": 1.6552093029022217, |
|
"logits/rejected": 1.6404740810394287, |
|
"logps/chosen": -648.8519287109375, |
|
"logps/rejected": -1346.6575927734375, |
|
"loss": 0.2895, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.9380698204040527, |
|
"rewards/margins": 7.239809989929199, |
|
"rewards/rejected": -10.17788028717041, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6017699115044248, |
|
"grad_norm": 10.336382337282298, |
|
"learning_rate": 8.203205439657248e-05, |
|
"logits/chosen": 0.8887385129928589, |
|
"logits/rejected": 1.1281851530075073, |
|
"logps/chosen": -658.0531005859375, |
|
"logps/rejected": -1374.1337890625, |
|
"loss": 1.5843, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.1537742614746094, |
|
"rewards/margins": 7.757615566253662, |
|
"rewards/rejected": -10.911389350891113, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.6044928522804629, |
|
"grad_norm": 4.623988802456331, |
|
"learning_rate": 8.109751030222765e-05, |
|
"logits/chosen": -0.07704190909862518, |
|
"logits/rejected": 0.13484761118888855, |
|
"logps/chosen": -662.59033203125, |
|
"logps/rejected": -1480.857177734375, |
|
"loss": 0.371, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.091067314147949, |
|
"rewards/margins": 8.361741065979004, |
|
"rewards/rejected": -11.452808380126953, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.607215793056501, |
|
"grad_norm": 11.914279665453476, |
|
"learning_rate": 8.016467518168821e-05, |
|
"logits/chosen": 0.3015810549259186, |
|
"logits/rejected": 0.36731666326522827, |
|
"logps/chosen": -736.3297729492188, |
|
"logps/rejected": -1540.991943359375, |
|
"loss": 0.2803, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.8391730785369873, |
|
"rewards/margins": 8.72679328918457, |
|
"rewards/rejected": -12.56596565246582, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6099387338325392, |
|
"grad_norm": 24.117684567012283, |
|
"learning_rate": 7.923363337256328e-05, |
|
"logits/chosen": 0.606380820274353, |
|
"logits/rejected": 0.8258590698242188, |
|
"logps/chosen": -774.5369873046875, |
|
"logps/rejected": -1497.198974609375, |
|
"loss": 1.462, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.283402442932129, |
|
"rewards/margins": 7.879556179046631, |
|
"rewards/rejected": -12.162959098815918, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6126616746085772, |
|
"grad_norm": 7.619277045213005, |
|
"learning_rate": 7.830446905032858e-05, |
|
"logits/chosen": -0.02835053764283657, |
|
"logits/rejected": 0.22348585724830627, |
|
"logps/chosen": -702.3712768554688, |
|
"logps/rejected": -1577.526611328125, |
|
"loss": 0.2359, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.479196548461914, |
|
"rewards/margins": 9.12803840637207, |
|
"rewards/rejected": -12.6072359085083, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 14.605221968669897, |
|
"learning_rate": 7.737726622071628e-05, |
|
"logits/chosen": -0.7005853652954102, |
|
"logits/rejected": -0.37476682662963867, |
|
"logps/chosen": -575.2031860351562, |
|
"logps/rejected": -1441.398681640625, |
|
"loss": 0.2372, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.820068359375, |
|
"rewards/margins": 8.712976455688477, |
|
"rewards/rejected": -11.533044815063477, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.6181075561606535, |
|
"grad_norm": 8.259350540995301, |
|
"learning_rate": 7.645210871212014e-05, |
|
"logits/chosen": -0.33096233010292053, |
|
"logits/rejected": -0.10710246860980988, |
|
"logps/chosen": -687.130126953125, |
|
"logps/rejected": -1422.7803955078125, |
|
"loss": 0.2, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.4392876625061035, |
|
"rewards/margins": 7.6333770751953125, |
|
"rewards/rejected": -11.072665214538574, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6208304969366917, |
|
"grad_norm": 11.61901143825517, |
|
"learning_rate": 7.552908016801632e-05, |
|
"logits/chosen": -0.26237788796424866, |
|
"logits/rejected": 0.03336496278643608, |
|
"logps/chosen": -818.0942993164062, |
|
"logps/rejected": -1831.728515625, |
|
"loss": 0.1969, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.504512310028076, |
|
"rewards/margins": 10.295443534851074, |
|
"rewards/rejected": -14.799957275390625, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6235534377127298, |
|
"grad_norm": 11.742306666819731, |
|
"learning_rate": 7.460826403940155e-05, |
|
"logits/chosen": -0.6621205806732178, |
|
"logits/rejected": -0.40963059663772583, |
|
"logps/chosen": -665.1314697265625, |
|
"logps/rejected": -1418.714111328125, |
|
"loss": 0.2204, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.196091413497925, |
|
"rewards/margins": 8.150285720825195, |
|
"rewards/rejected": -11.346376419067383, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6262763784887678, |
|
"grad_norm": 8.175831739988089, |
|
"learning_rate": 7.368974357724789e-05, |
|
"logits/chosen": -0.9455803036689758, |
|
"logits/rejected": -0.6427022218704224, |
|
"logps/chosen": -634.2056884765625, |
|
"logps/rejected": -1466.7425537109375, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.8602559566497803, |
|
"rewards/margins": 8.551790237426758, |
|
"rewards/rejected": -11.412044525146484, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.628999319264806, |
|
"grad_norm": 11.08633926544173, |
|
"learning_rate": 7.277360182497633e-05, |
|
"logits/chosen": -0.14578841626644135, |
|
"logits/rejected": 0.062481798231601715, |
|
"logps/chosen": -709.3231201171875, |
|
"logps/rejected": -1652.924072265625, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.703045606613159, |
|
"rewards/margins": 9.57800006866455, |
|
"rewards/rejected": -13.281045913696289, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6317222600408441, |
|
"grad_norm": 18.419415877088248, |
|
"learning_rate": 7.185992161094876e-05, |
|
"logits/chosen": 0.5623148679733276, |
|
"logits/rejected": 0.7446078062057495, |
|
"logps/chosen": -778.8319091796875, |
|
"logps/rejected": -1452.923583984375, |
|
"loss": 0.2594, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.7897286415100098, |
|
"rewards/margins": 7.021533966064453, |
|
"rewards/rejected": -10.811262130737305, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6344452008168823, |
|
"grad_norm": 7.760961578008244, |
|
"learning_rate": 7.094878554097935e-05, |
|
"logits/chosen": 0.16852492094039917, |
|
"logits/rejected": 0.37618380784988403, |
|
"logps/chosen": -627.3386840820312, |
|
"logps/rejected": -1635.9002685546875, |
|
"loss": 0.1427, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -3.179767370223999, |
|
"rewards/margins": 10.47213363647461, |
|
"rewards/rejected": -13.651899337768555, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6371681415929203, |
|
"grad_norm": 6.902496772019522, |
|
"learning_rate": 7.004027599086635e-05, |
|
"logits/chosen": -0.08031632751226425, |
|
"logits/rejected": 0.10308752208948135, |
|
"logps/chosen": -617.7669677734375, |
|
"logps/rejected": -1439.9415283203125, |
|
"loss": 0.1795, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.939218044281006, |
|
"rewards/margins": 8.878007888793945, |
|
"rewards/rejected": -11.817225456237793, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.6398910823689585, |
|
"grad_norm": 4.644941374043179, |
|
"learning_rate": 6.913447509894431e-05, |
|
"logits/chosen": 0.3528628647327423, |
|
"logits/rejected": 0.6444572806358337, |
|
"logps/chosen": -709.1551513671875, |
|
"logps/rejected": -1517.9844970703125, |
|
"loss": 0.2281, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.4242711067199707, |
|
"rewards/margins": 8.883430480957031, |
|
"rewards/rejected": -12.307700157165527, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6426140231449966, |
|
"grad_norm": 10.647837733586355, |
|
"learning_rate": 6.823146475865808e-05, |
|
"logits/chosen": 0.5684790015220642, |
|
"logits/rejected": 0.799397349357605, |
|
"logps/chosen": -809.0391845703125, |
|
"logps/rejected": -1618.5943603515625, |
|
"loss": 0.1812, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.392528057098389, |
|
"rewards/margins": 8.570115089416504, |
|
"rewards/rejected": -12.962644577026367, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6453369639210347, |
|
"grad_norm": 4.992656274243724, |
|
"learning_rate": 6.73313266111587e-05, |
|
"logits/chosen": 0.6541573405265808, |
|
"logits/rejected": 0.8468329310417175, |
|
"logps/chosen": -720.4848022460938, |
|
"logps/rejected": -1716.3724365234375, |
|
"loss": 0.1339, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.6888911724090576, |
|
"rewards/margins": 10.465368270874023, |
|
"rewards/rejected": -14.154258728027344, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6480599046970729, |
|
"grad_norm": 144.2262824604598, |
|
"learning_rate": 6.643414203792245e-05, |
|
"logits/chosen": 0.5966984629631042, |
|
"logits/rejected": 0.8201514482498169, |
|
"logps/chosen": -945.392578125, |
|
"logps/rejected": -1575.818359375, |
|
"loss": 0.7, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.658278465270996, |
|
"rewards/margins": 6.733407020568848, |
|
"rewards/rejected": -12.391685485839844, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6507828454731109, |
|
"grad_norm": 10.385459412302742, |
|
"learning_rate": 6.553999215339289e-05, |
|
"logits/chosen": -0.4542000889778137, |
|
"logits/rejected": -0.23481395840644836, |
|
"logps/chosen": -672.037841796875, |
|
"logps/rejected": -1340.35791015625, |
|
"loss": 0.2024, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.1981143951416016, |
|
"rewards/margins": 7.2478790283203125, |
|
"rewards/rejected": -10.445992469787598, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6535057862491491, |
|
"grad_norm": 3.4283866597659953, |
|
"learning_rate": 6.464895779764741e-05, |
|
"logits/chosen": -0.3537091910839081, |
|
"logits/rejected": -0.12687480449676514, |
|
"logps/chosen": -703.0349731445312, |
|
"logps/rejected": -1308.7872314453125, |
|
"loss": 0.247, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.5580172538757324, |
|
"rewards/margins": 6.497230529785156, |
|
"rewards/rejected": -10.05524730682373, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6562287270251872, |
|
"grad_norm": 10.682248923459898, |
|
"learning_rate": 6.376111952908859e-05, |
|
"logits/chosen": -0.14068874716758728, |
|
"logits/rejected": 0.12782877683639526, |
|
"logps/chosen": -731.6837158203125, |
|
"logps/rejected": -1395.9217529296875, |
|
"loss": 0.2446, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.983330249786377, |
|
"rewards/margins": 7.223794460296631, |
|
"rewards/rejected": -11.207123756408691, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6589516678012253, |
|
"grad_norm": 7.845658542414211, |
|
"learning_rate": 6.287655761716067e-05, |
|
"logits/chosen": -0.6590530276298523, |
|
"logits/rejected": -0.5280757546424866, |
|
"logps/chosen": -560.03173828125, |
|
"logps/rejected": -1205.8927001953125, |
|
"loss": 0.1641, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.356152296066284, |
|
"rewards/margins": 7.194516181945801, |
|
"rewards/rejected": -9.550667762756348, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6616746085772635, |
|
"grad_norm": 8.940033970881544, |
|
"learning_rate": 6.199535203509272e-05, |
|
"logits/chosen": -0.6741346120834351, |
|
"logits/rejected": -0.5807133316993713, |
|
"logps/chosen": -559.6777954101562, |
|
"logps/rejected": -1138.4764404296875, |
|
"loss": 0.1621, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.8906910419464111, |
|
"rewards/margins": 6.3634185791015625, |
|
"rewards/rejected": -8.254109382629395, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6643975493533015, |
|
"grad_norm": 13.766342422082177, |
|
"learning_rate": 6.111758245266794e-05, |
|
"logits/chosen": -0.4139633774757385, |
|
"logits/rejected": -0.12003821134567261, |
|
"logps/chosen": -607.3410034179688, |
|
"logps/rejected": -1390.0321044921875, |
|
"loss": 0.153, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.9113223552703857, |
|
"rewards/margins": 7.944007873535156, |
|
"rewards/rejected": -10.855331420898438, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6671204901293397, |
|
"grad_norm": 14.138045726244465, |
|
"learning_rate": 6.02433282290209e-05, |
|
"logits/chosen": -0.36063870787620544, |
|
"logits/rejected": -0.06381018459796906, |
|
"logps/chosen": -699.7347412109375, |
|
"logps/rejected": -1470.082275390625, |
|
"loss": 0.1933, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.4292755126953125, |
|
"rewards/margins": 8.251054763793945, |
|
"rewards/rejected": -11.680330276489258, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6698434309053778, |
|
"grad_norm": 3.280339771466081, |
|
"learning_rate": 5.937266840546265e-05, |
|
"logits/chosen": -1.0655449628829956, |
|
"logits/rejected": -0.8338848352432251, |
|
"logps/chosen": -624.5250244140625, |
|
"logps/rejected": -1253.6978759765625, |
|
"loss": 1.1411, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6232266426086426, |
|
"rewards/margins": 6.86643123626709, |
|
"rewards/rejected": -9.48965835571289, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.672566371681416, |
|
"grad_norm": 4.570905374864043, |
|
"learning_rate": 5.850568169833449e-05, |
|
"logits/chosen": -0.7835721969604492, |
|
"logits/rejected": -0.49934712052345276, |
|
"logps/chosen": -576.0851440429688, |
|
"logps/rejected": -1278.7838134765625, |
|
"loss": 0.1643, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.4718785285949707, |
|
"rewards/margins": 7.4020280838012695, |
|
"rewards/rejected": -9.873908042907715, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.675289312457454, |
|
"grad_norm": 11.384731637325952, |
|
"learning_rate": 5.76424464918915e-05, |
|
"logits/chosen": -0.3228727877140045, |
|
"logits/rejected": -0.060401104390621185, |
|
"logps/chosen": -685.0753173828125, |
|
"logps/rejected": -1462.6395263671875, |
|
"loss": 0.8961, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.290065050125122, |
|
"rewards/margins": 7.7338151931762695, |
|
"rewards/rejected": -11.023880004882812, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6780122532334921, |
|
"grad_norm": 24.041375030089217, |
|
"learning_rate": 5.678304083121546e-05, |
|
"logits/chosen": -0.5586646199226379, |
|
"logits/rejected": -0.27681785821914673, |
|
"logps/chosen": -995.1302490234375, |
|
"logps/rejected": -1378.470947265625, |
|
"loss": 1.1643, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.8445916175842285, |
|
"rewards/margins": 4.3523359298706055, |
|
"rewards/rejected": -11.196928024291992, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.6807351940095303, |
|
"grad_norm": 11.286366339467362, |
|
"learning_rate": 5.592754241515908e-05, |
|
"logits/chosen": -0.8155454397201538, |
|
"logits/rejected": -0.5854301452636719, |
|
"logps/chosen": -649.5323486328125, |
|
"logps/rejected": -1438.3155517578125, |
|
"loss": 0.1778, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.1901750564575195, |
|
"rewards/margins": 8.069450378417969, |
|
"rewards/rejected": -11.259625434875488, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6807351940095303, |
|
"eval_logits/chosen": -0.96079421043396, |
|
"eval_logits/rejected": -0.711790919303894, |
|
"eval_logps/chosen": -687.1464233398438, |
|
"eval_logps/rejected": -1489.6011962890625, |
|
"eval_loss": 0.2004450261592865, |
|
"eval_rewards/accuracies": 0.9242053627967834, |
|
"eval_rewards/chosen": -3.4745161533355713, |
|
"eval_rewards/margins": 8.502269744873047, |
|
"eval_rewards/rejected": -11.976785659790039, |
|
"eval_runtime": 3748.5252, |
|
"eval_samples_per_second": 1.308, |
|
"eval_steps_per_second": 0.109, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6834581347855684, |
|
"grad_norm": 6.190600718672074, |
|
"learning_rate": 5.507602858932113e-05, |
|
"logits/chosen": -0.7578543424606323, |
|
"logits/rejected": -0.48180899024009705, |
|
"logps/chosen": -655.7686767578125, |
|
"logps/rejected": -1475.280517578125, |
|
"loss": 0.1788, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.3294525146484375, |
|
"rewards/margins": 8.292399406433105, |
|
"rewards/rejected": -11.62185287475586, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6861810755616066, |
|
"grad_norm": 5.615801182575983, |
|
"learning_rate": 5.4228576339053586e-05, |
|
"logits/chosen": -0.6604496836662292, |
|
"logits/rejected": -0.3469547629356384, |
|
"logps/chosen": -687.5326538085938, |
|
"logps/rejected": -1508.2755126953125, |
|
"loss": 0.1189, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.2411141395568848, |
|
"rewards/margins": 8.564860343933105, |
|
"rewards/rejected": -11.805974960327148, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6889040163376446, |
|
"grad_norm": 9.926521527274184, |
|
"learning_rate": 5.3385262282501535e-05, |
|
"logits/chosen": -0.3594810962677002, |
|
"logits/rejected": -0.052802037447690964, |
|
"logps/chosen": -595.9348754882812, |
|
"logps/rejected": -1410.1544189453125, |
|
"loss": 0.2423, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.8217885494232178, |
|
"rewards/margins": 8.592623710632324, |
|
"rewards/rejected": -11.414411544799805, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6916269571136828, |
|
"grad_norm": 6.281514980730311, |
|
"learning_rate": 5.254616266367591e-05, |
|
"logits/chosen": -0.4293007254600525, |
|
"logits/rejected": -0.24068334698677063, |
|
"logps/chosen": -549.3060302734375, |
|
"logps/rejected": -1288.567626953125, |
|
"loss": 0.1586, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.1384198665618896, |
|
"rewards/margins": 7.993442535400391, |
|
"rewards/rejected": -10.131863594055176, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6943498978897209, |
|
"grad_norm": 6.11278397464638, |
|
"learning_rate": 5.171135334556047e-05, |
|
"logits/chosen": 0.2943124771118164, |
|
"logits/rejected": 0.4400175213813782, |
|
"logps/chosen": -577.3480224609375, |
|
"logps/rejected": -1332.40625, |
|
"loss": 0.1633, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.670849084854126, |
|
"rewards/margins": 7.999800682067871, |
|
"rewards/rejected": -10.670649528503418, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.697072838665759, |
|
"grad_norm": 9.008079572058884, |
|
"learning_rate": 5.088090980325297e-05, |
|
"logits/chosen": 1.101324439048767, |
|
"logits/rejected": 1.0822101831436157, |
|
"logps/chosen": -674.9332275390625, |
|
"logps/rejected": -1506.8800048828125, |
|
"loss": 0.1711, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.356271743774414, |
|
"rewards/margins": 8.419809341430664, |
|
"rewards/rejected": -11.776080131530762, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6997957794417972, |
|
"grad_norm": 9.425933719708931, |
|
"learning_rate": 5.005490711714139e-05, |
|
"logits/chosen": 0.4512532353401184, |
|
"logits/rejected": 0.5302013158798218, |
|
"logps/chosen": -619.0537719726562, |
|
"logps/rejected": -1460.2489013671875, |
|
"loss": 0.1672, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.9995157718658447, |
|
"rewards/margins": 8.4155855178833, |
|
"rewards/rejected": -11.41510009765625, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.7025187202178352, |
|
"grad_norm": 10.533473413081708, |
|
"learning_rate": 4.9233419966116036e-05, |
|
"logits/chosen": -0.4281206727027893, |
|
"logits/rejected": -0.11214808374643326, |
|
"logps/chosen": -540.2472534179688, |
|
"logps/rejected": -1485.8243408203125, |
|
"loss": 0.209, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.3536593914031982, |
|
"rewards/margins": 9.66262149810791, |
|
"rewards/rejected": -12.016282081604004, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.7052416609938734, |
|
"grad_norm": 2.6130275891308385, |
|
"learning_rate": 4.8416522620817627e-05, |
|
"logits/chosen": -0.7308027148246765, |
|
"logits/rejected": -0.41089239716529846, |
|
"logps/chosen": -613.0734252929688, |
|
"logps/rejected": -1534.1898193359375, |
|
"loss": 0.139, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.856041431427002, |
|
"rewards/margins": 9.61817741394043, |
|
"rewards/rejected": -12.474218368530273, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.7079646017699115, |
|
"grad_norm": 38.945197564012595, |
|
"learning_rate": 4.7604288936922735e-05, |
|
"logits/chosen": -0.625209629535675, |
|
"logits/rejected": -0.34178656339645386, |
|
"logps/chosen": -627.1326904296875, |
|
"logps/rejected": -1564.62158203125, |
|
"loss": 0.1752, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.122101306915283, |
|
"rewards/margins": 10.082366943359375, |
|
"rewards/rejected": -13.2044677734375, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7106875425459497, |
|
"grad_norm": 3.161769729178617, |
|
"learning_rate": 4.6796792348466356e-05, |
|
"logits/chosen": -0.6596896648406982, |
|
"logits/rejected": -0.32370975613594055, |
|
"logps/chosen": -559.852783203125, |
|
"logps/rejected": -1473.3958740234375, |
|
"loss": 0.1316, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.7754173278808594, |
|
"rewards/margins": 9.06057357788086, |
|
"rewards/rejected": -11.835991859436035, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7134104833219878, |
|
"grad_norm": 12.02339802433789, |
|
"learning_rate": 4.599410586120272e-05, |
|
"logits/chosen": -0.44842538237571716, |
|
"logits/rejected": -0.05267402529716492, |
|
"logps/chosen": -645.791259765625, |
|
"logps/rejected": -1563.593994140625, |
|
"loss": 0.1445, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.7652764320373535, |
|
"rewards/margins": 9.446809768676758, |
|
"rewards/rejected": -12.212087631225586, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7161334240980258, |
|
"grad_norm": 11.056020877129452, |
|
"learning_rate": 4.5196302046004926e-05, |
|
"logits/chosen": -0.5120202302932739, |
|
"logits/rejected": -0.22029462456703186, |
|
"logps/chosen": -651.0948486328125, |
|
"logps/rejected": -1455.8291015625, |
|
"loss": 0.194, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.2218902111053467, |
|
"rewards/margins": 8.378633499145508, |
|
"rewards/rejected": -11.600522994995117, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.718856364874064, |
|
"grad_norm": 3.682848580956075, |
|
"learning_rate": 4.4403453032303764e-05, |
|
"logits/chosen": -0.27802684903144836, |
|
"logits/rejected": -0.03195106238126755, |
|
"logps/chosen": -679.2535400390625, |
|
"logps/rejected": -1628.7232666015625, |
|
"loss": 0.1481, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.0372445583343506, |
|
"rewards/margins": 9.984166145324707, |
|
"rewards/rejected": -13.021410942077637, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.7215793056501021, |
|
"grad_norm": 7.257808137219564, |
|
"learning_rate": 4.3615630501566384e-05, |
|
"logits/chosen": -0.1426171511411667, |
|
"logits/rejected": 0.09189265221357346, |
|
"logps/chosen": -624.9664306640625, |
|
"logps/rejected": -1382.918701171875, |
|
"loss": 0.1858, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.731658935546875, |
|
"rewards/margins": 8.023258209228516, |
|
"rewards/rejected": -10.75491714477539, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7243022464261403, |
|
"grad_norm": 11.218556371279355, |
|
"learning_rate": 4.283290568081591e-05, |
|
"logits/chosen": -0.15059207379817963, |
|
"logits/rejected": -0.008915537968277931, |
|
"logps/chosen": -594.3723754882812, |
|
"logps/rejected": -1270.779052734375, |
|
"loss": 0.3277, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.626492977142334, |
|
"rewards/margins": 6.870394706726074, |
|
"rewards/rejected": -9.49688720703125, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7270251872021783, |
|
"grad_norm": 15.714016618081846, |
|
"learning_rate": 4.2055349336191366e-05, |
|
"logits/chosen": 0.12408769130706787, |
|
"logits/rejected": 0.4155445098876953, |
|
"logps/chosen": -705.4385375976562, |
|
"logps/rejected": -1534.524169921875, |
|
"loss": 0.1837, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.389605760574341, |
|
"rewards/margins": 8.947419166564941, |
|
"rewards/rejected": -12.337023735046387, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7297481279782164, |
|
"grad_norm": 5.674729134005591, |
|
"learning_rate": 4.128303176655002e-05, |
|
"logits/chosen": -0.021259818226099014, |
|
"logits/rejected": 0.36636242270469666, |
|
"logps/chosen": -713.9142456054688, |
|
"logps/rejected": -1694.980712890625, |
|
"loss": 0.2208, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.674048662185669, |
|
"rewards/margins": 10.161161422729492, |
|
"rewards/rejected": -13.835209846496582, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7324710687542546, |
|
"grad_norm": 12.609521570286844, |
|
"learning_rate": 4.051602279711163e-05, |
|
"logits/chosen": -0.2557820677757263, |
|
"logits/rejected": 0.07670646160840988, |
|
"logps/chosen": -731.0612182617188, |
|
"logps/rejected": -1623.459716796875, |
|
"loss": 0.1931, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.7107300758361816, |
|
"rewards/margins": 9.202885627746582, |
|
"rewards/rejected": -12.913615226745605, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7351940095302927, |
|
"grad_norm": 5.52670434124597, |
|
"learning_rate": 3.975439177314533e-05, |
|
"logits/chosen": -0.024277815595269203, |
|
"logits/rejected": 0.2917264699935913, |
|
"logps/chosen": -684.9026489257812, |
|
"logps/rejected": -1733.6109619140625, |
|
"loss": 0.1331, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.6988205909729004, |
|
"rewards/margins": 11.1884765625, |
|
"rewards/rejected": -14.887295722961426, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7379169503063309, |
|
"grad_norm": 3.5661251305369905, |
|
"learning_rate": 3.8998207553700506e-05, |
|
"logits/chosen": 0.26047295331954956, |
|
"logits/rejected": 0.5690510869026184, |
|
"logps/chosen": -759.6395263671875, |
|
"logps/rejected": -1651.3079833984375, |
|
"loss": 0.1748, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.069083213806152, |
|
"rewards/margins": 9.588483810424805, |
|
"rewards/rejected": -13.657567977905273, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7406398910823689, |
|
"grad_norm": 12.452203870449546, |
|
"learning_rate": 3.824753850538082e-05, |
|
"logits/chosen": 0.25679439306259155, |
|
"logits/rejected": 0.6026118993759155, |
|
"logps/chosen": -745.6700439453125, |
|
"logps/rejected": -1589.962890625, |
|
"loss": 0.1568, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.7858002185821533, |
|
"rewards/margins": 9.034173011779785, |
|
"rewards/rejected": -12.819973945617676, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7433628318584071, |
|
"grad_norm": 8.031642884115076, |
|
"learning_rate": 3.750245249616352e-05, |
|
"logits/chosen": 0.2537817358970642, |
|
"logits/rejected": 0.6085513830184937, |
|
"logps/chosen": -645.0103759765625, |
|
"logps/rejected": -1783.4013671875, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -3.4503912925720215, |
|
"rewards/margins": 11.608026504516602, |
|
"rewards/rejected": -15.058418273925781, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.7460857726344452, |
|
"grad_norm": 14.569525144419739, |
|
"learning_rate": 3.6763016889263344e-05, |
|
"logits/chosen": 0.15033717453479767, |
|
"logits/rejected": 0.47281932830810547, |
|
"logps/chosen": -663.0330810546875, |
|
"logps/rejected": -1513.896728515625, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.0524895191192627, |
|
"rewards/margins": 9.203824043273926, |
|
"rewards/rejected": -12.256312370300293, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.7488087134104833, |
|
"grad_norm": 62.65981235550733, |
|
"learning_rate": 3.602929853704223e-05, |
|
"logits/chosen": -0.08105476200580597, |
|
"logits/rejected": 0.24570266902446747, |
|
"logps/chosen": -862.7356567382812, |
|
"logps/rejected": -1566.857177734375, |
|
"loss": 0.8887, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.279505252838135, |
|
"rewards/margins": 7.579603672027588, |
|
"rewards/rejected": -12.859109878540039, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7515316541865215, |
|
"grad_norm": 6.616456930014495, |
|
"learning_rate": 3.5301363774965256e-05, |
|
"logits/chosen": -0.20161032676696777, |
|
"logits/rejected": 0.048817120492458344, |
|
"logps/chosen": -653.1944580078125, |
|
"logps/rejected": -1532.51025390625, |
|
"loss": 0.1708, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.062190294265747, |
|
"rewards/margins": 8.815936088562012, |
|
"rewards/rejected": -11.878127098083496, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7542545949625595, |
|
"grad_norm": 5.792226112009077, |
|
"learning_rate": 3.457927841560311e-05, |
|
"logits/chosen": -0.4394923150539398, |
|
"logits/rejected": -0.09831082820892334, |
|
"logps/chosen": -581.9075927734375, |
|
"logps/rejected": -1791.471435546875, |
|
"loss": 0.1458, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.953946352005005, |
|
"rewards/margins": 12.19267749786377, |
|
"rewards/rejected": -15.146624565124512, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.7569775357385977, |
|
"grad_norm": 10.191012036267383, |
|
"learning_rate": 3.3863107742682144e-05, |
|
"logits/chosen": -0.5570166707038879, |
|
"logits/rejected": -0.26251545548439026, |
|
"logps/chosen": -670.0498046875, |
|
"logps/rejected": -1517.240478515625, |
|
"loss": 0.1453, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.960145950317383, |
|
"rewards/margins": 9.034058570861816, |
|
"rewards/rejected": -11.994203567504883, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7597004765146358, |
|
"grad_norm": 5.4213794129480215, |
|
"learning_rate": 3.315291650518197e-05, |
|
"logits/chosen": -0.3676304221153259, |
|
"logits/rejected": -0.056665968149900436, |
|
"logps/chosen": -603.147705078125, |
|
"logps/rejected": -1735.9398193359375, |
|
"loss": 0.1155, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.912886381149292, |
|
"rewards/margins": 11.69940185546875, |
|
"rewards/rejected": -14.612287521362305, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.762423417290674, |
|
"grad_norm": 12.375680844171878, |
|
"learning_rate": 3.2448768911481576e-05, |
|
"logits/chosen": -0.09998101741075516, |
|
"logits/rejected": 0.16582295298576355, |
|
"logps/chosen": -644.6640625, |
|
"logps/rejected": -1918.529296875, |
|
"loss": 0.1839, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.1426656246185303, |
|
"rewards/margins": 13.003247261047363, |
|
"rewards/rejected": -16.14591407775879, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.765146358066712, |
|
"grad_norm": 5.975694994931154, |
|
"learning_rate": 3.175072862355415e-05, |
|
"logits/chosen": 0.23628827929496765, |
|
"logits/rejected": 0.4834356904029846, |
|
"logps/chosen": -655.3341064453125, |
|
"logps/rejected": -1792.5072021484375, |
|
"loss": 0.1245, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.2562522888183594, |
|
"rewards/margins": 11.580477714538574, |
|
"rewards/rejected": -14.836729049682617, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.7678692988427501, |
|
"grad_norm": 12.23511220826118, |
|
"learning_rate": 3.105885875121152e-05, |
|
"logits/chosen": 0.279664009809494, |
|
"logits/rejected": 0.541578471660614, |
|
"logps/chosen": -647.7738037109375, |
|
"logps/rejected": -1687.0943603515625, |
|
"loss": 0.1188, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.340040922164917, |
|
"rewards/margins": 10.875506401062012, |
|
"rewards/rejected": -14.215548515319824, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.7705922396187883, |
|
"grad_norm": 6.939190892672432, |
|
"learning_rate": 3.0373221846398235e-05, |
|
"logits/chosen": 0.2148459255695343, |
|
"logits/rejected": 0.46170759201049805, |
|
"logps/chosen": -640.2728881835938, |
|
"logps/rejected": -1714.49609375, |
|
"loss": 0.127, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.9620585441589355, |
|
"rewards/margins": 11.328829765319824, |
|
"rewards/rejected": -14.290887832641602, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.7733151803948264, |
|
"grad_norm": 6.269444512733244, |
|
"learning_rate": 2.9693879897536436e-05, |
|
"logits/chosen": 0.27309393882751465, |
|
"logits/rejected": 0.5137401223182678, |
|
"logps/chosen": -668.1610107421875, |
|
"logps/rejected": -1638.63671875, |
|
"loss": 0.1535, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.3333866596221924, |
|
"rewards/margins": 10.211597442626953, |
|
"rewards/rejected": -13.54498291015625, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7760381211708646, |
|
"grad_norm": 7.741925527096101, |
|
"learning_rate": 2.9020894323921366e-05, |
|
"logits/chosen": 0.25359398126602173, |
|
"logits/rejected": 0.5846693515777588, |
|
"logps/chosen": -644.554443359375, |
|
"logps/rejected": -1534.7540283203125, |
|
"loss": 0.1861, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.090136766433716, |
|
"rewards/margins": 9.402518272399902, |
|
"rewards/rejected": -12.492655754089355, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7787610619469026, |
|
"grad_norm": 17.942064241858763, |
|
"learning_rate": 2.8354325970168484e-05, |
|
"logits/chosen": 0.39829185605049133, |
|
"logits/rejected": 0.6095653772354126, |
|
"logps/chosen": -619.0804443359375, |
|
"logps/rejected": -1742.217041015625, |
|
"loss": 0.1313, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.099508047103882, |
|
"rewards/margins": 11.940356254577637, |
|
"rewards/rejected": -15.039861679077148, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7814840027229408, |
|
"grad_norm": 11.47804437386106, |
|
"learning_rate": 2.7694235100712518e-05, |
|
"logits/chosen": 0.4158736765384674, |
|
"logits/rejected": 0.6686142683029175, |
|
"logps/chosen": -601.0423583984375, |
|
"logps/rejected": -1758.963623046875, |
|
"loss": 0.1718, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.962646007537842, |
|
"rewards/margins": 12.120287895202637, |
|
"rewards/rejected": -15.082934379577637, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.7842069434989789, |
|
"grad_norm": 11.324300295147452, |
|
"learning_rate": 2.7040681394358813e-05, |
|
"logits/chosen": 0.26672840118408203, |
|
"logits/rejected": 0.49848708510398865, |
|
"logps/chosen": -687.2901611328125, |
|
"logps/rejected": -1745.1597900390625, |
|
"loss": 0.2234, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.3730876445770264, |
|
"rewards/margins": 10.44471263885498, |
|
"rewards/rejected": -13.817800521850586, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.786929884275017, |
|
"grad_norm": 3.8898657638742176, |
|
"learning_rate": 2.6393723938888125e-05, |
|
"logits/chosen": 0.1845589578151703, |
|
"logits/rejected": 0.44024285674095154, |
|
"logps/chosen": -611.0074462890625, |
|
"logps/rejected": -1503.866455078125, |
|
"loss": 0.171, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.838397741317749, |
|
"rewards/margins": 9.593734741210938, |
|
"rewards/rejected": -12.432132720947266, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7896528250510552, |
|
"grad_norm": 7.214623467902022, |
|
"learning_rate": 2.5753421225714057e-05, |
|
"logits/chosen": 0.11631506681442261, |
|
"logits/rejected": 0.3261148929595947, |
|
"logps/chosen": -659.8120727539062, |
|
"logps/rejected": -1533.37939453125, |
|
"loss": 0.1904, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.959886074066162, |
|
"rewards/margins": 9.48987865447998, |
|
"rewards/rejected": -12.449766159057617, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7923757658270932, |
|
"grad_norm": 14.811426408672398, |
|
"learning_rate": 2.5119831144595163e-05, |
|
"logits/chosen": 0.250567227602005, |
|
"logits/rejected": 0.4331508278846741, |
|
"logps/chosen": -606.6591796875, |
|
"logps/rejected": -1445.5570068359375, |
|
"loss": 0.1582, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.611093521118164, |
|
"rewards/margins": 9.088173866271973, |
|
"rewards/rejected": -11.699268341064453, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7950987066031314, |
|
"grad_norm": 1.3533811803679652, |
|
"learning_rate": 2.4493010978401064e-05, |
|
"logits/chosen": 0.1440388262271881, |
|
"logits/rejected": 0.2874930799007416, |
|
"logps/chosen": -635.5841674804688, |
|
"logps/rejected": -1497.6400146484375, |
|
"loss": 0.1258, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.7259573936462402, |
|
"rewards/margins": 9.025880813598633, |
|
"rewards/rejected": -11.751837730407715, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7978216473791695, |
|
"grad_norm": 8.320155633358658, |
|
"learning_rate": 2.3873017397933327e-05, |
|
"logits/chosen": 0.33122745156288147, |
|
"logits/rejected": 0.5511559247970581, |
|
"logps/chosen": -595.3291015625, |
|
"logps/rejected": -1565.5882568359375, |
|
"loss": 0.1784, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.964611530303955, |
|
"rewards/margins": 9.899821281433105, |
|
"rewards/rejected": -12.864433288574219, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.8005445881552076, |
|
"grad_norm": 11.153673486628335, |
|
"learning_rate": 2.3259906456802216e-05, |
|
"logits/chosen": 0.14781682193279266, |
|
"logits/rejected": 0.2881123125553131, |
|
"logps/chosen": -578.9710083007812, |
|
"logps/rejected": -1423.344970703125, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.7508277893066406, |
|
"rewards/margins": 8.510377883911133, |
|
"rewards/rejected": -11.261204719543457, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.8032675289312458, |
|
"grad_norm": 6.886878526143292, |
|
"learning_rate": 2.265373358635856e-05, |
|
"logits/chosen": -0.09965401142835617, |
|
"logits/rejected": -0.01610557734966278, |
|
"logps/chosen": -640.9449462890625, |
|
"logps/rejected": -1365.075439453125, |
|
"loss": 0.2531, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.95725154876709, |
|
"rewards/margins": 7.233819007873535, |
|
"rewards/rejected": -10.191070556640625, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.8059904697072838, |
|
"grad_norm": 16.611108572392084, |
|
"learning_rate": 2.205455359068227e-05, |
|
"logits/chosen": -0.021208569407463074, |
|
"logits/rejected": 0.1042444258928299, |
|
"logps/chosen": -568.1573486328125, |
|
"logps/rejected": -1511.6607666015625, |
|
"loss": 0.1583, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.5029826164245605, |
|
"rewards/margins": 9.983243942260742, |
|
"rewards/rejected": -12.486227035522461, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.808713410483322, |
|
"grad_norm": 8.57165227758473, |
|
"learning_rate": 2.146242064162767e-05, |
|
"logits/chosen": -0.03554140776395798, |
|
"logits/rejected": 0.11067160218954086, |
|
"logps/chosen": -583.32568359375, |
|
"logps/rejected": -1428.696533203125, |
|
"loss": 0.1406, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.437119960784912, |
|
"rewards/margins": 8.866755485534668, |
|
"rewards/rejected": -11.303875923156738, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.8114363512593601, |
|
"grad_norm": 6.1603778868761285, |
|
"learning_rate": 2.0877388273925646e-05, |
|
"logits/chosen": 0.10540245473384857, |
|
"logits/rejected": 0.2626660466194153, |
|
"logps/chosen": -588.477783203125, |
|
"logps/rejected": -1473.778076171875, |
|
"loss": 0.1282, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.5758628845214844, |
|
"rewards/margins": 9.339284896850586, |
|
"rewards/rejected": -11.91514778137207, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8141592920353983, |
|
"grad_norm": 1.9188133882396965, |
|
"learning_rate": 2.029950938034364e-05, |
|
"logits/chosen": 0.17124342918395996, |
|
"logits/rejected": 0.24657151103019714, |
|
"logps/chosen": -604.7764282226562, |
|
"logps/rejected": -1434.573974609375, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6486189365386963, |
|
"rewards/margins": 8.698214530944824, |
|
"rewards/rejected": -11.346834182739258, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8168822328114363, |
|
"grad_norm": 6.713017303993807, |
|
"learning_rate": 1.9728836206903656e-05, |
|
"logits/chosen": 0.5552121996879578, |
|
"logits/rejected": 0.6636002063751221, |
|
"logps/chosen": -636.4107666015625, |
|
"logps/rejected": -1567.7227783203125, |
|
"loss": 0.1342, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.9134068489074707, |
|
"rewards/margins": 9.883702278137207, |
|
"rewards/rejected": -12.797107696533203, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8168822328114363, |
|
"eval_logits/chosen": 0.5428004264831543, |
|
"eval_logits/rejected": 0.672680675983429, |
|
"eval_logps/chosen": -648.9738159179688, |
|
"eval_logps/rejected": -1576.696044921875, |
|
"eval_loss": 0.1451936662197113, |
|
"eval_rewards/accuracies": 0.9339853525161743, |
|
"eval_rewards/chosen": -3.092790126800537, |
|
"eval_rewards/margins": 9.754944801330566, |
|
"eval_rewards/rejected": -12.847735404968262, |
|
"eval_runtime": 3746.5832, |
|
"eval_samples_per_second": 1.309, |
|
"eval_steps_per_second": 0.109, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8196051735874744, |
|
"grad_norm": 3.6241792804364183, |
|
"learning_rate": 1.9165420348158526e-05, |
|
"logits/chosen": 0.6930165886878967, |
|
"logits/rejected": 0.7744175791740417, |
|
"logps/chosen": -633.44287109375, |
|
"logps/rejected": -1447.761474609375, |
|
"loss": 0.324, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.974301815032959, |
|
"rewards/margins": 8.308740615844727, |
|
"rewards/rejected": -11.283042907714844, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.8223281143635126, |
|
"grad_norm": 7.97257246955101, |
|
"learning_rate": 1.8609312742527497e-05, |
|
"logits/chosen": 0.3581380248069763, |
|
"logits/rejected": 0.5654556155204773, |
|
"logps/chosen": -625.2705688476562, |
|
"logps/rejected": -1440.2318115234375, |
|
"loss": 0.1714, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.855855941772461, |
|
"rewards/margins": 8.565881729125977, |
|
"rewards/rejected": -11.42173957824707, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.8250510551395507, |
|
"grad_norm": 13.920925895284425, |
|
"learning_rate": 1.8060563667690712e-05, |
|
"logits/chosen": 0.4211028218269348, |
|
"logits/rejected": 0.5939645171165466, |
|
"logps/chosen": -573.2506103515625, |
|
"logps/rejected": -1506.34814453125, |
|
"loss": 0.1703, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.502913236618042, |
|
"rewards/margins": 9.767515182495117, |
|
"rewards/rejected": -12.270427703857422, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8277739959155889, |
|
"grad_norm": 3.7906065265561137, |
|
"learning_rate": 1.7519222736043662e-05, |
|
"logits/chosen": 0.3115430474281311, |
|
"logits/rejected": 0.48292437195777893, |
|
"logps/chosen": -542.3753662109375, |
|
"logps/rejected": -1483.460205078125, |
|
"loss": 0.1485, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.395129680633545, |
|
"rewards/margins": 9.773895263671875, |
|
"rewards/rejected": -12.169026374816895, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8304969366916269, |
|
"grad_norm": 9.127305003183238, |
|
"learning_rate": 1.6985338890211745e-05, |
|
"logits/chosen": 0.314956933259964, |
|
"logits/rejected": 0.4118734896183014, |
|
"logps/chosen": -588.65185546875, |
|
"logps/rejected": -1375.0704345703125, |
|
"loss": 0.1925, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.656867504119873, |
|
"rewards/margins": 7.795752048492432, |
|
"rewards/rejected": -10.452619552612305, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8332198774676651, |
|
"grad_norm": 12.735372802298448, |
|
"learning_rate": 1.645896039862529e-05, |
|
"logits/chosen": 0.4811887741088867, |
|
"logits/rejected": 0.5578995943069458, |
|
"logps/chosen": -617.8807983398438, |
|
"logps/rejected": -1505.7147216796875, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.9311892986297607, |
|
"rewards/margins": 9.137002944946289, |
|
"rewards/rejected": -12.068192481994629, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8359428182437032, |
|
"grad_norm": 7.068145484671497, |
|
"learning_rate": 1.5940134851155697e-05, |
|
"logits/chosen": 0.6108947992324829, |
|
"logits/rejected": 0.6557289361953735, |
|
"logps/chosen": -655.98828125, |
|
"logps/rejected": -1595.331298828125, |
|
"loss": 0.1364, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.7303531169891357, |
|
"rewards/margins": 9.733122825622559, |
|
"rewards/rejected": -12.46347713470459, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.8386657590197413, |
|
"grad_norm": 4.310739879114603, |
|
"learning_rate": 1.542890915481282e-05, |
|
"logits/chosen": 0.7832988500595093, |
|
"logits/rejected": 0.9027649164199829, |
|
"logps/chosen": -618.2164306640625, |
|
"logps/rejected": -1467.2933349609375, |
|
"loss": 0.1309, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.913681745529175, |
|
"rewards/margins": 9.126543045043945, |
|
"rewards/rejected": -12.040224075317383, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.8413886997957795, |
|
"grad_norm": 4.180426497638915, |
|
"learning_rate": 1.4925329529504073e-05, |
|
"logits/chosen": 1.0196577310562134, |
|
"logits/rejected": 1.1263244152069092, |
|
"logps/chosen": -634.84716796875, |
|
"logps/rejected": -1566.780517578125, |
|
"loss": 0.1516, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.2844977378845215, |
|
"rewards/margins": 9.819615364074707, |
|
"rewards/rejected": -13.10411262512207, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.8441116405718175, |
|
"grad_norm": 7.403131719337164, |
|
"learning_rate": 1.4429441503855722e-05, |
|
"logits/chosen": 0.7300751209259033, |
|
"logits/rejected": 0.9075593948364258, |
|
"logps/chosen": -662.4650268554688, |
|
"logps/rejected": -1523.392333984375, |
|
"loss": 0.1383, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.188582181930542, |
|
"rewards/margins": 9.216440200805664, |
|
"rewards/rejected": -12.405021667480469, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8468345813478557, |
|
"grad_norm": 5.705211140347224, |
|
"learning_rate": 1.3941289911096566e-05, |
|
"logits/chosen": 0.8525094985961914, |
|
"logits/rejected": 0.8802255392074585, |
|
"logps/chosen": -633.8055419921875, |
|
"logps/rejected": -1591.3004150390625, |
|
"loss": 0.1648, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3043887615203857, |
|
"rewards/margins": 9.925670623779297, |
|
"rewards/rejected": -13.230059623718262, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.8495575221238938, |
|
"grad_norm": 4.045894996195174, |
|
"learning_rate": 1.3460918885004658e-05, |
|
"logits/chosen": 0.5131864547729492, |
|
"logits/rejected": 0.5491036772727966, |
|
"logps/chosen": -568.0333862304688, |
|
"logps/rejected": -1503.20263671875, |
|
"loss": 0.1776, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.693911552429199, |
|
"rewards/margins": 9.109321594238281, |
|
"rewards/rejected": -11.80323314666748, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.852280462899932, |
|
"grad_norm": 12.066715885022818, |
|
"learning_rate": 1.2988371855917225e-05, |
|
"logits/chosen": 0.37565484642982483, |
|
"logits/rejected": 0.5007588863372803, |
|
"logps/chosen": -703.4903564453125, |
|
"logps/rejected": -1430.76171875, |
|
"loss": 0.2114, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.1784768104553223, |
|
"rewards/margins": 7.968869686126709, |
|
"rewards/rejected": -11.147346496582031, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.85500340367597, |
|
"grad_norm": 10.07953482925563, |
|
"learning_rate": 1.2523691546803873e-05, |
|
"logits/chosen": 0.49442654848098755, |
|
"logits/rejected": 0.5744796395301819, |
|
"logps/chosen": -650.1203002929688, |
|
"logps/rejected": -1487.5498046875, |
|
"loss": 0.169, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.1168293952941895, |
|
"rewards/margins": 8.435214042663574, |
|
"rewards/rejected": -11.552044868469238, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.8577263444520081, |
|
"grad_norm": 7.211644812057612, |
|
"learning_rate": 1.206691996940431e-05, |
|
"logits/chosen": 0.4109431207180023, |
|
"logits/rejected": 0.517845630645752, |
|
"logps/chosen": -614.5814208984375, |
|
"logps/rejected": -1589.5028076171875, |
|
"loss": 0.1509, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.7576184272766113, |
|
"rewards/margins": 10.276385307312012, |
|
"rewards/rejected": -13.034006118774414, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8604492852280463, |
|
"grad_norm": 8.86779635631434, |
|
"learning_rate": 1.161809842042988e-05, |
|
"logits/chosen": 0.31158551573753357, |
|
"logits/rejected": 0.431427538394928, |
|
"logps/chosen": -564.8810424804688, |
|
"logps/rejected": -1479.360107421875, |
|
"loss": 2.1565, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.4318184852600098, |
|
"rewards/margins": 9.508817672729492, |
|
"rewards/rejected": -11.94063663482666, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.8631722260040844, |
|
"grad_norm": 3.5558132087968035, |
|
"learning_rate": 1.1177267477829978e-05, |
|
"logits/chosen": 0.24956552684307098, |
|
"logits/rejected": 0.44387301802635193, |
|
"logps/chosen": -596.3125610351562, |
|
"logps/rejected": -1585.163330078125, |
|
"loss": 0.1356, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.810048818588257, |
|
"rewards/margins": 10.357275009155273, |
|
"rewards/rejected": -13.167322158813477, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.8658951667801226, |
|
"grad_norm": 5.862065971308642, |
|
"learning_rate": 1.0744466997123425e-05, |
|
"logits/chosen": 0.3437689542770386, |
|
"logits/rejected": 0.5175299644470215, |
|
"logps/chosen": -621.8297119140625, |
|
"logps/rejected": -1538.585693359375, |
|
"loss": 0.1573, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.5700974464416504, |
|
"rewards/margins": 9.909372329711914, |
|
"rewards/rejected": -12.479471206665039, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8686181075561606, |
|
"grad_norm": 3.24472342611504, |
|
"learning_rate": 1.0319736107795041e-05, |
|
"logits/chosen": 0.37868422269821167, |
|
"logits/rejected": 0.6068054437637329, |
|
"logps/chosen": -572.27685546875, |
|
"logps/rejected": -1345.9105224609375, |
|
"loss": 0.155, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.400740623474121, |
|
"rewards/margins": 8.348073959350586, |
|
"rewards/rejected": -10.748812675476074, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8713410483321987, |
|
"grad_norm": 2.744692426135948, |
|
"learning_rate": 9.903113209758096e-06, |
|
"logits/chosen": 0.2929421067237854, |
|
"logits/rejected": 0.5335865616798401, |
|
"logps/chosen": -701.8385009765625, |
|
"logps/rejected": -1510.951171875, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -3.2702605724334717, |
|
"rewards/margins": 8.927230834960938, |
|
"rewards/rejected": -12.197491645812988, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8740639891082369, |
|
"grad_norm": 5.900535069753502, |
|
"learning_rate": 9.494635969882426e-06, |
|
"logits/chosen": 0.4422897696495056, |
|
"logits/rejected": 0.5750479102134705, |
|
"logps/chosen": -573.0560302734375, |
|
"logps/rejected": -1374.738037109375, |
|
"loss": 0.1683, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.55328106880188, |
|
"rewards/margins": 8.676356315612793, |
|
"rewards/rejected": -11.229637145996094, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.876786929884275, |
|
"grad_norm": 3.44462979022299, |
|
"learning_rate": 9.094341318589072e-06, |
|
"logits/chosen": 0.42226654291152954, |
|
"logits/rejected": 0.5428343415260315, |
|
"logps/chosen": -581.0510864257812, |
|
"logps/rejected": -1369.337158203125, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.3742847442626953, |
|
"rewards/margins": 8.42089557647705, |
|
"rewards/rejected": -10.79517936706543, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8795098706603132, |
|
"grad_norm": 5.608307171603524, |
|
"learning_rate": 8.702265446511382e-06, |
|
"logits/chosen": 0.3774926960468292, |
|
"logits/rejected": 0.5087335705757141, |
|
"logps/chosen": -514.2108154296875, |
|
"logps/rejected": -1304.4666748046875, |
|
"loss": 0.1778, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.0655770301818848, |
|
"rewards/margins": 8.323145866394043, |
|
"rewards/rejected": -10.38872241973877, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8822328114363512, |
|
"grad_norm": 5.455708732235766, |
|
"learning_rate": 8.31844380122292e-06, |
|
"logits/chosen": 0.47580593824386597, |
|
"logits/rejected": 0.6076167821884155, |
|
"logps/chosen": -556.3704833984375, |
|
"logps/rejected": -1279.912841796875, |
|
"loss": 0.1173, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.3415322303771973, |
|
"rewards/margins": 7.390820503234863, |
|
"rewards/rejected": -9.732351303100586, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 7.554518467559218, |
|
"learning_rate": 7.942911084032788e-06, |
|
"logits/chosen": 0.4682241976261139, |
|
"logits/rejected": 0.6157165765762329, |
|
"logps/chosen": -655.5394287109375, |
|
"logps/rejected": -1494.0238037109375, |
|
"loss": 0.1529, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.6178641319274902, |
|
"rewards/margins": 8.754510879516602, |
|
"rewards/rejected": -11.372376441955566, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8876786929884275, |
|
"grad_norm": 5.600989385971434, |
|
"learning_rate": 7.575701246848299e-06, |
|
"logits/chosen": 0.48880425095558167, |
|
"logits/rejected": 0.6227352619171143, |
|
"logps/chosen": -593.4500122070312, |
|
"logps/rejected": -1501.2703857421875, |
|
"loss": 0.1127, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.473632335662842, |
|
"rewards/margins": 9.577296257019043, |
|
"rewards/rejected": -12.050928115844727, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.8904016337644656, |
|
"grad_norm": 13.431074173165474, |
|
"learning_rate": 7.216847489105161e-06, |
|
"logits/chosen": 0.5438786149024963, |
|
"logits/rejected": 0.6403484344482422, |
|
"logps/chosen": -599.2393798828125, |
|
"logps/rejected": -1532.2991943359375, |
|
"loss": 0.2052, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.9075636863708496, |
|
"rewards/margins": 9.528191566467285, |
|
"rewards/rejected": -12.43575382232666, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8931245745405038, |
|
"grad_norm": 6.918619567772649, |
|
"learning_rate": 6.866382254766157e-06, |
|
"logits/chosen": 0.619216799736023, |
|
"logits/rejected": 0.8395193815231323, |
|
"logps/chosen": -658.65283203125, |
|
"logps/rejected": -1515.1939697265625, |
|
"loss": 0.1629, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.2254257202148438, |
|
"rewards/margins": 8.87635326385498, |
|
"rewards/rejected": -12.10177993774414, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.8958475153165418, |
|
"grad_norm": 6.648028443759932, |
|
"learning_rate": 6.5243372293878e-06, |
|
"logits/chosen": 0.49252304434776306, |
|
"logits/rejected": 0.8068822026252747, |
|
"logps/chosen": -607.2750854492188, |
|
"logps/rejected": -1523.320556640625, |
|
"loss": 0.1617, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.453411817550659, |
|
"rewards/margins": 9.811650276184082, |
|
"rewards/rejected": -12.265061378479004, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.89857045609258, |
|
"grad_norm": 2.539210647381528, |
|
"learning_rate": 6.190743337255589e-06, |
|
"logits/chosen": 0.6140366792678833, |
|
"logits/rejected": 0.7768694162368774, |
|
"logps/chosen": -614.919921875, |
|
"logps/rejected": -1382.4276123046875, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.821394205093384, |
|
"rewards/margins": 8.077497482299805, |
|
"rewards/rejected": -10.89889144897461, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.9012933968686181, |
|
"grad_norm": 10.794465693165817, |
|
"learning_rate": 5.865630738588268e-06, |
|
"logits/chosen": 0.3767799735069275, |
|
"logits/rejected": 0.5967041850090027, |
|
"logps/chosen": -656.5824584960938, |
|
"logps/rejected": -1527.9437255859375, |
|
"loss": 0.1401, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.898848056793213, |
|
"rewards/margins": 9.388232231140137, |
|
"rewards/rejected": -12.287079811096191, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.9040163376446563, |
|
"grad_norm": 5.263799813316893, |
|
"learning_rate": 5.5490288268108866e-06, |
|
"logits/chosen": 0.4702087342739105, |
|
"logits/rejected": 0.6225636005401611, |
|
"logps/chosen": -599.8109130859375, |
|
"logps/rejected": -1474.5819091796875, |
|
"loss": 0.1201, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.4712748527526855, |
|
"rewards/margins": 9.180947303771973, |
|
"rewards/rejected": -11.652222633361816, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.9067392784206944, |
|
"grad_norm": 6.067392778625339, |
|
"learning_rate": 5.2409662258974925e-06, |
|
"logits/chosen": 0.4685233533382416, |
|
"logits/rejected": 0.7201848030090332, |
|
"logps/chosen": -561.8470458984375, |
|
"logps/rejected": -1426.10009765625, |
|
"loss": 0.1095, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.4505324363708496, |
|
"rewards/margins": 9.327180862426758, |
|
"rewards/rejected": -11.777711868286133, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.9094622191967324, |
|
"grad_norm": 4.0896710788723745, |
|
"learning_rate": 4.941470787783131e-06, |
|
"logits/chosen": 0.42010945081710815, |
|
"logits/rejected": 0.6404047012329102, |
|
"logps/chosen": -609.3140869140625, |
|
"logps/rejected": -1468.7435302734375, |
|
"loss": 0.1608, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.7196056842803955, |
|
"rewards/margins": 8.689676284790039, |
|
"rewards/rejected": -11.409282684326172, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.9121851599727706, |
|
"grad_norm": 2.4478379172465448, |
|
"learning_rate": 4.6505695898457655e-06, |
|
"logits/chosen": 0.4856113791465759, |
|
"logits/rejected": 0.6557452082633972, |
|
"logps/chosen": -576.8583984375, |
|
"logps/rejected": -1425.530517578125, |
|
"loss": 0.1166, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.381040096282959, |
|
"rewards/margins": 8.655874252319336, |
|
"rewards/rejected": -11.036913871765137, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.9149081007488087, |
|
"grad_norm": 6.006212378190146, |
|
"learning_rate": 4.368288932458309e-06, |
|
"logits/chosen": 0.4439225196838379, |
|
"logits/rejected": 0.6169265508651733, |
|
"logps/chosen": -598.0299072265625, |
|
"logps/rejected": -1448.4476318359375, |
|
"loss": 0.1014, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.50980544090271, |
|
"rewards/margins": 8.920835494995117, |
|
"rewards/rejected": -11.430641174316406, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.9176310415248469, |
|
"grad_norm": 2.801540016758875, |
|
"learning_rate": 4.09465433661067e-06, |
|
"logits/chosen": 0.417350709438324, |
|
"logits/rejected": 0.6509106755256653, |
|
"logps/chosen": -574.1402587890625, |
|
"logps/rejected": -1419.3201904296875, |
|
"loss": 0.1554, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.46280837059021, |
|
"rewards/margins": 9.459114074707031, |
|
"rewards/rejected": -11.92192268371582, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.9203539823008849, |
|
"grad_norm": 6.571082473335941, |
|
"learning_rate": 3.829690541602504e-06, |
|
"logits/chosen": 0.3773556351661682, |
|
"logits/rejected": 0.6057129502296448, |
|
"logps/chosen": -554.6580200195312, |
|
"logps/rejected": -1534.7188720703125, |
|
"loss": 0.1205, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.384308338165283, |
|
"rewards/margins": 10.432950973510742, |
|
"rewards/rejected": -12.817258834838867, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 10.724095904793385, |
|
"learning_rate": 3.573421502806462e-06, |
|
"logits/chosen": 0.43141230940818787, |
|
"logits/rejected": 0.5827267169952393, |
|
"logps/chosen": -629.783935546875, |
|
"logps/rejected": -1560.9981689453125, |
|
"loss": 0.1687, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6314046382904053, |
|
"rewards/margins": 9.693445205688477, |
|
"rewards/rejected": -12.324849128723145, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.9257998638529612, |
|
"grad_norm": 3.3756489869976214, |
|
"learning_rate": 3.325870389502439e-06, |
|
"logits/chosen": 0.4125029444694519, |
|
"logits/rejected": 0.6377438902854919, |
|
"logps/chosen": -630.9293823242188, |
|
"logps/rejected": -1469.781005859375, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.6700332164764404, |
|
"rewards/margins": 9.24914836883545, |
|
"rewards/rejected": -11.919180870056152, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.9285228046289993, |
|
"grad_norm": 7.788178458703957, |
|
"learning_rate": 3.0870595827828365e-06, |
|
"logits/chosen": 0.3888288140296936, |
|
"logits/rejected": 0.6043727993965149, |
|
"logps/chosen": -689.7025146484375, |
|
"logps/rejected": -1592.350341796875, |
|
"loss": 0.1529, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.963463068008423, |
|
"rewards/margins": 9.724987983703613, |
|
"rewards/rejected": -12.688450813293457, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9312457454050375, |
|
"grad_norm": 7.147218526064257, |
|
"learning_rate": 2.857010673529015e-06, |
|
"logits/chosen": 0.44625091552734375, |
|
"logits/rejected": 0.6315649747848511, |
|
"logps/chosen": -627.4977416992188, |
|
"logps/rejected": -1486.74169921875, |
|
"loss": 0.1246, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.8249270915985107, |
|
"rewards/margins": 8.886324882507324, |
|
"rewards/rejected": -11.711252212524414, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9339686861810755, |
|
"grad_norm": 9.153788579430927, |
|
"learning_rate": 2.6357444604593662e-06, |
|
"logits/chosen": 0.34807997941970825, |
|
"logits/rejected": 0.5339373350143433, |
|
"logps/chosen": -595.520751953125, |
|
"logps/rejected": -1477.987548828125, |
|
"loss": 0.1449, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.6145668029785156, |
|
"rewards/margins": 9.307913780212402, |
|
"rewards/rejected": -11.922479629516602, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9366916269571137, |
|
"grad_norm": 7.602358676825036, |
|
"learning_rate": 2.4232809482488406e-06, |
|
"logits/chosen": 0.23554039001464844, |
|
"logits/rejected": 0.47686901688575745, |
|
"logps/chosen": -558.9524536132812, |
|
"logps/rejected": -1546.791259765625, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.5332367420196533, |
|
"rewards/margins": 10.079355239868164, |
|
"rewards/rejected": -12.612590789794922, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9394145677331518, |
|
"grad_norm": 16.49701246380974, |
|
"learning_rate": 2.219639345720359e-06, |
|
"logits/chosen": 0.35037535429000854, |
|
"logits/rejected": 0.5514084100723267, |
|
"logps/chosen": -589.4315185546875, |
|
"logps/rejected": -1492.39892578125, |
|
"loss": 0.2012, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.627023220062256, |
|
"rewards/margins": 9.265153884887695, |
|
"rewards/rejected": -11.892175674438477, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.94213750850919, |
|
"grad_norm": 6.307435314088876, |
|
"learning_rate": 2.02483806410807e-06, |
|
"logits/chosen": 0.451333612203598, |
|
"logits/rejected": 0.5924814343452454, |
|
"logps/chosen": -598.7140502929688, |
|
"logps/rejected": -1415.045654296875, |
|
"loss": 0.1003, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.440220355987549, |
|
"rewards/margins": 8.552030563354492, |
|
"rewards/rejected": -10.992252349853516, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.944860449285228, |
|
"grad_norm": 6.338529057616752, |
|
"learning_rate": 1.8388947153929027e-06, |
|
"logits/chosen": 0.36880573630332947, |
|
"logits/rejected": 0.5762465596199036, |
|
"logps/chosen": -576.0185546875, |
|
"logps/rejected": -1487.611572265625, |
|
"loss": 0.1643, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.8171238899230957, |
|
"rewards/margins": 9.56772518157959, |
|
"rewards/rejected": -12.384849548339844, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.9475833900612661, |
|
"grad_norm": 8.68745236022594, |
|
"learning_rate": 1.661826110710163e-06, |
|
"logits/chosen": 0.39139264822006226, |
|
"logits/rejected": 0.6284823417663574, |
|
"logps/chosen": -512.65380859375, |
|
"logps/rejected": -1482.1314697265625, |
|
"loss": 0.1754, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.0515763759613037, |
|
"rewards/margins": 9.894174575805664, |
|
"rewards/rejected": -11.94575023651123, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9503063308373043, |
|
"grad_norm": 8.323294909328977, |
|
"learning_rate": 1.493648258829694e-06, |
|
"logits/chosen": 0.37607231736183167, |
|
"logits/rejected": 0.4850333333015442, |
|
"logps/chosen": -595.7239990234375, |
|
"logps/rejected": -1427.0201416015625, |
|
"loss": 0.1485, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6649580001831055, |
|
"rewards/margins": 8.330266952514648, |
|
"rewards/rejected": -10.99522590637207, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9530292716133424, |
|
"grad_norm": 4.716570485344555, |
|
"learning_rate": 1.3343763647085339e-06, |
|
"logits/chosen": 0.33481714129447937, |
|
"logits/rejected": 0.5425733327865601, |
|
"logps/chosen": -607.2645263671875, |
|
"logps/rejected": -1544.1453857421875, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.5669472217559814, |
|
"rewards/margins": 9.806219100952148, |
|
"rewards/rejected": -12.373165130615234, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9530292716133424, |
|
"eval_logits/chosen": 0.30259910225868225, |
|
"eval_logits/rejected": 0.5002096891403198, |
|
"eval_logps/chosen": -609.8344116210938, |
|
"eval_logps/rejected": -1531.6849365234375, |
|
"eval_loss": 0.13275307416915894, |
|
"eval_rewards/accuracies": 0.9382640719413757, |
|
"eval_rewards/chosen": -2.7013959884643555, |
|
"eval_rewards/margins": 9.69622802734375, |
|
"eval_rewards/rejected": -12.397622108459473, |
|
"eval_runtime": 3747.3913, |
|
"eval_samples_per_second": 1.308, |
|
"eval_steps_per_second": 0.109, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9557522123893806, |
|
"grad_norm": 10.506491372972281, |
|
"learning_rate": 1.1840248281162037e-06, |
|
"logits/chosen": 0.27225154638290405, |
|
"logits/rejected": 0.4373188018798828, |
|
"logps/chosen": -638.9410400390625, |
|
"logps/rejected": -1560.869873046875, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.927722454071045, |
|
"rewards/margins": 9.307165145874023, |
|
"rewards/rejected": -12.234888076782227, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.9584751531654186, |
|
"grad_norm": 8.343867171985208, |
|
"learning_rate": 1.0426072423328382e-06, |
|
"logits/chosen": 0.341633141040802, |
|
"logits/rejected": 0.48726290464401245, |
|
"logps/chosen": -583.33837890625, |
|
"logps/rejected": -1415.044677734375, |
|
"loss": 0.1578, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.5397531986236572, |
|
"rewards/margins": 8.36491870880127, |
|
"rewards/rejected": -10.904672622680664, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.9611980939414567, |
|
"grad_norm": 11.471597562026771, |
|
"learning_rate": 9.101363929201911e-07, |
|
"logits/chosen": 0.24860472977161407, |
|
"logits/rejected": 0.4540013372898102, |
|
"logps/chosen": -581.4928588867188, |
|
"logps/rejected": -1512.778076171875, |
|
"loss": 0.1095, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.657729148864746, |
|
"rewards/margins": 9.633256912231445, |
|
"rewards/rejected": -12.290987014770508, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.9639210347174949, |
|
"grad_norm": 4.763516291437868, |
|
"learning_rate": 7.8662425656576e-07, |
|
"logits/chosen": 0.25861018896102905, |
|
"logits/rejected": 0.4950624406337738, |
|
"logps/chosen": -541.2784423828125, |
|
"logps/rejected": -1484.020263671875, |
|
"loss": 0.1069, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.7152905464172363, |
|
"rewards/margins": 9.658266067504883, |
|
"rewards/rejected": -12.373555183410645, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.966643975493533, |
|
"grad_norm": 9.11040786986144, |
|
"learning_rate": 6.720819999999073e-07, |
|
"logits/chosen": 0.3221897482872009, |
|
"logits/rejected": 0.49063223600387573, |
|
"logps/chosen": -623.95703125, |
|
"logps/rejected": -1448.3956298828125, |
|
"loss": 0.1387, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.757344961166382, |
|
"rewards/margins": 8.484246253967285, |
|
"rewards/rejected": -11.241591453552246, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9693669162695712, |
|
"grad_norm": 3.065251251534584, |
|
"learning_rate": 5.665199789862907e-07, |
|
"logits/chosen": 0.39759618043899536, |
|
"logits/rejected": 0.5658711791038513, |
|
"logps/chosen": -583.7330932617188, |
|
"logps/rejected": -1471.863037109375, |
|
"loss": 0.1435, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.722357988357544, |
|
"rewards/margins": 8.768855094909668, |
|
"rewards/rejected": -11.491212844848633, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.9720898570456092, |
|
"grad_norm": 5.12401841638688, |
|
"learning_rate": 4.6994773738563424e-07, |
|
"logits/chosen": 0.3610488176345825, |
|
"logits/rejected": 0.6028701663017273, |
|
"logps/chosen": -619.63916015625, |
|
"logps/rejected": -1411.327392578125, |
|
"loss": 0.1369, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.48909068107605, |
|
"rewards/margins": 8.581472396850586, |
|
"rewards/rejected": -11.070563316345215, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.9748127978216474, |
|
"grad_norm": 8.810544444486831, |
|
"learning_rate": 3.823740062928072e-07, |
|
"logits/chosen": 0.4455558657646179, |
|
"logits/rejected": 0.6806224584579468, |
|
"logps/chosen": -567.6795654296875, |
|
"logps/rejected": -1397.3406982421875, |
|
"loss": 0.1164, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.5231568813323975, |
|
"rewards/margins": 8.459074020385742, |
|
"rewards/rejected": -10.982232093811035, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9775357385976855, |
|
"grad_norm": 6.610645146397345, |
|
"learning_rate": 3.0380670324752227e-07, |
|
"logits/chosen": 0.3287045657634735, |
|
"logits/rejected": 0.5522847771644592, |
|
"logps/chosen": -532.489501953125, |
|
"logps/rejected": -1528.7987060546875, |
|
"loss": 0.1497, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.387678623199463, |
|
"rewards/margins": 10.268462181091309, |
|
"rewards/rejected": -12.65614128112793, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.9802586793737236, |
|
"grad_norm": 9.186519711207744, |
|
"learning_rate": 2.3425293151845273e-07, |
|
"logits/chosen": 0.2782616913318634, |
|
"logits/rejected": 0.4124705195426941, |
|
"logps/chosen": -625.5531005859375, |
|
"logps/rejected": -1455.448974609375, |
|
"loss": 0.1652, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.585552453994751, |
|
"rewards/margins": 9.286867141723633, |
|
"rewards/rejected": -11.872419357299805, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.9829816201497618, |
|
"grad_norm": 6.4113152609936455, |
|
"learning_rate": 1.73718979461035e-07, |
|
"logits/chosen": 0.335416316986084, |
|
"logits/rejected": 0.5422466397285461, |
|
"logps/chosen": -594.4679565429688, |
|
"logps/rejected": -1484.909423828125, |
|
"loss": 0.1066, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.729745864868164, |
|
"rewards/margins": 9.36650562286377, |
|
"rewards/rejected": -12.096250534057617, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9857045609257998, |
|
"grad_norm": 10.555918036454049, |
|
"learning_rate": 1.222103199489455e-07, |
|
"logits/chosen": 0.3650972247123718, |
|
"logits/rejected": 0.5400117635726929, |
|
"logps/chosen": -631.2184448242188, |
|
"logps/rejected": -1561.655029296875, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.8069605827331543, |
|
"rewards/margins": 9.688605308532715, |
|
"rewards/rejected": -12.495567321777344, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.988427501701838, |
|
"grad_norm": 9.85027247573772, |
|
"learning_rate": 7.973160987931883e-08, |
|
"logits/chosen": 0.3716769516468048, |
|
"logits/rejected": 0.5796463489532471, |
|
"logps/chosen": -593.5032958984375, |
|
"logps/rejected": -1529.673095703125, |
|
"loss": 0.1738, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.664257049560547, |
|
"rewards/margins": 9.706274032592773, |
|
"rewards/rejected": -12.370530128479004, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9911504424778761, |
|
"grad_norm": 7.384129391564848, |
|
"learning_rate": 4.6286689751662285e-08, |
|
"logits/chosen": 0.2735041081905365, |
|
"logits/rejected": 0.4770600199699402, |
|
"logps/chosen": -557.375, |
|
"logps/rejected": -1553.71142578125, |
|
"loss": 1.5963, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.3559460639953613, |
|
"rewards/margins": 10.080205917358398, |
|
"rewards/rejected": -12.436152458190918, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9938733832539143, |
|
"grad_norm": 2.0640752724213027, |
|
"learning_rate": 2.1878583320722457e-08, |
|
"logits/chosen": 0.3391203284263611, |
|
"logits/rejected": 0.5715040564537048, |
|
"logps/chosen": -543.0274658203125, |
|
"logps/rejected": -1483.306640625, |
|
"loss": 0.1335, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.347174644470215, |
|
"rewards/margins": 9.779848098754883, |
|
"rewards/rejected": -12.127023696899414, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9965963240299524, |
|
"grad_norm": 16.99590231865479, |
|
"learning_rate": 6.50949732301509e-09, |
|
"logits/chosen": 0.3131711483001709, |
|
"logits/rejected": 0.47414079308509827, |
|
"logps/chosen": -641.8659057617188, |
|
"logps/rejected": -1475.5911865234375, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.771031141281128, |
|
"rewards/margins": 8.959924697875977, |
|
"rewards/rejected": -11.730956077575684, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9993192648059904, |
|
"grad_norm": 4.2674518887673685, |
|
"learning_rate": 1.8082127736240851e-10, |
|
"logits/chosen": 0.28179001808166504, |
|
"logits/rejected": 0.5392038226127625, |
|
"logps/chosen": -625.225830078125, |
|
"logps/rejected": -1491.476806640625, |
|
"loss": 0.1415, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.782802104949951, |
|
"rewards/margins": 9.378962516784668, |
|
"rewards/rejected": -12.161764144897461, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9998638529611981, |
|
"step": 3672, |
|
"total_flos": 0.0, |
|
"train_loss": 0.32278433931516665, |
|
"train_runtime": 141933.118, |
|
"train_samples_per_second": 0.621, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3672, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|