|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.982222222222222, |
|
"eval_steps": 1, |
|
"global_step": 336, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.011851851851851851, |
|
"grad_norm": 44.10000740195015, |
|
"learning_rate": 1.4705882352941176e-08, |
|
"logits/chosen": -1.1635093688964844, |
|
"logits/rejected": -0.9440154433250427, |
|
"logps/chosen": -26.389511108398438, |
|
"logps/rejected": -42.156002044677734, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.023703703703703703, |
|
"grad_norm": 45.622821831639094, |
|
"learning_rate": 2.941176470588235e-08, |
|
"logits/chosen": -0.8899029493331909, |
|
"logits/rejected": -0.9265471696853638, |
|
"logps/chosen": -24.45637321472168, |
|
"logps/rejected": -38.72291564941406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.035555555555555556, |
|
"grad_norm": 41.287867804704256, |
|
"learning_rate": 4.411764705882353e-08, |
|
"logits/chosen": -0.9218576550483704, |
|
"logits/rejected": -0.8510868549346924, |
|
"logps/chosen": -23.573394775390625, |
|
"logps/rejected": -31.830120086669922, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.05094228684902191, |
|
"rewards/margins": 0.055795177817344666, |
|
"rewards/rejected": -0.004852890968322754, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 41.148615147033524, |
|
"learning_rate": 5.88235294117647e-08, |
|
"logits/chosen": -0.8889421820640564, |
|
"logits/rejected": -0.7832293510437012, |
|
"logps/chosen": -27.102622985839844, |
|
"logps/rejected": -32.83424377441406, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.005930736660957336, |
|
"rewards/margins": 0.013045087456703186, |
|
"rewards/rejected": -0.00711435079574585, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.05925925925925926, |
|
"grad_norm": 41.57192528486562, |
|
"learning_rate": 7.352941176470588e-08, |
|
"logits/chosen": -0.8269144296646118, |
|
"logits/rejected": -0.8342342376708984, |
|
"logps/chosen": -26.83285903930664, |
|
"logps/rejected": -33.845359802246094, |
|
"loss": 0.7004, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.002873659133911133, |
|
"rewards/margins": 0.0599842369556427, |
|
"rewards/rejected": -0.05711057782173157, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07111111111111111, |
|
"grad_norm": 42.964145384550164, |
|
"learning_rate": 8.823529411764706e-08, |
|
"logits/chosen": -0.9288309216499329, |
|
"logits/rejected": -0.9066528677940369, |
|
"logps/chosen": -31.687969207763672, |
|
"logps/rejected": -35.163841247558594, |
|
"loss": 0.701, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005395621061325073, |
|
"rewards/margins": -0.016778230667114258, |
|
"rewards/rejected": 0.02217385172843933, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.08296296296296296, |
|
"grad_norm": 38.3846396537961, |
|
"learning_rate": 1.0294117647058822e-07, |
|
"logits/chosen": -0.9132620096206665, |
|
"logits/rejected": -0.7912867665290833, |
|
"logps/chosen": -24.47614860534668, |
|
"logps/rejected": -32.74094009399414, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02499394118785858, |
|
"rewards/margins": 0.010348424315452576, |
|
"rewards/rejected": 0.014645516872406006, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 43.34894792705672, |
|
"learning_rate": 1.176470588235294e-07, |
|
"logits/chosen": -0.8170281648635864, |
|
"logits/rejected": -0.8093118667602539, |
|
"logps/chosen": -21.367229461669922, |
|
"logps/rejected": -30.556249618530273, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.021743685007095337, |
|
"rewards/margins": 0.05349762737751007, |
|
"rewards/rejected": -0.031753942370414734, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.10666666666666667, |
|
"grad_norm": 44.768590418142296, |
|
"learning_rate": 1.3235294117647057e-07, |
|
"logits/chosen": -0.8906874656677246, |
|
"logits/rejected": -0.8358623385429382, |
|
"logps/chosen": -27.88587760925293, |
|
"logps/rejected": -30.677749633789062, |
|
"loss": 0.7014, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00475698709487915, |
|
"rewards/margins": 0.035931557416915894, |
|
"rewards/rejected": -0.031174570322036743, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.11851851851851852, |
|
"grad_norm": 41.386325746824284, |
|
"learning_rate": 1.4705882352941175e-07, |
|
"logits/chosen": -1.0302842855453491, |
|
"logits/rejected": -0.8634576201438904, |
|
"logps/chosen": -28.216838836669922, |
|
"logps/rejected": -38.4200553894043, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02494041621685028, |
|
"rewards/margins": 0.07226283848285675, |
|
"rewards/rejected": -0.04732242226600647, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13037037037037036, |
|
"grad_norm": 42.87170433913047, |
|
"learning_rate": 1.6176470588235293e-07, |
|
"logits/chosen": -0.8272039890289307, |
|
"logits/rejected": -0.8201614618301392, |
|
"logps/chosen": -24.542991638183594, |
|
"logps/rejected": -33.56885528564453, |
|
"loss": 0.7016, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.04099439084529877, |
|
"rewards/margins": 0.02981768548488617, |
|
"rewards/rejected": 0.011176705360412598, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 41.54515829050869, |
|
"learning_rate": 1.764705882352941e-07, |
|
"logits/chosen": -0.8868040442466736, |
|
"logits/rejected": -0.8360949158668518, |
|
"logps/chosen": -29.391693115234375, |
|
"logps/rejected": -39.35624694824219, |
|
"loss": 0.6989, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.020398467779159546, |
|
"rewards/margins": 0.034372299909591675, |
|
"rewards/rejected": -0.013973832130432129, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.15407407407407409, |
|
"grad_norm": 40.960317074043914, |
|
"learning_rate": 1.9117647058823527e-07, |
|
"logits/chosen": -0.9931007623672485, |
|
"logits/rejected": -0.9051375985145569, |
|
"logps/chosen": -21.935997009277344, |
|
"logps/rejected": -29.908475875854492, |
|
"loss": 0.6973, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.006315797567367554, |
|
"rewards/margins": 0.032275840640068054, |
|
"rewards/rejected": -0.03859163820743561, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.16592592592592592, |
|
"grad_norm": 46.37678646749312, |
|
"learning_rate": 2.0588235294117645e-07, |
|
"logits/chosen": -0.736880898475647, |
|
"logits/rejected": -0.6582351326942444, |
|
"logps/chosen": -28.070615768432617, |
|
"logps/rejected": -37.080623626708984, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0168197900056839, |
|
"rewards/margins": 0.020758137106895447, |
|
"rewards/rejected": -0.037577927112579346, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 38.418772787456916, |
|
"learning_rate": 2.2058823529411763e-07, |
|
"logits/chosen": -0.8958194851875305, |
|
"logits/rejected": -0.8823959827423096, |
|
"logps/chosen": -24.240140914916992, |
|
"logps/rejected": -36.511985778808594, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.010584741830825806, |
|
"rewards/margins": 0.013375014066696167, |
|
"rewards/rejected": -0.023959755897521973, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 39.784578323473944, |
|
"learning_rate": 2.352941176470588e-07, |
|
"logits/chosen": -1.044739007949829, |
|
"logits/rejected": -0.9721382260322571, |
|
"logps/chosen": -24.203937530517578, |
|
"logps/rejected": -38.13182830810547, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.008091084659099579, |
|
"rewards/margins": 0.03979543596506119, |
|
"rewards/rejected": -0.04788652062416077, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.20148148148148148, |
|
"grad_norm": 38.68619452262893, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.9131325483322144, |
|
"logits/rejected": -0.9099739193916321, |
|
"logps/chosen": -23.27505874633789, |
|
"logps/rejected": -25.550016403198242, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.04493655264377594, |
|
"rewards/margins": 0.11525127291679382, |
|
"rewards/rejected": -0.07031472027301788, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 38.89353521239618, |
|
"learning_rate": 2.6470588235294114e-07, |
|
"logits/chosen": -1.1501476764678955, |
|
"logits/rejected": -1.0104213953018188, |
|
"logps/chosen": -28.398540496826172, |
|
"logps/rejected": -40.202754974365234, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.0002931952476501465, |
|
"rewards/margins": 0.1416773796081543, |
|
"rewards/rejected": -0.14197057485580444, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.22518518518518518, |
|
"grad_norm": 39.73809119940035, |
|
"learning_rate": 2.7941176470588235e-07, |
|
"logits/chosen": -0.6393623948097229, |
|
"logits/rejected": -0.5715636014938354, |
|
"logps/chosen": -23.02471160888672, |
|
"logps/rejected": -29.500215530395508, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.012419655919075012, |
|
"rewards/margins": 0.04500822722911835, |
|
"rewards/rejected": -0.032588571310043335, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 36.24445457135461, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -1.090634822845459, |
|
"logits/rejected": -1.0109808444976807, |
|
"logps/chosen": -22.518497467041016, |
|
"logps/rejected": -28.288860321044922, |
|
"loss": 0.6407, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.004169940948486328, |
|
"rewards/margins": 0.08663815259933472, |
|
"rewards/rejected": -0.08246821165084839, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.24888888888888888, |
|
"grad_norm": 37.48843626542997, |
|
"learning_rate": 3.088235294117647e-07, |
|
"logits/chosen": -0.9397974610328674, |
|
"logits/rejected": -0.8281663060188293, |
|
"logps/chosen": -29.923145294189453, |
|
"logps/rejected": -37.80279541015625, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.010303795337677002, |
|
"rewards/margins": 0.1979476809501648, |
|
"rewards/rejected": -0.2082514762878418, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.2607407407407407, |
|
"grad_norm": 37.17562909629751, |
|
"learning_rate": 3.2352941176470586e-07, |
|
"logits/chosen": -0.8852977752685547, |
|
"logits/rejected": -0.8319816589355469, |
|
"logps/chosen": -23.00829315185547, |
|
"logps/rejected": -28.55397605895996, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.001584082841873169, |
|
"rewards/margins": 0.16339415311813354, |
|
"rewards/rejected": -0.1649782359600067, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.2725925925925926, |
|
"grad_norm": 35.98779991504583, |
|
"learning_rate": 3.3823529411764707e-07, |
|
"logits/chosen": -0.7651995420455933, |
|
"logits/rejected": -0.7312899827957153, |
|
"logps/chosen": -31.04439926147461, |
|
"logps/rejected": -37.98454284667969, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.023012787103652954, |
|
"rewards/margins": 0.09663936495780945, |
|
"rewards/rejected": -0.1196521520614624, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 36.65037386431617, |
|
"learning_rate": 3.529411764705882e-07, |
|
"logits/chosen": -0.9652918577194214, |
|
"logits/rejected": -0.9185481071472168, |
|
"logps/chosen": -30.223522186279297, |
|
"logps/rejected": -34.86516189575195, |
|
"loss": 0.6319, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.025013744831085205, |
|
"rewards/margins": 0.17298102378845215, |
|
"rewards/rejected": -0.19799476861953735, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 36.325213836190166, |
|
"learning_rate": 3.6764705882352943e-07, |
|
"logits/chosen": -0.8377700448036194, |
|
"logits/rejected": -0.7563367486000061, |
|
"logps/chosen": -19.788166046142578, |
|
"logps/rejected": -32.94764709472656, |
|
"loss": 0.603, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.00691574439406395, |
|
"rewards/margins": 0.249167799949646, |
|
"rewards/rejected": -0.25608354806900024, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.30814814814814817, |
|
"grad_norm": 33.21927808914221, |
|
"learning_rate": 3.8235294117647053e-07, |
|
"logits/chosen": -0.9247075319290161, |
|
"logits/rejected": -0.9600427746772766, |
|
"logps/chosen": -22.75655746459961, |
|
"logps/rejected": -33.42902374267578, |
|
"loss": 0.5963, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.012206077575683594, |
|
"rewards/margins": 0.18068939447402954, |
|
"rewards/rejected": -0.16848331689834595, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 33.919179281256405, |
|
"learning_rate": 3.9705882352941174e-07, |
|
"logits/chosen": -1.0090656280517578, |
|
"logits/rejected": -0.8680551052093506, |
|
"logps/chosen": -27.313983917236328, |
|
"logps/rejected": -32.803958892822266, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.006254285573959351, |
|
"rewards/margins": 0.3513309061527252, |
|
"rewards/rejected": -0.34507662057876587, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 32.88800631538997, |
|
"learning_rate": 4.117647058823529e-07, |
|
"logits/chosen": -0.7507399320602417, |
|
"logits/rejected": -0.6654347777366638, |
|
"logps/chosen": -33.17474365234375, |
|
"logps/rejected": -37.52992248535156, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.018849045038223267, |
|
"rewards/margins": 0.5149893164634705, |
|
"rewards/rejected": -0.5338383913040161, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3437037037037037, |
|
"grad_norm": 32.187131672205425, |
|
"learning_rate": 4.264705882352941e-07, |
|
"logits/chosen": -0.9114011526107788, |
|
"logits/rejected": -0.7332407236099243, |
|
"logps/chosen": -27.552963256835938, |
|
"logps/rejected": -33.381103515625, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.034624576568603516, |
|
"rewards/margins": 0.2657691240310669, |
|
"rewards/rejected": -0.3003937005996704, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 31.43465207056781, |
|
"learning_rate": 4.4117647058823526e-07, |
|
"logits/chosen": -1.080330491065979, |
|
"logits/rejected": -1.018049716949463, |
|
"logps/chosen": -24.93523406982422, |
|
"logps/rejected": -33.0054817199707, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.08209644258022308, |
|
"rewards/margins": 0.2938240170478821, |
|
"rewards/rejected": -0.37592047452926636, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3674074074074074, |
|
"grad_norm": 29.8319071034986, |
|
"learning_rate": 4.5588235294117646e-07, |
|
"logits/chosen": -0.7354201078414917, |
|
"logits/rejected": -0.5976296663284302, |
|
"logps/chosen": -20.997676849365234, |
|
"logps/rejected": -32.08062744140625, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.06063076853752136, |
|
"rewards/margins": 0.5570548176765442, |
|
"rewards/rejected": -0.6176855564117432, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 32.37223816472854, |
|
"learning_rate": 4.705882352941176e-07, |
|
"logits/chosen": -0.9014286398887634, |
|
"logits/rejected": -0.868757963180542, |
|
"logps/chosen": -23.115407943725586, |
|
"logps/rejected": -39.159507751464844, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.0811956524848938, |
|
"rewards/margins": 0.4106258153915405, |
|
"rewards/rejected": -0.4918214678764343, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.39111111111111113, |
|
"grad_norm": 30.829704180417103, |
|
"learning_rate": 4.852941176470588e-07, |
|
"logits/chosen": -0.8415942788124084, |
|
"logits/rejected": -0.826940655708313, |
|
"logps/chosen": -25.28696060180664, |
|
"logps/rejected": -36.247039794921875, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.11577820032835007, |
|
"rewards/margins": 0.5010173916816711, |
|
"rewards/rejected": -0.616795539855957, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.40296296296296297, |
|
"grad_norm": 34.240433832755805, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.071217656135559, |
|
"logits/rejected": -0.8587817549705505, |
|
"logps/chosen": -23.079936981201172, |
|
"logps/rejected": -32.364227294921875, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06819352507591248, |
|
"rewards/margins": 0.4207611680030823, |
|
"rewards/rejected": -0.48895469307899475, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4148148148148148, |
|
"grad_norm": 31.79158261280939, |
|
"learning_rate": 4.999864732969518e-07, |
|
"logits/chosen": -1.041569471359253, |
|
"logits/rejected": -0.9538137912750244, |
|
"logps/chosen": -29.438274383544922, |
|
"logps/rejected": -35.4671745300293, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10224419832229614, |
|
"rewards/margins": 0.5241090059280396, |
|
"rewards/rejected": -0.6263532042503357, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 28.443303374361268, |
|
"learning_rate": 4.999458946515807e-07, |
|
"logits/chosen": -1.1223492622375488, |
|
"logits/rejected": -1.040766954421997, |
|
"logps/chosen": -32.29949951171875, |
|
"logps/rejected": -41.46755599975586, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.0577593594789505, |
|
"rewards/margins": 0.6482563018798828, |
|
"rewards/rejected": -0.7060155868530273, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.43851851851851853, |
|
"grad_norm": 30.648927045340578, |
|
"learning_rate": 4.998782684550491e-07, |
|
"logits/chosen": -0.9065847992897034, |
|
"logits/rejected": -0.8718705177307129, |
|
"logps/chosen": -21.124893188476562, |
|
"logps/rejected": -39.29669952392578, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.09413473308086395, |
|
"rewards/margins": 0.6028537154197693, |
|
"rewards/rejected": -0.6969884634017944, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.45037037037037037, |
|
"grad_norm": 29.437195830990852, |
|
"learning_rate": 4.997836020254328e-07, |
|
"logits/chosen": -0.9325073957443237, |
|
"logits/rejected": -0.8846120238304138, |
|
"logps/chosen": -27.168790817260742, |
|
"logps/rejected": -36.877262115478516, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15598426759243011, |
|
"rewards/margins": 0.6510501503944397, |
|
"rewards/rejected": -0.807034432888031, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.4622222222222222, |
|
"grad_norm": 28.44428517855095, |
|
"learning_rate": 4.996619056069291e-07, |
|
"logits/chosen": -0.8960347771644592, |
|
"logits/rejected": -0.8378150463104248, |
|
"logps/chosen": -28.43727684020996, |
|
"logps/rejected": -40.62827682495117, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.17505469918251038, |
|
"rewards/margins": 0.8592283725738525, |
|
"rewards/rejected": -1.0342830419540405, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 29.94537092561941, |
|
"learning_rate": 4.995131923687487e-07, |
|
"logits/chosen": -0.9718501567840576, |
|
"logits/rejected": -0.8560028076171875, |
|
"logps/chosen": -29.755184173583984, |
|
"logps/rejected": -37.2801399230957, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.11063119769096375, |
|
"rewards/margins": 0.7615076899528503, |
|
"rewards/rejected": -0.8721388578414917, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.48592592592592593, |
|
"grad_norm": 26.638351450808948, |
|
"learning_rate": 4.993374784036901e-07, |
|
"logits/chosen": -1.006788969039917, |
|
"logits/rejected": -0.8062241077423096, |
|
"logps/chosen": -27.824739456176758, |
|
"logps/rejected": -37.465415954589844, |
|
"loss": 0.4489, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16224287450313568, |
|
"rewards/margins": 0.9281247854232788, |
|
"rewards/rejected": -1.0903676748275757, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.49777777777777776, |
|
"grad_norm": 29.703403664234436, |
|
"learning_rate": 4.991347827263982e-07, |
|
"logits/chosen": -1.0439155101776123, |
|
"logits/rejected": -0.8992699384689331, |
|
"logps/chosen": -28.927303314208984, |
|
"logps/rejected": -42.13187026977539, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13541710376739502, |
|
"rewards/margins": 0.8769669532775879, |
|
"rewards/rejected": -1.0123839378356934, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5096296296296297, |
|
"grad_norm": 25.194805243065485, |
|
"learning_rate": 4.989051272713069e-07, |
|
"logits/chosen": -0.9479715824127197, |
|
"logits/rejected": -0.808491051197052, |
|
"logps/chosen": -30.748804092407227, |
|
"logps/rejected": -48.32786178588867, |
|
"loss": 0.4055, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0868428647518158, |
|
"rewards/margins": 1.7449877262115479, |
|
"rewards/rejected": -1.8318307399749756, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 28.50704779191256, |
|
"learning_rate": 4.986485368902656e-07, |
|
"logits/chosen": -1.003732681274414, |
|
"logits/rejected": -0.9534778594970703, |
|
"logps/chosen": -25.17104148864746, |
|
"logps/rejected": -36.80795669555664, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15720072388648987, |
|
"rewards/margins": 0.7120774984359741, |
|
"rewards/rejected": -0.8692781925201416, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 26.654378912528262, |
|
"learning_rate": 4.983650393498489e-07, |
|
"logits/chosen": -0.9796334505081177, |
|
"logits/rejected": -0.8810800313949585, |
|
"logps/chosen": -34.67963790893555, |
|
"logps/rejected": -37.48582077026367, |
|
"loss": 0.4059, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22126227617263794, |
|
"rewards/margins": 1.05548095703125, |
|
"rewards/rejected": -1.2767431735992432, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.5451851851851852, |
|
"grad_norm": 25.91641243212481, |
|
"learning_rate": 4.980546653283537e-07, |
|
"logits/chosen": -1.1144230365753174, |
|
"logits/rejected": -0.9187833666801453, |
|
"logps/chosen": -27.469764709472656, |
|
"logps/rejected": -42.77268981933594, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20582953095436096, |
|
"rewards/margins": 1.8931379318237305, |
|
"rewards/rejected": -2.0989675521850586, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.557037037037037, |
|
"grad_norm": 27.616713081396448, |
|
"learning_rate": 4.977174484124775e-07, |
|
"logits/chosen": -0.9438971877098083, |
|
"logits/rejected": -0.9460131525993347, |
|
"logps/chosen": -28.729183197021484, |
|
"logps/rejected": -30.642105102539062, |
|
"loss": 0.4464, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13018304109573364, |
|
"rewards/margins": 0.8073292970657349, |
|
"rewards/rejected": -0.9375122785568237, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 26.228638287015333, |
|
"learning_rate": 4.97353425093685e-07, |
|
"logits/chosen": -1.2007321119308472, |
|
"logits/rejected": -1.0530825853347778, |
|
"logps/chosen": -25.535133361816406, |
|
"logps/rejected": -35.96273422241211, |
|
"loss": 0.4261, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.14477074146270752, |
|
"rewards/margins": 1.4705314636230469, |
|
"rewards/rejected": -1.615302324295044, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.5807407407407408, |
|
"grad_norm": 27.447706308710917, |
|
"learning_rate": 4.96962634764259e-07, |
|
"logits/chosen": -1.0324229001998901, |
|
"logits/rejected": -1.000633955001831, |
|
"logps/chosen": -31.232351303100586, |
|
"logps/rejected": -40.054874420166016, |
|
"loss": 0.4274, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.37540578842163086, |
|
"rewards/margins": 0.9162301421165466, |
|
"rewards/rejected": -1.2916358709335327, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 27.537626334544292, |
|
"learning_rate": 4.965451197130372e-07, |
|
"logits/chosen": -1.0934017896652222, |
|
"logits/rejected": -0.9698958396911621, |
|
"logps/chosen": -25.604278564453125, |
|
"logps/rejected": -41.89402770996094, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11558225750923157, |
|
"rewards/margins": 1.3389551639556885, |
|
"rewards/rejected": -1.4545375108718872, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6044444444444445, |
|
"grad_norm": 26.396954082977054, |
|
"learning_rate": 4.961009251208367e-07, |
|
"logits/chosen": -1.071451187133789, |
|
"logits/rejected": -0.9166553616523743, |
|
"logps/chosen": -21.116607666015625, |
|
"logps/rejected": -34.15024948120117, |
|
"loss": 0.4173, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.06339044868946075, |
|
"rewards/margins": 1.8111618757247925, |
|
"rewards/rejected": -1.8745522499084473, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 24.23948883073191, |
|
"learning_rate": 4.956300990555643e-07, |
|
"logits/chosen": -1.0040934085845947, |
|
"logits/rejected": -0.8644249439239502, |
|
"logps/chosen": -24.51968002319336, |
|
"logps/rejected": -32.15287399291992, |
|
"loss": 0.3977, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.1651010513305664, |
|
"rewards/margins": 1.301413655281067, |
|
"rewards/rejected": -1.4665147066116333, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6281481481481481, |
|
"grad_norm": 29.178528579105812, |
|
"learning_rate": 4.951326924670147e-07, |
|
"logits/chosen": -0.8935304880142212, |
|
"logits/rejected": -0.9188090562820435, |
|
"logps/chosen": -29.823339462280273, |
|
"logps/rejected": -42.743675231933594, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.31552594900131226, |
|
"rewards/margins": 1.0024209022521973, |
|
"rewards/rejected": -1.3179469108581543, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 24.40363992735679, |
|
"learning_rate": 4.94608759181358e-07, |
|
"logits/chosen": -0.9994638562202454, |
|
"logits/rejected": -0.8031306266784668, |
|
"logps/chosen": -32.72019577026367, |
|
"logps/rejected": -39.62814712524414, |
|
"loss": 0.3302, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1551201343536377, |
|
"rewards/margins": 1.3950880765914917, |
|
"rewards/rejected": -1.5502082109451294, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.6518518518518519, |
|
"grad_norm": 27.80698317557724, |
|
"learning_rate": 4.940583558953137e-07, |
|
"logits/chosen": -1.1568812131881714, |
|
"logits/rejected": -1.083202838897705, |
|
"logps/chosen": -28.588844299316406, |
|
"logps/rejected": -46.40166091918945, |
|
"loss": 0.4196, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.32859814167022705, |
|
"rewards/margins": 1.721780776977539, |
|
"rewards/rejected": -2.0503790378570557, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 27.03342498011367, |
|
"learning_rate": 4.934815421700164e-07, |
|
"logits/chosen": -0.9664996266365051, |
|
"logits/rejected": -0.9351974725723267, |
|
"logps/chosen": -25.929637908935547, |
|
"logps/rejected": -36.615997314453125, |
|
"loss": 0.4234, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14317776262760162, |
|
"rewards/margins": 1.6834478378295898, |
|
"rewards/rejected": -1.8266258239746094, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.6755555555555556, |
|
"grad_norm": 27.024427262923552, |
|
"learning_rate": 4.928783804245699e-07, |
|
"logits/chosen": -0.8274962902069092, |
|
"logits/rejected": -0.745110273361206, |
|
"logps/chosen": -32.589447021484375, |
|
"logps/rejected": -34.72138977050781, |
|
"loss": 0.3984, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.42406025528907776, |
|
"rewards/margins": 0.8041820526123047, |
|
"rewards/rejected": -1.2282423973083496, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.6874074074074074, |
|
"grad_norm": 24.14506468826234, |
|
"learning_rate": 4.922489359292927e-07, |
|
"logits/chosen": -0.920275866985321, |
|
"logits/rejected": -0.7754595279693604, |
|
"logps/chosen": -30.828351974487305, |
|
"logps/rejected": -49.377220153808594, |
|
"loss": 0.3514, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3007601797580719, |
|
"rewards/margins": 2.031721830368042, |
|
"rewards/rejected": -2.33248233795166, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.6992592592592592, |
|
"grad_norm": 22.656374640286362, |
|
"learning_rate": 4.915932767986551e-07, |
|
"logits/chosen": -1.103749394416809, |
|
"logits/rejected": -1.0164357423782349, |
|
"logps/chosen": -26.017108917236328, |
|
"logps/rejected": -43.8387565612793, |
|
"loss": 0.3561, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.2933482527732849, |
|
"rewards/margins": 1.7674319744110107, |
|
"rewards/rejected": -2.0607800483703613, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 24.99314823194104, |
|
"learning_rate": 4.909114739839079e-07, |
|
"logits/chosen": -0.9634025692939758, |
|
"logits/rejected": -0.9252867102622986, |
|
"logps/chosen": -23.952117919921875, |
|
"logps/rejected": -34.92929458618164, |
|
"loss": 0.3598, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1855652928352356, |
|
"rewards/margins": 1.803605079650879, |
|
"rewards/rejected": -1.9891700744628906, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7229629629629629, |
|
"grad_norm": 22.905046033248826, |
|
"learning_rate": 4.902036012654048e-07, |
|
"logits/chosen": -0.7937788963317871, |
|
"logits/rejected": -0.7061766982078552, |
|
"logps/chosen": -22.034412384033203, |
|
"logps/rejected": -33.86552047729492, |
|
"loss": 0.3401, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3443925678730011, |
|
"rewards/margins": 1.395371913909912, |
|
"rewards/rejected": -1.73976469039917, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.7348148148148148, |
|
"grad_norm": 25.28725048216447, |
|
"learning_rate": 4.894697352446182e-07, |
|
"logits/chosen": -1.0165841579437256, |
|
"logits/rejected": -1.0237828493118286, |
|
"logps/chosen": -24.306283950805664, |
|
"logps/rejected": -39.6012077331543, |
|
"loss": 0.3453, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.09554791450500488, |
|
"rewards/margins": 1.746566891670227, |
|
"rewards/rejected": -1.8421146869659424, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.7466666666666667, |
|
"grad_norm": 27.905008683571545, |
|
"learning_rate": 4.887099553358501e-07, |
|
"logits/chosen": -1.087665319442749, |
|
"logits/rejected": -0.9620079398155212, |
|
"logps/chosen": -29.117008209228516, |
|
"logps/rejected": -37.334896087646484, |
|
"loss": 0.3946, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.18033871054649353, |
|
"rewards/margins": 1.7729251384735107, |
|
"rewards/rejected": -1.953263759613037, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 26.563175740341975, |
|
"learning_rate": 4.879243437576383e-07, |
|
"logits/chosen": -1.0562440156936646, |
|
"logits/rejected": -0.8816579580307007, |
|
"logps/chosen": -23.48358726501465, |
|
"logps/rejected": -34.346927642822266, |
|
"loss": 0.369, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.34856918454170227, |
|
"rewards/margins": 1.5337965488433838, |
|
"rewards/rejected": -1.8823657035827637, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.7703703703703704, |
|
"grad_norm": 28.950708662099014, |
|
"learning_rate": 4.871129855238588e-07, |
|
"logits/chosen": -1.031766653060913, |
|
"logits/rejected": -1.0294549465179443, |
|
"logps/chosen": -31.139263153076172, |
|
"logps/rejected": -41.21425247192383, |
|
"loss": 0.3715, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.3050842881202698, |
|
"rewards/margins": 1.8005170822143555, |
|
"rewards/rejected": -2.1056013107299805, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.7822222222222223, |
|
"grad_norm": 27.546216408337372, |
|
"learning_rate": 4.862759684345269e-07, |
|
"logits/chosen": -1.203002691268921, |
|
"logits/rejected": -1.0988627672195435, |
|
"logps/chosen": -29.396411895751953, |
|
"logps/rejected": -35.40150833129883, |
|
"loss": 0.3922, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12675023078918457, |
|
"rewards/margins": 2.0646703243255615, |
|
"rewards/rejected": -2.191420316696167, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.794074074074074, |
|
"grad_norm": 23.894441975814534, |
|
"learning_rate": 4.854133830662955e-07, |
|
"logits/chosen": -0.9780765771865845, |
|
"logits/rejected": -0.8497614860534668, |
|
"logps/chosen": -28.06260871887207, |
|
"logps/rejected": -34.55665588378906, |
|
"loss": 0.3334, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6227935552597046, |
|
"rewards/margins": 2.2487592697143555, |
|
"rewards/rejected": -2.8715527057647705, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 30.617173652616593, |
|
"learning_rate": 4.845253227626536e-07, |
|
"logits/chosen": -1.0398799180984497, |
|
"logits/rejected": -0.907300591468811, |
|
"logps/chosen": -41.52682876586914, |
|
"logps/rejected": -43.311920166015625, |
|
"loss": 0.4022, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7630512714385986, |
|
"rewards/margins": 1.217781662940979, |
|
"rewards/rejected": -1.9808329343795776, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.8177777777777778, |
|
"grad_norm": 24.025263203043526, |
|
"learning_rate": 4.836118836238252e-07, |
|
"logits/chosen": -1.1331119537353516, |
|
"logits/rejected": -1.0378354787826538, |
|
"logps/chosen": -27.220407485961914, |
|
"logps/rejected": -41.87384796142578, |
|
"loss": 0.3431, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.057599157094955444, |
|
"rewards/margins": 1.6851834058761597, |
|
"rewards/rejected": -1.7427825927734375, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.8296296296296296, |
|
"grad_norm": 23.34599437673964, |
|
"learning_rate": 4.826731644963704e-07, |
|
"logits/chosen": -1.0917811393737793, |
|
"logits/rejected": -1.0149914026260376, |
|
"logps/chosen": -25.583330154418945, |
|
"logps/rejected": -33.85319900512695, |
|
"loss": 0.3162, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5846102237701416, |
|
"rewards/margins": 1.9573626518249512, |
|
"rewards/rejected": -2.5419728755950928, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8414814814814815, |
|
"grad_norm": 24.42006807604626, |
|
"learning_rate": 4.817092669624882e-07, |
|
"logits/chosen": -1.0650672912597656, |
|
"logits/rejected": -0.9445031881332397, |
|
"logps/chosen": -22.825862884521484, |
|
"logps/rejected": -33.60643768310547, |
|
"loss": 0.3745, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11327299475669861, |
|
"rewards/margins": 2.1697635650634766, |
|
"rewards/rejected": -2.283036708831787, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 24.54245031605526, |
|
"learning_rate": 4.807202953290243e-07, |
|
"logits/chosen": -1.1544904708862305, |
|
"logits/rejected": -0.9994347095489502, |
|
"logps/chosen": -23.641387939453125, |
|
"logps/rejected": -38.42119216918945, |
|
"loss": 0.3599, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23269107937812805, |
|
"rewards/margins": 2.1029093265533447, |
|
"rewards/rejected": -2.3356003761291504, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.8651851851851852, |
|
"grad_norm": 25.210130682755583, |
|
"learning_rate": 4.797063566161834e-07, |
|
"logits/chosen": -0.9285881519317627, |
|
"logits/rejected": -0.8881164789199829, |
|
"logps/chosen": -31.189298629760742, |
|
"logps/rejected": -35.99159622192383, |
|
"loss": 0.3768, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.41402971744537354, |
|
"rewards/margins": 1.2696895599365234, |
|
"rewards/rejected": -1.6837193965911865, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.8770370370370371, |
|
"grad_norm": 22.99038510220094, |
|
"learning_rate": 4.786675605459487e-07, |
|
"logits/chosen": -1.1656837463378906, |
|
"logits/rejected": -1.1220611333847046, |
|
"logps/chosen": -28.37079620361328, |
|
"logps/rejected": -45.16815185546875, |
|
"loss": 0.3318, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2702009975910187, |
|
"rewards/margins": 2.232954978942871, |
|
"rewards/rejected": -2.5031557083129883, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 24.11796136324434, |
|
"learning_rate": 4.776040195302079e-07, |
|
"logits/chosen": -1.112859845161438, |
|
"logits/rejected": -0.9862438440322876, |
|
"logps/chosen": -22.272464752197266, |
|
"logps/rejected": -35.39492416381836, |
|
"loss": 0.3439, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.34517136216163635, |
|
"rewards/margins": 2.139002561569214, |
|
"rewards/rejected": -2.4841737747192383, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 29.8497129464844, |
|
"learning_rate": 4.76515848658589e-07, |
|
"logits/chosen": -1.182924747467041, |
|
"logits/rejected": -1.0297247171401978, |
|
"logps/chosen": -30.078699111938477, |
|
"logps/rejected": -39.582275390625, |
|
"loss": 0.3452, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5807650089263916, |
|
"rewards/margins": 2.0797762870788574, |
|
"rewards/rejected": -2.660541534423828, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.9125925925925926, |
|
"grad_norm": 25.533689636810493, |
|
"learning_rate": 4.754031656860059e-07, |
|
"logits/chosen": -1.0601996183395386, |
|
"logits/rejected": -0.968002200126648, |
|
"logps/chosen": -25.98404312133789, |
|
"logps/rejected": -29.14290428161621, |
|
"loss": 0.3515, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17048078775405884, |
|
"rewards/margins": 1.8824352025985718, |
|
"rewards/rejected": -2.0529160499572754, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.9244444444444444, |
|
"grad_norm": 21.394058422904486, |
|
"learning_rate": 4.74266091019916e-07, |
|
"logits/chosen": -1.1088751554489136, |
|
"logits/rejected": -0.9137270450592041, |
|
"logps/chosen": -28.85074806213379, |
|
"logps/rejected": -34.893470764160156, |
|
"loss": 0.2988, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.05692651867866516, |
|
"rewards/margins": 1.6240626573562622, |
|
"rewards/rejected": -1.6809892654418945, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.9362962962962963, |
|
"grad_norm": 25.697276730733257, |
|
"learning_rate": 4.7310474770728996e-07, |
|
"logits/chosen": -1.2263762950897217, |
|
"logits/rejected": -1.1397736072540283, |
|
"logps/chosen": -28.09562873840332, |
|
"logps/rejected": -35.75029754638672, |
|
"loss": 0.3664, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18649393320083618, |
|
"rewards/margins": 1.1695051193237305, |
|
"rewards/rejected": -1.3559989929199219, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 21.662832078683152, |
|
"learning_rate": 4.719192614212969e-07, |
|
"logits/chosen": -0.9513252377510071, |
|
"logits/rejected": -0.9007601141929626, |
|
"logps/chosen": -34.18433380126953, |
|
"logps/rejected": -53.043609619140625, |
|
"loss": 0.2814, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.33905377984046936, |
|
"rewards/margins": 2.0920355319976807, |
|
"rewards/rejected": -2.431089401245117, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 24.69839835625674, |
|
"learning_rate": 4.707097604477045e-07, |
|
"logits/chosen": -1.1311062574386597, |
|
"logits/rejected": -0.9999745488166809, |
|
"logps/chosen": -32.54650115966797, |
|
"logps/rejected": -34.888450622558594, |
|
"loss": 0.3278, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.328436940908432, |
|
"rewards/margins": 1.7844316959381104, |
|
"rewards/rejected": -2.112868547439575, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.9718518518518519, |
|
"grad_norm": 21.411242391551657, |
|
"learning_rate": 4.694763756709967e-07, |
|
"logits/chosen": -1.1982715129852295, |
|
"logits/rejected": -1.1674623489379883, |
|
"logps/chosen": -28.029937744140625, |
|
"logps/rejected": -37.19408416748047, |
|
"loss": 0.2882, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3920401930809021, |
|
"rewards/margins": 1.973564624786377, |
|
"rewards/rejected": -2.365604877471924, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.9837037037037037, |
|
"grad_norm": 21.7744311573738, |
|
"learning_rate": 4.6821924056021053e-07, |
|
"logits/chosen": -1.0800765752792358, |
|
"logits/rejected": -0.9170486330986023, |
|
"logps/chosen": -22.360857009887695, |
|
"logps/rejected": -41.66752624511719, |
|
"loss": 0.3088, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21913698315620422, |
|
"rewards/margins": 2.08003830909729, |
|
"rewards/rejected": -2.299175262451172, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 24.355082987137063, |
|
"learning_rate": 4.669384911544926e-07, |
|
"logits/chosen": -1.06318199634552, |
|
"logits/rejected": -1.0848791599273682, |
|
"logps/chosen": -24.275285720825195, |
|
"logps/rejected": -37.596893310546875, |
|
"loss": 0.3674, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2712962031364441, |
|
"rewards/margins": 1.7089827060699463, |
|
"rewards/rejected": -1.9802789688110352, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.0074074074074073, |
|
"grad_norm": 22.616093539594576, |
|
"learning_rate": 4.6563426604837817e-07, |
|
"logits/chosen": -1.2081141471862793, |
|
"logits/rejected": -0.9877020716667175, |
|
"logps/chosen": -34.070823669433594, |
|
"logps/rejected": -40.52888107299805, |
|
"loss": 0.2829, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10701459646224976, |
|
"rewards/margins": 3.043393611907959, |
|
"rewards/rejected": -3.1504077911376953, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.0192592592592593, |
|
"grad_norm": 16.700104066458838, |
|
"learning_rate": 4.6430670637679294e-07, |
|
"logits/chosen": -1.0600411891937256, |
|
"logits/rejected": -0.8425652384757996, |
|
"logps/chosen": -22.52095603942871, |
|
"logps/rejected": -33.55463409423828, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14321041107177734, |
|
"rewards/margins": 2.309981346130371, |
|
"rewards/rejected": -2.1667709350585938, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.031111111111111, |
|
"grad_norm": 15.240586856186553, |
|
"learning_rate": 4.629559557997804e-07, |
|
"logits/chosen": -1.3102786540985107, |
|
"logits/rejected": -1.143240213394165, |
|
"logps/chosen": -31.257415771484375, |
|
"logps/rejected": -47.26383590698242, |
|
"loss": 0.1831, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7230758666992188, |
|
"rewards/margins": 3.270418882369995, |
|
"rewards/rejected": -3.993495225906372, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.0429629629629629, |
|
"grad_norm": 14.157542057104557, |
|
"learning_rate": 4.615821604869563e-07, |
|
"logits/chosen": -1.094043254852295, |
|
"logits/rejected": -0.8985757827758789, |
|
"logps/chosen": -28.409828186035156, |
|
"logps/rejected": -47.5828971862793, |
|
"loss": 0.1842, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.18681968748569489, |
|
"rewards/margins": 3.8075270652770996, |
|
"rewards/rejected": -3.9943466186523438, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.0548148148148149, |
|
"grad_norm": 17.38420675108177, |
|
"learning_rate": 4.6018546910169067e-07, |
|
"logits/chosen": -1.0334746837615967, |
|
"logits/rejected": -0.9715449810028076, |
|
"logps/chosen": -25.995702743530273, |
|
"logps/rejected": -38.42037582397461, |
|
"loss": 0.2053, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.36298614740371704, |
|
"rewards/margins": 2.626688241958618, |
|
"rewards/rejected": -2.9896743297576904, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 16.906629376553013, |
|
"learning_rate": 4.5876603278502027e-07, |
|
"logits/chosen": -1.0619425773620605, |
|
"logits/rejected": -0.9389445781707764, |
|
"logps/chosen": -28.09102439880371, |
|
"logps/rejected": -51.08159255981445, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0606449693441391, |
|
"rewards/margins": 3.6463186740875244, |
|
"rewards/rejected": -3.5856735706329346, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0785185185185184, |
|
"grad_norm": 18.96732689014115, |
|
"learning_rate": 4.573240051392935e-07, |
|
"logits/chosen": -0.9454656839370728, |
|
"logits/rejected": -0.9307714700698853, |
|
"logps/chosen": -26.379640579223633, |
|
"logps/rejected": -37.363258361816406, |
|
"loss": 0.238, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.14031583070755005, |
|
"rewards/margins": 2.1791586875915527, |
|
"rewards/rejected": -2.319474458694458, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.0903703703703704, |
|
"grad_norm": 16.671437504434632, |
|
"learning_rate": 4.5585954221154853e-07, |
|
"logits/chosen": -1.3018877506256104, |
|
"logits/rejected": -1.1478052139282227, |
|
"logps/chosen": -25.605445861816406, |
|
"logps/rejected": -44.80401611328125, |
|
"loss": 0.2076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03381985425949097, |
|
"rewards/margins": 3.086803436279297, |
|
"rewards/rejected": -3.1206235885620117, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.1022222222222222, |
|
"grad_norm": 16.654640941302485, |
|
"learning_rate": 4.5437280247662646e-07, |
|
"logits/chosen": -1.0023672580718994, |
|
"logits/rejected": -0.9070078134536743, |
|
"logps/chosen": -29.185150146484375, |
|
"logps/rejected": -37.990234375, |
|
"loss": 0.1961, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18216750025749207, |
|
"rewards/margins": 2.39959716796875, |
|
"rewards/rejected": -2.5817646980285645, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.114074074074074, |
|
"grad_norm": 16.12699044310946, |
|
"learning_rate": 4.528639468200226e-07, |
|
"logits/chosen": -1.1345858573913574, |
|
"logits/rejected": -1.107000470161438, |
|
"logps/chosen": -28.13390350341797, |
|
"logps/rejected": -36.65238571166992, |
|
"loss": 0.204, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.10749045014381409, |
|
"rewards/margins": 2.2392215728759766, |
|
"rewards/rejected": -2.1317310333251953, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.125925925925926, |
|
"grad_norm": 14.378767798932659, |
|
"learning_rate": 4.5133313852047613e-07, |
|
"logits/chosen": -1.058295726776123, |
|
"logits/rejected": -1.0083810091018677, |
|
"logps/chosen": -27.640762329101562, |
|
"logps/rejected": -42.5653076171875, |
|
"loss": 0.1812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1484062671661377, |
|
"rewards/margins": 2.2676548957824707, |
|
"rewards/rejected": -2.119248390197754, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.1377777777777778, |
|
"grad_norm": 20.808144652094654, |
|
"learning_rate": 4.4978054323230144e-07, |
|
"logits/chosen": -1.0242708921432495, |
|
"logits/rejected": -0.9334837198257446, |
|
"logps/chosen": -24.16075897216797, |
|
"logps/rejected": -34.90480041503906, |
|
"loss": 0.241, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.17029838263988495, |
|
"rewards/margins": 2.3052542209625244, |
|
"rewards/rejected": -2.134955406188965, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.1496296296296296, |
|
"grad_norm": 14.579273235897853, |
|
"learning_rate": 4.482063289674618e-07, |
|
"logits/chosen": -1.0504794120788574, |
|
"logits/rejected": -0.9864072799682617, |
|
"logps/chosen": -25.85841178894043, |
|
"logps/rejected": -44.5855598449707, |
|
"loss": 0.1552, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2036604881286621, |
|
"rewards/margins": 3.2001941204071045, |
|
"rewards/rejected": -2.9965333938598633, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.1614814814814816, |
|
"grad_norm": 14.479069724776132, |
|
"learning_rate": 4.466106660773884e-07, |
|
"logits/chosen": -1.2236568927764893, |
|
"logits/rejected": -1.0246343612670898, |
|
"logps/chosen": -30.013458251953125, |
|
"logps/rejected": -40.343631744384766, |
|
"loss": 0.176, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.13774560391902924, |
|
"rewards/margins": 2.9517884254455566, |
|
"rewards/rejected": -3.089534044265747, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.1733333333333333, |
|
"grad_norm": 16.052170855559773, |
|
"learning_rate": 4.44993727234546e-07, |
|
"logits/chosen": -1.102075457572937, |
|
"logits/rejected": -0.9819889664649963, |
|
"logps/chosen": -30.00847816467285, |
|
"logps/rejected": -35.746273040771484, |
|
"loss": 0.1827, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23734648525714874, |
|
"rewards/margins": 2.4544928073883057, |
|
"rewards/rejected": -2.6918392181396484, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.1851851851851851, |
|
"grad_norm": 14.71406650743676, |
|
"learning_rate": 4.4335568741374695e-07, |
|
"logits/chosen": -1.3955886363983154, |
|
"logits/rejected": -1.1072180271148682, |
|
"logps/chosen": -29.151214599609375, |
|
"logps/rejected": -35.26973342895508, |
|
"loss": 0.1753, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.24000686407089233, |
|
"rewards/margins": 2.8170034885406494, |
|
"rewards/rejected": -2.576996326446533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1970370370370371, |
|
"grad_norm": 15.185117866368294, |
|
"learning_rate": 4.4169672387321735e-07, |
|
"logits/chosen": -0.9774000644683838, |
|
"logits/rejected": -0.8965713977813721, |
|
"logps/chosen": -28.971498489379883, |
|
"logps/rejected": -42.8656120300293, |
|
"loss": 0.1719, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.21834176778793335, |
|
"rewards/margins": 3.638746738433838, |
|
"rewards/rejected": -3.4204049110412598, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.208888888888889, |
|
"grad_norm": 19.818913364910017, |
|
"learning_rate": 4.4001701613541454e-07, |
|
"logits/chosen": -0.9378620982170105, |
|
"logits/rejected": -0.8033993244171143, |
|
"logps/chosen": -25.265066146850586, |
|
"logps/rejected": -34.76940155029297, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.2527243196964264, |
|
"rewards/margins": 2.660951852798462, |
|
"rewards/rejected": -2.4082274436950684, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.2207407407407407, |
|
"grad_norm": 17.017386662283865, |
|
"learning_rate": 4.383167459676008e-07, |
|
"logits/chosen": -1.101958155632019, |
|
"logits/rejected": -1.0334186553955078, |
|
"logps/chosen": -27.581031799316406, |
|
"logps/rejected": -41.83063507080078, |
|
"loss": 0.2141, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.033310309052467346, |
|
"rewards/margins": 2.295804262161255, |
|
"rewards/rejected": -2.2624940872192383, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.2325925925925927, |
|
"grad_norm": 15.01263977310487, |
|
"learning_rate": 4.365960973621734e-07, |
|
"logits/chosen": -1.261305570602417, |
|
"logits/rejected": -1.1650094985961914, |
|
"logps/chosen": -21.846336364746094, |
|
"logps/rejected": -38.35143280029297, |
|
"loss": 0.1664, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08976972103118896, |
|
"rewards/margins": 2.9284555912017822, |
|
"rewards/rejected": -2.838685989379883, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.2444444444444445, |
|
"grad_norm": 15.499811043472015, |
|
"learning_rate": 4.348552565167542e-07, |
|
"logits/chosen": -0.9682034850120544, |
|
"logits/rejected": -0.8779630064964294, |
|
"logps/chosen": -26.32052993774414, |
|
"logps/rejected": -33.074302673339844, |
|
"loss": 0.1766, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.027928471565246582, |
|
"rewards/margins": 2.689946413040161, |
|
"rewards/rejected": -2.717874765396118, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.2562962962962962, |
|
"grad_norm": 16.751326465749557, |
|
"learning_rate": 4.330944118140406e-07, |
|
"logits/chosen": -0.9463189840316772, |
|
"logits/rejected": -0.8563187718391418, |
|
"logps/chosen": -29.297607421875, |
|
"logps/rejected": -38.705177307128906, |
|
"loss": 0.1839, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.17362913489341736, |
|
"rewards/margins": 2.7915725708007812, |
|
"rewards/rejected": -2.617943286895752, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.268148148148148, |
|
"grad_norm": 13.213493074609195, |
|
"learning_rate": 4.313137538014198e-07, |
|
"logits/chosen": -1.0986582040786743, |
|
"logits/rejected": -0.9737260937690735, |
|
"logps/chosen": -25.97295570373535, |
|
"logps/rejected": -27.29983901977539, |
|
"loss": 0.1545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4464994966983795, |
|
"rewards/margins": 2.5145790576934814, |
|
"rewards/rejected": -2.0680792331695557, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 21.550277344518772, |
|
"learning_rate": 4.295134751703492e-07, |
|
"logits/chosen": -0.9147591590881348, |
|
"logits/rejected": -0.8136166334152222, |
|
"logps/chosen": -39.372562408447266, |
|
"logps/rejected": -40.19895935058594, |
|
"loss": 0.2066, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10953384637832642, |
|
"rewards/margins": 3.0302987098693848, |
|
"rewards/rejected": -3.1398324966430664, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.2918518518518518, |
|
"grad_norm": 15.95008980481358, |
|
"learning_rate": 4.276937707355044e-07, |
|
"logits/chosen": -1.119678020477295, |
|
"logits/rejected": -0.9529648423194885, |
|
"logps/chosen": -29.550357818603516, |
|
"logps/rejected": -40.979732513427734, |
|
"loss": 0.1793, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.23076438903808594, |
|
"rewards/margins": 3.9992775917053223, |
|
"rewards/rejected": -3.7685132026672363, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.3037037037037038, |
|
"grad_norm": 14.896618310434517, |
|
"learning_rate": 4.2585483741369755e-07, |
|
"logits/chosen": -1.1377118825912476, |
|
"logits/rejected": -1.0649988651275635, |
|
"logps/chosen": -20.728757858276367, |
|
"logps/rejected": -42.846527099609375, |
|
"loss": 0.1515, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1395069807767868, |
|
"rewards/margins": 3.1843342781066895, |
|
"rewards/rejected": -3.3238413333892822, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.3155555555555556, |
|
"grad_norm": 15.287898186475319, |
|
"learning_rate": 4.239968742025684e-07, |
|
"logits/chosen": -0.9551693797111511, |
|
"logits/rejected": -0.8516461253166199, |
|
"logps/chosen": -22.917587280273438, |
|
"logps/rejected": -43.595619201660156, |
|
"loss": 0.184, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1407267451286316, |
|
"rewards/margins": 3.2762203216552734, |
|
"rewards/rejected": -3.4169468879699707, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.3274074074074074, |
|
"grad_norm": 13.13930765742771, |
|
"learning_rate": 4.2212008215905e-07, |
|
"logits/chosen": -1.309780240058899, |
|
"logits/rejected": -1.1697163581848145, |
|
"logps/chosen": -23.579864501953125, |
|
"logps/rejected": -39.38568115234375, |
|
"loss": 0.1529, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12219972908496857, |
|
"rewards/margins": 2.721135139465332, |
|
"rewards/rejected": -2.843334913253784, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.3392592592592591, |
|
"grad_norm": 16.93467958306283, |
|
"learning_rate": 4.2022466437761154e-07, |
|
"logits/chosen": -1.0195517539978027, |
|
"logits/rejected": -0.9710554480552673, |
|
"logps/chosen": -27.96396255493164, |
|
"logps/rejected": -39.36810302734375, |
|
"loss": 0.1946, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.13629719614982605, |
|
"rewards/margins": 1.8954023122787476, |
|
"rewards/rejected": -2.0316996574401855, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.3511111111111112, |
|
"grad_norm": 16.185982425906115, |
|
"learning_rate": 4.18310825968281e-07, |
|
"logits/chosen": -1.085777997970581, |
|
"logits/rejected": -1.0098400115966797, |
|
"logps/chosen": -31.38774871826172, |
|
"logps/rejected": -44.18259811401367, |
|
"loss": 0.1856, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.33986663818359375, |
|
"rewards/margins": 3.2784461975097656, |
|
"rewards/rejected": -3.618312358856201, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.362962962962963, |
|
"grad_norm": 14.370437677602862, |
|
"learning_rate": 4.1637877403444923e-07, |
|
"logits/chosen": -1.1370917558670044, |
|
"logits/rejected": -1.076406478881836, |
|
"logps/chosen": -21.368831634521484, |
|
"logps/rejected": -37.987247467041016, |
|
"loss": 0.1862, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.27133771777153015, |
|
"rewards/margins": 3.750422716140747, |
|
"rewards/rejected": -3.4790849685668945, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.374814814814815, |
|
"grad_norm": 14.315285669788084, |
|
"learning_rate": 4.144287176504582e-07, |
|
"logits/chosen": -1.0781633853912354, |
|
"logits/rejected": -0.9295682907104492, |
|
"logps/chosen": -27.247238159179688, |
|
"logps/rejected": -39.297607421875, |
|
"loss": 0.1807, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07360666990280151, |
|
"rewards/margins": 2.9343483448028564, |
|
"rewards/rejected": -2.860741376876831, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.3866666666666667, |
|
"grad_norm": 13.224703617010858, |
|
"learning_rate": 4.1246086783897713e-07, |
|
"logits/chosen": -1.143677830696106, |
|
"logits/rejected": -1.035298228263855, |
|
"logps/chosen": -21.692089080810547, |
|
"logps/rejected": -39.77001953125, |
|
"loss": 0.1324, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11114715039730072, |
|
"rewards/margins": 4.035545825958252, |
|
"rewards/rejected": -3.924398422241211, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.3985185185185185, |
|
"grad_norm": 13.386330467851073, |
|
"learning_rate": 4.104754375481664e-07, |
|
"logits/chosen": -1.1449244022369385, |
|
"logits/rejected": -1.0441653728485107, |
|
"logps/chosen": -24.610374450683594, |
|
"logps/rejected": -36.322635650634766, |
|
"loss": 0.148, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.24931076169013977, |
|
"rewards/margins": 2.860081195831299, |
|
"rewards/rejected": -3.1093921661376953, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.4103703703703703, |
|
"grad_norm": 17.903128810468665, |
|
"learning_rate": 4.084726416286337e-07, |
|
"logits/chosen": -1.1355631351470947, |
|
"logits/rejected": -1.0569454431533813, |
|
"logps/chosen": -22.172731399536133, |
|
"logps/rejected": -38.71437072753906, |
|
"loss": 0.1681, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.004118114709854126, |
|
"rewards/margins": 3.1719160079956055, |
|
"rewards/rejected": -3.1760339736938477, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 14.325608299731273, |
|
"learning_rate": 4.0645269681018434e-07, |
|
"logits/chosen": -1.2059547901153564, |
|
"logits/rejected": -1.132045030593872, |
|
"logps/chosen": -24.006052017211914, |
|
"logps/rejected": -37.643314361572266, |
|
"loss": 0.1583, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3129858076572418, |
|
"rewards/margins": 2.9086873531341553, |
|
"rewards/rejected": -3.221672773361206, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.434074074074074, |
|
"grad_norm": 13.002484277938684, |
|
"learning_rate": 4.044158216783684e-07, |
|
"logits/chosen": -1.369994044303894, |
|
"logits/rejected": -1.179801344871521, |
|
"logps/chosen": -28.838666915893555, |
|
"logps/rejected": -49.269287109375, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.21779999136924744, |
|
"rewards/margins": 4.504581928253174, |
|
"rewards/rejected": -4.722381591796875, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.445925925925926, |
|
"grad_norm": 16.113792921785464, |
|
"learning_rate": 4.0236223665082605e-07, |
|
"logits/chosen": -1.1226955652236938, |
|
"logits/rejected": -1.0712882280349731, |
|
"logps/chosen": -21.75322151184082, |
|
"logps/rejected": -35.07586669921875, |
|
"loss": 0.1625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1918860822916031, |
|
"rewards/margins": 3.3565304279327393, |
|
"rewards/rejected": -3.164644479751587, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.4577777777777778, |
|
"grad_norm": 11.423804755471494, |
|
"learning_rate": 4.0029216395343617e-07, |
|
"logits/chosen": -1.0564236640930176, |
|
"logits/rejected": -0.9565566778182983, |
|
"logps/chosen": -27.292240142822266, |
|
"logps/rejected": -41.23828887939453, |
|
"loss": 0.1276, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.42787694931030273, |
|
"rewards/margins": 3.2124743461608887, |
|
"rewards/rejected": -3.6403515338897705, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.4696296296296296, |
|
"grad_norm": 11.96487396864106, |
|
"learning_rate": 3.982058275962682e-07, |
|
"logits/chosen": -1.2627426385879517, |
|
"logits/rejected": -1.163001298904419, |
|
"logps/chosen": -20.64603614807129, |
|
"logps/rejected": -39.54261016845703, |
|
"loss": 0.1485, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.4101359248161316, |
|
"rewards/margins": 2.903512716293335, |
|
"rewards/rejected": -2.4933767318725586, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 13.800579072803204, |
|
"learning_rate": 3.9610345334934094e-07, |
|
"logits/chosen": -1.2117929458618164, |
|
"logits/rejected": -0.9392006993293762, |
|
"logps/chosen": -28.66204071044922, |
|
"logps/rejected": -40.63731002807617, |
|
"loss": 0.1596, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1593039333820343, |
|
"rewards/margins": 3.4954304695129395, |
|
"rewards/rejected": -3.3361263275146484, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.4933333333333334, |
|
"grad_norm": 12.680404338446278, |
|
"learning_rate": 3.939852687181915e-07, |
|
"logits/chosen": -1.1634321212768555, |
|
"logits/rejected": -1.0764764547348022, |
|
"logps/chosen": -24.423765182495117, |
|
"logps/rejected": -45.39548873901367, |
|
"loss": 0.1324, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17568892240524292, |
|
"rewards/margins": 4.0248494148254395, |
|
"rewards/rejected": -3.8491601943969727, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.5051851851851852, |
|
"grad_norm": 13.14161578490378, |
|
"learning_rate": 3.9185150291925585e-07, |
|
"logits/chosen": -1.0429072380065918, |
|
"logits/rejected": -1.0684268474578857, |
|
"logps/chosen": -26.456886291503906, |
|
"logps/rejected": -39.13412094116211, |
|
"loss": 0.1397, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.43806853890419006, |
|
"rewards/margins": 3.234588146209717, |
|
"rewards/rejected": -3.672656536102295, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.5170370370370372, |
|
"grad_norm": 14.252517134892512, |
|
"learning_rate": 3.8970238685506486e-07, |
|
"logits/chosen": -1.0745394229888916, |
|
"logits/rejected": -1.0680888891220093, |
|
"logps/chosen": -26.106287002563477, |
|
"logps/rejected": -45.78963088989258, |
|
"loss": 0.1535, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.10547050833702087, |
|
"rewards/margins": 3.6777379512786865, |
|
"rewards/rejected": -3.5722672939300537, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.528888888888889, |
|
"grad_norm": 13.410270453749325, |
|
"learning_rate": 3.8753815308925685e-07, |
|
"logits/chosen": -1.3084537982940674, |
|
"logits/rejected": -1.1879018545150757, |
|
"logps/chosen": -22.162595748901367, |
|
"logps/rejected": -42.90380096435547, |
|
"loss": 0.1354, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.29345619678497314, |
|
"rewards/margins": 3.8301730155944824, |
|
"rewards/rejected": -4.123629570007324, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.5407407407407407, |
|
"grad_norm": 16.65901363698597, |
|
"learning_rate": 3.8535903582141184e-07, |
|
"logits/chosen": -1.1705418825149536, |
|
"logits/rejected": -1.053526520729065, |
|
"logps/chosen": -22.083023071289062, |
|
"logps/rejected": -43.40499496459961, |
|
"loss": 0.1819, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09943583607673645, |
|
"rewards/margins": 3.597656011581421, |
|
"rewards/rejected": -3.498220443725586, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.5525925925925925, |
|
"grad_norm": 15.81048973784746, |
|
"learning_rate": 3.8316527086170727e-07, |
|
"logits/chosen": -1.1002339124679565, |
|
"logits/rejected": -0.9635283946990967, |
|
"logps/chosen": -22.6536865234375, |
|
"logps/rejected": -35.75001907348633, |
|
"loss": 0.1862, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.08621586859226227, |
|
"rewards/margins": 3.319308280944824, |
|
"rewards/rejected": -3.2330923080444336, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.5644444444444443, |
|
"grad_norm": 13.934303626010081, |
|
"learning_rate": 3.809570956054003e-07, |
|
"logits/chosen": -1.2058043479919434, |
|
"logits/rejected": -1.1326546669006348, |
|
"logps/chosen": -20.698150634765625, |
|
"logps/rejected": -43.496559143066406, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10407552123069763, |
|
"rewards/margins": 4.241490364074707, |
|
"rewards/rejected": -4.3455657958984375, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.5762962962962963, |
|
"grad_norm": 13.808397445470401, |
|
"learning_rate": 3.787347490071389e-07, |
|
"logits/chosen": -1.2017699480056763, |
|
"logits/rejected": -1.1394281387329102, |
|
"logps/chosen": -29.24155044555664, |
|
"logps/rejected": -45.46855163574219, |
|
"loss": 0.1565, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.3030049204826355, |
|
"rewards/margins": 3.9124467372894287, |
|
"rewards/rejected": -4.215451240539551, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.5881481481481483, |
|
"grad_norm": 13.152290267087837, |
|
"learning_rate": 3.764984715551031e-07, |
|
"logits/chosen": -1.1422480344772339, |
|
"logits/rejected": -1.053503155708313, |
|
"logps/chosen": -20.119190216064453, |
|
"logps/rejected": -41.04280090332031, |
|
"loss": 0.1632, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.04641704261302948, |
|
"rewards/margins": 3.333278179168701, |
|
"rewards/rejected": -3.379695415496826, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 12.384641280044091, |
|
"learning_rate": 3.7424850524498113e-07, |
|
"logits/chosen": -1.1235531568527222, |
|
"logits/rejected": -1.016575574874878, |
|
"logps/chosen": -23.927431106567383, |
|
"logps/rejected": -38.624183654785156, |
|
"loss": 0.1505, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.145728200674057, |
|
"rewards/margins": 3.4623892307281494, |
|
"rewards/rejected": -3.6081173419952393, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.6118518518518519, |
|
"grad_norm": 13.297788267005293, |
|
"learning_rate": 3.7198509355378207e-07, |
|
"logits/chosen": -1.1904593706130981, |
|
"logits/rejected": -1.0650973320007324, |
|
"logps/chosen": -30.460954666137695, |
|
"logps/rejected": -35.29721450805664, |
|
"loss": 0.1623, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5998407602310181, |
|
"rewards/margins": 2.190915822982788, |
|
"rewards/rejected": -2.7907564640045166, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.6237037037037036, |
|
"grad_norm": 17.654879145447634, |
|
"learning_rate": 3.6970848141348855e-07, |
|
"logits/chosen": -1.2997840642929077, |
|
"logits/rejected": -1.1812993288040161, |
|
"logps/chosen": -29.659500122070312, |
|
"logps/rejected": -39.244354248046875, |
|
"loss": 0.1878, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.24524670839309692, |
|
"rewards/margins": 3.048208713531494, |
|
"rewards/rejected": -3.2934556007385254, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.6355555555555554, |
|
"grad_norm": 9.713259026639975, |
|
"learning_rate": 3.6741891518455146e-07, |
|
"logits/chosen": -1.0600968599319458, |
|
"logits/rejected": -0.9694119691848755, |
|
"logps/chosen": -26.941146850585938, |
|
"logps/rejected": -45.241539001464844, |
|
"loss": 0.099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2543194591999054, |
|
"rewards/margins": 3.474762201309204, |
|
"rewards/rejected": -3.729081392288208, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.6474074074074074, |
|
"grad_norm": 11.146298314879976, |
|
"learning_rate": 3.6511664262923094e-07, |
|
"logits/chosen": -1.1857203245162964, |
|
"logits/rejected": -1.1235812902450562, |
|
"logps/chosen": -20.542293548583984, |
|
"logps/rejected": -38.22064971923828, |
|
"loss": 0.1272, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.22452278435230255, |
|
"rewards/margins": 3.8128674030303955, |
|
"rewards/rejected": -4.037390232086182, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.6592592592592592, |
|
"grad_norm": 11.77226347660767, |
|
"learning_rate": 3.6280191288478435e-07, |
|
"logits/chosen": -1.2729771137237549, |
|
"logits/rejected": -1.1265182495117188, |
|
"logps/chosen": -26.0278377532959, |
|
"logps/rejected": -44.57939147949219, |
|
"loss": 0.1158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20267322659492493, |
|
"rewards/margins": 3.678438901901245, |
|
"rewards/rejected": -3.8811120986938477, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.6711111111111112, |
|
"grad_norm": 12.442016266819769, |
|
"learning_rate": 3.604749764365069e-07, |
|
"logits/chosen": -1.1912599802017212, |
|
"logits/rejected": -1.084775686264038, |
|
"logps/chosen": -20.05962371826172, |
|
"logps/rejected": -39.900665283203125, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07022520899772644, |
|
"rewards/margins": 3.820122718811035, |
|
"rewards/rejected": -3.890347957611084, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.682962962962963, |
|
"grad_norm": 14.443169294013128, |
|
"learning_rate": 3.5813608509062526e-07, |
|
"logits/chosen": -0.998296856880188, |
|
"logits/rejected": -1.11066472530365, |
|
"logps/chosen": -26.359149932861328, |
|
"logps/rejected": -48.0468635559082, |
|
"loss": 0.1386, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2483871877193451, |
|
"rewards/margins": 3.9444689750671387, |
|
"rewards/rejected": -4.192856311798096, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.6948148148148148, |
|
"grad_norm": 12.88438627763912, |
|
"learning_rate": 3.557854919470491e-07, |
|
"logits/chosen": -1.1343494653701782, |
|
"logits/rejected": -1.1029855012893677, |
|
"logps/chosen": -32.05289077758789, |
|
"logps/rejected": -38.77518081665039, |
|
"loss": 0.1465, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08383223414421082, |
|
"rewards/margins": 2.9178643226623535, |
|
"rewards/rejected": -3.001697063446045, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.7066666666666666, |
|
"grad_norm": 12.409012501344572, |
|
"learning_rate": 3.5342345137198206e-07, |
|
"logits/chosen": -1.0480347871780396, |
|
"logits/rejected": -0.9312314391136169, |
|
"logps/chosen": -30.324771881103516, |
|
"logps/rejected": -36.17607116699219, |
|
"loss": 0.1341, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2758581340312958, |
|
"rewards/margins": 2.6668765544891357, |
|
"rewards/rejected": -2.942734718322754, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.7185185185185186, |
|
"grad_norm": 14.582949797718573, |
|
"learning_rate": 3.510502189703954e-07, |
|
"logits/chosen": -0.97275710105896, |
|
"logits/rejected": -0.7612693905830383, |
|
"logps/chosen": -28.907245635986328, |
|
"logps/rejected": -45.605037689208984, |
|
"loss": 0.1472, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35651320219039917, |
|
"rewards/margins": 4.817986011505127, |
|
"rewards/rejected": -5.17449951171875, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.7303703703703703, |
|
"grad_norm": 13.66922326611715, |
|
"learning_rate": 3.486660515583691e-07, |
|
"logits/chosen": -1.1288774013519287, |
|
"logits/rejected": -1.1245758533477783, |
|
"logps/chosen": -23.699264526367188, |
|
"logps/rejected": -42.97127914428711, |
|
"loss": 0.1285, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11036735773086548, |
|
"rewards/margins": 4.373822212219238, |
|
"rewards/rejected": -4.263454914093018, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.7422222222222223, |
|
"grad_norm": 13.037114765866198, |
|
"learning_rate": 3.4627120713529983e-07, |
|
"logits/chosen": -0.9598813056945801, |
|
"logits/rejected": -0.8330179452896118, |
|
"logps/chosen": -22.383928298950195, |
|
"logps/rejected": -45.0758171081543, |
|
"loss": 0.1429, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.034085407853126526, |
|
"rewards/margins": 4.736968040466309, |
|
"rewards/rejected": -4.771053314208984, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.7540740740740741, |
|
"grad_norm": 10.872543956486167, |
|
"learning_rate": 3.438659448559825e-07, |
|
"logits/chosen": -1.1963474750518799, |
|
"logits/rejected": -1.0486239194869995, |
|
"logps/chosen": -27.349458694458008, |
|
"logps/rejected": -48.23403549194336, |
|
"loss": 0.1038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17232058942317963, |
|
"rewards/margins": 4.531591892242432, |
|
"rewards/rejected": -4.703912734985352, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.765925925925926, |
|
"grad_norm": 10.7720279947233, |
|
"learning_rate": 3.414505250025659e-07, |
|
"logits/chosen": -0.9560255408287048, |
|
"logits/rejected": -1.0075461864471436, |
|
"logps/chosen": -30.97559928894043, |
|
"logps/rejected": -42.89778518676758, |
|
"loss": 0.1031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.011755384504795074, |
|
"rewards/margins": 3.706606864929199, |
|
"rewards/rejected": -3.718362331390381, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 14.01453220823484, |
|
"learning_rate": 3.390252089563867e-07, |
|
"logits/chosen": -1.167525291442871, |
|
"logits/rejected": -1.008201241493225, |
|
"logps/chosen": -24.03421401977539, |
|
"logps/rejected": -37.12451171875, |
|
"loss": 0.147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05710184574127197, |
|
"rewards/margins": 3.54923939704895, |
|
"rewards/rejected": -3.492137908935547, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.7896296296296297, |
|
"grad_norm": 18.40124537105695, |
|
"learning_rate": 3.3659025916968475e-07, |
|
"logits/chosen": -1.1562587022781372, |
|
"logits/rejected": -1.0596400499343872, |
|
"logps/chosen": -27.828075408935547, |
|
"logps/rejected": -50.78956985473633, |
|
"loss": 0.1666, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4149998426437378, |
|
"rewards/margins": 4.103493690490723, |
|
"rewards/rejected": -4.51849365234375, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.8014814814814815, |
|
"grad_norm": 15.187471450574751, |
|
"learning_rate": 3.3414593913720155e-07, |
|
"logits/chosen": -1.1149495840072632, |
|
"logits/rejected": -0.9014438986778259, |
|
"logps/chosen": -24.957393646240234, |
|
"logps/rejected": -38.273773193359375, |
|
"loss": 0.1572, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.3299483358860016, |
|
"rewards/margins": 3.5365545749664307, |
|
"rewards/rejected": -3.206606388092041, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.8133333333333335, |
|
"grad_norm": 11.786430269793136, |
|
"learning_rate": 3.3169251336766697e-07, |
|
"logits/chosen": -1.0765142440795898, |
|
"logits/rejected": -0.9713940620422363, |
|
"logps/chosen": -23.6178035736084, |
|
"logps/rejected": -36.39717102050781, |
|
"loss": 0.1303, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5628844499588013, |
|
"rewards/margins": 3.1841235160827637, |
|
"rewards/rejected": -3.7470080852508545, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.8251851851851852, |
|
"grad_norm": 15.707535366344572, |
|
"learning_rate": 3.2923024735517567e-07, |
|
"logits/chosen": -1.2396905422210693, |
|
"logits/rejected": -1.13885498046875, |
|
"logps/chosen": -25.60649871826172, |
|
"logps/rejected": -41.11204147338867, |
|
"loss": 0.1377, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.42897889018058777, |
|
"rewards/margins": 3.3137550354003906, |
|
"rewards/rejected": -3.742733955383301, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.837037037037037, |
|
"grad_norm": 16.002573607260132, |
|
"learning_rate": 3.2675940755045713e-07, |
|
"logits/chosen": -1.1592830419540405, |
|
"logits/rejected": -1.069584846496582, |
|
"logps/chosen": -34.06727600097656, |
|
"logps/rejected": -54.026817321777344, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43196994066238403, |
|
"rewards/margins": 4.310949802398682, |
|
"rewards/rejected": -4.742919445037842, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.8488888888888888, |
|
"grad_norm": 14.394511048135854, |
|
"learning_rate": 3.242802613320418e-07, |
|
"logits/chosen": -1.0737497806549072, |
|
"logits/rejected": -0.9672637581825256, |
|
"logps/chosen": -27.148597717285156, |
|
"logps/rejected": -41.859004974365234, |
|
"loss": 0.1554, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2204400897026062, |
|
"rewards/margins": 3.8589026927948, |
|
"rewards/rejected": -4.079343318939209, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.8607407407407406, |
|
"grad_norm": 13.068510095436686, |
|
"learning_rate": 3.217930769773275e-07, |
|
"logits/chosen": -1.2130502462387085, |
|
"logits/rejected": -1.0399776697158813, |
|
"logps/chosen": -20.487337112426758, |
|
"logps/rejected": -35.530582427978516, |
|
"loss": 0.1261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.049159154295921326, |
|
"rewards/margins": 4.460110187530518, |
|
"rewards/rejected": -4.410951614379883, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.8725925925925926, |
|
"grad_norm": 12.727841490377434, |
|
"learning_rate": 3.1929812363354764e-07, |
|
"logits/chosen": -1.1142170429229736, |
|
"logits/rejected": -0.979875385761261, |
|
"logps/chosen": -25.325483322143555, |
|
"logps/rejected": -46.20812225341797, |
|
"loss": 0.1047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1650889664888382, |
|
"rewards/margins": 4.539978504180908, |
|
"rewards/rejected": -4.7050676345825195, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.8844444444444446, |
|
"grad_norm": 13.783921189406176, |
|
"learning_rate": 3.167956712886463e-07, |
|
"logits/chosen": -1.0069048404693604, |
|
"logits/rejected": -0.9355603456497192, |
|
"logps/chosen": -29.581226348876953, |
|
"logps/rejected": -37.52265167236328, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.499575138092041, |
|
"rewards/margins": 2.6125097274780273, |
|
"rewards/rejected": -3.1120848655700684, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.8962962962962964, |
|
"grad_norm": 12.862775831490238, |
|
"learning_rate": 3.142859907420615e-07, |
|
"logits/chosen": -1.0252788066864014, |
|
"logits/rejected": -1.0804516077041626, |
|
"logps/chosen": -24.711009979248047, |
|
"logps/rejected": -42.78890609741211, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3273608684539795, |
|
"rewards/margins": 3.2098522186279297, |
|
"rewards/rejected": -3.5372135639190674, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.9081481481481481, |
|
"grad_norm": 11.856116486125906, |
|
"learning_rate": 3.117693535754213e-07, |
|
"logits/chosen": -1.069286823272705, |
|
"logits/rejected": -0.9155316948890686, |
|
"logps/chosen": -23.146581649780273, |
|
"logps/rejected": -43.31779479980469, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.038980498909950256, |
|
"rewards/margins": 4.636472702026367, |
|
"rewards/rejected": -4.597492218017578, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 15.032149567521808, |
|
"learning_rate": 3.092460321231547e-07, |
|
"logits/chosen": -1.0839258432388306, |
|
"logits/rejected": -1.006733775138855, |
|
"logps/chosen": -24.381574630737305, |
|
"logps/rejected": -40.473060607910156, |
|
"loss": 0.1488, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.02435511350631714, |
|
"rewards/margins": 4.535048007965088, |
|
"rewards/rejected": -4.559402942657471, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.9318518518518517, |
|
"grad_norm": 14.8363884279284, |
|
"learning_rate": 3.0671629944302164e-07, |
|
"logits/chosen": -1.0501927137374878, |
|
"logits/rejected": -0.9243767261505127, |
|
"logps/chosen": -27.61357879638672, |
|
"logps/rejected": -36.362586975097656, |
|
"loss": 0.1177, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.23934195935726166, |
|
"rewards/margins": 3.6352920532226562, |
|
"rewards/rejected": -3.8746337890625, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.9437037037037037, |
|
"grad_norm": 12.238985051757798, |
|
"learning_rate": 3.0418042928656415e-07, |
|
"logits/chosen": -1.1459879875183105, |
|
"logits/rejected": -0.9831377267837524, |
|
"logps/chosen": -23.33287811279297, |
|
"logps/rejected": -43.29710006713867, |
|
"loss": 0.1341, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.029119372367858887, |
|
"rewards/margins": 4.274390697479248, |
|
"rewards/rejected": -4.3035101890563965, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.9555555555555557, |
|
"grad_norm": 16.045991654119778, |
|
"learning_rate": 3.016386960694827e-07, |
|
"logits/chosen": -1.0820094347000122, |
|
"logits/rejected": -0.9164285063743591, |
|
"logps/chosen": -29.36737823486328, |
|
"logps/rejected": -45.8538818359375, |
|
"loss": 0.1575, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5107632875442505, |
|
"rewards/margins": 3.8868861198425293, |
|
"rewards/rejected": -4.39764928817749, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.9674074074074075, |
|
"grad_norm": 15.658417100599408, |
|
"learning_rate": 2.990913748419411e-07, |
|
"logits/chosen": -1.1057474613189697, |
|
"logits/rejected": -1.0400460958480835, |
|
"logps/chosen": -32.17692565917969, |
|
"logps/rejected": -43.858551025390625, |
|
"loss": 0.1491, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.028441503643989563, |
|
"rewards/margins": 3.5364620685577393, |
|
"rewards/rejected": -3.5080206394195557, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.9792592592592593, |
|
"grad_norm": 17.182247947721276, |
|
"learning_rate": 2.9653874125880167e-07, |
|
"logits/chosen": -1.1606206893920898, |
|
"logits/rejected": -1.0265402793884277, |
|
"logps/chosen": -24.273101806640625, |
|
"logps/rejected": -43.97246551513672, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.012486815452575684, |
|
"rewards/margins": 3.4821486473083496, |
|
"rewards/rejected": -3.4946351051330566, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.991111111111111, |
|
"grad_norm": 8.93976424369471, |
|
"learning_rate": 2.9398107154979634e-07, |
|
"logits/chosen": -1.1381988525390625, |
|
"logits/rejected": -1.03400456905365, |
|
"logps/chosen": -21.53853416442871, |
|
"logps/rejected": -48.0505256652832, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05716177821159363, |
|
"rewards/margins": 4.557176113128662, |
|
"rewards/rejected": -4.500014305114746, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.002962962962963, |
|
"grad_norm": 11.949224405327886, |
|
"learning_rate": 2.9141864248963427e-07, |
|
"logits/chosen": -1.2692681550979614, |
|
"logits/rejected": -1.0146331787109375, |
|
"logps/chosen": -27.361726760864258, |
|
"logps/rejected": -35.84319305419922, |
|
"loss": 0.1362, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.12528757750988007, |
|
"rewards/margins": 4.429131984710693, |
|
"rewards/rejected": -4.303844451904297, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.0148148148148146, |
|
"grad_norm": 7.858640781523143, |
|
"learning_rate": 2.8885173136805125e-07, |
|
"logits/chosen": -1.1425201892852783, |
|
"logits/rejected": -1.0211284160614014, |
|
"logps/chosen": -26.627113342285156, |
|
"logps/rejected": -51.298709869384766, |
|
"loss": 0.0958, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16640473902225494, |
|
"rewards/margins": 4.273306846618652, |
|
"rewards/rejected": -4.439712047576904, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.026666666666667, |
|
"grad_norm": 5.791091337239758, |
|
"learning_rate": 2.862806159598032e-07, |
|
"logits/chosen": -1.246085286140442, |
|
"logits/rejected": -1.1816462278366089, |
|
"logps/chosen": -23.06086540222168, |
|
"logps/rejected": -39.5461540222168, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21991188824176788, |
|
"rewards/margins": 4.500458717346191, |
|
"rewards/rejected": -4.28054666519165, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.0385185185185186, |
|
"grad_norm": 8.464583168455022, |
|
"learning_rate": 2.837055744946072e-07, |
|
"logits/chosen": -0.9950094819068909, |
|
"logits/rejected": -0.9867933392524719, |
|
"logps/chosen": -20.085613250732422, |
|
"logps/rejected": -39.374183654785156, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.23747408390045166, |
|
"rewards/margins": 4.035274505615234, |
|
"rewards/rejected": -3.797800064086914, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.0503703703703704, |
|
"grad_norm": 7.246388422688696, |
|
"learning_rate": 2.811268856270332e-07, |
|
"logits/chosen": -1.149637222290039, |
|
"logits/rejected": -1.1608506441116333, |
|
"logps/chosen": -22.0140380859375, |
|
"logps/rejected": -42.8390998840332, |
|
"loss": 0.081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.34959834814071655, |
|
"rewards/margins": 4.302677154541016, |
|
"rewards/rejected": -3.9530792236328125, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.062222222222222, |
|
"grad_norm": 7.340518516395049, |
|
"learning_rate": 2.7854482840634965e-07, |
|
"logits/chosen": -1.2548686265945435, |
|
"logits/rejected": -1.127457618713379, |
|
"logps/chosen": -21.352310180664062, |
|
"logps/rejected": -43.30939483642578, |
|
"loss": 0.0859, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04353713244199753, |
|
"rewards/margins": 5.536983013153076, |
|
"rewards/rejected": -5.49344539642334, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.074074074074074, |
|
"grad_norm": 9.753614692470563, |
|
"learning_rate": 2.759596822463267e-07, |
|
"logits/chosen": -1.1281955242156982, |
|
"logits/rejected": -0.9843631386756897, |
|
"logps/chosen": -28.948612213134766, |
|
"logps/rejected": -37.4376335144043, |
|
"loss": 0.0864, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19420504570007324, |
|
"rewards/margins": 3.627711772918701, |
|
"rewards/rejected": -3.8219170570373535, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.0859259259259257, |
|
"grad_norm": 6.267240444464727, |
|
"learning_rate": 2.73371726895e-07, |
|
"logits/chosen": -1.1884928941726685, |
|
"logits/rejected": -1.0611791610717773, |
|
"logps/chosen": -29.869997024536133, |
|
"logps/rejected": -49.20811462402344, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011849135160446167, |
|
"rewards/margins": 4.665461540222168, |
|
"rewards/rejected": -4.6536126136779785, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.097777777777778, |
|
"grad_norm": 6.022136138537939, |
|
"learning_rate": 2.7078124240439793e-07, |
|
"logits/chosen": -1.1008820533752441, |
|
"logits/rejected": -0.9790475368499756, |
|
"logps/chosen": -29.616289138793945, |
|
"logps/rejected": -57.20648193359375, |
|
"loss": 0.0594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5932025909423828, |
|
"rewards/margins": 6.053226947784424, |
|
"rewards/rejected": -6.64642858505249, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.1096296296296297, |
|
"grad_norm": 6.379960194971949, |
|
"learning_rate": 2.68188509100236e-07, |
|
"logits/chosen": -1.0663186311721802, |
|
"logits/rejected": -0.994686484336853, |
|
"logps/chosen": -26.227067947387695, |
|
"logps/rejected": -50.95429229736328, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13309167325496674, |
|
"rewards/margins": 4.429349422454834, |
|
"rewards/rejected": -4.562440872192383, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 2.1214814814814815, |
|
"grad_norm": 7.642435740805011, |
|
"learning_rate": 2.6559380755158206e-07, |
|
"logits/chosen": -1.1984007358551025, |
|
"logits/rejected": -1.1312189102172852, |
|
"logps/chosen": -29.640098571777344, |
|
"logps/rejected": -48.15163040161133, |
|
"loss": 0.0936, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03109852969646454, |
|
"rewards/margins": 4.580999851226807, |
|
"rewards/rejected": -4.61209774017334, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 7.309302464370304, |
|
"learning_rate": 2.629974185404951e-07, |
|
"logits/chosen": -1.232039451599121, |
|
"logits/rejected": -1.1574738025665283, |
|
"logps/chosen": -24.592525482177734, |
|
"logps/rejected": -58.08824157714844, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.607149600982666, |
|
"rewards/margins": 5.233615875244141, |
|
"rewards/rejected": -5.840765476226807, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.145185185185185, |
|
"grad_norm": 7.918401262658898, |
|
"learning_rate": 2.603996230316402e-07, |
|
"logits/chosen": -1.1730706691741943, |
|
"logits/rejected": -1.1893783807754517, |
|
"logps/chosen": -20.52701187133789, |
|
"logps/rejected": -32.62423324584961, |
|
"loss": 0.093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2257765233516693, |
|
"rewards/margins": 3.6342880725860596, |
|
"rewards/rejected": -3.4085114002227783, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.157037037037037, |
|
"grad_norm": 8.076843746703107, |
|
"learning_rate": 2.5780070214188474e-07, |
|
"logits/chosen": -1.2444607019424438, |
|
"logits/rejected": -1.1096103191375732, |
|
"logps/chosen": -33.14277267456055, |
|
"logps/rejected": -46.21152114868164, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3877983093261719, |
|
"rewards/margins": 3.8209316730499268, |
|
"rewards/rejected": -4.2087297439575195, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.168888888888889, |
|
"grad_norm": 7.21014521039241, |
|
"learning_rate": 2.552009371098778e-07, |
|
"logits/chosen": -1.132177472114563, |
|
"logits/rejected": -1.0657352209091187, |
|
"logps/chosen": -27.557518005371094, |
|
"logps/rejected": -44.8818473815918, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.03888387978076935, |
|
"rewards/margins": 4.166874885559082, |
|
"rewards/rejected": -4.205758571624756, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.180740740740741, |
|
"grad_norm": 7.31003315950285, |
|
"learning_rate": 2.5260060926561604e-07, |
|
"logits/chosen": -1.1547397375106812, |
|
"logits/rejected": -1.0553665161132812, |
|
"logps/chosen": -22.003814697265625, |
|
"logps/rejected": -42.98273849487305, |
|
"loss": 0.0753, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14262710511684418, |
|
"rewards/margins": 4.841116905212402, |
|
"rewards/rejected": -4.9837446212768555, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.1925925925925926, |
|
"grad_norm": 8.27339627937372, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.2605483531951904, |
|
"logits/rejected": -1.0690468549728394, |
|
"logps/chosen": -28.908740997314453, |
|
"logps/rejected": -40.10096740722656, |
|
"loss": 0.0956, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.43739017844200134, |
|
"rewards/margins": 3.4248218536376953, |
|
"rewards/rejected": -3.8622121810913086, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.2044444444444444, |
|
"grad_norm": 8.253216832927258, |
|
"learning_rate": 2.4739939073438393e-07, |
|
"logits/chosen": -1.3061436414718628, |
|
"logits/rejected": -1.1886006593704224, |
|
"logps/chosen": -33.44011688232422, |
|
"logps/rejected": -46.8795166015625, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5726553201675415, |
|
"rewards/margins": 3.8218576908111572, |
|
"rewards/rejected": -4.39451265335083, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.216296296296296, |
|
"grad_norm": 7.807015119173489, |
|
"learning_rate": 2.4479906289012216e-07, |
|
"logits/chosen": -1.345091462135315, |
|
"logits/rejected": -1.0644184350967407, |
|
"logps/chosen": -25.767536163330078, |
|
"logps/rejected": -41.148502349853516, |
|
"loss": 0.0849, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5413724780082703, |
|
"rewards/margins": 4.547415256500244, |
|
"rewards/rejected": -4.006042957305908, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 2.228148148148148, |
|
"grad_norm": 8.268473966183542, |
|
"learning_rate": 2.421992978581152e-07, |
|
"logits/chosen": -1.2509685754776, |
|
"logits/rejected": -1.1202762126922607, |
|
"logps/chosen": -26.480911254882812, |
|
"logps/rejected": -41.798858642578125, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4163511097431183, |
|
"rewards/margins": 4.190377235412598, |
|
"rewards/rejected": -4.606728553771973, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 6.31545694362126, |
|
"learning_rate": 2.3960037696835987e-07, |
|
"logits/chosen": -0.9931889772415161, |
|
"logits/rejected": -0.9487002491950989, |
|
"logps/chosen": -23.28666877746582, |
|
"logps/rejected": -45.82819366455078, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22770199179649353, |
|
"rewards/margins": 5.227255344390869, |
|
"rewards/rejected": -5.454957008361816, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.251851851851852, |
|
"grad_norm": 8.198173492670941, |
|
"learning_rate": 2.3700258145950493e-07, |
|
"logits/chosen": -1.2542146444320679, |
|
"logits/rejected": -1.296125888824463, |
|
"logps/chosen": -23.325332641601562, |
|
"logps/rejected": -42.396663665771484, |
|
"loss": 0.074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37019920349121094, |
|
"rewards/margins": 4.538805961608887, |
|
"rewards/rejected": -4.909005165100098, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.2637037037037038, |
|
"grad_norm": 6.252335723496194, |
|
"learning_rate": 2.3440619244841794e-07, |
|
"logits/chosen": -1.0998159646987915, |
|
"logits/rejected": -1.0990605354309082, |
|
"logps/chosen": -24.507465362548828, |
|
"logps/rejected": -36.9913330078125, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07138313353061676, |
|
"rewards/margins": 3.743443727493286, |
|
"rewards/rejected": -3.67206072807312, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 2.2755555555555556, |
|
"grad_norm": 5.937599917562406, |
|
"learning_rate": 2.3181149089976404e-07, |
|
"logits/chosen": -1.1160556077957153, |
|
"logits/rejected": -0.9888994693756104, |
|
"logps/chosen": -25.562957763671875, |
|
"logps/rejected": -44.06254959106445, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11315414309501648, |
|
"rewards/margins": 5.40950345993042, |
|
"rewards/rejected": -5.52265739440918, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.2874074074074073, |
|
"grad_norm": 8.140792637653023, |
|
"learning_rate": 2.2921875759560207e-07, |
|
"logits/chosen": -1.2146611213684082, |
|
"logits/rejected": -1.1461243629455566, |
|
"logps/chosen": -36.22383499145508, |
|
"logps/rejected": -46.22894287109375, |
|
"loss": 0.0893, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8142991065979004, |
|
"rewards/margins": 4.104118347167969, |
|
"rewards/rejected": -4.918417930603027, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 2.299259259259259, |
|
"grad_norm": 7.224664725024332, |
|
"learning_rate": 2.2662827310499995e-07, |
|
"logits/chosen": -1.0874426364898682, |
|
"logits/rejected": -0.9829124212265015, |
|
"logps/chosen": -24.988603591918945, |
|
"logps/rejected": -42.57012939453125, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10310526937246323, |
|
"rewards/margins": 5.060862064361572, |
|
"rewards/rejected": -4.957756996154785, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 2.311111111111111, |
|
"grad_norm": 7.027603500584767, |
|
"learning_rate": 2.2404031775367332e-07, |
|
"logits/chosen": -1.1362197399139404, |
|
"logits/rejected": -1.0883052349090576, |
|
"logps/chosen": -24.717567443847656, |
|
"logps/rejected": -43.55390167236328, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07100862264633179, |
|
"rewards/margins": 4.886796474456787, |
|
"rewards/rejected": -4.815788269042969, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.322962962962963, |
|
"grad_norm": 6.3481105853123, |
|
"learning_rate": 2.2145517159365043e-07, |
|
"logits/chosen": -1.2440788745880127, |
|
"logits/rejected": -1.0895586013793945, |
|
"logps/chosen": -27.22349739074707, |
|
"logps/rejected": -39.78349304199219, |
|
"loss": 0.0609, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12459969520568848, |
|
"rewards/margins": 4.133920192718506, |
|
"rewards/rejected": -4.0093207359313965, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 2.334814814814815, |
|
"grad_norm": 8.448014970739372, |
|
"learning_rate": 2.1887311437296684e-07, |
|
"logits/chosen": -1.2059340476989746, |
|
"logits/rejected": -1.1843221187591553, |
|
"logps/chosen": -22.853811264038086, |
|
"logps/rejected": -32.71154022216797, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47247427701950073, |
|
"rewards/margins": 3.9881787300109863, |
|
"rewards/rejected": -3.51570463180542, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 2.3466666666666667, |
|
"grad_norm": 8.053586024276273, |
|
"learning_rate": 2.162944255053928e-07, |
|
"logits/chosen": -1.1554303169250488, |
|
"logits/rejected": -1.0401800870895386, |
|
"logps/chosen": -20.67418670654297, |
|
"logps/rejected": -37.24845504760742, |
|
"loss": 0.0809, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2763448655605316, |
|
"rewards/margins": 4.477565288543701, |
|
"rewards/rejected": -4.201220512390137, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.3585185185185185, |
|
"grad_norm": 7.516398498619182, |
|
"learning_rate": 2.137193840401968e-07, |
|
"logits/chosen": -1.1824381351470947, |
|
"logits/rejected": -1.1074461936950684, |
|
"logps/chosen": -28.55365562438965, |
|
"logps/rejected": -41.09587478637695, |
|
"loss": 0.0734, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2404264211654663, |
|
"rewards/margins": 3.8725597858428955, |
|
"rewards/rejected": -3.6321334838867188, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 5.954177017572196, |
|
"learning_rate": 2.1114826863194878e-07, |
|
"logits/chosen": -1.24180269241333, |
|
"logits/rejected": -1.0925354957580566, |
|
"logps/chosen": -28.197025299072266, |
|
"logps/rejected": -46.81939697265625, |
|
"loss": 0.0549, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15689772367477417, |
|
"rewards/margins": 5.056156635284424, |
|
"rewards/rejected": -5.213054180145264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.3822222222222225, |
|
"grad_norm": 5.991252280343694, |
|
"learning_rate": 2.0858135751036568e-07, |
|
"logits/chosen": -1.222536325454712, |
|
"logits/rejected": -1.1197445392608643, |
|
"logps/chosen": -32.660709381103516, |
|
"logps/rejected": -46.89257049560547, |
|
"loss": 0.0509, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014814764261245728, |
|
"rewards/margins": 5.237975120544434, |
|
"rewards/rejected": -5.223160743713379, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 2.3940740740740742, |
|
"grad_norm": 6.65615573416704, |
|
"learning_rate": 2.060189284502037e-07, |
|
"logits/chosen": -1.1877946853637695, |
|
"logits/rejected": -1.1109426021575928, |
|
"logps/chosen": -25.55805206298828, |
|
"logps/rejected": -44.239295959472656, |
|
"loss": 0.0634, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14644792675971985, |
|
"rewards/margins": 4.83575439453125, |
|
"rewards/rejected": -4.689306259155273, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 2.405925925925926, |
|
"grad_norm": 6.275499946646439, |
|
"learning_rate": 2.0346125874119838e-07, |
|
"logits/chosen": -1.132055401802063, |
|
"logits/rejected": -1.0429214239120483, |
|
"logps/chosen": -24.973257064819336, |
|
"logps/rejected": -42.17146682739258, |
|
"loss": 0.071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11066167056560516, |
|
"rewards/margins": 4.5910515785217285, |
|
"rewards/rejected": -4.7017130851745605, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 2.417777777777778, |
|
"grad_norm": 7.65769891944596, |
|
"learning_rate": 2.0090862515805895e-07, |
|
"logits/chosen": -1.0738351345062256, |
|
"logits/rejected": -0.8972642421722412, |
|
"logps/chosen": -33.31107711791992, |
|
"logps/rejected": -41.709693908691406, |
|
"loss": 0.0813, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21268025040626526, |
|
"rewards/margins": 4.644548416137695, |
|
"rewards/rejected": -4.857229232788086, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 2.4296296296296296, |
|
"grad_norm": 7.640686179230129, |
|
"learning_rate": 1.983613039305173e-07, |
|
"logits/chosen": -1.2996752262115479, |
|
"logits/rejected": -1.12294340133667, |
|
"logps/chosen": -18.794048309326172, |
|
"logps/rejected": -45.74852752685547, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16848334670066833, |
|
"rewards/margins": 4.915053367614746, |
|
"rewards/rejected": -5.0835371017456055, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.4414814814814814, |
|
"grad_norm": 7.524471411959897, |
|
"learning_rate": 1.9581957071343588e-07, |
|
"logits/chosen": -1.0391274690628052, |
|
"logits/rejected": -0.9014835357666016, |
|
"logps/chosen": -33.915252685546875, |
|
"logps/rejected": -57.86189270019531, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4224894046783447, |
|
"rewards/margins": 4.6927666664123535, |
|
"rewards/rejected": -5.115255832672119, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 2.453333333333333, |
|
"grad_norm": 6.9279059385356305, |
|
"learning_rate": 1.9328370055697832e-07, |
|
"logits/chosen": -1.1469345092773438, |
|
"logits/rejected": -0.9380808472633362, |
|
"logps/chosen": -24.10541343688965, |
|
"logps/rejected": -44.4921760559082, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29525160789489746, |
|
"rewards/margins": 4.89801549911499, |
|
"rewards/rejected": -4.602763652801514, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 2.4651851851851854, |
|
"grad_norm": 6.54091678469529, |
|
"learning_rate": 1.907539678768453e-07, |
|
"logits/chosen": -1.1986242532730103, |
|
"logits/rejected": -1.1000490188598633, |
|
"logps/chosen": -22.64141273498535, |
|
"logps/rejected": -53.74283981323242, |
|
"loss": 0.068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23207074403762817, |
|
"rewards/margins": 5.020073413848877, |
|
"rewards/rejected": -5.2521443367004395, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 2.477037037037037, |
|
"grad_norm": 7.3835745720901365, |
|
"learning_rate": 1.8823064642457876e-07, |
|
"logits/chosen": -1.1322101354599, |
|
"logits/rejected": -1.0012404918670654, |
|
"logps/chosen": -25.564584732055664, |
|
"logps/rejected": -52.52565002441406, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2706539034843445, |
|
"rewards/margins": 4.544902801513672, |
|
"rewards/rejected": -4.815556526184082, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 2.488888888888889, |
|
"grad_norm": 6.037126217772019, |
|
"learning_rate": 1.8571400925793852e-07, |
|
"logits/chosen": -1.32914137840271, |
|
"logits/rejected": -1.199539303779602, |
|
"logps/chosen": -27.011600494384766, |
|
"logps/rejected": -42.806114196777344, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05861341953277588, |
|
"rewards/margins": 4.113726615905762, |
|
"rewards/rejected": -4.055113315582275, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.5007407407407407, |
|
"grad_norm": 6.792003028800643, |
|
"learning_rate": 1.8320432871135376e-07, |
|
"logits/chosen": -0.9643785357475281, |
|
"logits/rejected": -0.8642684817314148, |
|
"logps/chosen": -32.56407928466797, |
|
"logps/rejected": -48.981529235839844, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09449410438537598, |
|
"rewards/margins": 4.1956257820129395, |
|
"rewards/rejected": -4.2901201248168945, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 2.5125925925925925, |
|
"grad_norm": 6.652434536599441, |
|
"learning_rate": 1.8070187636645237e-07, |
|
"logits/chosen": -1.1183323860168457, |
|
"logits/rejected": -1.0643121004104614, |
|
"logps/chosen": -23.476839065551758, |
|
"logps/rejected": -46.453697204589844, |
|
"loss": 0.0651, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05248948931694031, |
|
"rewards/margins": 4.479131698608398, |
|
"rewards/rejected": -4.426641941070557, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.5244444444444447, |
|
"grad_norm": 6.873490871799767, |
|
"learning_rate": 1.782069230226725e-07, |
|
"logits/chosen": -0.9355219602584839, |
|
"logits/rejected": -0.8760642409324646, |
|
"logps/chosen": -26.840740203857422, |
|
"logps/rejected": -46.565147399902344, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27372512221336365, |
|
"rewards/margins": 4.666114330291748, |
|
"rewards/rejected": -4.9398393630981445, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 2.536296296296296, |
|
"grad_norm": 6.477809311744379, |
|
"learning_rate": 1.7571973866795813e-07, |
|
"logits/chosen": -1.3275456428527832, |
|
"logits/rejected": -1.1785155534744263, |
|
"logps/chosen": -19.671016693115234, |
|
"logps/rejected": -40.520137786865234, |
|
"loss": 0.0696, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06844872236251831, |
|
"rewards/margins": 4.899576663970947, |
|
"rewards/rejected": -4.8311285972595215, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.5481481481481483, |
|
"grad_norm": 4.940048002831371, |
|
"learning_rate": 1.7324059244954292e-07, |
|
"logits/chosen": -1.461755633354187, |
|
"logits/rejected": -1.3273966312408447, |
|
"logps/chosen": -23.988277435302734, |
|
"logps/rejected": -35.3886604309082, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3582080900669098, |
|
"rewards/margins": 4.515974044799805, |
|
"rewards/rejected": -4.874181747436523, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 9.005658987409907, |
|
"learning_rate": 1.7076975264482433e-07, |
|
"logits/chosen": -1.2200323343276978, |
|
"logits/rejected": -1.0738322734832764, |
|
"logps/chosen": -22.159700393676758, |
|
"logps/rejected": -41.98440170288086, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023108944296836853, |
|
"rewards/margins": 4.085160255432129, |
|
"rewards/rejected": -4.062050819396973, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 2.571851851851852, |
|
"grad_norm": 6.115258133963013, |
|
"learning_rate": 1.6830748663233303e-07, |
|
"logits/chosen": -1.135589599609375, |
|
"logits/rejected": -1.0998283624649048, |
|
"logps/chosen": -22.15255355834961, |
|
"logps/rejected": -39.37363815307617, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2580828368663788, |
|
"rewards/margins": 4.229098796844482, |
|
"rewards/rejected": -4.487181186676025, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 2.5837037037037036, |
|
"grad_norm": 7.594741719247832, |
|
"learning_rate": 1.6585406086279846e-07, |
|
"logits/chosen": -1.3007519245147705, |
|
"logits/rejected": -1.258547306060791, |
|
"logps/chosen": -29.01621437072754, |
|
"logps/rejected": -51.67272186279297, |
|
"loss": 0.0695, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06502366065979004, |
|
"rewards/margins": 5.339412212371826, |
|
"rewards/rejected": -5.274388313293457, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 2.5955555555555554, |
|
"grad_norm": 5.212981266507165, |
|
"learning_rate": 1.6340974083031523e-07, |
|
"logits/chosen": -1.2680379152297974, |
|
"logits/rejected": -1.2023954391479492, |
|
"logps/chosen": -25.777963638305664, |
|
"logps/rejected": -38.38170623779297, |
|
"loss": 0.057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02264055609703064, |
|
"rewards/margins": 3.978463649749756, |
|
"rewards/rejected": -3.9558229446411133, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 2.6074074074074076, |
|
"grad_norm": 5.672295808616577, |
|
"learning_rate": 1.6097479104361326e-07, |
|
"logits/chosen": -1.2693517208099365, |
|
"logits/rejected": -1.2250739336013794, |
|
"logps/chosen": -21.100271224975586, |
|
"logps/rejected": -41.79471969604492, |
|
"loss": 0.0562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08915658295154572, |
|
"rewards/margins": 4.542138576507568, |
|
"rewards/rejected": -4.452981948852539, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.6192592592592594, |
|
"grad_norm": 6.347499166452346, |
|
"learning_rate": 1.5854947499743413e-07, |
|
"logits/chosen": -1.0178323984146118, |
|
"logits/rejected": -0.9484214186668396, |
|
"logps/chosen": -18.72942543029785, |
|
"logps/rejected": -43.50739288330078, |
|
"loss": 0.058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00246235728263855, |
|
"rewards/margins": 5.049181938171387, |
|
"rewards/rejected": -5.046720027923584, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 2.631111111111111, |
|
"grad_norm": 7.517395617419555, |
|
"learning_rate": 1.5613405514401757e-07, |
|
"logits/chosen": -1.3176552057266235, |
|
"logits/rejected": -1.2037431001663208, |
|
"logps/chosen": -23.663074493408203, |
|
"logps/rejected": -38.63740158081055, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46497219800949097, |
|
"rewards/margins": 3.637241840362549, |
|
"rewards/rejected": -4.1022138595581055, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 2.642962962962963, |
|
"grad_norm": 5.580464995595371, |
|
"learning_rate": 1.537287928647002e-07, |
|
"logits/chosen": -1.1343742609024048, |
|
"logits/rejected": -1.0372800827026367, |
|
"logps/chosen": -24.60474395751953, |
|
"logps/rejected": -35.45951843261719, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19021296501159668, |
|
"rewards/margins": 3.999257802963257, |
|
"rewards/rejected": -4.1894707679748535, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 2.6548148148148147, |
|
"grad_norm": 6.8709626079577175, |
|
"learning_rate": 1.513339484416309e-07, |
|
"logits/chosen": -1.1663920879364014, |
|
"logits/rejected": -1.151513695716858, |
|
"logps/chosen": -34.081424713134766, |
|
"logps/rejected": -52.950035095214844, |
|
"loss": 0.0634, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6248299479484558, |
|
"rewards/margins": 5.083865165710449, |
|
"rewards/rejected": -5.708695411682129, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 5.241170241551687, |
|
"learning_rate": 1.489497810296046e-07, |
|
"logits/chosen": -1.1173107624053955, |
|
"logits/rejected": -1.0356335639953613, |
|
"logps/chosen": -23.928882598876953, |
|
"logps/rejected": -59.75672912597656, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.154710054397583, |
|
"rewards/margins": 6.490203857421875, |
|
"rewards/rejected": -6.644913673400879, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.6785185185185183, |
|
"grad_norm": 6.118871434229746, |
|
"learning_rate": 1.4657654862801797e-07, |
|
"logits/chosen": -1.1692712306976318, |
|
"logits/rejected": -1.1598937511444092, |
|
"logps/chosen": -21.213607788085938, |
|
"logps/rejected": -43.659019470214844, |
|
"loss": 0.0535, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0038762539625167847, |
|
"rewards/margins": 4.2396321296691895, |
|
"rewards/rejected": -4.243508338928223, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 2.6903703703703705, |
|
"grad_norm": 6.573686325728602, |
|
"learning_rate": 1.4421450805295082e-07, |
|
"logits/chosen": -1.3742166757583618, |
|
"logits/rejected": -1.2483296394348145, |
|
"logps/chosen": -26.414283752441406, |
|
"logps/rejected": -36.898033142089844, |
|
"loss": 0.0631, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4164190888404846, |
|
"rewards/margins": 3.4958038330078125, |
|
"rewards/rejected": -3.0793848037719727, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 2.7022222222222223, |
|
"grad_norm": 6.627117841873176, |
|
"learning_rate": 1.418639149093748e-07, |
|
"logits/chosen": -1.252882719039917, |
|
"logits/rejected": -1.1287035942077637, |
|
"logps/chosen": -27.196077346801758, |
|
"logps/rejected": -36.04934310913086, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3088911473751068, |
|
"rewards/margins": 3.228538990020752, |
|
"rewards/rejected": -3.5374302864074707, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 2.714074074074074, |
|
"grad_norm": 5.7383606439736985, |
|
"learning_rate": 1.3952502356349323e-07, |
|
"logits/chosen": -1.134902000427246, |
|
"logits/rejected": -1.048799753189087, |
|
"logps/chosen": -24.576427459716797, |
|
"logps/rejected": -45.68292236328125, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00011165440082550049, |
|
"rewards/margins": 5.5121378898620605, |
|
"rewards/rejected": -5.512249946594238, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 2.725925925925926, |
|
"grad_norm": 7.011579914365523, |
|
"learning_rate": 1.371980871152157e-07, |
|
"logits/chosen": -1.0634100437164307, |
|
"logits/rejected": -0.9104180335998535, |
|
"logps/chosen": -29.859907150268555, |
|
"logps/rejected": -50.70886993408203, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16749510169029236, |
|
"rewards/margins": 5.730169773101807, |
|
"rewards/rejected": -5.5626749992370605, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.7377777777777776, |
|
"grad_norm": 6.154624592473375, |
|
"learning_rate": 1.3488335737076911e-07, |
|
"logits/chosen": -1.196423888206482, |
|
"logits/rejected": -1.0755786895751953, |
|
"logps/chosen": -22.506702423095703, |
|
"logps/rejected": -31.105947494506836, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14943012595176697, |
|
"rewards/margins": 3.1946725845336914, |
|
"rewards/rejected": -3.344102621078491, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 2.74962962962963, |
|
"grad_norm": 5.957255330795934, |
|
"learning_rate": 1.3258108481544847e-07, |
|
"logits/chosen": -1.1230725049972534, |
|
"logits/rejected": -1.0154623985290527, |
|
"logps/chosen": -32.393314361572266, |
|
"logps/rejected": -46.890968322753906, |
|
"loss": 0.0571, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3677994906902313, |
|
"rewards/margins": 4.284073829650879, |
|
"rewards/rejected": -4.651873588562012, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 2.7614814814814816, |
|
"grad_norm": 7.438230804694601, |
|
"learning_rate": 1.3029151858651143e-07, |
|
"logits/chosen": -1.351361632347107, |
|
"logits/rejected": -1.2523919343948364, |
|
"logps/chosen": -21.477752685546875, |
|
"logps/rejected": -47.73276138305664, |
|
"loss": 0.072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33381107449531555, |
|
"rewards/margins": 5.271888256072998, |
|
"rewards/rejected": -5.60569953918457, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 2.7733333333333334, |
|
"grad_norm": 6.539977486206468, |
|
"learning_rate": 1.2801490644621788e-07, |
|
"logits/chosen": -0.9469627141952515, |
|
"logits/rejected": -0.7967553734779358, |
|
"logps/chosen": -29.131805419921875, |
|
"logps/rejected": -47.47956085205078, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4405498802661896, |
|
"rewards/margins": 4.784643650054932, |
|
"rewards/rejected": -5.225193500518799, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 2.785185185185185, |
|
"grad_norm": 5.650929076564459, |
|
"learning_rate": 1.257514947550189e-07, |
|
"logits/chosen": -1.1391454935073853, |
|
"logits/rejected": -0.9985545873641968, |
|
"logps/chosen": -19.8972110748291, |
|
"logps/rejected": -33.077980041503906, |
|
"loss": 0.0546, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12327444553375244, |
|
"rewards/margins": 4.0027875900268555, |
|
"rewards/rejected": -3.8795135021209717, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.797037037037037, |
|
"grad_norm": 7.402429067879936, |
|
"learning_rate": 1.2350152844489688e-07, |
|
"logits/chosen": -1.1549052000045776, |
|
"logits/rejected": -0.9909151792526245, |
|
"logps/chosen": -30.456247329711914, |
|
"logps/rejected": -48.731536865234375, |
|
"loss": 0.0793, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4372573494911194, |
|
"rewards/margins": 4.651694297790527, |
|
"rewards/rejected": -5.088951587677002, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 2.8088888888888888, |
|
"grad_norm": 6.734173424308296, |
|
"learning_rate": 1.2126525099286108e-07, |
|
"logits/chosen": -1.180855631828308, |
|
"logits/rejected": -1.2272781133651733, |
|
"logps/chosen": -28.35424041748047, |
|
"logps/rejected": -48.205318450927734, |
|
"loss": 0.0687, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.47708311676979065, |
|
"rewards/margins": 5.187458515167236, |
|
"rewards/rejected": -5.664542198181152, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 2.8207407407407405, |
|
"grad_norm": 6.387888892476844, |
|
"learning_rate": 1.1904290439459971e-07, |
|
"logits/chosen": -1.1783702373504639, |
|
"logits/rejected": -1.0934996604919434, |
|
"logps/chosen": -23.247806549072266, |
|
"logps/rejected": -42.38697814941406, |
|
"loss": 0.0566, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.272434800863266, |
|
"rewards/margins": 3.895128011703491, |
|
"rewards/rejected": -4.167562484741211, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 2.8325925925925928, |
|
"grad_norm": 5.6141759750684015, |
|
"learning_rate": 1.1683472913829284e-07, |
|
"logits/chosen": -1.2703089714050293, |
|
"logits/rejected": -1.1347819566726685, |
|
"logps/chosen": -36.7236213684082, |
|
"logps/rejected": -49.431922912597656, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3381836414337158, |
|
"rewards/margins": 4.108426094055176, |
|
"rewards/rejected": -4.446609973907471, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 5.932933616519591, |
|
"learning_rate": 1.146409641785882e-07, |
|
"logits/chosen": -1.1102083921432495, |
|
"logits/rejected": -1.0604140758514404, |
|
"logps/chosen": -27.76748275756836, |
|
"logps/rejected": -34.07774353027344, |
|
"loss": 0.0608, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.2582487463951111, |
|
"rewards/margins": 2.856698513031006, |
|
"rewards/rejected": -3.1149468421936035, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.8562962962962963, |
|
"grad_norm": 6.7530047905552735, |
|
"learning_rate": 1.1246184691074314e-07, |
|
"logits/chosen": -1.2408270835876465, |
|
"logits/rejected": -1.1994930505752563, |
|
"logps/chosen": -28.50021743774414, |
|
"logps/rejected": -49.54254150390625, |
|
"loss": 0.0722, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0546003133058548, |
|
"rewards/margins": 6.17899227142334, |
|
"rewards/rejected": -6.124391555786133, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 2.868148148148148, |
|
"grad_norm": 7.401984494431854, |
|
"learning_rate": 1.1029761314493518e-07, |
|
"logits/chosen": -1.3563504219055176, |
|
"logits/rejected": -1.2836796045303345, |
|
"logps/chosen": -29.872364044189453, |
|
"logps/rejected": -42.799747467041016, |
|
"loss": 0.0685, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22778728604316711, |
|
"rewards/margins": 5.134041786193848, |
|
"rewards/rejected": -5.3618292808532715, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 7.471266580413762, |
|
"learning_rate": 1.0814849708074414e-07, |
|
"logits/chosen": -1.128278136253357, |
|
"logits/rejected": -0.9680910706520081, |
|
"logps/chosen": -38.86433792114258, |
|
"logps/rejected": -47.132667541503906, |
|
"loss": 0.0593, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.25867849588394165, |
|
"rewards/margins": 4.5064005851745605, |
|
"rewards/rejected": -4.247722625732422, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 2.891851851851852, |
|
"grad_norm": 6.390593039880407, |
|
"learning_rate": 1.0601473128180854e-07, |
|
"logits/chosen": -1.2510465383529663, |
|
"logits/rejected": -1.100001573562622, |
|
"logps/chosen": -33.47804260253906, |
|
"logps/rejected": -41.27080154418945, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0875249058008194, |
|
"rewards/margins": 4.39518404006958, |
|
"rewards/rejected": -4.307658672332764, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.9037037037037035, |
|
"grad_norm": 8.267732345292577, |
|
"learning_rate": 1.0389654665065908e-07, |
|
"logits/chosen": -1.1220481395721436, |
|
"logits/rejected": -1.0034825801849365, |
|
"logps/chosen": -24.331592559814453, |
|
"logps/rejected": -41.46772003173828, |
|
"loss": 0.0865, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.26567134261131287, |
|
"rewards/margins": 4.682834148406982, |
|
"rewards/rejected": -4.948505401611328, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.9155555555555557, |
|
"grad_norm": 7.488610652410469, |
|
"learning_rate": 1.0179417240373182e-07, |
|
"logits/chosen": -1.176962971687317, |
|
"logits/rejected": -1.1089400053024292, |
|
"logps/chosen": -34.5350341796875, |
|
"logps/rejected": -56.02618408203125, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9151340126991272, |
|
"rewards/margins": 5.155758380889893, |
|
"rewards/rejected": -6.070892333984375, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 2.9274074074074075, |
|
"grad_norm": 6.376533768492628, |
|
"learning_rate": 9.970783604656383e-08, |
|
"logits/chosen": -1.3059768676757812, |
|
"logits/rejected": -1.0361342430114746, |
|
"logps/chosen": -28.046321868896484, |
|
"logps/rejected": -48.62135696411133, |
|
"loss": 0.0632, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13395918905735016, |
|
"rewards/margins": 5.584090232849121, |
|
"rewards/rejected": -5.718049049377441, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 2.9392592592592592, |
|
"grad_norm": 7.764371689739165, |
|
"learning_rate": 9.763776334917398e-08, |
|
"logits/chosen": -1.3117642402648926, |
|
"logits/rejected": -1.1723650693893433, |
|
"logps/chosen": -28.31963348388672, |
|
"logps/rejected": -37.416561126708984, |
|
"loss": 0.0825, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4260867238044739, |
|
"rewards/margins": 2.7781217098236084, |
|
"rewards/rejected": -3.2042083740234375, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 2.951111111111111, |
|
"grad_norm": 6.603531713615615, |
|
"learning_rate": 9.558417832163162e-08, |
|
"logits/chosen": -1.0509438514709473, |
|
"logits/rejected": -1.1028845310211182, |
|
"logps/chosen": -29.35840606689453, |
|
"logps/rejected": -39.08806610107422, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06763426959514618, |
|
"rewards/margins": 4.334118843078613, |
|
"rewards/rejected": -4.401752948760986, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 6.641636931789762, |
|
"learning_rate": 9.354730318981561e-08, |
|
"logits/chosen": -1.269490122795105, |
|
"logits/rejected": -1.1995911598205566, |
|
"logps/chosen": -23.048587799072266, |
|
"logps/rejected": -41.5166015625, |
|
"loss": 0.0718, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22312739491462708, |
|
"rewards/margins": 4.370500564575195, |
|
"rewards/rejected": -4.5936279296875, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.974814814814815, |
|
"grad_norm": 5.554303148575841, |
|
"learning_rate": 9.15273583713663e-08, |
|
"logits/chosen": -1.2579662799835205, |
|
"logits/rejected": -1.0015959739685059, |
|
"logps/chosen": -31.479568481445312, |
|
"logps/rejected": -56.00233459472656, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5022794008255005, |
|
"rewards/margins": 6.369531154632568, |
|
"rewards/rejected": -6.871809959411621, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"grad_norm": 5.613495199138643, |
|
"learning_rate": 8.95245624518336e-08, |
|
"logits/chosen": -1.2209105491638184, |
|
"logits/rejected": -1.217021107673645, |
|
"logps/chosen": -25.06351089477539, |
|
"logps/rejected": -47.17867660522461, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4572719633579254, |
|
"rewards/margins": 4.925302028656006, |
|
"rewards/rejected": -5.382573127746582, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 2.9985185185185186, |
|
"grad_norm": 5.721091066167364, |
|
"learning_rate": 8.753913216102285e-08, |
|
"logits/chosen": -1.257638931274414, |
|
"logits/rejected": -1.1348259449005127, |
|
"logps/chosen": -28.36161231994629, |
|
"logps/rejected": -52.211952209472656, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5001235604286194, |
|
"rewards/margins": 4.936119079589844, |
|
"rewards/rejected": -5.436242580413818, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 3.0103703703703704, |
|
"grad_norm": 6.164342961198106, |
|
"learning_rate": 8.557128234954189e-08, |
|
"logits/chosen": -1.16610848903656, |
|
"logits/rejected": -1.0525445938110352, |
|
"logps/chosen": -19.37337875366211, |
|
"logps/rejected": -44.04081344604492, |
|
"loss": 0.0608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3864176273345947, |
|
"rewards/margins": 5.418819427490234, |
|
"rewards/rejected": -5.805237293243408, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 3.022222222222222, |
|
"grad_norm": 4.836985245782948, |
|
"learning_rate": 8.362122596555088e-08, |
|
"logits/chosen": -1.1399970054626465, |
|
"logits/rejected": -0.9710614681243896, |
|
"logps/chosen": -23.326759338378906, |
|
"logps/rejected": -46.79590606689453, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2866979241371155, |
|
"rewards/margins": 6.633603572845459, |
|
"rewards/rejected": -6.3469061851501465, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 3.034074074074074, |
|
"grad_norm": 5.427568975360207, |
|
"learning_rate": 8.16891740317189e-08, |
|
"logits/chosen": -1.2294830083847046, |
|
"logits/rejected": -1.1226603984832764, |
|
"logps/chosen": -23.196685791015625, |
|
"logps/rejected": -38.58136749267578, |
|
"loss": 0.0515, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16551783680915833, |
|
"rewards/margins": 4.354981899261475, |
|
"rewards/rejected": -4.520500183105469, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 3.0459259259259257, |
|
"grad_norm": 5.924541071404178, |
|
"learning_rate": 7.977533562238838e-08, |
|
"logits/chosen": -1.1663788557052612, |
|
"logits/rejected": -1.1404701471328735, |
|
"logps/chosen": -26.776004791259766, |
|
"logps/rejected": -50.571266174316406, |
|
"loss": 0.059, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.18248632550239563, |
|
"rewards/margins": 5.887378692626953, |
|
"rewards/rejected": -6.069864273071289, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 3.057777777777778, |
|
"grad_norm": 4.128731375178606, |
|
"learning_rate": 7.787991784094999e-08, |
|
"logits/chosen": -1.2448476552963257, |
|
"logits/rejected": -1.0964651107788086, |
|
"logps/chosen": -29.85052490234375, |
|
"logps/rejected": -62.34690856933594, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4742766320705414, |
|
"rewards/margins": 5.80230712890625, |
|
"rewards/rejected": -6.276583671569824, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 3.0696296296296297, |
|
"grad_norm": 6.5179983840331825, |
|
"learning_rate": 7.60031257974316e-08, |
|
"logits/chosen": -1.1081359386444092, |
|
"logits/rejected": -1.0185449123382568, |
|
"logps/chosen": -23.463979721069336, |
|
"logps/rejected": -50.03909683227539, |
|
"loss": 0.0571, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14104682207107544, |
|
"rewards/margins": 5.487791538238525, |
|
"rewards/rejected": -5.628839015960693, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 3.0814814814814815, |
|
"grad_norm": 6.670813820042167, |
|
"learning_rate": 7.414516258630244e-08, |
|
"logits/chosen": -1.0931766033172607, |
|
"logits/rejected": -0.9176234602928162, |
|
"logps/chosen": -35.09284210205078, |
|
"logps/rejected": -56.267723083496094, |
|
"loss": 0.0615, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34566253423690796, |
|
"rewards/margins": 5.847842216491699, |
|
"rewards/rejected": -6.193504810333252, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.0933333333333333, |
|
"grad_norm": 4.886020098171949, |
|
"learning_rate": 7.230622926449564e-08, |
|
"logits/chosen": -1.2389843463897705, |
|
"logits/rejected": -1.1709716320037842, |
|
"logps/chosen": -23.021934509277344, |
|
"logps/rejected": -42.478797912597656, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25014615058898926, |
|
"rewards/margins": 5.359426975250244, |
|
"rewards/rejected": -5.6095733642578125, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 3.105185185185185, |
|
"grad_norm": 5.470390367743688, |
|
"learning_rate": 7.048652482965078e-08, |
|
"logits/chosen": -1.250532865524292, |
|
"logits/rejected": -1.098189353942871, |
|
"logps/chosen": -33.6146354675293, |
|
"logps/rejected": -41.64539337158203, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16313320398330688, |
|
"rewards/margins": 4.307170391082764, |
|
"rewards/rejected": -4.470303535461426, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 3.117037037037037, |
|
"grad_norm": 4.836356334775007, |
|
"learning_rate": 6.868624619858021e-08, |
|
"logits/chosen": -1.4147872924804688, |
|
"logits/rejected": -1.4524210691452026, |
|
"logps/chosen": -28.40629768371582, |
|
"logps/rejected": -56.72626495361328, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03009369969367981, |
|
"rewards/margins": 5.194394111633301, |
|
"rewards/rejected": -5.164300918579102, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 3.128888888888889, |
|
"grad_norm": 4.750367218060603, |
|
"learning_rate": 6.690558818595943e-08, |
|
"logits/chosen": -1.2358546257019043, |
|
"logits/rejected": -1.1999270915985107, |
|
"logps/chosen": -25.05208969116211, |
|
"logps/rejected": -48.712806701660156, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.637313723564148, |
|
"rewards/margins": 5.243877410888672, |
|
"rewards/rejected": -5.881191253662109, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 3.140740740740741, |
|
"grad_norm": 4.863983890990079, |
|
"learning_rate": 6.514474348324581e-08, |
|
"logits/chosen": -1.2671034336090088, |
|
"logits/rejected": -1.1254373788833618, |
|
"logps/chosen": -32.094966888427734, |
|
"logps/rejected": -52.297821044921875, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4840225875377655, |
|
"rewards/margins": 5.210573196411133, |
|
"rewards/rejected": -5.6945953369140625, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 3.1525925925925926, |
|
"grad_norm": 6.337695693323137, |
|
"learning_rate": 6.340390263782655e-08, |
|
"logits/chosen": -1.2698873281478882, |
|
"logits/rejected": -1.172045111656189, |
|
"logps/chosen": -24.47865867614746, |
|
"logps/rejected": -54.05537796020508, |
|
"loss": 0.0665, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3584892153739929, |
|
"rewards/margins": 5.420907020568848, |
|
"rewards/rejected": -5.779396057128906, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 3.1644444444444444, |
|
"grad_norm": 6.315515433549729, |
|
"learning_rate": 6.168325403239913e-08, |
|
"logits/chosen": -1.2651307582855225, |
|
"logits/rejected": -1.1162527799606323, |
|
"logps/chosen": -19.784488677978516, |
|
"logps/rejected": -40.73728942871094, |
|
"loss": 0.0556, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.27537021040916443, |
|
"rewards/margins": 5.313858985900879, |
|
"rewards/rejected": -5.038488388061523, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 3.176296296296296, |
|
"grad_norm": 4.582040973118046, |
|
"learning_rate": 5.998298386458545e-08, |
|
"logits/chosen": -1.0796051025390625, |
|
"logits/rejected": -1.0264118909835815, |
|
"logps/chosen": -27.581031799316406, |
|
"logps/rejected": -49.427703857421875, |
|
"loss": 0.0417, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.022144198417663574, |
|
"rewards/margins": 5.080024719238281, |
|
"rewards/rejected": -5.057880878448486, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 3.188148148148148, |
|
"grad_norm": 4.952404939534042, |
|
"learning_rate": 5.830327612678265e-08, |
|
"logits/chosen": -1.0570693016052246, |
|
"logits/rejected": -1.0790140628814697, |
|
"logps/chosen": -27.206192016601562, |
|
"logps/rejected": -52.819984436035156, |
|
"loss": 0.0419, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8033032417297363, |
|
"rewards/margins": 4.914515495300293, |
|
"rewards/rejected": -5.717819690704346, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 5.724512806119854, |
|
"learning_rate": 5.6644312586253044e-08, |
|
"logits/chosen": -1.0734919309616089, |
|
"logits/rejected": -1.0849241018295288, |
|
"logps/chosen": -41.63764572143555, |
|
"logps/rejected": -48.729576110839844, |
|
"loss": 0.0562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17860059440135956, |
|
"rewards/margins": 4.616375923156738, |
|
"rewards/rejected": -4.794977188110352, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.211851851851852, |
|
"grad_norm": 5.991455888598502, |
|
"learning_rate": 5.5006272765454056e-08, |
|
"logits/chosen": -1.2988901138305664, |
|
"logits/rejected": -1.1308969259262085, |
|
"logps/chosen": -22.436080932617188, |
|
"logps/rejected": -34.09817123413086, |
|
"loss": 0.0584, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.005201712250709534, |
|
"rewards/margins": 3.529590606689453, |
|
"rewards/rejected": -3.534792423248291, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 3.2237037037037037, |
|
"grad_norm": 5.413534996418431, |
|
"learning_rate": 5.338933392261158e-08, |
|
"logits/chosen": -1.222093105316162, |
|
"logits/rejected": -1.1171449422836304, |
|
"logps/chosen": -26.16643714904785, |
|
"logps/rejected": -42.16415023803711, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20804953575134277, |
|
"rewards/margins": 5.081421852111816, |
|
"rewards/rejected": -5.2894721031188965, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 3.2355555555555555, |
|
"grad_norm": 5.91458057536832, |
|
"learning_rate": 5.1793671032538206e-08, |
|
"logits/chosen": -1.2229275703430176, |
|
"logits/rejected": -1.3230491876602173, |
|
"logps/chosen": -23.901247024536133, |
|
"logps/rejected": -45.79841995239258, |
|
"loss": 0.0586, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.171402707695961, |
|
"rewards/margins": 4.954162120819092, |
|
"rewards/rejected": -5.125565052032471, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 3.2474074074074073, |
|
"grad_norm": 5.22719369235926, |
|
"learning_rate": 5.021945676769859e-08, |
|
"logits/chosen": -1.2852232456207275, |
|
"logits/rejected": -1.2391951084136963, |
|
"logps/chosen": -20.282339096069336, |
|
"logps/rejected": -42.286293029785156, |
|
"loss": 0.0503, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.014406859874725342, |
|
"rewards/margins": 4.513213157653809, |
|
"rewards/rejected": -4.5276198387146, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 3.259259259259259, |
|
"grad_norm": 5.73422178803048, |
|
"learning_rate": 4.866686147952387e-08, |
|
"logits/chosen": -1.0481388568878174, |
|
"logits/rejected": -0.9910224676132202, |
|
"logps/chosen": -31.128089904785156, |
|
"logps/rejected": -48.627586364746094, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11383038759231567, |
|
"rewards/margins": 4.8710784912109375, |
|
"rewards/rejected": -4.757248401641846, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 3.2711111111111113, |
|
"grad_norm": 5.655456397723797, |
|
"learning_rate": 4.71360531799774e-08, |
|
"logits/chosen": -1.1052677631378174, |
|
"logits/rejected": -1.0184680223464966, |
|
"logps/chosen": -36.36450958251953, |
|
"logps/rejected": -51.73442840576172, |
|
"loss": 0.0596, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5170344114303589, |
|
"rewards/margins": 5.25890588760376, |
|
"rewards/rejected": -5.775939464569092, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 3.282962962962963, |
|
"grad_norm": 4.996738283026781, |
|
"learning_rate": 4.562719752337349e-08, |
|
"logits/chosen": -1.266676664352417, |
|
"logits/rejected": -1.1158446073532104, |
|
"logps/chosen": -33.958919525146484, |
|
"logps/rejected": -66.85248565673828, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6868615746498108, |
|
"rewards/margins": 6.892773628234863, |
|
"rewards/rejected": -7.579635143280029, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 3.294814814814815, |
|
"grad_norm": 5.1730881424971535, |
|
"learning_rate": 4.4140457788451434e-08, |
|
"logits/chosen": -1.3682211637496948, |
|
"logits/rejected": -1.2177406549453735, |
|
"logps/chosen": -23.593040466308594, |
|
"logps/rejected": -43.28880310058594, |
|
"loss": 0.047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21666671335697174, |
|
"rewards/margins": 4.890883445739746, |
|
"rewards/rejected": -4.674216270446777, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 3.3066666666666666, |
|
"grad_norm": 4.729619192449929, |
|
"learning_rate": 4.267599486070647e-08, |
|
"logits/chosen": -1.2258741855621338, |
|
"logits/rejected": -1.1649140119552612, |
|
"logps/chosen": -31.068470001220703, |
|
"logps/rejected": -36.381038665771484, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24162916839122772, |
|
"rewards/margins": 4.573906898498535, |
|
"rewards/rejected": -4.8155364990234375, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 3.3185185185185184, |
|
"grad_norm": 5.122216550777693, |
|
"learning_rate": 4.1233967214979764e-08, |
|
"logits/chosen": -1.198957920074463, |
|
"logits/rejected": -1.06025230884552, |
|
"logps/chosen": -33.02262496948242, |
|
"logps/rejected": -41.4984130859375, |
|
"loss": 0.049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.23480704426765442, |
|
"rewards/margins": 3.949801445007324, |
|
"rewards/rejected": -3.714993953704834, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.33037037037037, |
|
"grad_norm": 4.305629596628497, |
|
"learning_rate": 3.9814530898309356e-08, |
|
"logits/chosen": -1.0878995656967163, |
|
"logits/rejected": -1.0379247665405273, |
|
"logps/chosen": -27.192787170410156, |
|
"logps/rejected": -46.65719223022461, |
|
"loss": 0.0366, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.029019802808761597, |
|
"rewards/margins": 5.454700946807861, |
|
"rewards/rejected": -5.483720779418945, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 3.3422222222222224, |
|
"grad_norm": 5.169778953020736, |
|
"learning_rate": 3.8417839513043646e-08, |
|
"logits/chosen": -1.2834384441375732, |
|
"logits/rejected": -1.2438150644302368, |
|
"logps/chosen": -30.712045669555664, |
|
"logps/rejected": -37.924110412597656, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27224576473236084, |
|
"rewards/margins": 3.7623844146728516, |
|
"rewards/rejected": -4.034629821777344, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 3.354074074074074, |
|
"grad_norm": 6.097603815404355, |
|
"learning_rate": 3.704404420021956e-08, |
|
"logits/chosen": -1.1656073331832886, |
|
"logits/rejected": -0.950996994972229, |
|
"logps/chosen": -27.072315216064453, |
|
"logps/rejected": -46.62635040283203, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.001886114478111267, |
|
"rewards/margins": 5.3247785568237305, |
|
"rewards/rejected": -5.326663970947266, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 3.365925925925926, |
|
"grad_norm": 5.599744322780303, |
|
"learning_rate": 3.569329362320708e-08, |
|
"logits/chosen": -1.015643835067749, |
|
"logits/rejected": -0.936226487159729, |
|
"logps/chosen": -21.00103187561035, |
|
"logps/rejected": -49.05156326293945, |
|
"loss": 0.0462, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10715761035680771, |
|
"rewards/margins": 5.087098121643066, |
|
"rewards/rejected": -5.19425630569458, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 3.3777777777777778, |
|
"grad_norm": 5.412291665519436, |
|
"learning_rate": 3.436573395162179e-08, |
|
"logits/chosen": -1.2125096321105957, |
|
"logits/rejected": -1.0717750787734985, |
|
"logps/chosen": -26.21784782409668, |
|
"logps/rejected": -44.80372619628906, |
|
"loss": 0.0562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4322938024997711, |
|
"rewards/margins": 4.486114025115967, |
|
"rewards/rejected": -4.918407440185547, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 3.3896296296296295, |
|
"grad_norm": 5.068684864066647, |
|
"learning_rate": 3.306150884550732e-08, |
|
"logits/chosen": -1.306767225265503, |
|
"logits/rejected": -1.136150598526001, |
|
"logps/chosen": -28.90319061279297, |
|
"logps/rejected": -48.472164154052734, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3896186947822571, |
|
"rewards/margins": 4.801982879638672, |
|
"rewards/rejected": -5.191601753234863, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 3.4014814814814813, |
|
"grad_norm": 6.257371157657287, |
|
"learning_rate": 3.17807594397895e-08, |
|
"logits/chosen": -1.2118041515350342, |
|
"logits/rejected": -1.007792353630066, |
|
"logps/chosen": -26.383615493774414, |
|
"logps/rejected": -46.10572052001953, |
|
"loss": 0.0542, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43773341178894043, |
|
"rewards/margins": 5.824153900146484, |
|
"rewards/rejected": -6.261887550354004, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 3.413333333333333, |
|
"grad_norm": 5.353883051519317, |
|
"learning_rate": 3.052362432900332e-08, |
|
"logits/chosen": -1.447021245956421, |
|
"logits/rejected": -1.2934633493423462, |
|
"logps/chosen": -25.619125366210938, |
|
"logps/rejected": -42.07542037963867, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3246709406375885, |
|
"rewards/margins": 7.011876106262207, |
|
"rewards/rejected": -6.687204360961914, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.4251851851851853, |
|
"grad_norm": 5.399450219209751, |
|
"learning_rate": 2.9290239552295538e-08, |
|
"logits/chosen": -1.0401594638824463, |
|
"logits/rejected": -1.0249950885772705, |
|
"logps/chosen": -32.01249313354492, |
|
"logps/rejected": -38.693145751953125, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22764620184898376, |
|
"rewards/margins": 4.879059314727783, |
|
"rewards/rejected": -4.6514129638671875, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 3.437037037037037, |
|
"grad_norm": 5.942445036249677, |
|
"learning_rate": 2.8080738578703052e-08, |
|
"logits/chosen": -1.2160862684249878, |
|
"logits/rejected": -1.1057730913162231, |
|
"logps/chosen": -26.857769012451172, |
|
"logps/rejected": -49.42009735107422, |
|
"loss": 0.0644, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0421622097492218, |
|
"rewards/margins": 7.015720844268799, |
|
"rewards/rejected": -6.973557472229004, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.448888888888889, |
|
"grad_norm": 4.452390830898345, |
|
"learning_rate": 2.6895252292709974e-08, |
|
"logits/chosen": -1.0676244497299194, |
|
"logits/rejected": -1.078723669052124, |
|
"logps/chosen": -31.738510131835938, |
|
"logps/rejected": -45.86015319824219, |
|
"loss": 0.0474, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5407127737998962, |
|
"rewards/margins": 4.980555057525635, |
|
"rewards/rejected": -5.521267890930176, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 3.4607407407407407, |
|
"grad_norm": 6.147853636678421, |
|
"learning_rate": 2.5733908980083984e-08, |
|
"logits/chosen": -1.2384705543518066, |
|
"logits/rejected": -1.112764835357666, |
|
"logps/chosen": -26.170108795166016, |
|
"logps/rejected": -45.731956481933594, |
|
"loss": 0.0609, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5723249316215515, |
|
"rewards/margins": 4.402219772338867, |
|
"rewards/rejected": -4.974545001983643, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 3.4725925925925925, |
|
"grad_norm": 5.914419745435524, |
|
"learning_rate": 2.4596834313994037e-08, |
|
"logits/chosen": -1.1161627769470215, |
|
"logits/rejected": -1.0215301513671875, |
|
"logps/chosen": -28.129005432128906, |
|
"logps/rejected": -33.972686767578125, |
|
"loss": 0.0566, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1716342568397522, |
|
"rewards/margins": 4.373476028442383, |
|
"rewards/rejected": -4.201840877532959, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 3.4844444444444447, |
|
"grad_norm": 4.7471616018558285, |
|
"learning_rate": 2.3484151341411018e-08, |
|
"logits/chosen": -1.1082960367202759, |
|
"logits/rejected": -1.0436348915100098, |
|
"logps/chosen": -20.280670166015625, |
|
"logps/rejected": -46.68223190307617, |
|
"loss": 0.0442, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2738330066204071, |
|
"rewards/margins": 5.264364719390869, |
|
"rewards/rejected": -5.5381975173950195, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 3.4962962962962965, |
|
"grad_norm": 4.718228569099853, |
|
"learning_rate": 2.23959804697921e-08, |
|
"logits/chosen": -1.0989983081817627, |
|
"logits/rejected": -1.0200862884521484, |
|
"logps/chosen": -28.536529541015625, |
|
"logps/rejected": -44.35844421386719, |
|
"loss": 0.039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01516886055469513, |
|
"rewards/margins": 5.231680870056152, |
|
"rewards/rejected": -5.216512680053711, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 3.5081481481481482, |
|
"grad_norm": 4.412160626992289, |
|
"learning_rate": 2.1332439454051277e-08, |
|
"logits/chosen": -1.0349336862564087, |
|
"logits/rejected": -0.9772415161132812, |
|
"logps/chosen": -24.290695190429688, |
|
"logps/rejected": -34.85298538208008, |
|
"loss": 0.0405, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.006944596767425537, |
|
"rewards/margins": 3.6680963039398193, |
|
"rewards/rejected": -3.661151647567749, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"grad_norm": 5.698184998134574, |
|
"learning_rate": 2.029364338381656e-08, |
|
"logits/chosen": -1.373365879058838, |
|
"logits/rejected": -1.2929483652114868, |
|
"logps/chosen": -34.31553649902344, |
|
"logps/rejected": -35.5068359375, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09695194661617279, |
|
"rewards/margins": 3.730624198913574, |
|
"rewards/rejected": -3.8275763988494873, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 3.531851851851852, |
|
"grad_norm": 5.166813211580323, |
|
"learning_rate": 1.9279704670975726e-08, |
|
"logits/chosen": -1.0577523708343506, |
|
"logits/rejected": -0.9344998598098755, |
|
"logps/chosen": -25.05517578125, |
|
"logps/rejected": -48.95963668823242, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17365989089012146, |
|
"rewards/margins": 4.252870559692383, |
|
"rewards/rejected": -4.426530838012695, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 3.5437037037037036, |
|
"grad_norm": 4.976330098589956, |
|
"learning_rate": 1.829073303751172e-08, |
|
"logits/chosen": -1.071714162826538, |
|
"logits/rejected": -1.0084483623504639, |
|
"logps/chosen": -20.396150588989258, |
|
"logps/rejected": -38.729373931884766, |
|
"loss": 0.0463, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11510992050170898, |
|
"rewards/margins": 5.170332431793213, |
|
"rewards/rejected": -5.2854413986206055, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 4.623020185136485, |
|
"learning_rate": 1.732683550362954e-08, |
|
"logits/chosen": -1.06589674949646, |
|
"logits/rejected": -1.0053000450134277, |
|
"logps/chosen": -33.81154251098633, |
|
"logps/rejected": -48.16522216796875, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0654190182685852, |
|
"rewards/margins": 4.919932842254639, |
|
"rewards/rejected": -4.985352516174316, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.5674074074074076, |
|
"grad_norm": 4.373917316257469, |
|
"learning_rate": 1.6388116376174765e-08, |
|
"logits/chosen": -1.1930819749832153, |
|
"logits/rejected": -1.1007626056671143, |
|
"logps/chosen": -24.583969116210938, |
|
"logps/rejected": -48.29629898071289, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5538195371627808, |
|
"rewards/margins": 5.18541145324707, |
|
"rewards/rejected": -5.739231109619141, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 3.5792592592592594, |
|
"grad_norm": 4.944808160984247, |
|
"learning_rate": 1.5474677237346468e-08, |
|
"logits/chosen": -1.1952768564224243, |
|
"logits/rejected": -1.1539109945297241, |
|
"logps/chosen": -29.354717254638672, |
|
"logps/rejected": -49.623294830322266, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31121665239334106, |
|
"rewards/margins": 4.585163116455078, |
|
"rewards/rejected": -4.896379470825195, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 3.591111111111111, |
|
"grad_norm": 6.237582774941322, |
|
"learning_rate": 1.4586616933704527e-08, |
|
"logits/chosen": -1.0483250617980957, |
|
"logits/rejected": -1.0512489080429077, |
|
"logps/chosen": -36.7315788269043, |
|
"logps/rejected": -52.41490173339844, |
|
"loss": 0.063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.060242414474487305, |
|
"rewards/margins": 5.002007484436035, |
|
"rewards/rejected": -4.941765308380127, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 3.602962962962963, |
|
"grad_norm": 5.366887328514776, |
|
"learning_rate": 1.372403156547311e-08, |
|
"logits/chosen": -1.2591538429260254, |
|
"logits/rejected": -1.1872644424438477, |
|
"logps/chosen": -22.69057273864746, |
|
"logps/rejected": -38.499332427978516, |
|
"loss": 0.0535, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3308228552341461, |
|
"rewards/margins": 4.569196701049805, |
|
"rewards/rejected": -4.900019645690918, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 3.6148148148148147, |
|
"grad_norm": 4.383186056032288, |
|
"learning_rate": 1.2887014476141212e-08, |
|
"logits/chosen": -1.1302443742752075, |
|
"logits/rejected": -1.1017392873764038, |
|
"logps/chosen": -27.243087768554688, |
|
"logps/rejected": -47.09513473510742, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2740994989871979, |
|
"rewards/margins": 6.748981475830078, |
|
"rewards/rejected": -6.474882125854492, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 3.626666666666667, |
|
"grad_norm": 5.520520861273014, |
|
"learning_rate": 1.2075656242361732e-08, |
|
"logits/chosen": -1.1834189891815186, |
|
"logits/rejected": -1.0502477884292603, |
|
"logps/chosen": -24.07543182373047, |
|
"logps/rejected": -44.05875778198242, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1085430383682251, |
|
"rewards/margins": 4.616766452789307, |
|
"rewards/rejected": -4.725309371948242, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 3.6385185185185183, |
|
"grad_norm": 4.5846368218080045, |
|
"learning_rate": 1.1290044664149873e-08, |
|
"logits/chosen": -1.0908325910568237, |
|
"logits/rejected": -1.0090572834014893, |
|
"logps/chosen": -32.33647918701172, |
|
"logps/rejected": -47.15243148803711, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17506128549575806, |
|
"rewards/margins": 4.792283058166504, |
|
"rewards/rejected": -4.967344284057617, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 3.6503703703703705, |
|
"grad_norm": 5.28209891846498, |
|
"learning_rate": 1.0530264755381824e-08, |
|
"logits/chosen": -1.2786378860473633, |
|
"logits/rejected": -1.3132318258285522, |
|
"logps/chosen": -26.759113311767578, |
|
"logps/rejected": -41.227149963378906, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08898818492889404, |
|
"rewards/margins": 3.8004322052001953, |
|
"rewards/rejected": -3.8894202709198, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 3.6622222222222223, |
|
"grad_norm": 4.960907388580732, |
|
"learning_rate": 9.796398734595284e-09, |
|
"logits/chosen": -1.1778481006622314, |
|
"logits/rejected": -1.181472897529602, |
|
"logps/chosen": -20.444726943969727, |
|
"logps/rejected": -33.29534149169922, |
|
"loss": 0.0466, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2902683913707733, |
|
"rewards/margins": 3.6233019828796387, |
|
"rewards/rejected": -3.9135704040527344, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 3.674074074074074, |
|
"grad_norm": 5.737646906284586, |
|
"learning_rate": 9.088526016092141e-09, |
|
"logits/chosen": -1.1990212202072144, |
|
"logits/rejected": -1.1145985126495361, |
|
"logps/chosen": -23.687454223632812, |
|
"logps/rejected": -40.095672607421875, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.447526752948761, |
|
"rewards/margins": 5.467113018035889, |
|
"rewards/rejected": -5.019586086273193, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.685925925925926, |
|
"grad_norm": 5.7150399704998245, |
|
"learning_rate": 8.40672320134489e-09, |
|
"logits/chosen": -1.146994948387146, |
|
"logits/rejected": -0.9583498239517212, |
|
"logps/chosen": -27.36312484741211, |
|
"logps/rejected": -43.72743225097656, |
|
"loss": 0.0543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11620418727397919, |
|
"rewards/margins": 5.6578192710876465, |
|
"rewards/rejected": -5.774023056030273, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 3.6977777777777776, |
|
"grad_norm": 4.67711156350355, |
|
"learning_rate": 7.751064070707247e-09, |
|
"logits/chosen": -1.3420299291610718, |
|
"logits/rejected": -1.3341833353042603, |
|
"logps/chosen": -31.239133834838867, |
|
"logps/rejected": -41.84351348876953, |
|
"loss": 0.0412, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3695347011089325, |
|
"rewards/margins": 4.75352668762207, |
|
"rewards/rejected": -4.3839921951293945, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 3.70962962962963, |
|
"grad_norm": 5.331465549642304, |
|
"learning_rate": 7.12161957543006e-09, |
|
"logits/chosen": -1.1273610591888428, |
|
"logits/rejected": -1.1161746978759766, |
|
"logps/chosen": -37.207733154296875, |
|
"logps/rejected": -61.19139862060547, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4315568804740906, |
|
"rewards/margins": 5.310615539550781, |
|
"rewards/rejected": -5.742172718048096, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 3.7214814814814816, |
|
"grad_norm": 5.478798851131127, |
|
"learning_rate": 6.518457829983559e-09, |
|
"logits/chosen": -1.3124021291732788, |
|
"logits/rejected": -1.2279609441757202, |
|
"logps/chosen": -34.83631896972656, |
|
"logps/rejected": -44.276790618896484, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18586915731430054, |
|
"rewards/margins": 3.245110511779785, |
|
"rewards/rejected": -3.4309799671173096, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 3.7333333333333334, |
|
"grad_norm": 4.7427648272619, |
|
"learning_rate": 5.9416441046862555e-09, |
|
"logits/chosen": -1.1716216802597046, |
|
"logits/rejected": -1.2297029495239258, |
|
"logps/chosen": -21.677108764648438, |
|
"logps/rejected": -35.96882247924805, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.003640979528427124, |
|
"rewards/margins": 3.7295522689819336, |
|
"rewards/rejected": -3.7259111404418945, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 3.745185185185185, |
|
"grad_norm": 5.760686688528461, |
|
"learning_rate": 5.3912408186420064e-09, |
|
"logits/chosen": -1.038623332977295, |
|
"logits/rejected": -0.9665778875350952, |
|
"logps/chosen": -27.82607650756836, |
|
"logps/rejected": -35.596378326416016, |
|
"loss": 0.0571, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08153516054153442, |
|
"rewards/margins": 4.407654762268066, |
|
"rewards/rejected": -4.326119422912598, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 3.757037037037037, |
|
"grad_norm": 4.629475217167777, |
|
"learning_rate": 4.867307532985227e-09, |
|
"logits/chosen": -1.2615653276443481, |
|
"logits/rejected": -1.1494407653808594, |
|
"logps/chosen": -40.15790557861328, |
|
"logps/rejected": -60.7736701965332, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6902495622634888, |
|
"rewards/margins": 5.601743698120117, |
|
"rewards/rejected": -6.291993141174316, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 3.7688888888888887, |
|
"grad_norm": 6.454886951587756, |
|
"learning_rate": 4.369900944435734e-09, |
|
"logits/chosen": -1.0968234539031982, |
|
"logits/rejected": -1.026517391204834, |
|
"logps/chosen": -31.793502807617188, |
|
"logps/rejected": -60.37879180908203, |
|
"loss": 0.0663, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31835824251174927, |
|
"rewards/margins": 5.233622074127197, |
|
"rewards/rejected": -5.551980495452881, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.7807407407407405, |
|
"grad_norm": 5.37027735834608, |
|
"learning_rate": 3.899074879163244e-09, |
|
"logits/chosen": -1.2527568340301514, |
|
"logits/rejected": -1.0810654163360596, |
|
"logps/chosen": -24.402645111083984, |
|
"logps/rejected": -39.67679977416992, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4089929461479187, |
|
"rewards/margins": 4.298420429229736, |
|
"rewards/rejected": -4.707413196563721, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 3.7925925925925927, |
|
"grad_norm": 5.568233279162257, |
|
"learning_rate": 3.4548802869627804e-09, |
|
"logits/chosen": -1.291711688041687, |
|
"logits/rejected": -1.2471994161605835, |
|
"logps/chosen": -31.061437606811523, |
|
"logps/rejected": -49.516639709472656, |
|
"loss": 0.0473, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07120761275291443, |
|
"rewards/margins": 3.723219394683838, |
|
"rewards/rejected": -3.794426918029785, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.8044444444444445, |
|
"grad_norm": 6.171276653233977, |
|
"learning_rate": 3.037365235741024e-09, |
|
"logits/chosen": -1.3342313766479492, |
|
"logits/rejected": -1.187886357307434, |
|
"logps/chosen": -24.079877853393555, |
|
"logps/rejected": -38.28224182128906, |
|
"loss": 0.0611, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4851805865764618, |
|
"rewards/margins": 4.108402252197266, |
|
"rewards/rejected": -4.593582630157471, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 3.8162962962962963, |
|
"grad_norm": 6.301615641450496, |
|
"learning_rate": 2.6465749063149245e-09, |
|
"logits/chosen": -1.4614932537078857, |
|
"logits/rejected": -1.3210101127624512, |
|
"logps/chosen": -24.112567901611328, |
|
"logps/rejected": -51.42138671875, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5791712999343872, |
|
"rewards/margins": 6.299165725708008, |
|
"rewards/rejected": -6.8783369064331055, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 3.828148148148148, |
|
"grad_norm": 6.87975838997433, |
|
"learning_rate": 2.282551587522441e-09, |
|
"logits/chosen": -1.406750202178955, |
|
"logits/rejected": -1.3338254690170288, |
|
"logps/chosen": -22.056568145751953, |
|
"logps/rejected": -34.89329147338867, |
|
"loss": 0.0758, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25121578574180603, |
|
"rewards/margins": 4.385520935058594, |
|
"rewards/rejected": -4.636736869812012, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 4.966352446635051, |
|
"learning_rate": 1.9453346716462316e-09, |
|
"logits/chosen": -1.211751937866211, |
|
"logits/rejected": -1.1320858001708984, |
|
"logps/chosen": -27.62029457092285, |
|
"logps/rejected": -32.46119689941406, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.004868373274803162, |
|
"rewards/margins": 3.807752847671509, |
|
"rewards/rejected": -3.802884578704834, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 3.851851851851852, |
|
"grad_norm": 5.653095930506065, |
|
"learning_rate": 1.6349606501509794e-09, |
|
"logits/chosen": -1.1088950634002686, |
|
"logits/rejected": -0.9607290029525757, |
|
"logps/chosen": -28.395509719848633, |
|
"logps/rejected": -34.00682830810547, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.15473833680152893, |
|
"rewards/margins": 3.99048113822937, |
|
"rewards/rejected": -3.835742473602295, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 3.863703703703704, |
|
"grad_norm": 5.3966444428734945, |
|
"learning_rate": 1.351463109734441e-09, |
|
"logits/chosen": -1.3495458364486694, |
|
"logits/rejected": -1.0097894668579102, |
|
"logps/chosen": -22.80147933959961, |
|
"logps/rejected": -41.809940338134766, |
|
"loss": 0.0496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19359610974788666, |
|
"rewards/margins": 5.500581741333008, |
|
"rewards/rejected": -5.694178104400635, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 3.8755555555555556, |
|
"grad_norm": 5.006770074945758, |
|
"learning_rate": 1.0948727286930192e-09, |
|
"logits/chosen": -1.1479936838150024, |
|
"logits/rejected": -0.9590707421302795, |
|
"logps/chosen": -27.08885955810547, |
|
"logps/rejected": -40.10725402832031, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17611512541770935, |
|
"rewards/margins": 3.613635540008545, |
|
"rewards/rejected": -3.4375205039978027, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 3.8874074074074074, |
|
"grad_norm": 6.085390667471827, |
|
"learning_rate": 8.652172736017816e-10, |
|
"logits/chosen": -1.1275379657745361, |
|
"logits/rejected": -1.116228461265564, |
|
"logps/chosen": -33.487083435058594, |
|
"logps/rejected": -52.050228118896484, |
|
"loss": 0.0628, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16681703925132751, |
|
"rewards/margins": 4.557419776916504, |
|
"rewards/rejected": -4.724237442016602, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 3.899259259259259, |
|
"grad_norm": 6.597375260168904, |
|
"learning_rate": 6.625215963098896e-10, |
|
"logits/chosen": -1.234811782836914, |
|
"logits/rejected": -1.1153168678283691, |
|
"logps/chosen": -27.0404052734375, |
|
"logps/rejected": -34.0019416809082, |
|
"loss": 0.065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32952964305877686, |
|
"rewards/margins": 4.547809600830078, |
|
"rewards/rejected": -4.8773393630981445, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 3.911111111111111, |
|
"grad_norm": 5.399445593167999, |
|
"learning_rate": 4.868076312512515e-10, |
|
"logits/chosen": -1.1961758136749268, |
|
"logits/rejected": -1.034976840019226, |
|
"logps/chosen": -22.31209945678711, |
|
"logps/rejected": -44.69541931152344, |
|
"loss": 0.0563, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18147775530815125, |
|
"rewards/margins": 5.29000186920166, |
|
"rewards/rejected": -5.108523845672607, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.9229629629629628, |
|
"grad_norm": 4.687101989180421, |
|
"learning_rate": 3.3809439307086463e-10, |
|
"logits/chosen": -1.204687237739563, |
|
"logits/rejected": -1.126007318496704, |
|
"logps/chosen": -24.837623596191406, |
|
"logps/rejected": -40.658023834228516, |
|
"loss": 0.0473, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2791484594345093, |
|
"rewards/margins": 4.140464782714844, |
|
"rewards/rejected": -3.8613169193267822, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 3.934814814814815, |
|
"grad_norm": 5.7183873880444045, |
|
"learning_rate": 2.1639797456723952e-10, |
|
"logits/chosen": -1.2559609413146973, |
|
"logits/rejected": -1.0792549848556519, |
|
"logps/chosen": -35.796287536621094, |
|
"logps/rejected": -46.229820251464844, |
|
"loss": 0.049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015116512775421143, |
|
"rewards/margins": 5.156147480010986, |
|
"rewards/rejected": -5.141030311584473, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 3.9466666666666668, |
|
"grad_norm": 4.801576190645628, |
|
"learning_rate": 1.21731544950876e-10, |
|
"logits/chosen": -1.227901816368103, |
|
"logits/rejected": -1.2207201719284058, |
|
"logps/chosen": -31.329517364501953, |
|
"logps/rejected": -51.822059631347656, |
|
"loss": 0.0438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12939153611660004, |
|
"rewards/margins": 5.30501651763916, |
|
"rewards/rejected": -5.434407711029053, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 3.9585185185185185, |
|
"grad_norm": 5.366333281325966, |
|
"learning_rate": 5.4105348419264394e-11, |
|
"logits/chosen": -1.474123239517212, |
|
"logits/rejected": -1.370969295501709, |
|
"logps/chosen": -21.29511260986328, |
|
"logps/rejected": -37.816551208496094, |
|
"loss": 0.0584, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.26586639881134033, |
|
"rewards/margins": 4.2116827964782715, |
|
"rewards/rejected": -3.9458167552948, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 3.9703703703703703, |
|
"grad_norm": 4.961233689259609, |
|
"learning_rate": 1.3526703048216682e-11, |
|
"logits/chosen": -1.2672888040542603, |
|
"logits/rejected": -1.0974268913269043, |
|
"logps/chosen": -25.828834533691406, |
|
"logps/rejected": -52.68805694580078, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10839378833770752, |
|
"rewards/margins": 6.133199691772461, |
|
"rewards/rejected": -6.024805545806885, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 3.982222222222222, |
|
"grad_norm": 5.7068180002610625, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.3201903104782104, |
|
"logits/rejected": -1.2799780368804932, |
|
"logps/chosen": -26.542402267456055, |
|
"logps/rejected": -42.164154052734375, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13592669367790222, |
|
"rewards/margins": 4.714659690856934, |
|
"rewards/rejected": -4.578732967376709, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 3.982222222222222, |
|
"step": 336, |
|
"total_flos": 0.0, |
|
"train_loss": 0.19470643034825721, |
|
"train_runtime": 59934.0013, |
|
"train_samples_per_second": 0.72, |
|
"train_steps_per_second": 0.006 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 336, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|