{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9995206136145733, "eval_steps": 50, "global_step": 695, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.0014381591562799617, "grad_norm": 19.134574294723603, "learning_rate": 7.142857142857144e-08, "logits": -1.475965142250061, "logps": -86.5583724975586, "loss": 0.6931, "objective": 0.6931471824645996, "ranking_simple": 0.3958333432674408, "step": 1 }, { "dpo_loss": 0.6926321983337402, "epoch": 0.007190795781399808, "grad_norm": 20.478323427973958, "learning_rate": 3.5714285714285716e-07, "logits": -1.5228310823440552, "logps": -85.78047943115234, "loss": 0.6923, "objective": 0.6926321983337402, "ranking_simple": 0.5625, "step": 5 }, { "dpo_loss": 0.6972719430923462, "epoch": 0.014381591562799617, "grad_norm": 18.41490700600558, "learning_rate": 7.142857142857143e-07, "logits": -1.5104866027832031, "logps": -85.7982406616211, "loss": 0.6947, "objective": 0.6972719430923462, "ranking_simple": 0.47083333134651184, "step": 10 }, { "dpo_loss": 0.6982707977294922, "epoch": 0.021572387344199424, "grad_norm": 18.09593626635207, "learning_rate": 1.0714285714285714e-06, "logits": -1.5033611059188843, "logps": -84.19438171386719, "loss": 0.6937, "objective": 0.6982707977294922, "ranking_simple": 0.5208333134651184, "step": 15 }, { "dpo_loss": 0.6746294498443604, "epoch": 0.028763183125599234, "grad_norm": 16.989375292416742, "learning_rate": 1.4285714285714286e-06, "logits": -1.4866502285003662, "logps": -83.14163970947266, "loss": 0.6849, "objective": 0.6746294498443604, "ranking_simple": 0.46666666865348816, "step": 20 }, { "dpo_loss": 0.6969509720802307, "epoch": 0.03595397890699904, "grad_norm": 20.67293748562133, "learning_rate": 1.7857142857142859e-06, "logits": -1.5258924961090088, "logps": -85.8170166015625, "loss": 0.6851, "objective": 0.6969509720802307, "ranking_simple": 0.4791666567325592, "step": 25 }, { "dpo_loss": 0.6758856177330017, "epoch": 0.04314477468839885, "grad_norm": 18.426008220098865, "learning_rate": 2.1428571428571427e-06, "logits": -1.4745742082595825, "logps": -83.44979095458984, "loss": 0.6952, "objective": 0.6758856177330017, "ranking_simple": 0.4625000059604645, "step": 30 }, { "dpo_loss": 0.6962794065475464, "epoch": 0.050335570469798654, "grad_norm": 17.9032967512842, "learning_rate": 2.5e-06, "logits": -1.4695559740066528, "logps": -84.0098648071289, "loss": 0.6801, "objective": 0.6962794065475464, "ranking_simple": 0.512499988079071, "step": 35 }, { "dpo_loss": 0.6491132378578186, "epoch": 0.05752636625119847, "grad_norm": 19.914404961281203, "learning_rate": 2.8571428571428573e-06, "logits": -1.5081710815429688, "logps": -87.46798706054688, "loss": 0.6554, "objective": 0.6491132378578186, "ranking_simple": 0.5291666388511658, "step": 40 }, { "dpo_loss": 0.6948689818382263, "epoch": 0.06471716203259828, "grad_norm": 18.11322171833476, "learning_rate": 3.2142857142857147e-06, "logits": -1.5364570617675781, "logps": -93.01942443847656, "loss": 0.68, "objective": 0.6948689818382263, "ranking_simple": 0.5458333492279053, "step": 45 }, { "dpo_loss": 0.6870535016059875, "epoch": 0.07190795781399809, "grad_norm": 16.24249291651068, "learning_rate": 3.5714285714285718e-06, "logits": -1.4616867303848267, "logps": -98.69449615478516, "loss": 0.6788, "objective": 0.6870535016059875, "ranking_simple": 0.5166666507720947, "step": 50 }, { "epoch": 0.07190795781399809, "eval_dpo_loss": 0.6872744560241699, "eval_logits": -1.4424751996994019, "eval_logps": -101.08426666259766, "eval_loss": 0.6864251494407654, "eval_objective": 0.6872744560241699, "eval_ranking_simple": 0.5170270204544067, "eval_runtime": 667.7982, "eval_samples_per_second": 16.605, "eval_steps_per_second": 1.385, "step": 50 }, { "dpo_loss": 0.6663610935211182, "epoch": 0.07909875359539789, "grad_norm": 16.292750687919423, "learning_rate": 3.928571428571429e-06, "logits": -1.4286314249038696, "logps": -99.18305206298828, "loss": 0.653, "objective": 0.6663610935211182, "ranking_simple": 0.4583333432674408, "step": 55 }, { "dpo_loss": 0.6659743189811707, "epoch": 0.0862895493767977, "grad_norm": 16.962152501970213, "learning_rate": 4.2857142857142855e-06, "logits": -1.4745826721191406, "logps": -93.86702728271484, "loss": 0.6482, "objective": 0.6659743189811707, "ranking_simple": 0.5625, "step": 60 }, { "dpo_loss": 0.671344518661499, "epoch": 0.0934803451581975, "grad_norm": 14.901029294680688, "learning_rate": 4.642857142857144e-06, "logits": -1.451969027519226, "logps": -87.68860626220703, "loss": 0.6615, "objective": 0.671344518661499, "ranking_simple": 0.550000011920929, "step": 65 }, { "dpo_loss": 0.6434452533721924, "epoch": 0.10067114093959731, "grad_norm": 14.778236920418157, "learning_rate": 5e-06, "logits": -1.3780988454818726, "logps": -89.031982421875, "loss": 0.6407, "objective": 0.6434452533721924, "ranking_simple": 0.5833333134651184, "step": 70 }, { "dpo_loss": 0.6859174370765686, "epoch": 0.10786193672099713, "grad_norm": 28.274152674131496, "learning_rate": 4.99921047320825e-06, "logits": -1.4645949602127075, "logps": -88.36033630371094, "loss": 0.685, "objective": 0.6859174370765686, "ranking_simple": 0.5, "step": 75 }, { "dpo_loss": 0.6891925930976868, "epoch": 0.11505273250239693, "grad_norm": 15.764669217558497, "learning_rate": 4.996842391515045e-06, "logits": -1.4844069480895996, "logps": -95.44277954101562, "loss": 0.6754, "objective": 0.6891925930976868, "ranking_simple": 0.5625, "step": 80 }, { "dpo_loss": 0.667593777179718, "epoch": 0.12224352828379674, "grad_norm": 14.453520081391234, "learning_rate": 4.992897250651535e-06, "logits": -1.5593804121017456, "logps": -94.26665496826172, "loss": 0.6563, "objective": 0.667593777179718, "ranking_simple": 0.5166666507720947, "step": 85 }, { "dpo_loss": 0.6684335470199585, "epoch": 0.12943432406519656, "grad_norm": 13.861895733772226, "learning_rate": 4.9873775424532515e-06, "logits": -1.7217061519622803, "logps": -93.05764770507812, "loss": 0.6527, "objective": 0.6684335470199585, "ranking_simple": 0.5666666626930237, "step": 90 }, { "dpo_loss": 0.6190710663795471, "epoch": 0.13662511984659637, "grad_norm": 14.681765981277032, "learning_rate": 4.980286753286196e-06, "logits": -1.6066279411315918, "logps": -90.85689544677734, "loss": 0.619, "objective": 0.6190710663795471, "ranking_simple": 0.5375000238418579, "step": 95 }, { "dpo_loss": 0.6605519652366638, "epoch": 0.14381591562799617, "grad_norm": 15.531532836382373, "learning_rate": 4.971629361844785e-06, "logits": -1.7002263069152832, "logps": -95.20433044433594, "loss": 0.6396, "objective": 0.6605519652366638, "ranking_simple": 0.5625, "step": 100 }, { "epoch": 0.14381591562799617, "eval_dpo_loss": 0.6830798387527466, "eval_logits": -1.6394028663635254, "eval_logps": -96.63552856445312, "eval_loss": 0.6795840859413147, "eval_objective": 0.6830798387527466, "eval_ranking_simple": 0.5283783674240112, "eval_runtime": 667.5921, "eval_samples_per_second": 16.61, "eval_steps_per_second": 1.386, "step": 100 }, { "dpo_loss": 0.6575779318809509, "epoch": 0.15100671140939598, "grad_norm": 13.9124802140197, "learning_rate": 4.961410836323014e-06, "logits": -1.6909289360046387, "logps": -95.10658264160156, "loss": 0.6472, "objective": 0.6575779318809509, "ranking_simple": 0.5666666626930237, "step": 105 }, { "dpo_loss": 0.6442916393280029, "epoch": 0.15819750719079578, "grad_norm": 15.449248259744447, "learning_rate": 4.949637630960618e-06, "logits": -1.753013253211975, "logps": -100.01918029785156, "loss": 0.6257, "objective": 0.6442916393280029, "ranking_simple": 0.574999988079071, "step": 110 }, { "dpo_loss": 0.5997211337089539, "epoch": 0.1653883029721956, "grad_norm": 15.359232778269337, "learning_rate": 4.9363171819664434e-06, "logits": -1.7351175546646118, "logps": -103.54134368896484, "loss": 0.6191, "objective": 0.5997211337089539, "ranking_simple": 0.5625, "step": 115 }, { "dpo_loss": 0.6286367774009705, "epoch": 0.1725790987535954, "grad_norm": 16.18007141994783, "learning_rate": 4.921457902821578e-06, "logits": -1.6781164407730103, "logps": -102.82028198242188, "loss": 0.6188, "objective": 0.6286367774009705, "ranking_simple": 0.49166667461395264, "step": 120 }, { "dpo_loss": 0.5925440192222595, "epoch": 0.1797698945349952, "grad_norm": 16.36914258606562, "learning_rate": 4.905069178965215e-06, "logits": -1.587238073348999, "logps": -102.64461517333984, "loss": 0.6077, "objective": 0.5925440192222595, "ranking_simple": 0.6499999761581421, "step": 125 }, { "dpo_loss": 0.5784808397293091, "epoch": 0.186960690316395, "grad_norm": 13.104316399935197, "learning_rate": 4.887161361866608e-06, "logits": -1.5238862037658691, "logps": -98.6126708984375, "loss": 0.5683, "objective": 0.5784808397293091, "ranking_simple": 0.5916666388511658, "step": 130 }, { "dpo_loss": 0.5823652744293213, "epoch": 0.1941514860977948, "grad_norm": 13.32781172011542, "learning_rate": 4.867745762486862e-06, "logits": -1.4705675840377808, "logps": -96.02190399169922, "loss": 0.6094, "objective": 0.5823652744293213, "ranking_simple": 0.6041666865348816, "step": 135 }, { "dpo_loss": 0.6114974617958069, "epoch": 0.20134228187919462, "grad_norm": 14.266153939024862, "learning_rate": 4.846834644134686e-06, "logits": -1.4452903270721436, "logps": -95.41686248779297, "loss": 0.643, "objective": 0.6114974617958069, "ranking_simple": 0.5375000238418579, "step": 140 }, { "dpo_loss": 0.6222321391105652, "epoch": 0.20853307766059445, "grad_norm": 13.527433051608822, "learning_rate": 4.824441214720629e-06, "logits": -1.343421220779419, "logps": -90.63035583496094, "loss": 0.6004, "objective": 0.6222321391105652, "ranking_simple": 0.5249999761581421, "step": 145 }, { "dpo_loss": 0.6666644811630249, "epoch": 0.21572387344199426, "grad_norm": 14.387730896252268, "learning_rate": 4.800579618414677e-06, "logits": -1.3223589658737183, "logps": -89.82644653320312, "loss": 0.6428, "objective": 0.6666644811630249, "ranking_simple": 0.5625, "step": 150 }, { "epoch": 0.21572387344199426, "eval_dpo_loss": 0.6741421818733215, "eval_logits": -1.3248826265335083, "eval_logps": -90.46123504638672, "eval_loss": 0.6731191277503967, "eval_objective": 0.6741421818733215, "eval_ranking_simple": 0.5421621799468994, "eval_runtime": 667.0734, "eval_samples_per_second": 16.623, "eval_steps_per_second": 1.387, "step": 150 }, { "dpo_loss": 0.6579559445381165, "epoch": 0.22291466922339406, "grad_norm": 13.510175858510676, "learning_rate": 4.775264926712489e-06, "logits": -1.4420989751815796, "logps": -87.71839904785156, "loss": 0.6137, "objective": 0.6579559445381165, "ranking_simple": 0.5458333492279053, "step": 155 }, { "dpo_loss": 0.6222509741783142, "epoch": 0.23010546500479387, "grad_norm": 11.574891409598989, "learning_rate": 4.748513128915928e-06, "logits": -1.3550127744674683, "logps": -82.75563049316406, "loss": 0.6097, "objective": 0.6222509741783142, "ranking_simple": 0.6166666746139526, "step": 160 }, { "dpo_loss": 0.5967528820037842, "epoch": 0.23729626078619367, "grad_norm": 13.289004474817304, "learning_rate": 4.720341122033862e-06, "logits": -1.309799313545227, "logps": -83.7621078491211, "loss": 0.6238, "objective": 0.5967528820037842, "ranking_simple": 0.5916666388511658, "step": 165 }, { "dpo_loss": 0.6145275831222534, "epoch": 0.24448705656759348, "grad_norm": 13.95960374523932, "learning_rate": 4.690766700109659e-06, "logits": -1.4246125221252441, "logps": -81.2165756225586, "loss": 0.6133, "objective": 0.6145275831222534, "ranking_simple": 0.5458333492279053, "step": 170 }, { "dpo_loss": 0.6186246275901794, "epoch": 0.2516778523489933, "grad_norm": 14.582408342886906, "learning_rate": 4.659808542982089e-06, "logits": -1.4199490547180176, "logps": -83.35224914550781, "loss": 0.5772, "objective": 0.6186246275901794, "ranking_simple": 0.574999988079071, "step": 175 }, { "dpo_loss": 0.6175665259361267, "epoch": 0.2588686481303931, "grad_norm": 13.512566329291777, "learning_rate": 4.62748620448673e-06, "logits": -1.5705552101135254, "logps": -82.73717498779297, "loss": 0.5669, "objective": 0.6175665259361267, "ranking_simple": 0.5583333373069763, "step": 180 }, { "dpo_loss": 0.5879412293434143, "epoch": 0.2660594439117929, "grad_norm": 13.766981915987873, "learning_rate": 4.593820100105355e-06, "logits": -1.5317676067352295, "logps": -81.35298919677734, "loss": 0.5636, "objective": 0.5879412293434143, "ranking_simple": 0.6625000238418579, "step": 185 }, { "dpo_loss": 0.576286256313324, "epoch": 0.27325023969319273, "grad_norm": 15.043077064111454, "learning_rate": 4.558831494071069e-06, "logits": -1.59921395778656, "logps": -86.69087219238281, "loss": 0.5783, "objective": 0.576286256313324, "ranking_simple": 0.6208333373069763, "step": 190 }, { "dpo_loss": 0.535432755947113, "epoch": 0.28044103547459254, "grad_norm": 14.462798502305684, "learning_rate": 4.522542485937369e-06, "logits": -1.5289549827575684, "logps": -82.07774353027344, "loss": 0.5768, "objective": 0.535432755947113, "ranking_simple": 0.6958333253860474, "step": 195 }, { "dpo_loss": 0.664901614189148, "epoch": 0.28763183125599234, "grad_norm": 14.055996226374166, "learning_rate": 4.4849759966195885e-06, "logits": -1.4697571992874146, "logps": -84.17499542236328, "loss": 0.6108, "objective": 0.664901614189148, "ranking_simple": 0.5291666388511658, "step": 200 }, { "epoch": 0.28763183125599234, "eval_dpo_loss": 0.6794703006744385, "eval_logits": -1.406295895576477, "eval_logps": -85.59771728515625, "eval_loss": 0.6792395710945129, "eval_objective": 0.6794703006744385, "eval_ranking_simple": 0.5467567443847656, "eval_runtime": 672.3535, "eval_samples_per_second": 16.493, "eval_steps_per_second": 1.376, "step": 200 }, { "dpo_loss": 0.5508471131324768, "epoch": 0.29482262703739215, "grad_norm": 12.878307216053773, "learning_rate": 4.446155753917559e-06, "logits": -1.4630080461502075, "logps": -82.61222839355469, "loss": 0.5818, "objective": 0.5508471131324768, "ranking_simple": 0.6166666746139526, "step": 205 }, { "dpo_loss": 0.6260824203491211, "epoch": 0.30201342281879195, "grad_norm": 13.972188885138191, "learning_rate": 4.40610627752862e-06, "logits": -1.5031265020370483, "logps": -82.899658203125, "loss": 0.6104, "objective": 0.6260824203491211, "ranking_simple": 0.5916666388511658, "step": 210 }, { "dpo_loss": 0.5617867708206177, "epoch": 0.30920421860019176, "grad_norm": 12.206297979335497, "learning_rate": 4.364852863560456e-06, "logits": -1.5797665119171143, "logps": -81.89352416992188, "loss": 0.5669, "objective": 0.5617867708206177, "ranking_simple": 0.6083333492279053, "step": 215 }, { "dpo_loss": 0.4900062680244446, "epoch": 0.31639501438159157, "grad_norm": 13.75475301520002, "learning_rate": 4.322421568553529e-06, "logits": -1.571911096572876, "logps": -85.30619812011719, "loss": 0.5582, "objective": 0.4900062680244446, "ranking_simple": 0.6499999761581421, "step": 220 }, { "dpo_loss": 0.6220059990882874, "epoch": 0.32358581016299137, "grad_norm": 14.857535963350637, "learning_rate": 4.278839193023214e-06, "logits": -1.6418553590774536, "logps": -86.45494842529297, "loss": 0.5991, "objective": 0.6220059990882874, "ranking_simple": 0.5833333134651184, "step": 225 }, { "dpo_loss": 0.5970975160598755, "epoch": 0.3307766059443912, "grad_norm": 12.612318414450316, "learning_rate": 4.234133264532012e-06, "logits": -1.5469430685043335, "logps": -83.91010284423828, "loss": 0.5904, "objective": 0.5970975160598755, "ranking_simple": 0.574999988079071, "step": 230 }, { "dpo_loss": 0.5481938123703003, "epoch": 0.337967401725791, "grad_norm": 12.797277404018391, "learning_rate": 4.188332020302561e-06, "logits": -1.3681390285491943, "logps": -83.8482894897461, "loss": 0.5406, "objective": 0.5481938123703003, "ranking_simple": 0.6499999761581421, "step": 235 }, { "dpo_loss": 0.6017696261405945, "epoch": 0.3451581975071908, "grad_norm": 13.86217536920719, "learning_rate": 4.141464389382392e-06, "logits": -1.4286202192306519, "logps": -85.05973052978516, "loss": 0.5855, "objective": 0.6017696261405945, "ranking_simple": 0.6041666865348816, "step": 240 }, { "dpo_loss": 0.5819531679153442, "epoch": 0.3523489932885906, "grad_norm": 13.117346601405215, "learning_rate": 4.093559974371725e-06, "logits": -1.4333292245864868, "logps": -85.2267837524414, "loss": 0.5903, "objective": 0.5819531679153442, "ranking_simple": 0.5666666626930237, "step": 245 }, { "dpo_loss": 0.6078858971595764, "epoch": 0.3595397890699904, "grad_norm": 12.057808368664894, "learning_rate": 4.044649032725836e-06, "logits": -1.43559992313385, "logps": -83.82780456542969, "loss": 0.5856, "objective": 0.6078858971595764, "ranking_simple": 0.6041666865348816, "step": 250 }, { "epoch": 0.3595397890699904, "eval_dpo_loss": 0.6614840626716614, "eval_logits": -1.3857855796813965, "eval_logps": -87.80554962158203, "eval_loss": 0.6622549295425415, "eval_objective": 0.6614840626716614, "eval_ranking_simple": 0.545945942401886, "eval_runtime": 667.2429, "eval_samples_per_second": 16.619, "eval_steps_per_second": 1.386, "step": 250 }, { "dpo_loss": 0.47653335332870483, "epoch": 0.3667305848513902, "grad_norm": 12.396454934600511, "learning_rate": 3.9947624576437975e-06, "logits": -1.3619633913040161, "logps": -85.24054718017578, "loss": 0.5457, "objective": 0.47653335332870483, "ranking_simple": 0.6499999761581421, "step": 255 }, { "dpo_loss": 0.4943782687187195, "epoch": 0.37392138063279, "grad_norm": 12.89782329658142, "learning_rate": 3.943931758555669e-06, "logits": -1.4246817827224731, "logps": -85.02723693847656, "loss": 0.5225, "objective": 0.4943782687187195, "ranking_simple": 0.6458333134651184, "step": 260 }, { "dpo_loss": 0.6019367575645447, "epoch": 0.3811121764141898, "grad_norm": 14.075516870638713, "learning_rate": 3.89218904122047e-06, "logits": -1.4607974290847778, "logps": -84.44505310058594, "loss": 0.5847, "objective": 0.6019367575645447, "ranking_simple": 0.6166666746139526, "step": 265 }, { "dpo_loss": 0.5739615559577942, "epoch": 0.3883029721955896, "grad_norm": 13.302239992029932, "learning_rate": 3.839566987447492e-06, "logits": -1.4933629035949707, "logps": -85.0285873413086, "loss": 0.586, "objective": 0.5739615559577942, "ranking_simple": 0.6416666507720947, "step": 270 }, { "dpo_loss": 0.6036986708641052, "epoch": 0.39549376797698943, "grad_norm": 12.980951698600105, "learning_rate": 3.7860988344537664e-06, "logits": -1.536937952041626, "logps": -85.6275863647461, "loss": 0.5653, "objective": 0.6036986708641052, "ranking_simple": 0.6708333492279053, "step": 275 }, { "dpo_loss": 0.5608472228050232, "epoch": 0.40268456375838924, "grad_norm": 12.026354566513733, "learning_rate": 3.731818353870729e-06, "logits": -1.42515230178833, "logps": -83.92908477783203, "loss": 0.5729, "objective": 0.5608472228050232, "ranking_simple": 0.6208333373069763, "step": 280 }, { "dpo_loss": 0.5649358034133911, "epoch": 0.4098753595397891, "grad_norm": 12.506947473488001, "learning_rate": 3.6767598304133325e-06, "logits": -1.386040210723877, "logps": -83.64810180664062, "loss": 0.5379, "objective": 0.5649358034133911, "ranking_simple": 0.6458333134651184, "step": 285 }, { "dpo_loss": 0.5191709399223328, "epoch": 0.4170661553211889, "grad_norm": 11.83713099591173, "learning_rate": 3.6209580402250816e-06, "logits": -1.464784026145935, "logps": -82.18307495117188, "loss": 0.5179, "objective": 0.5191709399223328, "ranking_simple": 0.5958333611488342, "step": 290 }, { "dpo_loss": 0.5659650564193726, "epoch": 0.4242569511025887, "grad_norm": 13.171214716747091, "learning_rate": 3.564448228912682e-06, "logits": -1.456804633140564, "logps": -85.0264892578125, "loss": 0.56, "objective": 0.5659650564193726, "ranking_simple": 0.637499988079071, "step": 295 }, { "dpo_loss": 0.5784734487533569, "epoch": 0.4314477468839885, "grad_norm": 14.743575255965931, "learning_rate": 3.507266089284157e-06, "logits": -1.4471509456634521, "logps": -86.41505432128906, "loss": 0.5701, "objective": 0.5784734487533569, "ranking_simple": 0.6541666388511658, "step": 300 }, { "epoch": 0.4314477468839885, "eval_dpo_loss": 0.6661913990974426, "eval_logits": -1.478379487991333, "eval_logps": -88.78189086914062, "eval_loss": 0.6633257269859314, "eval_objective": 0.6661913990974426, "eval_ranking_simple": 0.5448648929595947, "eval_runtime": 671.0, "eval_samples_per_second": 16.526, "eval_steps_per_second": 1.379, "step": 300 }, { "dpo_loss": 0.6009036302566528, "epoch": 0.4386385426653883, "grad_norm": 13.785096580105144, "learning_rate": 3.4494477388045035e-06, "logits": -1.5999183654785156, "logps": -88.15947723388672, "loss": 0.5422, "objective": 0.6009036302566528, "ranking_simple": 0.625, "step": 305 }, { "dpo_loss": 0.4983634650707245, "epoch": 0.4458293384467881, "grad_norm": 12.441589802769128, "learning_rate": 3.391029696783127e-06, "logits": -1.5172799825668335, "logps": -85.33808135986328, "loss": 0.5392, "objective": 0.4983634650707245, "ranking_simple": 0.6708333492279053, "step": 310 }, { "dpo_loss": 0.48155930638313293, "epoch": 0.45302013422818793, "grad_norm": 13.232427609955911, "learning_rate": 3.332048861307467e-06, "logits": -1.5914006233215332, "logps": -86.23084259033203, "loss": 0.5352, "objective": 0.48155930638313293, "ranking_simple": 0.6666666865348816, "step": 315 }, { "dpo_loss": 0.5351694226264954, "epoch": 0.46021093000958774, "grad_norm": 12.016279548789747, "learning_rate": 3.272542485937369e-06, "logits": -1.5048084259033203, "logps": -86.4006118774414, "loss": 0.5458, "objective": 0.5351694226264954, "ranking_simple": 0.637499988079071, "step": 320 }, { "dpo_loss": 0.6042866110801697, "epoch": 0.46740172579098754, "grad_norm": 11.801563225543802, "learning_rate": 3.2125481561749406e-06, "logits": -1.5276148319244385, "logps": -88.05933380126953, "loss": 0.5391, "objective": 0.6042866110801697, "ranking_simple": 0.6166666746139526, "step": 325 }, { "dpo_loss": 0.5653389096260071, "epoch": 0.47459252157238735, "grad_norm": 13.17915800680032, "learning_rate": 3.152103765724743e-06, "logits": -1.6013100147247314, "logps": -87.7344741821289, "loss": 0.5488, "objective": 0.5653389096260071, "ranking_simple": 0.6583333611488342, "step": 330 }, { "dpo_loss": 0.5424811840057373, "epoch": 0.48178331735378715, "grad_norm": 11.944202243524137, "learning_rate": 3.0912474925593124e-06, "logits": -1.625429391860962, "logps": -87.23538208007812, "loss": 0.5222, "objective": 0.5424811840057373, "ranking_simple": 0.6041666865348816, "step": 335 }, { "dpo_loss": 0.5668550729751587, "epoch": 0.48897411313518696, "grad_norm": 13.39845062211121, "learning_rate": 3.0300177748051375e-06, "logits": -1.6717287302017212, "logps": -87.84339904785156, "loss": 0.5531, "objective": 0.5668550729751587, "ranking_simple": 0.6166666746139526, "step": 340 }, { "dpo_loss": 0.5499953627586365, "epoch": 0.49616490891658677, "grad_norm": 13.090607166084366, "learning_rate": 2.9684532864643123e-06, "logits": -1.593755841255188, "logps": -85.349365234375, "loss": 0.5647, "objective": 0.5499953627586365, "ranking_simple": 0.637499988079071, "step": 345 }, { "dpo_loss": 0.5135945081710815, "epoch": 0.5033557046979866, "grad_norm": 13.902934776591621, "learning_rate": 2.9065929129872097e-06, "logits": -1.5004137754440308, "logps": -86.49576568603516, "loss": 0.5296, "objective": 0.5135945081710815, "ranking_simple": 0.6583333611488342, "step": 350 }, { "epoch": 0.5033557046979866, "eval_dpo_loss": 0.66639244556427, "eval_logits": -1.5418288707733154, "eval_logps": -89.54682159423828, "eval_loss": 0.6642188429832458, "eval_objective": 0.66639244556427, "eval_ranking_simple": 0.5456756949424744, "eval_runtime": 667.0903, "eval_samples_per_second": 16.623, "eval_steps_per_second": 1.387, "step": 350 }, { "dpo_loss": 0.5924364328384399, "epoch": 0.5105465004793864, "grad_norm": 15.350144877247981, "learning_rate": 2.844475726711595e-06, "logits": -1.5987753868103027, "logps": -88.64606475830078, "loss": 0.5333, "objective": 0.5924364328384399, "ranking_simple": 0.6416666507720947, "step": 355 }, { "dpo_loss": 0.5756219029426575, "epoch": 0.5177372962607862, "grad_norm": 13.527406313681999, "learning_rate": 2.7821409621837042e-06, "logits": -1.641775369644165, "logps": -87.02297973632812, "loss": 0.5326, "objective": 0.5756219029426575, "ranking_simple": 0.6166666746139526, "step": 360 }, { "dpo_loss": 0.5566696524620056, "epoch": 0.524928092042186, "grad_norm": 12.992979137134387, "learning_rate": 2.7196279913768587e-06, "logits": -1.6056040525436401, "logps": -88.7591781616211, "loss": 0.5132, "objective": 0.5566696524620056, "ranking_simple": 0.612500011920929, "step": 365 }, { "dpo_loss": 0.5310615301132202, "epoch": 0.5321188878235859, "grad_norm": 12.368172686048874, "learning_rate": 2.6569762988232838e-06, "logits": -1.6386042833328247, "logps": -88.06958770751953, "loss": 0.5415, "objective": 0.5310615301132202, "ranking_simple": 0.675000011920929, "step": 370 }, { "dpo_loss": 0.49322208762168884, "epoch": 0.5393096836049857, "grad_norm": 14.332678107736337, "learning_rate": 2.594225456674837e-06, "logits": -1.642881989479065, "logps": -88.7568359375, "loss": 0.5239, "objective": 0.49322208762168884, "ranking_simple": 0.6333333253860474, "step": 375 }, { "dpo_loss": 0.5207617878913879, "epoch": 0.5465004793863855, "grad_norm": 12.666539980855852, "learning_rate": 2.531415099708382e-06, "logits": -1.6033991575241089, "logps": -87.1814956665039, "loss": 0.5323, "objective": 0.5207617878913879, "ranking_simple": 0.6416666507720947, "step": 380 }, { "dpo_loss": 0.5247195363044739, "epoch": 0.5536912751677853, "grad_norm": 12.467690022940683, "learning_rate": 2.4685849002916184e-06, "logits": -1.5490994453430176, "logps": -87.7108154296875, "loss": 0.5402, "objective": 0.5247195363044739, "ranking_simple": 0.6625000238418579, "step": 385 }, { "dpo_loss": 0.5364466309547424, "epoch": 0.5608820709491851, "grad_norm": 13.247530100964932, "learning_rate": 2.4057745433251637e-06, "logits": -1.5940817594528198, "logps": -83.6358871459961, "loss": 0.5439, "objective": 0.5364466309547424, "ranking_simple": 0.5791666507720947, "step": 390 }, { "dpo_loss": 0.5119186639785767, "epoch": 0.5680728667305849, "grad_norm": 13.353391264425635, "learning_rate": 2.3430237011767166e-06, "logits": -1.462990641593933, "logps": -88.32820892333984, "loss": 0.5228, "objective": 0.5119186639785767, "ranking_simple": 0.625, "step": 395 }, { "dpo_loss": 0.5097938776016235, "epoch": 0.5752636625119847, "grad_norm": 12.303363355643434, "learning_rate": 2.280372008623142e-06, "logits": -1.3568633794784546, "logps": -87.81967163085938, "loss": 0.5055, "objective": 0.5097938776016235, "ranking_simple": 0.5958333611488342, "step": 400 }, { "epoch": 0.5752636625119847, "eval_dpo_loss": 0.6593486666679382, "eval_logits": -1.4127254486083984, "eval_logps": -89.56134796142578, "eval_loss": 0.6579382419586182, "eval_objective": 0.6593486666679382, "eval_ranking_simple": 0.5535135269165039, "eval_runtime": 673.313, "eval_samples_per_second": 16.469, "eval_steps_per_second": 1.374, "step": 400 }, { "dpo_loss": 0.5508550405502319, "epoch": 0.5824544582933845, "grad_norm": 12.24633763991367, "learning_rate": 2.2178590378162957e-06, "logits": -1.4666920900344849, "logps": -84.19215393066406, "loss": 0.5126, "objective": 0.5508550405502319, "ranking_simple": 0.6166666746139526, "step": 405 }, { "dpo_loss": 0.45802783966064453, "epoch": 0.5896452540747843, "grad_norm": 12.200895787251042, "learning_rate": 2.155524273288405e-06, "logits": -1.4165197610855103, "logps": -86.24173736572266, "loss": 0.4984, "objective": 0.45802783966064453, "ranking_simple": 0.6625000238418579, "step": 410 }, { "dpo_loss": 0.5100553035736084, "epoch": 0.5968360498561841, "grad_norm": 11.346657198974036, "learning_rate": 2.093407087012791e-06, "logits": -1.4899728298187256, "logps": -85.83265686035156, "loss": 0.4831, "objective": 0.5100553035736084, "ranking_simple": 0.6166666746139526, "step": 415 }, { "dpo_loss": 0.5085461139678955, "epoch": 0.6040268456375839, "grad_norm": 11.635360179200692, "learning_rate": 2.031546713535688e-06, "logits": -1.4201585054397583, "logps": -86.8657455444336, "loss": 0.5288, "objective": 0.5085461139678955, "ranking_simple": 0.6625000238418579, "step": 420 }, { "dpo_loss": 0.47269171476364136, "epoch": 0.6112176414189837, "grad_norm": 12.428264913670654, "learning_rate": 1.969982225194864e-06, "logits": -1.5006626844406128, "logps": -86.10279083251953, "loss": 0.5131, "objective": 0.47269171476364136, "ranking_simple": 0.6666666865348816, "step": 425 }, { "dpo_loss": 0.5131818056106567, "epoch": 0.6184084372003835, "grad_norm": 13.248218878689803, "learning_rate": 1.908752507440689e-06, "logits": -1.387397289276123, "logps": -87.1451644897461, "loss": 0.4929, "objective": 0.5131818056106567, "ranking_simple": 0.6291666626930237, "step": 430 }, { "dpo_loss": 0.5004034042358398, "epoch": 0.6255992329817833, "grad_norm": 13.300116401195462, "learning_rate": 1.8478962342752584e-06, "logits": -1.5423215627670288, "logps": -88.39946746826172, "loss": 0.4938, "objective": 0.5004034042358398, "ranking_simple": 0.7166666388511658, "step": 435 }, { "dpo_loss": 0.5937944054603577, "epoch": 0.6327900287631831, "grad_norm": 11.725152089523224, "learning_rate": 1.7874518438250598e-06, "logits": -1.5435981750488281, "logps": -88.17759704589844, "loss": 0.5132, "objective": 0.5937944054603577, "ranking_simple": 0.5874999761581421, "step": 440 }, { "dpo_loss": 0.4983668625354767, "epoch": 0.6399808245445829, "grad_norm": 12.259686808708759, "learning_rate": 1.7274575140626318e-06, "logits": -1.5366530418395996, "logps": -89.67758178710938, "loss": 0.4849, "objective": 0.4983668625354767, "ranking_simple": 0.6333333253860474, "step": 445 }, { "dpo_loss": 0.5500684380531311, "epoch": 0.6471716203259827, "grad_norm": 14.756271253525957, "learning_rate": 1.6679511386925337e-06, "logits": -1.5554006099700928, "logps": -87.62480163574219, "loss": 0.5493, "objective": 0.5500684380531311, "ranking_simple": 0.6541666388511658, "step": 450 }, { "epoch": 0.6471716203259827, "eval_dpo_loss": 0.6628099083900452, "eval_logits": -1.5076098442077637, "eval_logps": -90.84712219238281, "eval_loss": 0.6599166393280029, "eval_objective": 0.6628099083900452, "eval_ranking_simple": 0.5556756854057312, "eval_runtime": 666.1599, "eval_samples_per_second": 16.646, "eval_steps_per_second": 1.389, "step": 450 }, { "dpo_loss": 0.46406909823417664, "epoch": 0.6543624161073825, "grad_norm": 15.01981163706973, "learning_rate": 1.6089703032168736e-06, "logits": -1.6114773750305176, "logps": -85.98390197753906, "loss": 0.4709, "objective": 0.46406909823417664, "ranking_simple": 0.6416666507720947, "step": 455 }, { "dpo_loss": 0.5671834945678711, "epoch": 0.6615532118887824, "grad_norm": 15.90483411128291, "learning_rate": 1.5505522611954977e-06, "logits": -1.611572265625, "logps": -86.73653411865234, "loss": 0.5229, "objective": 0.5671834945678711, "ranking_simple": 0.6499999761581421, "step": 460 }, { "dpo_loss": 0.49705228209495544, "epoch": 0.6687440076701822, "grad_norm": 12.71861951977229, "learning_rate": 1.4927339107158437e-06, "logits": -1.4986417293548584, "logps": -86.50923156738281, "loss": 0.5062, "objective": 0.49705228209495544, "ranking_simple": 0.6708333492279053, "step": 465 }, { "dpo_loss": 0.49713465571403503, "epoch": 0.675934803451582, "grad_norm": 13.38314630878278, "learning_rate": 1.4355517710873184e-06, "logits": -1.575962781906128, "logps": -87.91844940185547, "loss": 0.4998, "objective": 0.49713465571403503, "ranking_simple": 0.6625000238418579, "step": 470 }, { "dpo_loss": 0.4781602621078491, "epoch": 0.6831255992329818, "grad_norm": 14.056433998263255, "learning_rate": 1.3790419597749198e-06, "logits": -1.4974421262741089, "logps": -86.79618835449219, "loss": 0.5164, "objective": 0.4781602621078491, "ranking_simple": 0.6166666746139526, "step": 475 }, { "dpo_loss": 0.56535404920578, "epoch": 0.6903163950143816, "grad_norm": 14.229131759003886, "learning_rate": 1.3232401695866686e-06, "logits": -1.5553234815597534, "logps": -88.59098052978516, "loss": 0.5259, "objective": 0.56535404920578, "ranking_simple": 0.6541666388511658, "step": 480 }, { "dpo_loss": 0.5027016997337341, "epoch": 0.6975071907957814, "grad_norm": 13.871006060897372, "learning_rate": 1.2681816461292715e-06, "logits": -1.423956274986267, "logps": -89.58472442626953, "loss": 0.5072, "objective": 0.5027016997337341, "ranking_simple": 0.6791666746139526, "step": 485 }, { "dpo_loss": 0.50449138879776, "epoch": 0.7046979865771812, "grad_norm": 12.098287020865879, "learning_rate": 1.2139011655462338e-06, "logits": -1.48275887966156, "logps": -91.03202819824219, "loss": 0.4816, "objective": 0.50449138879776, "ranking_simple": 0.637499988079071, "step": 490 }, { "dpo_loss": 0.5269492268562317, "epoch": 0.711888782358581, "grad_norm": 12.367787564366719, "learning_rate": 1.160433012552508e-06, "logits": -1.6003237962722778, "logps": -88.4117202758789, "loss": 0.5101, "objective": 0.5269492268562317, "ranking_simple": 0.6291666626930237, "step": 495 }, { "dpo_loss": 0.48800936341285706, "epoch": 0.7190795781399808, "grad_norm": 14.023641814656244, "learning_rate": 1.1078109587795311e-06, "logits": -1.4825706481933594, "logps": -87.40599060058594, "loss": 0.5056, "objective": 0.48800936341285706, "ranking_simple": 0.5874999761581421, "step": 500 }, { "epoch": 0.7190795781399808, "eval_dpo_loss": 0.659065842628479, "eval_logits": -1.4782841205596924, "eval_logps": -91.5462417602539, "eval_loss": 0.656062662601471, "eval_objective": 0.659065842628479, "eval_ranking_simple": 0.5540540814399719, "eval_runtime": 671.8381, "eval_samples_per_second": 16.505, "eval_steps_per_second": 1.377, "step": 500 }, { "dpo_loss": 0.5191890001296997, "epoch": 0.7262703739213806, "grad_norm": 14.79908104404648, "learning_rate": 1.0560682414443315e-06, "logits": -1.7025607824325562, "logps": -88.41036224365234, "loss": 0.5285, "objective": 0.5191890001296997, "ranking_simple": 0.6291666626930237, "step": 505 }, { "dpo_loss": 0.5311907529830933, "epoch": 0.7334611697027804, "grad_norm": 13.890053175833895, "learning_rate": 1.0052375423562038e-06, "logits": -1.5510889291763306, "logps": -86.90328979492188, "loss": 0.5148, "objective": 0.5311907529830933, "ranking_simple": 0.6708333492279053, "step": 510 }, { "dpo_loss": 0.5146838426589966, "epoch": 0.7406519654841802, "grad_norm": 13.696722595764014, "learning_rate": 9.553509672741646e-07, "logits": -1.498627781867981, "logps": -89.00706481933594, "loss": 0.5023, "objective": 0.5146838426589966, "ranking_simple": 0.6666666865348816, "step": 515 }, { "dpo_loss": 0.4828968048095703, "epoch": 0.74784276126558, "grad_norm": 14.629592063481873, "learning_rate": 9.064400256282757e-07, "logits": -1.5899360179901123, "logps": -85.54163360595703, "loss": 0.4789, "objective": 0.4828968048095703, "ranking_simple": 0.6458333134651184, "step": 520 }, { "dpo_loss": 0.4821651577949524, "epoch": 0.7550335570469798, "grad_norm": 12.203692188404089, "learning_rate": 8.585356106176093e-07, "logits": -1.5359408855438232, "logps": -85.53140258789062, "loss": 0.4681, "objective": 0.4821651577949524, "ranking_simple": 0.6708333492279053, "step": 525 }, { "dpo_loss": 0.4843992292881012, "epoch": 0.7622243528283796, "grad_norm": 14.26789388947485, "learning_rate": 8.116679796974389e-07, "logits": -1.5956252813339233, "logps": -88.78003692626953, "loss": 0.4665, "objective": 0.4843992292881012, "ranking_simple": 0.6916666626930237, "step": 530 }, { "dpo_loss": 0.48365145921707153, "epoch": 0.7694151486097794, "grad_norm": 14.283269123514527, "learning_rate": 7.65866735467988e-07, "logits": -1.56830632686615, "logps": -87.40410614013672, "loss": 0.5123, "objective": 0.48365145921707153, "ranking_simple": 0.637499988079071, "step": 535 }, { "dpo_loss": 0.4533773958683014, "epoch": 0.7766059443911792, "grad_norm": 12.484364657489317, "learning_rate": 7.211608069767867e-07, "logits": -1.5612220764160156, "logps": -87.40184783935547, "loss": 0.4608, "objective": 0.4533773958683014, "ranking_simple": 0.6625000238418579, "step": 540 }, { "dpo_loss": 0.5006808042526245, "epoch": 0.783796740172579, "grad_norm": 16.213825472630578, "learning_rate": 6.775784314464717e-07, "logits": -1.6361111402511597, "logps": -86.7210464477539, "loss": 0.5143, "objective": 0.5006808042526245, "ranking_simple": 0.6625000238418579, "step": 545 }, { "dpo_loss": 0.4449290633201599, "epoch": 0.7909875359539789, "grad_norm": 14.026431238273847, "learning_rate": 6.351471364395448e-07, "logits": -1.5031849145889282, "logps": -86.18959045410156, "loss": 0.475, "objective": 0.4449290633201599, "ranking_simple": 0.6875, "step": 550 }, { "epoch": 0.7909875359539789, "eval_dpo_loss": 0.6598567962646484, "eval_logits": -1.5167720317840576, "eval_logps": -90.85450744628906, "eval_loss": 0.6573106646537781, "eval_objective": 0.6598567962646484, "eval_ranking_simple": 0.5581080913543701, "eval_runtime": 667.865, "eval_samples_per_second": 16.604, "eval_steps_per_second": 1.385, "step": 550 }, { "dpo_loss": 0.4697189927101135, "epoch": 0.7981783317353787, "grad_norm": 13.213685669112976, "learning_rate": 5.9389372247138e-07, "logits": -1.5569509267807007, "logps": -89.16913604736328, "loss": 0.4903, "objective": 0.4697189927101135, "ranking_simple": 0.675000011920929, "step": 555 }, { "dpo_loss": 0.4498293101787567, "epoch": 0.8053691275167785, "grad_norm": 13.815731473547668, "learning_rate": 5.538442460824417e-07, "logits": -1.6490610837936401, "logps": -89.35672760009766, "loss": 0.4906, "objective": 0.4498293101787567, "ranking_simple": 0.6666666865348816, "step": 560 }, { "dpo_loss": 0.47427651286125183, "epoch": 0.8125599232981783, "grad_norm": 13.523693416447607, "learning_rate": 5.150240033804116e-07, "logits": -1.617875337600708, "logps": -87.59880065917969, "loss": 0.4859, "objective": 0.47427651286125183, "ranking_simple": 0.6499999761581421, "step": 565 }, { "dpo_loss": 0.49262678623199463, "epoch": 0.8197507190795782, "grad_norm": 13.02579782169373, "learning_rate": 4.774575140626317e-07, "logits": -1.571083426475525, "logps": -86.48162078857422, "loss": 0.46, "objective": 0.49262678623199463, "ranking_simple": 0.6458333134651184, "step": 570 }, { "dpo_loss": 0.5198132991790771, "epoch": 0.826941514860978, "grad_norm": 11.87980002943589, "learning_rate": 4.411685059289314e-07, "logits": -1.5978747606277466, "logps": -88.71498107910156, "loss": 0.4784, "objective": 0.5198132991790771, "ranking_simple": 0.6458333134651184, "step": 575 }, { "dpo_loss": 0.46336984634399414, "epoch": 0.8341323106423778, "grad_norm": 15.96370979044969, "learning_rate": 4.061798998946459e-07, "logits": -1.5552499294281006, "logps": -85.58879852294922, "loss": 0.4752, "objective": 0.46336984634399414, "ranking_simple": 0.6916666626930237, "step": 580 }, { "dpo_loss": 0.41780319809913635, "epoch": 0.8413231064237776, "grad_norm": 13.134671346499106, "learning_rate": 3.725137955132707e-07, "logits": -1.6836026906967163, "logps": -88.24311065673828, "loss": 0.4566, "objective": 0.41780319809913635, "ranking_simple": 0.6708333492279053, "step": 585 }, { "dpo_loss": 0.4858356714248657, "epoch": 0.8485139022051774, "grad_norm": 13.954686906693901, "learning_rate": 3.4019145701791186e-07, "logits": -1.7384170293807983, "logps": -88.8193359375, "loss": 0.4763, "objective": 0.4858356714248657, "ranking_simple": 0.6708333492279053, "step": 590 }, { "dpo_loss": 0.48335954546928406, "epoch": 0.8557046979865772, "grad_norm": 13.802803355539663, "learning_rate": 3.092332998903416e-07, "logits": -1.6276822090148926, "logps": -89.26622772216797, "loss": 0.4727, "objective": 0.48335954546928406, "ranking_simple": 0.6583333611488342, "step": 595 }, { "dpo_loss": 0.522754430770874, "epoch": 0.862895493767977, "grad_norm": 13.432541739844822, "learning_rate": 2.796588779661388e-07, "logits": -1.5472941398620605, "logps": -87.67240142822266, "loss": 0.4881, "objective": 0.522754430770874, "ranking_simple": 0.6666666865348816, "step": 600 }, { "epoch": 0.862895493767977, "eval_dpo_loss": 0.6609825491905212, "eval_logits": -1.575743556022644, "eval_logps": -91.89826202392578, "eval_loss": 0.6581856608390808, "eval_objective": 0.6609825491905212, "eval_ranking_simple": 0.5564864873886108, "eval_runtime": 667.1404, "eval_samples_per_second": 16.622, "eval_steps_per_second": 1.387, "step": 600 }, { "dpo_loss": 0.4695630371570587, "epoch": 0.8700862895493768, "grad_norm": 14.156954962823502, "learning_rate": 2.514868710840723e-07, "logits": -1.5538970232009888, "logps": -86.31729888916016, "loss": 0.4836, "objective": 0.4695630371570587, "ranking_simple": 0.6416666507720947, "step": 605 }, { "dpo_loss": 0.49627622961997986, "epoch": 0.8772770853307766, "grad_norm": 13.015536935516824, "learning_rate": 2.2473507328751086e-07, "logits": -1.6039773225784302, "logps": -90.30487823486328, "loss": 0.4714, "objective": 0.49627622961997986, "ranking_simple": 0.675000011920929, "step": 610 }, { "dpo_loss": 0.5044869184494019, "epoch": 0.8844678811121764, "grad_norm": 14.874997035211502, "learning_rate": 1.9942038158532407e-07, "logits": -1.6170308589935303, "logps": -87.51586151123047, "loss": 0.4845, "objective": 0.5044869184494019, "ranking_simple": 0.7333333492279053, "step": 615 }, { "dpo_loss": 0.4692457616329193, "epoch": 0.8916586768935763, "grad_norm": 14.018445832991219, "learning_rate": 1.7555878527937164e-07, "logits": -1.6254560947418213, "logps": -85.93553924560547, "loss": 0.4605, "objective": 0.4692457616329193, "ranking_simple": 0.7083333134651184, "step": 620 }, { "dpo_loss": 0.5408317446708679, "epoch": 0.8988494726749761, "grad_norm": 13.907456738178524, "learning_rate": 1.5316535586531483e-07, "logits": -1.5320008993148804, "logps": -87.24546813964844, "loss": 0.508, "objective": 0.5408317446708679, "ranking_simple": 0.675000011920929, "step": 625 }, { "dpo_loss": 0.4765617549419403, "epoch": 0.9060402684563759, "grad_norm": 11.837344650453549, "learning_rate": 1.3225423751313942e-07, "logits": -1.5696455240249634, "logps": -88.36416625976562, "loss": 0.4901, "objective": 0.4765617549419403, "ranking_simple": 0.6583333611488342, "step": 630 }, { "dpo_loss": 0.5072826743125916, "epoch": 0.9132310642377757, "grad_norm": 14.491002179336329, "learning_rate": 1.1283863813339263e-07, "logits": -1.571786880493164, "logps": -87.18602752685547, "loss": 0.5107, "objective": 0.5072826743125916, "ranking_simple": 0.5958333611488342, "step": 635 }, { "dpo_loss": 0.4825662672519684, "epoch": 0.9204218600191755, "grad_norm": 14.33764106585909, "learning_rate": 9.493082103478519e-08, "logits": -1.6901806592941284, "logps": -89.96266174316406, "loss": 0.4713, "objective": 0.4825662672519684, "ranking_simple": 0.6208333373069763, "step": 640 }, { "dpo_loss": 0.4537600874900818, "epoch": 0.9276126558005753, "grad_norm": 14.477057200242452, "learning_rate": 7.854209717842231e-08, "logits": -1.5616987943649292, "logps": -89.00830841064453, "loss": 0.465, "objective": 0.4537600874900818, "ranking_simple": 0.637499988079071, "step": 645 }, { "dpo_loss": 0.45157289505004883, "epoch": 0.9348034515819751, "grad_norm": 11.922257325246159, "learning_rate": 6.368281803355692e-08, "logits": -1.5918368101119995, "logps": -87.64961242675781, "loss": 0.4496, "objective": 0.45157289505004883, "ranking_simple": 0.6958333253860474, "step": 650 }, { "epoch": 0.9348034515819751, "eval_dpo_loss": 0.6607991456985474, "eval_logits": -1.5710175037384033, "eval_logps": -91.90254211425781, "eval_loss": 0.6576409935951233, "eval_objective": 0.6607991456985474, "eval_ranking_simple": 0.5572972893714905, "eval_runtime": 667.4864, "eval_samples_per_second": 16.613, "eval_steps_per_second": 1.386, "step": 650 }, { "dpo_loss": 0.47816571593284607, "epoch": 0.9419942473633749, "grad_norm": 16.22501773533146, "learning_rate": 5.036236903938285e-08, "logits": -1.697513461112976, "logps": -89.019287109375, "loss": 0.4829, "objective": 0.47816571593284607, "ranking_simple": 0.6458333134651184, "step": 655 }, { "dpo_loss": 0.5012370347976685, "epoch": 0.9491850431447747, "grad_norm": 14.637112054873407, "learning_rate": 3.8589163676986674e-08, "logits": -1.6037873029708862, "logps": -88.02587890625, "loss": 0.4592, "objective": 0.5012370347976685, "ranking_simple": 0.6708333492279053, "step": 660 }, { "dpo_loss": 0.39999979734420776, "epoch": 0.9563758389261745, "grad_norm": 12.62525649619692, "learning_rate": 2.8370638155215125e-08, "logits": -1.6766146421432495, "logps": -89.1705093383789, "loss": 0.4545, "objective": 0.39999979734420776, "ranking_simple": 0.6625000238418579, "step": 665 }, { "dpo_loss": 0.40588125586509705, "epoch": 0.9635666347075743, "grad_norm": 13.937122960983983, "learning_rate": 1.9713246713805588e-08, "logits": -1.6513657569885254, "logps": -89.80951690673828, "loss": 0.4608, "objective": 0.40588125586509705, "ranking_simple": 0.675000011920929, "step": 670 }, { "dpo_loss": 0.5094369053840637, "epoch": 0.9707574304889741, "grad_norm": 14.410112405435674, "learning_rate": 1.2622457546749567e-08, "logits": -1.6709257364273071, "logps": -90.41423034667969, "loss": 0.4891, "objective": 0.5094369053840637, "ranking_simple": 0.6291666626930237, "step": 675 }, { "dpo_loss": 0.5134339928627014, "epoch": 0.9779482262703739, "grad_norm": 14.035267614600281, "learning_rate": 7.102749348465166e-09, "logits": -1.580245852470398, "logps": -87.9394760131836, "loss": 0.5082, "objective": 0.5134339928627014, "ranking_simple": 0.637499988079071, "step": 680 }, { "dpo_loss": 0.5040514469146729, "epoch": 0.9851390220517737, "grad_norm": 14.829160167874026, "learning_rate": 3.1576084849563315e-09, "logits": -1.5595978498458862, "logps": -88.81024932861328, "loss": 0.5014, "objective": 0.5040514469146729, "ranking_simple": 0.6458333134651184, "step": 685 }, { "dpo_loss": 0.5098596215248108, "epoch": 0.9923298178331735, "grad_norm": 15.252382359007699, "learning_rate": 7.895267917501503e-10, "logits": -1.6206508874893188, "logps": -88.91633605957031, "loss": 0.4697, "objective": 0.5098596215248108, "ranking_simple": 0.6583333611488342, "step": 690 }, { "dpo_loss": 0.47479790449142456, "epoch": 0.9995206136145733, "grad_norm": 14.700651928280335, "learning_rate": 0.0, "logits": -1.5682662725448608, "logps": -88.02741241455078, "loss": 0.4579, "objective": 0.47479790449142456, "ranking_simple": 0.6708333492279053, "step": 695 }, { "epoch": 0.9995206136145733, "step": 695, "total_flos": 0.0, "train_loss": 0.5500740305125285, "train_runtime": 23565.8136, "train_samples_per_second": 4.249, "train_steps_per_second": 0.029 } ], "logging_steps": 5, "max_steps": 695, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }