|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 604.3152841567993, |
|
"epoch": 0.032, |
|
"grad_norm": 0.14503227174282074, |
|
"kl": 0.00012428760528564452, |
|
"learning_rate": 9.375e-07, |
|
"loss": 0.0, |
|
"reward": 0.6548611164093018, |
|
"reward_std": 0.3155763540416956, |
|
"rewards/accuracy_reward": 0.6541666720062494, |
|
"rewards/format_reward": 0.0006944444496184588, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 643.951396560669, |
|
"epoch": 0.064, |
|
"grad_norm": 0.08788644522428513, |
|
"kl": 0.00025680065155029295, |
|
"learning_rate": 1.875e-06, |
|
"loss": 0.0, |
|
"reward": 0.6215277845971287, |
|
"reward_std": 0.30458943024277685, |
|
"rewards/accuracy_reward": 0.6208333398215473, |
|
"rewards/format_reward": 0.0006944444496184588, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 606.6493129730225, |
|
"epoch": 0.096, |
|
"grad_norm": 0.10090751200914383, |
|
"kl": 0.0006760239601135253, |
|
"learning_rate": 2.8125e-06, |
|
"loss": 0.0, |
|
"reward": 0.6791666748467833, |
|
"reward_std": 0.27497591376304625, |
|
"rewards/accuracy_reward": 0.6791666748467833, |
|
"rewards/format_reward": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 628.4958393096924, |
|
"epoch": 0.128, |
|
"grad_norm": 0.10319401323795319, |
|
"kl": 0.0019372463226318359, |
|
"learning_rate": 2.993961440992859e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7437500089406968, |
|
"reward_std": 0.23143401462584734, |
|
"rewards/accuracy_reward": 0.7437500089406968, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 637.5111164093017, |
|
"epoch": 0.16, |
|
"grad_norm": 0.0020248512737452984, |
|
"kl": 0.13670992851257324, |
|
"learning_rate": 2.9695130976348534e-06, |
|
"loss": 0.0055, |
|
"reward": 0.7256944484543055, |
|
"reward_std": 0.23655044846236706, |
|
"rewards/accuracy_reward": 0.7256944484543055, |
|
"rewards/format_reward": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 628.5451442718506, |
|
"epoch": 0.192, |
|
"grad_norm": 0.09042873978614807, |
|
"kl": 0.0024117946624755858, |
|
"learning_rate": 2.9265847744427307e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7555555619299412, |
|
"reward_std": 0.20260455049574375, |
|
"rewards/accuracy_reward": 0.7555555619299412, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 615.7291702270508, |
|
"epoch": 0.224, |
|
"grad_norm": 0.07207166403532028, |
|
"kl": 0.0032260894775390627, |
|
"learning_rate": 2.865716319988224e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7326388933230191, |
|
"reward_std": 0.21108066253364086, |
|
"rewards/accuracy_reward": 0.7326388933230191, |
|
"rewards/format_reward": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 626.9736179351806, |
|
"epoch": 0.256, |
|
"grad_norm": 0.054428163915872574, |
|
"kl": 0.0026816368103027345, |
|
"learning_rate": 2.7876731904027993e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7451388964429497, |
|
"reward_std": 0.2157622052356601, |
|
"rewards/accuracy_reward": 0.7451388964429497, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 594.1208389282226, |
|
"epoch": 0.288, |
|
"grad_norm": 0.08016934990882874, |
|
"kl": 0.002810811996459961, |
|
"learning_rate": 2.6934368233226715e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7694444504100829, |
|
"reward_std": 0.1868146162480116, |
|
"rewards/accuracy_reward": 0.7694444504100829, |
|
"rewards/format_reward": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 614.3645858764648, |
|
"epoch": 0.32, |
|
"grad_norm": 0.11283142864704132, |
|
"kl": 0.002644062042236328, |
|
"learning_rate": 2.584192295741087e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7666666714474559, |
|
"reward_std": 0.18280234448611737, |
|
"rewards/accuracy_reward": 0.7666666714474559, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 587.173616027832, |
|
"epoch": 0.352, |
|
"grad_norm": 0.052785150706768036, |
|
"kl": 0.003171491622924805, |
|
"learning_rate": 2.461313420977536e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7930555634200573, |
|
"reward_std": 0.17841423936188222, |
|
"rewards/accuracy_reward": 0.7930555634200573, |
|
"rewards/format_reward": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"completion_length": 593.7201442718506, |
|
"epoch": 0.384, |
|
"grad_norm": 0.1886148303747177, |
|
"kl": 0.004308557510375977, |
|
"learning_rate": 2.3263454721781537e-06, |
|
"loss": 0.0002, |
|
"reward": 0.777083340100944, |
|
"reward_std": 0.18415420949459077, |
|
"rewards/accuracy_reward": 0.777083340100944, |
|
"rewards/format_reward": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 600.0354248046875, |
|
"epoch": 0.416, |
|
"grad_norm": 0.019889166578650475, |
|
"kl": 0.0036341667175292967, |
|
"learning_rate": 2.18098574960932e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7277777815237642, |
|
"reward_std": 0.18065068628638983, |
|
"rewards/accuracy_reward": 0.7277777815237642, |
|
"rewards/format_reward": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"completion_length": 598.8555599212647, |
|
"epoch": 0.448, |
|
"grad_norm": 0.050428278744220734, |
|
"kl": 0.016699600219726562, |
|
"learning_rate": 2.027062236122014e-06, |
|
"loss": 0.0007, |
|
"reward": 0.733333338610828, |
|
"reward_std": 0.18704659640789031, |
|
"rewards/accuracy_reward": 0.733333338610828, |
|
"rewards/format_reward": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"completion_length": 589.2645866394043, |
|
"epoch": 0.48, |
|
"grad_norm": 0.5229154229164124, |
|
"kl": 0.004603099822998047, |
|
"learning_rate": 1.866510609206841e-06, |
|
"loss": 0.0002, |
|
"reward": 0.7680555649101735, |
|
"reward_std": 0.20577130634337665, |
|
"rewards/accuracy_reward": 0.7680555649101735, |
|
"rewards/format_reward": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"completion_length": 613.6562576293945, |
|
"epoch": 0.512, |
|
"grad_norm": 0.03132776543498039, |
|
"kl": 0.0037424564361572266, |
|
"learning_rate": 1.7013498987264833e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7611111178994179, |
|
"reward_std": 0.1536103853955865, |
|
"rewards/accuracy_reward": 0.7611111178994179, |
|
"rewards/format_reward": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"completion_length": 580.3000034332275, |
|
"epoch": 0.544, |
|
"grad_norm": 0.041480712592601776, |
|
"kl": 0.0032805442810058595, |
|
"learning_rate": 1.5336570964437077e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7500000051222742, |
|
"reward_std": 0.18460483234375716, |
|
"rewards/accuracy_reward": 0.7500000051222742, |
|
"rewards/format_reward": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"completion_length": 577.7583400726319, |
|
"epoch": 0.576, |
|
"grad_norm": 0.0553722009062767, |
|
"kl": 0.003495025634765625, |
|
"learning_rate": 1.3655410366448499e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7687500093132258, |
|
"reward_std": 0.17976610995829106, |
|
"rewards/accuracy_reward": 0.7687500093132258, |
|
"rewards/format_reward": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"completion_length": 585.31389503479, |
|
"epoch": 0.608, |
|
"grad_norm": 0.05203121528029442, |
|
"kl": 0.003870105743408203, |
|
"learning_rate": 1.199115876325091e-06, |
|
"loss": 0.0002, |
|
"reward": 0.7888888973742724, |
|
"reward_std": 0.15750880874693393, |
|
"rewards/accuracy_reward": 0.7888888973742724, |
|
"rewards/format_reward": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"completion_length": 565.354870223999, |
|
"epoch": 0.64, |
|
"grad_norm": 0.06253661960363388, |
|
"kl": 0.003356456756591797, |
|
"learning_rate": 1.036474508437579e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7930555619299412, |
|
"reward_std": 0.15678047761321068, |
|
"rewards/accuracy_reward": 0.7930555619299412, |
|
"rewards/format_reward": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_completion_length": 580.157574237716, |
|
"eval_kl": 0.007348269420822866, |
|
"eval_loss": 0.00029379926854744554, |
|
"eval_reward": 0.6814303872967167, |
|
"eval_reward_std": 0.20519427888108788, |
|
"eval_rewards/accuracy_reward": 0.6814303872967167, |
|
"eval_rewards/format_reward": 0.0, |
|
"eval_runtime": 15488.5186, |
|
"eval_samples_per_second": 0.323, |
|
"eval_steps_per_second": 0.108, |
|
"step": 100 |
|
}, |
|
{ |
|
"completion_length": 574.4409788131713, |
|
"epoch": 0.672, |
|
"grad_norm": 0.034538425505161285, |
|
"kl": 0.003883838653564453, |
|
"learning_rate": 8.796622425502193e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7833333427086473, |
|
"reward_std": 0.1512267516925931, |
|
"rewards/accuracy_reward": 0.7833333427086473, |
|
"rewards/format_reward": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"completion_length": 611.8923690795898, |
|
"epoch": 0.704, |
|
"grad_norm": 0.004378489218652248, |
|
"kl": 0.0033061027526855467, |
|
"learning_rate": 7.30651083891141e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7430555634200573, |
|
"reward_std": 0.19789394550025463, |
|
"rewards/accuracy_reward": 0.7430555634200573, |
|
"rewards/format_reward": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"completion_length": 608.2687576293945, |
|
"epoch": 0.736, |
|
"grad_norm": 0.0850713923573494, |
|
"kl": 0.0032596588134765625, |
|
"learning_rate": 5.913149342387704e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7694444509223104, |
|
"reward_std": 0.1801286071538925, |
|
"rewards/accuracy_reward": 0.7694444509223104, |
|
"rewards/format_reward": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"completion_length": 572.259033203125, |
|
"epoch": 0.768, |
|
"grad_norm": 0.06911196559667587, |
|
"kl": 0.0034391403198242186, |
|
"learning_rate": 4.63406026519703e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7722222298849374, |
|
"reward_std": 0.15622506141662598, |
|
"rewards/accuracy_reward": 0.7722222298849374, |
|
"rewards/format_reward": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"completion_length": 604.2020915985107, |
|
"epoch": 0.8, |
|
"grad_norm": 0.09203875064849854, |
|
"kl": 0.003432178497314453, |
|
"learning_rate": 3.4853288946298335e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7388888964429497, |
|
"reward_std": 0.18809502944350243, |
|
"rewards/accuracy_reward": 0.7388888964429497, |
|
"rewards/format_reward": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"completion_length": 582.3465305328369, |
|
"epoch": 0.832, |
|
"grad_norm": 0.04850884899497032, |
|
"kl": 0.0036045074462890624, |
|
"learning_rate": 2.48140119418046e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7687500065192581, |
|
"reward_std": 0.15752214156091213, |
|
"rewards/accuracy_reward": 0.7687500065192581, |
|
"rewards/format_reward": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"completion_length": 599.1916728973389, |
|
"epoch": 0.864, |
|
"grad_norm": 0.03212558850646019, |
|
"kl": 0.0031602859497070314, |
|
"learning_rate": 1.634902137174483e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7312500088475644, |
|
"reward_std": 0.1761287273839116, |
|
"rewards/accuracy_reward": 0.7312500088475644, |
|
"rewards/format_reward": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"completion_length": 577.2263946533203, |
|
"epoch": 0.896, |
|
"grad_norm": 0.03874632343649864, |
|
"kl": 0.003940582275390625, |
|
"learning_rate": 9.564769404039419e-08, |
|
"loss": 0.0002, |
|
"reward": 0.7729166727513075, |
|
"reward_std": 0.189943253621459, |
|
"rewards/accuracy_reward": 0.7729166727513075, |
|
"rewards/format_reward": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"completion_length": 575.8805583953857, |
|
"epoch": 0.928, |
|
"grad_norm": 0.07863741368055344, |
|
"kl": 0.004055213928222656, |
|
"learning_rate": 4.546571943496969e-08, |
|
"loss": 0.0002, |
|
"reward": 0.7590277843177319, |
|
"reward_std": 0.19089928902685643, |
|
"rewards/accuracy_reward": 0.7590277843177319, |
|
"rewards/format_reward": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"completion_length": 585.861116027832, |
|
"epoch": 0.96, |
|
"grad_norm": 0.02937915176153183, |
|
"kl": 0.004036521911621094, |
|
"learning_rate": 1.357535734809795e-08, |
|
"loss": 0.0002, |
|
"reward": 0.7618055611848831, |
|
"reward_std": 0.169761879183352, |
|
"rewards/accuracy_reward": 0.7618055611848831, |
|
"rewards/format_reward": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"completion_length": 580.2944515228271, |
|
"epoch": 0.992, |
|
"grad_norm": 0.04815535247325897, |
|
"kl": 0.003546333312988281, |
|
"learning_rate": 3.77647586240204e-10, |
|
"loss": 0.0001, |
|
"reward": 0.7680555626749992, |
|
"reward_std": 0.18228026237338782, |
|
"rewards/accuracy_reward": 0.7680555626749992, |
|
"rewards/format_reward": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"completion_length": 629.1527805328369, |
|
"epoch": 0.9984, |
|
"kl": 0.002917766571044922, |
|
"reward": 0.7187500055879354, |
|
"reward_std": 0.1746763400733471, |
|
"rewards/accuracy_reward": 0.7187500055879354, |
|
"rewards/format_reward": 0.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0003127560741445333, |
|
"train_runtime": 45856.1745, |
|
"train_samples_per_second": 0.164, |
|
"train_steps_per_second": 0.003 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|