{ "best_metric": 0.5695301294326782, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.7017543859649122, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007017543859649123, "grad_norm": 2.7762348651885986, "learning_rate": 1e-05, "loss": 2.3903, "step": 1 }, { "epoch": 0.007017543859649123, "eval_loss": 0.9683788418769836, "eval_runtime": 17.6946, "eval_samples_per_second": 13.563, "eval_steps_per_second": 3.391, "step": 1 }, { "epoch": 0.014035087719298246, "grad_norm": 2.8924126625061035, "learning_rate": 2e-05, "loss": 2.356, "step": 2 }, { "epoch": 0.021052631578947368, "grad_norm": 3.4745891094207764, "learning_rate": 3e-05, "loss": 2.5984, "step": 3 }, { "epoch": 0.028070175438596492, "grad_norm": 3.46642804145813, "learning_rate": 4e-05, "loss": 2.4721, "step": 4 }, { "epoch": 0.03508771929824561, "grad_norm": 2.7118122577667236, "learning_rate": 5e-05, "loss": 2.8837, "step": 5 }, { "epoch": 0.042105263157894736, "grad_norm": 1.8579764366149902, "learning_rate": 6e-05, "loss": 2.2862, "step": 6 }, { "epoch": 0.04912280701754386, "grad_norm": 1.3937697410583496, "learning_rate": 7e-05, "loss": 2.1112, "step": 7 }, { "epoch": 0.056140350877192984, "grad_norm": 1.0277411937713623, "learning_rate": 8e-05, "loss": 2.5434, "step": 8 }, { "epoch": 0.06315789473684211, "grad_norm": 1.0102975368499756, "learning_rate": 9e-05, "loss": 2.4365, "step": 9 }, { "epoch": 0.07017543859649122, "grad_norm": 1.0124120712280273, "learning_rate": 0.0001, "loss": 2.6585, "step": 10 }, { "epoch": 0.07719298245614035, "grad_norm": 1.0789661407470703, "learning_rate": 9.998605186060137e-05, "loss": 2.4538, "step": 11 }, { "epoch": 0.08421052631578947, "grad_norm": 1.1773967742919922, "learning_rate": 9.99442152244292e-05, "loss": 2.5744, "step": 12 }, { "epoch": 0.0912280701754386, "grad_norm": 1.0121546983718872, "learning_rate": 9.987451343321279e-05, "loss": 2.4692, "step": 13 }, { "epoch": 0.09824561403508772, "grad_norm": 1.3383002281188965, "learning_rate": 9.977698537536419e-05, "loss": 2.9323, "step": 14 }, { "epoch": 0.10526315789473684, "grad_norm": 1.177334189414978, "learning_rate": 9.965168546428121e-05, "loss": 2.2799, "step": 15 }, { "epoch": 0.11228070175438597, "grad_norm": 0.9295145869255066, "learning_rate": 9.949868360798893e-05, "loss": 2.3713, "step": 16 }, { "epoch": 0.11929824561403508, "grad_norm": 0.9561142921447754, "learning_rate": 9.931806517013612e-05, "loss": 2.615, "step": 17 }, { "epoch": 0.12631578947368421, "grad_norm": 0.9804189801216125, "learning_rate": 9.910993092236878e-05, "loss": 2.6586, "step": 18 }, { "epoch": 0.13333333333333333, "grad_norm": 1.0032782554626465, "learning_rate": 9.887439698810693e-05, "loss": 2.6251, "step": 19 }, { "epoch": 0.14035087719298245, "grad_norm": 1.1459952592849731, "learning_rate": 9.861159477775653e-05, "loss": 2.6155, "step": 20 }, { "epoch": 0.14736842105263157, "grad_norm": 0.9445633292198181, "learning_rate": 9.832167091539214e-05, "loss": 2.5463, "step": 21 }, { "epoch": 0.1543859649122807, "grad_norm": 0.9772726893424988, "learning_rate": 9.800478715695163e-05, "loss": 2.4464, "step": 22 }, { "epoch": 0.16140350877192983, "grad_norm": 0.8855580687522888, "learning_rate": 9.766112029998846e-05, "loss": 2.4683, "step": 23 }, { "epoch": 0.16842105263157894, "grad_norm": 1.0070710182189941, "learning_rate": 9.729086208503174e-05, "loss": 2.8086, "step": 24 }, { "epoch": 0.17543859649122806, "grad_norm": 1.0289078950881958, "learning_rate": 9.689421908860927e-05, "loss": 2.7138, "step": 25 }, { "epoch": 0.1824561403508772, "grad_norm": 1.0180659294128418, "learning_rate": 9.64714126079933e-05, "loss": 2.6071, "step": 26 }, { "epoch": 0.18947368421052632, "grad_norm": 0.9560624957084656, "learning_rate": 9.602267853773301e-05, "loss": 2.3464, "step": 27 }, { "epoch": 0.19649122807017544, "grad_norm": 1.3721864223480225, "learning_rate": 9.554826723804303e-05, "loss": 2.6763, "step": 28 }, { "epoch": 0.20350877192982456, "grad_norm": 1.3725249767303467, "learning_rate": 9.504844339512095e-05, "loss": 2.8626, "step": 29 }, { "epoch": 0.21052631578947367, "grad_norm": 1.265424132347107, "learning_rate": 9.452348587347224e-05, "loss": 2.502, "step": 30 }, { "epoch": 0.21754385964912282, "grad_norm": 1.2181262969970703, "learning_rate": 9.397368756032445e-05, "loss": 2.3714, "step": 31 }, { "epoch": 0.22456140350877193, "grad_norm": 1.321036458015442, "learning_rate": 9.339935520221816e-05, "loss": 2.2873, "step": 32 }, { "epoch": 0.23157894736842105, "grad_norm": 1.5463135242462158, "learning_rate": 9.280080923386501e-05, "loss": 2.3626, "step": 33 }, { "epoch": 0.23859649122807017, "grad_norm": 1.3238377571105957, "learning_rate": 9.217838359936914e-05, "loss": 2.2543, "step": 34 }, { "epoch": 0.24561403508771928, "grad_norm": 2.2101263999938965, "learning_rate": 9.153242556591116e-05, "loss": 1.8211, "step": 35 }, { "epoch": 0.25263157894736843, "grad_norm": 0.9670763611793518, "learning_rate": 9.086329552999891e-05, "loss": 2.0042, "step": 36 }, { "epoch": 0.2596491228070175, "grad_norm": 0.7848258018493652, "learning_rate": 9.017136681639307e-05, "loss": 1.7858, "step": 37 }, { "epoch": 0.26666666666666666, "grad_norm": 0.729593813419342, "learning_rate": 8.945702546981969e-05, "loss": 2.1785, "step": 38 }, { "epoch": 0.2736842105263158, "grad_norm": 0.6895172595977783, "learning_rate": 8.872067003958598e-05, "loss": 2.3695, "step": 39 }, { "epoch": 0.2807017543859649, "grad_norm": 0.8260446786880493, "learning_rate": 8.796271135721944e-05, "loss": 1.9042, "step": 40 }, { "epoch": 0.28771929824561404, "grad_norm": 1.0478273630142212, "learning_rate": 8.718357230725449e-05, "loss": 1.8239, "step": 41 }, { "epoch": 0.29473684210526313, "grad_norm": 0.6120851039886475, "learning_rate": 8.638368759129432e-05, "loss": 2.078, "step": 42 }, { "epoch": 0.3017543859649123, "grad_norm": 0.7053998708724976, "learning_rate": 8.556350348547977e-05, "loss": 2.2686, "step": 43 }, { "epoch": 0.3087719298245614, "grad_norm": 0.6118577718734741, "learning_rate": 8.472347759150043e-05, "loss": 1.9846, "step": 44 }, { "epoch": 0.3157894736842105, "grad_norm": 0.6109419465065002, "learning_rate": 8.386407858128706e-05, "loss": 1.9423, "step": 45 }, { "epoch": 0.32280701754385965, "grad_norm": 0.6774243712425232, "learning_rate": 8.298578593552737e-05, "loss": 2.6628, "step": 46 }, { "epoch": 0.3298245614035088, "grad_norm": 0.7050437331199646, "learning_rate": 8.208908967615159e-05, "loss": 2.2159, "step": 47 }, { "epoch": 0.3368421052631579, "grad_norm": 0.8880307674407959, "learning_rate": 8.117449009293668e-05, "loss": 2.0407, "step": 48 }, { "epoch": 0.34385964912280703, "grad_norm": 0.789176344871521, "learning_rate": 8.024249746438188e-05, "loss": 2.3735, "step": 49 }, { "epoch": 0.3508771929824561, "grad_norm": 1.0776917934417725, "learning_rate": 7.929363177301124e-05, "loss": 2.1694, "step": 50 }, { "epoch": 0.3508771929824561, "eval_loss": 0.5783611536026001, "eval_runtime": 17.9725, "eval_samples_per_second": 13.354, "eval_steps_per_second": 3.338, "step": 50 }, { "epoch": 0.35789473684210527, "grad_norm": 0.8510519862174988, "learning_rate": 7.832842241526212e-05, "loss": 2.4368, "step": 51 }, { "epoch": 0.3649122807017544, "grad_norm": 0.7064984440803528, "learning_rate": 7.734740790612136e-05, "loss": 2.2657, "step": 52 }, { "epoch": 0.3719298245614035, "grad_norm": 0.7655551433563232, "learning_rate": 7.635113557867395e-05, "loss": 2.2036, "step": 53 }, { "epoch": 0.37894736842105264, "grad_norm": 0.7803980708122253, "learning_rate": 7.534016127873201e-05, "loss": 2.4267, "step": 54 }, { "epoch": 0.38596491228070173, "grad_norm": 0.7970924377441406, "learning_rate": 7.431504905471408e-05, "loss": 2.5336, "step": 55 }, { "epoch": 0.3929824561403509, "grad_norm": 0.7999897003173828, "learning_rate": 7.327637084294817e-05, "loss": 2.6374, "step": 56 }, { "epoch": 0.4, "grad_norm": 0.8058353662490845, "learning_rate": 7.22247061485738e-05, "loss": 2.6301, "step": 57 }, { "epoch": 0.4070175438596491, "grad_norm": 0.8530853986740112, "learning_rate": 7.116064172222125e-05, "loss": 2.6473, "step": 58 }, { "epoch": 0.41403508771929826, "grad_norm": 0.9347732067108154, "learning_rate": 7.008477123264848e-05, "loss": 2.7603, "step": 59 }, { "epoch": 0.42105263157894735, "grad_norm": 0.8796826004981995, "learning_rate": 6.8997694935518e-05, "loss": 2.4875, "step": 60 }, { "epoch": 0.4280701754385965, "grad_norm": 0.9310798048973083, "learning_rate": 6.790001933849899e-05, "loss": 2.4794, "step": 61 }, { "epoch": 0.43508771929824563, "grad_norm": 0.9056044816970825, "learning_rate": 6.679235686288114e-05, "loss": 2.5706, "step": 62 }, { "epoch": 0.4421052631578947, "grad_norm": 0.9717352986335754, "learning_rate": 6.567532550188907e-05, "loss": 2.3916, "step": 63 }, { "epoch": 0.44912280701754387, "grad_norm": 1.0113260746002197, "learning_rate": 6.454954847588824e-05, "loss": 2.5943, "step": 64 }, { "epoch": 0.45614035087719296, "grad_norm": 1.1161454916000366, "learning_rate": 6.341565388467425e-05, "loss": 2.4713, "step": 65 }, { "epoch": 0.4631578947368421, "grad_norm": 1.0935138463974, "learning_rate": 6.227427435703997e-05, "loss": 2.4408, "step": 66 }, { "epoch": 0.47017543859649125, "grad_norm": 1.1171753406524658, "learning_rate": 6.112604669781572e-05, "loss": 2.594, "step": 67 }, { "epoch": 0.47719298245614034, "grad_norm": 1.2532185316085815, "learning_rate": 5.997161153257963e-05, "loss": 2.6315, "step": 68 }, { "epoch": 0.4842105263157895, "grad_norm": 1.1737571954727173, "learning_rate": 5.8811612950236096e-05, "loss": 2.3575, "step": 69 }, { "epoch": 0.49122807017543857, "grad_norm": 1.5828299522399902, "learning_rate": 5.76466981436623e-05, "loss": 2.3481, "step": 70 }, { "epoch": 0.4982456140350877, "grad_norm": 0.5896500945091248, "learning_rate": 5.647751704862263e-05, "loss": 1.786, "step": 71 }, { "epoch": 0.5052631578947369, "grad_norm": 0.6132327914237976, "learning_rate": 5.5304721981152906e-05, "loss": 1.6966, "step": 72 }, { "epoch": 0.512280701754386, "grad_norm": 0.6747874021530151, "learning_rate": 5.4128967273616625e-05, "loss": 2.312, "step": 73 }, { "epoch": 0.519298245614035, "grad_norm": 0.7108948826789856, "learning_rate": 5.2950908909636146e-05, "loss": 2.0016, "step": 74 }, { "epoch": 0.5263157894736842, "grad_norm": 0.6152532696723938, "learning_rate": 5.1771204158102705e-05, "loss": 1.712, "step": 75 }, { "epoch": 0.5333333333333333, "grad_norm": 0.6101934313774109, "learning_rate": 5.059051120646924e-05, "loss": 2.1353, "step": 76 }, { "epoch": 0.5403508771929825, "grad_norm": 0.5694242715835571, "learning_rate": 4.940948879353078e-05, "loss": 2.2486, "step": 77 }, { "epoch": 0.5473684210526316, "grad_norm": 0.5999569892883301, "learning_rate": 4.822879584189731e-05, "loss": 1.9464, "step": 78 }, { "epoch": 0.5543859649122806, "grad_norm": 0.5646923184394836, "learning_rate": 4.7049091090363865e-05, "loss": 2.1126, "step": 79 }, { "epoch": 0.5614035087719298, "grad_norm": 0.6812761425971985, "learning_rate": 4.5871032726383386e-05, "loss": 2.0419, "step": 80 }, { "epoch": 0.5684210526315789, "grad_norm": 0.6502499580383301, "learning_rate": 4.4695278018847105e-05, "loss": 2.0716, "step": 81 }, { "epoch": 0.5754385964912281, "grad_norm": 0.5896376967430115, "learning_rate": 4.352248295137739e-05, "loss": 1.9859, "step": 82 }, { "epoch": 0.5824561403508772, "grad_norm": 0.6512247323989868, "learning_rate": 4.23533018563377e-05, "loss": 2.3233, "step": 83 }, { "epoch": 0.5894736842105263, "grad_norm": 0.6478447318077087, "learning_rate": 4.118838704976392e-05, "loss": 2.0947, "step": 84 }, { "epoch": 0.5964912280701754, "grad_norm": 0.7491245865821838, "learning_rate": 4.0028388467420385e-05, "loss": 2.4953, "step": 85 }, { "epoch": 0.6035087719298246, "grad_norm": 0.7526668310165405, "learning_rate": 3.887395330218429e-05, "loss": 2.4461, "step": 86 }, { "epoch": 0.6105263157894737, "grad_norm": 0.6946207880973816, "learning_rate": 3.772572564296005e-05, "loss": 2.4925, "step": 87 }, { "epoch": 0.6175438596491228, "grad_norm": 0.7764760851860046, "learning_rate": 3.658434611532578e-05, "loss": 2.4452, "step": 88 }, { "epoch": 0.624561403508772, "grad_norm": 0.7967379093170166, "learning_rate": 3.5450451524111775e-05, "loss": 2.5828, "step": 89 }, { "epoch": 0.631578947368421, "grad_norm": 0.8338074684143066, "learning_rate": 3.4324674498110954e-05, "loss": 2.3247, "step": 90 }, { "epoch": 0.6385964912280702, "grad_norm": 0.7553719282150269, "learning_rate": 3.3207643137118874e-05, "loss": 2.4807, "step": 91 }, { "epoch": 0.6456140350877193, "grad_norm": 0.811700165271759, "learning_rate": 3.209998066150102e-05, "loss": 2.4316, "step": 92 }, { "epoch": 0.6526315789473685, "grad_norm": 0.8489969968795776, "learning_rate": 3.100230506448201e-05, "loss": 2.2822, "step": 93 }, { "epoch": 0.6596491228070176, "grad_norm": 0.8480799198150635, "learning_rate": 2.991522876735154e-05, "loss": 2.549, "step": 94 }, { "epoch": 0.6666666666666666, "grad_norm": 0.8709715604782104, "learning_rate": 2.8839358277778755e-05, "loss": 2.7395, "step": 95 }, { "epoch": 0.6736842105263158, "grad_norm": 0.892209529876709, "learning_rate": 2.7775293851426232e-05, "loss": 2.3252, "step": 96 }, { "epoch": 0.6807017543859649, "grad_norm": 0.8704090118408203, "learning_rate": 2.6723629157051844e-05, "loss": 2.5855, "step": 97 }, { "epoch": 0.6877192982456141, "grad_norm": 1.0494251251220703, "learning_rate": 2.5684950945285935e-05, "loss": 2.4473, "step": 98 }, { "epoch": 0.6947368421052632, "grad_norm": 0.9874880909919739, "learning_rate": 2.4659838721268004e-05, "loss": 2.5214, "step": 99 }, { "epoch": 0.7017543859649122, "grad_norm": 1.0498487949371338, "learning_rate": 2.364886442132606e-05, "loss": 2.4562, "step": 100 }, { "epoch": 0.7017543859649122, "eval_loss": 0.5695301294326782, "eval_runtime": 17.7268, "eval_samples_per_second": 13.539, "eval_steps_per_second": 3.385, "step": 100 } ], "logging_steps": 1, "max_steps": 143, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.761096106573824e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }