{ "best_metric": 0.53005450963974, "best_model_checkpoint": "miner_id_24/checkpoint-450", "epoch": 0.17061611374407584, "eval_steps": 150, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003791469194312796, "eval_loss": 1.7795168161392212, "eval_runtime": 209.8549, "eval_samples_per_second": 21.167, "eval_steps_per_second": 5.294, "step": 1 }, { "epoch": 0.0037914691943127963, "grad_norm": 2.3918707370758057, "learning_rate": 2e-05, "loss": 2.1878, "step": 10 }, { "epoch": 0.007582938388625593, "grad_norm": 1.775721788406372, "learning_rate": 4e-05, "loss": 1.257, "step": 20 }, { "epoch": 0.011374407582938388, "grad_norm": 1.3155328035354614, "learning_rate": 6e-05, "loss": 0.7683, "step": 30 }, { "epoch": 0.015165876777251185, "grad_norm": 1.3514695167541504, "learning_rate": 8e-05, "loss": 0.5549, "step": 40 }, { "epoch": 0.018957345971563982, "grad_norm": 1.1452535390853882, "learning_rate": 0.0001, "loss": 0.3957, "step": 50 }, { "epoch": 0.022748815165876776, "grad_norm": 1.4056144952774048, "learning_rate": 9.999631611658893e-05, "loss": 1.3166, "step": 60 }, { "epoch": 0.026540284360189573, "grad_norm": 1.0476895570755005, "learning_rate": 9.998526500919558e-05, "loss": 0.6856, "step": 70 }, { "epoch": 0.03033175355450237, "grad_norm": 1.057721495628357, "learning_rate": 9.996684830625961e-05, "loss": 0.5876, "step": 80 }, { "epoch": 0.034123222748815164, "grad_norm": 0.8883450627326965, "learning_rate": 9.99410687215805e-05, "loss": 0.5092, "step": 90 }, { "epoch": 0.037914691943127965, "grad_norm": 1.0792176723480225, "learning_rate": 9.990793005391757e-05, "loss": 0.3727, "step": 100 }, { "epoch": 0.04170616113744076, "grad_norm": 0.9899026155471802, "learning_rate": 9.986743718643037e-05, "loss": 1.0962, "step": 110 }, { "epoch": 0.04549763033175355, "grad_norm": 0.9852607250213623, "learning_rate": 9.981959608595904e-05, "loss": 0.6074, "step": 120 }, { "epoch": 0.04928909952606635, "grad_norm": 0.8935758471488953, "learning_rate": 9.976441380214499e-05, "loss": 0.5443, "step": 130 }, { "epoch": 0.05308056872037915, "grad_norm": 0.8601885437965393, "learning_rate": 9.970189846639224e-05, "loss": 0.4588, "step": 140 }, { "epoch": 0.05687203791469194, "grad_norm": 0.8278557658195496, "learning_rate": 9.963205929066912e-05, "loss": 0.3571, "step": 150 }, { "epoch": 0.05687203791469194, "eval_loss": 0.6163225173950195, "eval_runtime": 211.3771, "eval_samples_per_second": 21.015, "eval_steps_per_second": 5.256, "step": 150 }, { "epoch": 0.06066350710900474, "grad_norm": 0.9639042019844055, "learning_rate": 9.955490656615086e-05, "loss": 0.9907, "step": 160 }, { "epoch": 0.06445497630331753, "grad_norm": 1.0571763515472412, "learning_rate": 9.947045166170315e-05, "loss": 0.6059, "step": 170 }, { "epoch": 0.06824644549763033, "grad_norm": 0.8203420639038086, "learning_rate": 9.937870702220684e-05, "loss": 0.5407, "step": 180 }, { "epoch": 0.07203791469194312, "grad_norm": 0.7809204459190369, "learning_rate": 9.927968616672416e-05, "loss": 0.4592, "step": 190 }, { "epoch": 0.07582938388625593, "grad_norm": 0.6434981226921082, "learning_rate": 9.917340368650657e-05, "loss": 0.3295, "step": 200 }, { "epoch": 0.07962085308056872, "grad_norm": 0.8934921026229858, "learning_rate": 9.905987524284471e-05, "loss": 0.9521, "step": 210 }, { "epoch": 0.08341232227488152, "grad_norm": 0.8254252672195435, "learning_rate": 9.89391175647606e-05, "loss": 0.5738, "step": 220 }, { "epoch": 0.08720379146919431, "grad_norm": 0.840071976184845, "learning_rate": 9.881114844654249e-05, "loss": 0.5222, "step": 230 }, { "epoch": 0.0909952606635071, "grad_norm": 0.8142471313476562, "learning_rate": 9.867598674512288e-05, "loss": 0.4142, "step": 240 }, { "epoch": 0.0947867298578199, "grad_norm": 0.6625562310218811, "learning_rate": 9.853365237729976e-05, "loss": 0.3158, "step": 250 }, { "epoch": 0.0985781990521327, "grad_norm": 0.9738134145736694, "learning_rate": 9.838416631680176e-05, "loss": 0.9377, "step": 260 }, { "epoch": 0.1023696682464455, "grad_norm": 0.919395387172699, "learning_rate": 9.822755059119765e-05, "loss": 0.5472, "step": 270 }, { "epoch": 0.1061611374407583, "grad_norm": 0.9126551151275635, "learning_rate": 9.806382827865035e-05, "loss": 0.4959, "step": 280 }, { "epoch": 0.10995260663507109, "grad_norm": 0.7662134766578674, "learning_rate": 9.78930235045163e-05, "loss": 0.4471, "step": 290 }, { "epoch": 0.11374407582938388, "grad_norm": 0.6991143226623535, "learning_rate": 9.771516143779049e-05, "loss": 0.345, "step": 300 }, { "epoch": 0.11374407582938388, "eval_loss": 0.5615507960319519, "eval_runtime": 210.9515, "eval_samples_per_second": 21.057, "eval_steps_per_second": 5.267, "step": 300 }, { "epoch": 0.11753554502369669, "grad_norm": 0.8847883343696594, "learning_rate": 9.753026828739756e-05, "loss": 0.9371, "step": 310 }, { "epoch": 0.12132701421800948, "grad_norm": 0.840414822101593, "learning_rate": 9.733837129832993e-05, "loss": 0.5609, "step": 320 }, { "epoch": 0.12511848341232226, "grad_norm": 0.8552011251449585, "learning_rate": 9.713949874763296e-05, "loss": 0.4904, "step": 330 }, { "epoch": 0.12890995260663507, "grad_norm": 0.7504507899284363, "learning_rate": 9.693367994023828e-05, "loss": 0.4276, "step": 340 }, { "epoch": 0.13270142180094788, "grad_norm": 0.7728025317192078, "learning_rate": 9.672094520464552e-05, "loss": 0.3153, "step": 350 }, { "epoch": 0.13649289099526066, "grad_norm": 0.8927388191223145, "learning_rate": 9.650132588845318e-05, "loss": 0.8539, "step": 360 }, { "epoch": 0.14028436018957346, "grad_norm": 0.9280526638031006, "learning_rate": 9.627485435373948e-05, "loss": 0.5319, "step": 370 }, { "epoch": 0.14407582938388624, "grad_norm": 0.8443691730499268, "learning_rate": 9.604156397229367e-05, "loss": 0.4799, "step": 380 }, { "epoch": 0.14786729857819905, "grad_norm": 0.8244546055793762, "learning_rate": 9.580148912069836e-05, "loss": 0.4255, "step": 390 }, { "epoch": 0.15165876777251186, "grad_norm": 0.587851881980896, "learning_rate": 9.555466517526405e-05, "loss": 0.3149, "step": 400 }, { "epoch": 0.15545023696682464, "grad_norm": 0.9294399619102478, "learning_rate": 9.53011285068163e-05, "loss": 0.8398, "step": 410 }, { "epoch": 0.15924170616113745, "grad_norm": 0.8756105303764343, "learning_rate": 9.50409164753362e-05, "loss": 0.5178, "step": 420 }, { "epoch": 0.16303317535545023, "grad_norm": 0.7490562796592712, "learning_rate": 9.477406742445516e-05, "loss": 0.4677, "step": 430 }, { "epoch": 0.16682464454976303, "grad_norm": 0.8731195330619812, "learning_rate": 9.450062067580488e-05, "loss": 0.4073, "step": 440 }, { "epoch": 0.17061611374407584, "grad_norm": 0.7838053107261658, "learning_rate": 9.422061652322298e-05, "loss": 0.2986, "step": 450 }, { "epoch": 0.17061611374407584, "eval_loss": 0.53005450963974, "eval_runtime": 211.0482, "eval_samples_per_second": 21.047, "eval_steps_per_second": 5.264, "step": 450 } ], "logging_steps": 10, "max_steps": 2638, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 150, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.9862471318359245e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }