{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.10084033613445378, "eval_steps": 500, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016806722689075631, "grad_norm": 0.08711077272891998, "learning_rate": 4e-05, "loss": 0.7892, "step": 1 }, { "epoch": 0.0033613445378151263, "grad_norm": 0.12688124179840088, "learning_rate": 8e-05, "loss": 0.9025, "step": 2 }, { "epoch": 0.005042016806722689, "grad_norm": 0.10088784247636795, "learning_rate": 0.00012, "loss": 0.7134, "step": 3 }, { "epoch": 0.0067226890756302525, "grad_norm": 0.09177786111831665, "learning_rate": 0.00016, "loss": 0.927, "step": 4 }, { "epoch": 0.008403361344537815, "grad_norm": 0.2383689433336258, "learning_rate": 0.0002, "loss": 0.8999, "step": 5 }, { "epoch": 0.010084033613445379, "grad_norm": 0.07735294103622437, "learning_rate": 0.00019636363636363636, "loss": 0.8429, "step": 6 }, { "epoch": 0.011764705882352941, "grad_norm": 0.07536331564188004, "learning_rate": 0.00019272727272727274, "loss": 0.7763, "step": 7 }, { "epoch": 0.013445378151260505, "grad_norm": 0.08141479641199112, "learning_rate": 0.0001890909090909091, "loss": 0.5322, "step": 8 }, { "epoch": 0.015126050420168067, "grad_norm": 0.0736273005604744, "learning_rate": 0.00018545454545454545, "loss": 0.6342, "step": 9 }, { "epoch": 0.01680672268907563, "grad_norm": 0.10946282744407654, "learning_rate": 0.00018181818181818183, "loss": 0.7587, "step": 10 }, { "epoch": 0.018487394957983194, "grad_norm": 0.07774144411087036, "learning_rate": 0.0001781818181818182, "loss": 0.6685, "step": 11 }, { "epoch": 0.020168067226890758, "grad_norm": 0.07681110501289368, "learning_rate": 0.00017454545454545454, "loss": 1.064, "step": 12 }, { "epoch": 0.021848739495798318, "grad_norm": 0.08137353509664536, "learning_rate": 0.0001709090909090909, "loss": 0.8278, "step": 13 }, { "epoch": 0.023529411764705882, "grad_norm": 0.15623211860656738, "learning_rate": 0.00016727272727272728, "loss": 0.6985, "step": 14 }, { "epoch": 0.025210084033613446, "grad_norm": 0.10167036205530167, "learning_rate": 0.00016363636363636366, "loss": 0.8868, "step": 15 }, { "epoch": 0.02689075630252101, "grad_norm": 0.1013818234205246, "learning_rate": 0.00016, "loss": 0.9126, "step": 16 }, { "epoch": 0.02857142857142857, "grad_norm": 0.09055998921394348, "learning_rate": 0.00015636363636363637, "loss": 0.7888, "step": 17 }, { "epoch": 0.030252100840336135, "grad_norm": 0.16094405949115753, "learning_rate": 0.00015272727272727275, "loss": 0.6225, "step": 18 }, { "epoch": 0.031932773109243695, "grad_norm": 0.21245548129081726, "learning_rate": 0.0001490909090909091, "loss": 0.6745, "step": 19 }, { "epoch": 0.03361344537815126, "grad_norm": 0.08682460337877274, "learning_rate": 0.00014545454545454546, "loss": 0.7252, "step": 20 }, { "epoch": 0.03529411764705882, "grad_norm": 0.08345898985862732, "learning_rate": 0.00014181818181818184, "loss": 0.638, "step": 21 }, { "epoch": 0.03697478991596639, "grad_norm": 0.08165189623832703, "learning_rate": 0.0001381818181818182, "loss": 0.9751, "step": 22 }, { "epoch": 0.03865546218487395, "grad_norm": 0.08062303811311722, "learning_rate": 0.00013454545454545455, "loss": 0.7559, "step": 23 }, { "epoch": 0.040336134453781515, "grad_norm": 0.07231509685516357, "learning_rate": 0.00013090909090909093, "loss": 0.71, "step": 24 }, { "epoch": 0.04201680672268908, "grad_norm": 0.07577697187662125, "learning_rate": 0.00012727272727272728, "loss": 0.7186, "step": 25 }, { "epoch": 0.043697478991596636, "grad_norm": 0.07562875002622604, "learning_rate": 0.00012363636363636364, "loss": 0.9232, "step": 26 }, { "epoch": 0.0453781512605042, "grad_norm": 0.07023375481367111, "learning_rate": 0.00012, "loss": 0.8472, "step": 27 }, { "epoch": 0.047058823529411764, "grad_norm": 0.07514394819736481, "learning_rate": 0.00011636363636363636, "loss": 0.7765, "step": 28 }, { "epoch": 0.04873949579831933, "grad_norm": 0.08253440260887146, "learning_rate": 0.00011272727272727272, "loss": 0.6163, "step": 29 }, { "epoch": 0.05042016806722689, "grad_norm": 0.09420851618051529, "learning_rate": 0.00010909090909090909, "loss": 0.6294, "step": 30 }, { "epoch": 0.052100840336134456, "grad_norm": 0.07622350007295609, "learning_rate": 0.00010545454545454545, "loss": 0.7741, "step": 31 }, { "epoch": 0.05378151260504202, "grad_norm": 0.08414219319820404, "learning_rate": 0.00010181818181818181, "loss": 0.7156, "step": 32 }, { "epoch": 0.05546218487394958, "grad_norm": 0.07866919785737991, "learning_rate": 9.818181818181818e-05, "loss": 0.7736, "step": 33 }, { "epoch": 0.05714285714285714, "grad_norm": 0.07867816835641861, "learning_rate": 9.454545454545455e-05, "loss": 0.7042, "step": 34 }, { "epoch": 0.058823529411764705, "grad_norm": 0.07410237193107605, "learning_rate": 9.090909090909092e-05, "loss": 0.8446, "step": 35 }, { "epoch": 0.06050420168067227, "grad_norm": 0.07834600657224655, "learning_rate": 8.727272727272727e-05, "loss": 0.8873, "step": 36 }, { "epoch": 0.06218487394957983, "grad_norm": 0.08074888586997986, "learning_rate": 8.363636363636364e-05, "loss": 0.6879, "step": 37 }, { "epoch": 0.06386554621848739, "grad_norm": 0.06010926514863968, "learning_rate": 8e-05, "loss": 0.6882, "step": 38 }, { "epoch": 0.06554621848739496, "grad_norm": 0.08766958117485046, "learning_rate": 7.636363636363637e-05, "loss": 0.6037, "step": 39 }, { "epoch": 0.06722689075630252, "grad_norm": 0.07735378295183182, "learning_rate": 7.272727272727273e-05, "loss": 0.6251, "step": 40 }, { "epoch": 0.06890756302521009, "grad_norm": 0.0956479161977768, "learning_rate": 6.90909090909091e-05, "loss": 1.0099, "step": 41 }, { "epoch": 0.07058823529411765, "grad_norm": 0.07726597785949707, "learning_rate": 6.545454545454546e-05, "loss": 0.5449, "step": 42 }, { "epoch": 0.07226890756302522, "grad_norm": 0.07963621616363525, "learning_rate": 6.181818181818182e-05, "loss": 0.6568, "step": 43 }, { "epoch": 0.07394957983193277, "grad_norm": 0.08890356868505478, "learning_rate": 5.818181818181818e-05, "loss": 0.7226, "step": 44 }, { "epoch": 0.07563025210084033, "grad_norm": 0.07577082514762878, "learning_rate": 5.4545454545454546e-05, "loss": 0.7658, "step": 45 }, { "epoch": 0.0773109243697479, "grad_norm": 0.07142199575901031, "learning_rate": 5.090909090909091e-05, "loss": 0.63, "step": 46 }, { "epoch": 0.07899159663865546, "grad_norm": 0.0648496150970459, "learning_rate": 4.7272727272727275e-05, "loss": 0.6635, "step": 47 }, { "epoch": 0.08067226890756303, "grad_norm": 0.11062151193618774, "learning_rate": 4.3636363636363636e-05, "loss": 0.5822, "step": 48 }, { "epoch": 0.08235294117647059, "grad_norm": 0.08142093569040298, "learning_rate": 4e-05, "loss": 0.9608, "step": 49 }, { "epoch": 0.08403361344537816, "grad_norm": 0.08923624455928802, "learning_rate": 3.6363636363636364e-05, "loss": 0.7617, "step": 50 }, { "epoch": 0.08571428571428572, "grad_norm": 0.07603711634874344, "learning_rate": 3.272727272727273e-05, "loss": 0.4793, "step": 51 }, { "epoch": 0.08739495798319327, "grad_norm": 0.08750972151756287, "learning_rate": 2.909090909090909e-05, "loss": 0.8154, "step": 52 }, { "epoch": 0.08907563025210084, "grad_norm": 0.07799336314201355, "learning_rate": 2.5454545454545454e-05, "loss": 0.7218, "step": 53 }, { "epoch": 0.0907563025210084, "grad_norm": 0.07668763399124146, "learning_rate": 2.1818181818181818e-05, "loss": 0.6844, "step": 54 }, { "epoch": 0.09243697478991597, "grad_norm": 0.07868485897779465, "learning_rate": 1.8181818181818182e-05, "loss": 0.7515, "step": 55 }, { "epoch": 0.09411764705882353, "grad_norm": 0.06420107930898666, "learning_rate": 1.4545454545454545e-05, "loss": 0.5804, "step": 56 }, { "epoch": 0.0957983193277311, "grad_norm": 0.07881864905357361, "learning_rate": 1.0909090909090909e-05, "loss": 0.6701, "step": 57 }, { "epoch": 0.09747899159663866, "grad_norm": 0.07414459437131882, "learning_rate": 7.272727272727272e-06, "loss": 0.6471, "step": 58 }, { "epoch": 0.09915966386554621, "grad_norm": 0.08740859478712082, "learning_rate": 3.636363636363636e-06, "loss": 0.699, "step": 59 }, { "epoch": 0.10084033613445378, "grad_norm": 0.0815616026520729, "learning_rate": 0.0, "loss": 0.8564, "step": 60 } ], "logging_steps": 1, "max_steps": 60, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.136603104667648e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }