|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.10084033613445378, |
|
"eval_steps": 500, |
|
"global_step": 60, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016806722689075631, |
|
"grad_norm": 0.08711077272891998, |
|
"learning_rate": 4e-05, |
|
"loss": 0.7892, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0033613445378151263, |
|
"grad_norm": 0.12688124179840088, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9025, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.005042016806722689, |
|
"grad_norm": 0.10088784247636795, |
|
"learning_rate": 0.00012, |
|
"loss": 0.7134, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0067226890756302525, |
|
"grad_norm": 0.09177786111831665, |
|
"learning_rate": 0.00016, |
|
"loss": 0.927, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.008403361344537815, |
|
"grad_norm": 0.2383689433336258, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8999, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010084033613445379, |
|
"grad_norm": 0.07735294103622437, |
|
"learning_rate": 0.00019636363636363636, |
|
"loss": 0.8429, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011764705882352941, |
|
"grad_norm": 0.07536331564188004, |
|
"learning_rate": 0.00019272727272727274, |
|
"loss": 0.7763, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.013445378151260505, |
|
"grad_norm": 0.08141479641199112, |
|
"learning_rate": 0.0001890909090909091, |
|
"loss": 0.5322, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.015126050420168067, |
|
"grad_norm": 0.0736273005604744, |
|
"learning_rate": 0.00018545454545454545, |
|
"loss": 0.6342, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01680672268907563, |
|
"grad_norm": 0.10946282744407654, |
|
"learning_rate": 0.00018181818181818183, |
|
"loss": 0.7587, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018487394957983194, |
|
"grad_norm": 0.07774144411087036, |
|
"learning_rate": 0.0001781818181818182, |
|
"loss": 0.6685, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.020168067226890758, |
|
"grad_norm": 0.07681110501289368, |
|
"learning_rate": 0.00017454545454545454, |
|
"loss": 1.064, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.021848739495798318, |
|
"grad_norm": 0.08137353509664536, |
|
"learning_rate": 0.0001709090909090909, |
|
"loss": 0.8278, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.023529411764705882, |
|
"grad_norm": 0.15623211860656738, |
|
"learning_rate": 0.00016727272727272728, |
|
"loss": 0.6985, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.025210084033613446, |
|
"grad_norm": 0.10167036205530167, |
|
"learning_rate": 0.00016363636363636366, |
|
"loss": 0.8868, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02689075630252101, |
|
"grad_norm": 0.1013818234205246, |
|
"learning_rate": 0.00016, |
|
"loss": 0.9126, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02857142857142857, |
|
"grad_norm": 0.09055998921394348, |
|
"learning_rate": 0.00015636363636363637, |
|
"loss": 0.7888, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.030252100840336135, |
|
"grad_norm": 0.16094405949115753, |
|
"learning_rate": 0.00015272727272727275, |
|
"loss": 0.6225, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.031932773109243695, |
|
"grad_norm": 0.21245548129081726, |
|
"learning_rate": 0.0001490909090909091, |
|
"loss": 0.6745, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03361344537815126, |
|
"grad_norm": 0.08682460337877274, |
|
"learning_rate": 0.00014545454545454546, |
|
"loss": 0.7252, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03529411764705882, |
|
"grad_norm": 0.08345898985862732, |
|
"learning_rate": 0.00014181818181818184, |
|
"loss": 0.638, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03697478991596639, |
|
"grad_norm": 0.08165189623832703, |
|
"learning_rate": 0.0001381818181818182, |
|
"loss": 0.9751, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03865546218487395, |
|
"grad_norm": 0.08062303811311722, |
|
"learning_rate": 0.00013454545454545455, |
|
"loss": 0.7559, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.040336134453781515, |
|
"grad_norm": 0.07231509685516357, |
|
"learning_rate": 0.00013090909090909093, |
|
"loss": 0.71, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04201680672268908, |
|
"grad_norm": 0.07577697187662125, |
|
"learning_rate": 0.00012727272727272728, |
|
"loss": 0.7186, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.043697478991596636, |
|
"grad_norm": 0.07562875002622604, |
|
"learning_rate": 0.00012363636363636364, |
|
"loss": 0.9232, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0453781512605042, |
|
"grad_norm": 0.07023375481367111, |
|
"learning_rate": 0.00012, |
|
"loss": 0.8472, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.047058823529411764, |
|
"grad_norm": 0.07514394819736481, |
|
"learning_rate": 0.00011636363636363636, |
|
"loss": 0.7765, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04873949579831933, |
|
"grad_norm": 0.08253440260887146, |
|
"learning_rate": 0.00011272727272727272, |
|
"loss": 0.6163, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05042016806722689, |
|
"grad_norm": 0.09420851618051529, |
|
"learning_rate": 0.00010909090909090909, |
|
"loss": 0.6294, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.052100840336134456, |
|
"grad_norm": 0.07622350007295609, |
|
"learning_rate": 0.00010545454545454545, |
|
"loss": 0.7741, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.05378151260504202, |
|
"grad_norm": 0.08414219319820404, |
|
"learning_rate": 0.00010181818181818181, |
|
"loss": 0.7156, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05546218487394958, |
|
"grad_norm": 0.07866919785737991, |
|
"learning_rate": 9.818181818181818e-05, |
|
"loss": 0.7736, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05714285714285714, |
|
"grad_norm": 0.07867816835641861, |
|
"learning_rate": 9.454545454545455e-05, |
|
"loss": 0.7042, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.058823529411764705, |
|
"grad_norm": 0.07410237193107605, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 0.8446, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06050420168067227, |
|
"grad_norm": 0.07834600657224655, |
|
"learning_rate": 8.727272727272727e-05, |
|
"loss": 0.8873, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06218487394957983, |
|
"grad_norm": 0.08074888586997986, |
|
"learning_rate": 8.363636363636364e-05, |
|
"loss": 0.6879, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06386554621848739, |
|
"grad_norm": 0.06010926514863968, |
|
"learning_rate": 8e-05, |
|
"loss": 0.6882, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06554621848739496, |
|
"grad_norm": 0.08766958117485046, |
|
"learning_rate": 7.636363636363637e-05, |
|
"loss": 0.6037, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06722689075630252, |
|
"grad_norm": 0.07735378295183182, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 0.6251, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06890756302521009, |
|
"grad_norm": 0.0956479161977768, |
|
"learning_rate": 6.90909090909091e-05, |
|
"loss": 1.0099, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.07058823529411765, |
|
"grad_norm": 0.07726597785949707, |
|
"learning_rate": 6.545454545454546e-05, |
|
"loss": 0.5449, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07226890756302522, |
|
"grad_norm": 0.07963621616363525, |
|
"learning_rate": 6.181818181818182e-05, |
|
"loss": 0.6568, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07394957983193277, |
|
"grad_norm": 0.08890356868505478, |
|
"learning_rate": 5.818181818181818e-05, |
|
"loss": 0.7226, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07563025210084033, |
|
"grad_norm": 0.07577082514762878, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 0.7658, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0773109243697479, |
|
"grad_norm": 0.07142199575901031, |
|
"learning_rate": 5.090909090909091e-05, |
|
"loss": 0.63, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07899159663865546, |
|
"grad_norm": 0.0648496150970459, |
|
"learning_rate": 4.7272727272727275e-05, |
|
"loss": 0.6635, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.08067226890756303, |
|
"grad_norm": 0.11062151193618774, |
|
"learning_rate": 4.3636363636363636e-05, |
|
"loss": 0.5822, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08235294117647059, |
|
"grad_norm": 0.08142093569040298, |
|
"learning_rate": 4e-05, |
|
"loss": 0.9608, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.08403361344537816, |
|
"grad_norm": 0.08923624455928802, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 0.7617, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08571428571428572, |
|
"grad_norm": 0.07603711634874344, |
|
"learning_rate": 3.272727272727273e-05, |
|
"loss": 0.4793, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08739495798319327, |
|
"grad_norm": 0.08750972151756287, |
|
"learning_rate": 2.909090909090909e-05, |
|
"loss": 0.8154, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.08907563025210084, |
|
"grad_norm": 0.07799336314201355, |
|
"learning_rate": 2.5454545454545454e-05, |
|
"loss": 0.7218, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0907563025210084, |
|
"grad_norm": 0.07668763399124146, |
|
"learning_rate": 2.1818181818181818e-05, |
|
"loss": 0.6844, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.09243697478991597, |
|
"grad_norm": 0.07868485897779465, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.7515, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09411764705882353, |
|
"grad_norm": 0.06420107930898666, |
|
"learning_rate": 1.4545454545454545e-05, |
|
"loss": 0.5804, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0957983193277311, |
|
"grad_norm": 0.07881864905357361, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 0.6701, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.09747899159663866, |
|
"grad_norm": 0.07414459437131882, |
|
"learning_rate": 7.272727272727272e-06, |
|
"loss": 0.6471, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.09915966386554621, |
|
"grad_norm": 0.08740859478712082, |
|
"learning_rate": 3.636363636363636e-06, |
|
"loss": 0.699, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.10084033613445378, |
|
"grad_norm": 0.0815616026520729, |
|
"learning_rate": 0.0, |
|
"loss": 0.8564, |
|
"step": 60 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 60, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.136603104667648e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|