{ "best_metric": 81.07514905827573, "best_model_checkpoint": "distilbert-qasports/checkpoint-2000", "epoch": 0.6622516556291391, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.026490066225165563, "grad_norm": 10.109187126159668, "learning_rate": 9.994754966887419e-06, "loss": 0.8141, "step": 100 }, { "epoch": 0.052980132450331126, "grad_norm": 13.866031646728516, "learning_rate": 9.989456953642385e-06, "loss": 0.7347, "step": 200 }, { "epoch": 0.07947019867549669, "grad_norm": 7.728989124298096, "learning_rate": 9.984158940397352e-06, "loss": 0.6902, "step": 300 }, { "epoch": 0.10596026490066225, "grad_norm": 8.372330665588379, "learning_rate": 9.978860927152319e-06, "loss": 0.6645, "step": 400 }, { "epoch": 0.13245033112582782, "grad_norm": 10.106789588928223, "learning_rate": 9.973562913907286e-06, "loss": 0.6782, "step": 500 }, { "epoch": 0.13245033112582782, "eval_HasAns_exact": 74.40994614719766, "eval_HasAns_f1": 79.39755830823303, "eval_HasAns_total": 15041, "eval_best_exact": 74.40994614719766, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.39755830823303, "eval_best_f1_thresh": 0.0, "eval_exact": 74.40994614719766, "eval_f1": 79.39755830823303, "eval_loss": 0.6026614308357239, "eval_runtime": 62.2306, "eval_samples_per_second": 241.698, "eval_steps_per_second": 15.121, "eval_total": 15041, "step": 500 }, { "epoch": 0.15894039735099338, "grad_norm": 15.874868392944336, "learning_rate": 9.968264900662253e-06, "loss": 0.6139, "step": 600 }, { "epoch": 0.18543046357615894, "grad_norm": 12.287043571472168, "learning_rate": 9.963019867549669e-06, "loss": 0.6147, "step": 700 }, { "epoch": 0.2119205298013245, "grad_norm": 11.49232006072998, "learning_rate": 9.957721854304636e-06, "loss": 0.6033, "step": 800 }, { "epoch": 0.23841059602649006, "grad_norm": 8.300566673278809, "learning_rate": 9.952423841059603e-06, "loss": 0.6254, "step": 900 }, { "epoch": 0.26490066225165565, "grad_norm": 14.619403839111328, "learning_rate": 9.94712582781457e-06, "loss": 0.569, "step": 1000 }, { "epoch": 0.26490066225165565, "eval_HasAns_exact": 75.10803802938635, "eval_HasAns_f1": 80.10135733594169, "eval_HasAns_total": 15041, "eval_best_exact": 75.10803802938635, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.10135733594169, "eval_best_f1_thresh": 0.0, "eval_exact": 75.10803802938635, "eval_f1": 80.10135733594169, "eval_loss": 0.5508891344070435, "eval_runtime": 62.3805, "eval_samples_per_second": 241.117, "eval_steps_per_second": 15.085, "eval_total": 15041, "step": 1000 }, { "epoch": 0.2913907284768212, "grad_norm": 10.70427131652832, "learning_rate": 9.941827814569536e-06, "loss": 0.6449, "step": 1100 }, { "epoch": 0.31788079470198677, "grad_norm": 10.064720153808594, "learning_rate": 9.936529801324503e-06, "loss": 0.5613, "step": 1200 }, { "epoch": 0.3443708609271523, "grad_norm": 8.998592376708984, "learning_rate": 9.93123178807947e-06, "loss": 0.5896, "step": 1300 }, { "epoch": 0.3708609271523179, "grad_norm": 11.581778526306152, "learning_rate": 9.925933774834437e-06, "loss": 0.5767, "step": 1400 }, { "epoch": 0.3973509933774834, "grad_norm": 7.466866493225098, "learning_rate": 9.920635761589405e-06, "loss": 0.5821, "step": 1500 }, { "epoch": 0.3973509933774834, "eval_HasAns_exact": 75.55348713516389, "eval_HasAns_f1": 80.3558190310888, "eval_HasAns_total": 15041, "eval_best_exact": 75.55348713516389, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.3558190310888, "eval_best_f1_thresh": 0.0, "eval_exact": 75.55348713516389, "eval_f1": 80.3558190310888, "eval_loss": 0.5195178985595703, "eval_runtime": 62.2517, "eval_samples_per_second": 241.616, "eval_steps_per_second": 15.116, "eval_total": 15041, "step": 1500 }, { "epoch": 0.423841059602649, "grad_norm": 8.173700332641602, "learning_rate": 9.915337748344372e-06, "loss": 0.584, "step": 1600 }, { "epoch": 0.4503311258278146, "grad_norm": 6.5389723777771, "learning_rate": 9.910039735099339e-06, "loss": 0.5457, "step": 1700 }, { "epoch": 0.4768211920529801, "grad_norm": 9.859861373901367, "learning_rate": 9.904741721854306e-06, "loss": 0.5417, "step": 1800 }, { "epoch": 0.5033112582781457, "grad_norm": 13.78690242767334, "learning_rate": 9.899443708609273e-06, "loss": 0.5475, "step": 1900 }, { "epoch": 0.5298013245033113, "grad_norm": 8.801095008850098, "learning_rate": 9.89414569536424e-06, "loss": 0.5814, "step": 2000 }, { "epoch": 0.5298013245033113, "eval_HasAns_exact": 76.39784588790639, "eval_HasAns_f1": 81.07514905827573, "eval_HasAns_total": 15041, "eval_best_exact": 76.39784588790639, "eval_best_exact_thresh": 0.0, "eval_best_f1": 81.07514905827573, "eval_best_f1_thresh": 0.0, "eval_exact": 76.39784588790639, "eval_f1": 81.07514905827573, "eval_loss": 0.4890206754207611, "eval_runtime": 62.2295, "eval_samples_per_second": 241.702, "eval_steps_per_second": 15.121, "eval_total": 15041, "step": 2000 }, { "epoch": 0.5562913907284768, "grad_norm": 10.412147521972656, "learning_rate": 9.888847682119206e-06, "loss": 0.5275, "step": 2100 }, { "epoch": 0.5827814569536424, "grad_norm": 8.767007827758789, "learning_rate": 9.883549668874173e-06, "loss": 0.4934, "step": 2200 }, { "epoch": 0.609271523178808, "grad_norm": 11.056267738342285, "learning_rate": 9.87825165562914e-06, "loss": 0.5246, "step": 2300 }, { "epoch": 0.6357615894039735, "grad_norm": 9.738068580627441, "learning_rate": 9.872953642384107e-06, "loss": 0.5127, "step": 2400 }, { "epoch": 0.6622516556291391, "grad_norm": 15.755542755126953, "learning_rate": 9.867655629139074e-06, "loss": 0.5165, "step": 2500 }, { "epoch": 0.6622516556291391, "eval_HasAns_exact": 76.21168805265607, "eval_HasAns_f1": 80.96151211311172, "eval_HasAns_total": 15041, "eval_best_exact": 76.21168805265607, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.96151211311172, "eval_best_f1_thresh": 0.0, "eval_exact": 76.21168805265607, "eval_f1": 80.96151211311172, "eval_loss": 0.4729439914226532, "eval_runtime": 62.2287, "eval_samples_per_second": 241.705, "eval_steps_per_second": 15.122, "eval_total": 15041, "step": 2500 } ], "logging_steps": 100, "max_steps": 188750, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.045224800256e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }