| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.74074074074074, | |
| "eval_steps": 500, | |
| "global_step": 1480, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 52.495697021484375, | |
| "learning_rate": 9.9464649786582e-06, | |
| "loss": 21.8993, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_classes": 0, | |
| "eval_loss": 12.443974494934082, | |
| "eval_map": 0.6744, | |
| "eval_map_50": 0.9348, | |
| "eval_map_75": 0.7871, | |
| "eval_map_large": 0.9595, | |
| "eval_map_medium": 0.7115, | |
| "eval_map_per_class": 0.6744, | |
| "eval_map_small": 0.4962, | |
| "eval_mar_1": 0.0394, | |
| "eval_mar_10": 0.3779, | |
| "eval_mar_100": 0.7627, | |
| "eval_mar_100_per_class": 0.7627, | |
| "eval_mar_large": 0.9741, | |
| "eval_mar_medium": 0.7977, | |
| "eval_mar_small": 0.5896, | |
| "eval_runtime": 301.7158, | |
| "eval_samples_per_second": 0.437, | |
| "eval_steps_per_second": 0.056, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 35.35026550292969, | |
| "learning_rate": 9.768230005822394e-06, | |
| "loss": 10.836, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_classes": 0, | |
| "eval_loss": 10.104583740234375, | |
| "eval_map": 0.7114, | |
| "eval_map_50": 0.9553, | |
| "eval_map_75": 0.8305, | |
| "eval_map_large": 0.9806, | |
| "eval_map_medium": 0.7495, | |
| "eval_map_per_class": 0.7114, | |
| "eval_map_small": 0.5421, | |
| "eval_mar_1": 0.0399, | |
| "eval_mar_10": 0.3853, | |
| "eval_mar_100": 0.7924, | |
| "eval_mar_100_per_class": 0.7924, | |
| "eval_mar_large": 0.9864, | |
| "eval_mar_medium": 0.8267, | |
| "eval_mar_small": 0.6239, | |
| "eval_runtime": 301.7621, | |
| "eval_samples_per_second": 0.437, | |
| "eval_steps_per_second": 0.056, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 77.25425720214844, | |
| "learning_rate": 9.469397461053838e-06, | |
| "loss": 9.422, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_classes": 0, | |
| "eval_loss": 9.384869575500488, | |
| "eval_map": 0.7277, | |
| "eval_map_50": 0.9599, | |
| "eval_map_75": 0.8501, | |
| "eval_map_large": 0.9829, | |
| "eval_map_medium": 0.7674, | |
| "eval_map_per_class": 0.7277, | |
| "eval_map_small": 0.5591, | |
| "eval_mar_1": 0.0398, | |
| "eval_mar_10": 0.3882, | |
| "eval_mar_100": 0.8057, | |
| "eval_mar_100_per_class": 0.8057, | |
| "eval_mar_large": 0.9852, | |
| "eval_mar_medium": 0.8385, | |
| "eval_mar_small": 0.6454, | |
| "eval_runtime": 301.8767, | |
| "eval_samples_per_second": 0.437, | |
| "eval_steps_per_second": 0.056, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 25.18535614013672, | |
| "learning_rate": 9.057525385652877e-06, | |
| "loss": 8.7626, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_classes": 0, | |
| "eval_loss": 8.737225532531738, | |
| "eval_map": 0.7337, | |
| "eval_map_50": 0.9632, | |
| "eval_map_75": 0.8564, | |
| "eval_map_large": 0.9825, | |
| "eval_map_medium": 0.7713, | |
| "eval_map_per_class": 0.7337, | |
| "eval_map_small": 0.5685, | |
| "eval_mar_1": 0.0401, | |
| "eval_mar_10": 0.3902, | |
| "eval_mar_100": 0.8122, | |
| "eval_mar_100_per_class": 0.8122, | |
| "eval_mar_large": 0.9889, | |
| "eval_mar_medium": 0.8441, | |
| "eval_mar_small": 0.6565, | |
| "eval_runtime": 301.9674, | |
| "eval_samples_per_second": 0.437, | |
| "eval_steps_per_second": 0.056, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 100.30033111572266, | |
| "learning_rate": 8.550511739408428e-06, | |
| "loss": 8.2403, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_classes": 0, | |
| "eval_loss": 8.39384651184082, | |
| "eval_map": 0.7416, | |
| "eval_map_50": 0.9673, | |
| "eval_map_75": 0.8706, | |
| "eval_map_large": 0.991, | |
| "eval_map_medium": 0.7812, | |
| "eval_map_per_class": 0.7416, | |
| "eval_map_small": 0.5659, | |
| "eval_mar_1": 0.0403, | |
| "eval_mar_10": 0.3914, | |
| "eval_mar_100": 0.8177, | |
| "eval_mar_100_per_class": 0.8177, | |
| "eval_mar_large": 0.9926, | |
| "eval_mar_medium": 0.8508, | |
| "eval_mar_small": 0.657, | |
| "eval_runtime": 301.5787, | |
| "eval_samples_per_second": 0.438, | |
| "eval_steps_per_second": 0.056, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 60.029884338378906, | |
| "learning_rate": 7.947506126566009e-06, | |
| "loss": 7.8836, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_classes": 0, | |
| "eval_loss": 7.927948951721191, | |
| "eval_map": 0.7504, | |
| "eval_map_50": 0.9692, | |
| "eval_map_75": 0.8794, | |
| "eval_map_large": 0.9901, | |
| "eval_map_medium": 0.7878, | |
| "eval_map_per_class": 0.7504, | |
| "eval_map_small": 0.5853, | |
| "eval_mar_1": 0.0412, | |
| "eval_mar_10": 0.3964, | |
| "eval_mar_100": 0.824, | |
| "eval_mar_100_per_class": 0.824, | |
| "eval_mar_large": 0.9914, | |
| "eval_mar_medium": 0.8561, | |
| "eval_mar_small": 0.6685, | |
| "eval_runtime": 302.1538, | |
| "eval_samples_per_second": 0.437, | |
| "eval_steps_per_second": 0.056, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 45.95404815673828, | |
| "learning_rate": 7.2794040652413374e-06, | |
| "loss": 7.6029, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_classes": 0, | |
| "eval_loss": 7.830236434936523, | |
| "eval_map": 0.7562, | |
| "eval_map_50": 0.9707, | |
| "eval_map_75": 0.8933, | |
| "eval_map_large": 0.9909, | |
| "eval_map_medium": 0.7925, | |
| "eval_map_per_class": 0.7562, | |
| "eval_map_small": 0.5965, | |
| "eval_mar_1": 0.0415, | |
| "eval_mar_10": 0.3969, | |
| "eval_mar_100": 0.8294, | |
| "eval_mar_100_per_class": 0.8294, | |
| "eval_mar_large": 0.9926, | |
| "eval_mar_medium": 0.8603, | |
| "eval_mar_small": 0.68, | |
| "eval_runtime": 302.0489, | |
| "eval_samples_per_second": 0.437, | |
| "eval_steps_per_second": 0.056, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 35.335750579833984, | |
| "learning_rate": 6.555175510922047e-06, | |
| "loss": 7.3823, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_classes": 0, | |
| "eval_loss": 7.547567844390869, | |
| "eval_map": 0.756, | |
| "eval_map_50": 0.9689, | |
| "eval_map_75": 0.8892, | |
| "eval_map_large": 0.9908, | |
| "eval_map_medium": 0.792, | |
| "eval_map_per_class": 0.756, | |
| "eval_map_small": 0.5973, | |
| "eval_mar_1": 0.0412, | |
| "eval_mar_10": 0.3982, | |
| "eval_mar_100": 0.8315, | |
| "eval_mar_100_per_class": 0.8315, | |
| "eval_mar_large": 0.9926, | |
| "eval_mar_medium": 0.8626, | |
| "eval_mar_small": 0.6809, | |
| "eval_runtime": 302.4595, | |
| "eval_samples_per_second": 0.436, | |
| "eval_steps_per_second": 0.056, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 16.641401290893555, | |
| "learning_rate": 5.782172325201155e-06, | |
| "loss": 7.2456, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_classes": 0, | |
| "eval_loss": 7.461709499359131, | |
| "eval_map": 0.7611, | |
| "eval_map_50": 0.969, | |
| "eval_map_75": 0.8979, | |
| "eval_map_large": 0.9902, | |
| "eval_map_medium": 0.795, | |
| "eval_map_per_class": 0.7611, | |
| "eval_map_small": 0.6055, | |
| "eval_mar_1": 0.0411, | |
| "eval_mar_10": 0.3995, | |
| "eval_mar_100": 0.8338, | |
| "eval_mar_100_per_class": 0.8338, | |
| "eval_mar_large": 0.9926, | |
| "eval_mar_medium": 0.8642, | |
| "eval_mar_small": 0.6865, | |
| "eval_runtime": 302.0761, | |
| "eval_samples_per_second": 0.437, | |
| "eval_steps_per_second": 0.056, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 18.994029998779297, | |
| "learning_rate": 4.989386519275895e-06, | |
| "loss": 7.068, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_classes": 0, | |
| "eval_loss": 7.323790550231934, | |
| "eval_map": 0.7627, | |
| "eval_map_50": 0.9688, | |
| "eval_map_75": 0.8988, | |
| "eval_map_large": 0.9901, | |
| "eval_map_medium": 0.7964, | |
| "eval_map_per_class": 0.7627, | |
| "eval_map_small": 0.6028, | |
| "eval_mar_1": 0.0413, | |
| "eval_mar_10": 0.3995, | |
| "eval_mar_100": 0.8347, | |
| "eval_mar_100_per_class": 0.8347, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.8656, | |
| "eval_mar_small": 0.685, | |
| "eval_runtime": 304.0925, | |
| "eval_samples_per_second": 0.434, | |
| "eval_steps_per_second": 0.056, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 17.86629295349121, | |
| "learning_rate": 4.196869148389114e-06, | |
| "loss": 7.0105, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_classes": 0, | |
| "eval_loss": 7.247533798217773, | |
| "eval_map": 0.7655, | |
| "eval_map_50": 0.9703, | |
| "eval_map_75": 0.8992, | |
| "eval_map_large": 0.991, | |
| "eval_map_medium": 0.7982, | |
| "eval_map_per_class": 0.7655, | |
| "eval_map_small": 0.6099, | |
| "eval_mar_1": 0.0409, | |
| "eval_mar_10": 0.4007, | |
| "eval_mar_100": 0.8372, | |
| "eval_mar_100_per_class": 0.8372, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.8679, | |
| "eval_mar_small": 0.6893, | |
| "eval_runtime": 302.7002, | |
| "eval_samples_per_second": 0.436, | |
| "eval_steps_per_second": 0.056, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 10.187027931213379, | |
| "learning_rate": 3.424664478552887e-06, | |
| "loss": 6.9651, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_classes": 0, | |
| "eval_loss": 7.234455108642578, | |
| "eval_map": 0.7679, | |
| "eval_map_50": 0.9709, | |
| "eval_map_75": 0.9045, | |
| "eval_map_large": 0.9899, | |
| "eval_map_medium": 0.8008, | |
| "eval_map_per_class": 0.7679, | |
| "eval_map_small": 0.6147, | |
| "eval_mar_1": 0.0412, | |
| "eval_mar_10": 0.3999, | |
| "eval_mar_100": 0.8393, | |
| "eval_mar_100_per_class": 0.8393, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.8692, | |
| "eval_mar_small": 0.6944, | |
| "eval_runtime": 307.3877, | |
| "eval_samples_per_second": 0.429, | |
| "eval_steps_per_second": 0.055, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 24.03527069091797, | |
| "learning_rate": 2.6923030290801817e-06, | |
| "loss": 6.8849, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_classes": 0, | |
| "eval_loss": 7.041961669921875, | |
| "eval_map": 0.764, | |
| "eval_map_50": 0.9698, | |
| "eval_map_75": 0.8996, | |
| "eval_map_large": 0.9899, | |
| "eval_map_medium": 0.7977, | |
| "eval_map_per_class": 0.764, | |
| "eval_map_small": 0.6091, | |
| "eval_mar_1": 0.0405, | |
| "eval_mar_10": 0.3996, | |
| "eval_mar_100": 0.8377, | |
| "eval_mar_100_per_class": 0.8377, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.8677, | |
| "eval_mar_small": 0.692, | |
| "eval_runtime": 302.0813, | |
| "eval_samples_per_second": 0.437, | |
| "eval_steps_per_second": 0.056, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 25.183696746826172, | |
| "learning_rate": 2.0183076087440044e-06, | |
| "loss": 6.8559, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_classes": 0, | |
| "eval_loss": 7.071371555328369, | |
| "eval_map": 0.7643, | |
| "eval_map_50": 0.9693, | |
| "eval_map_75": 0.8986, | |
| "eval_map_large": 0.9899, | |
| "eval_map_medium": 0.7991, | |
| "eval_map_per_class": 0.7643, | |
| "eval_map_small": 0.6084, | |
| "eval_mar_1": 0.0413, | |
| "eval_mar_10": 0.3994, | |
| "eval_mar_100": 0.8387, | |
| "eval_mar_100_per_class": 0.8387, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.8693, | |
| "eval_mar_small": 0.6913, | |
| "eval_runtime": 302.1345, | |
| "eval_samples_per_second": 0.437, | |
| "eval_steps_per_second": 0.056, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 26.083616256713867, | |
| "learning_rate": 1.4197248388455693e-06, | |
| "loss": 6.7735, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_classes": 0, | |
| "eval_loss": 6.997360706329346, | |
| "eval_map": 0.7671, | |
| "eval_map_50": 0.9702, | |
| "eval_map_75": 0.9007, | |
| "eval_map_large": 0.9908, | |
| "eval_map_medium": 0.8014, | |
| "eval_map_per_class": 0.7671, | |
| "eval_map_small": 0.6119, | |
| "eval_mar_1": 0.0413, | |
| "eval_mar_10": 0.3998, | |
| "eval_mar_100": 0.8395, | |
| "eval_mar_100_per_class": 0.8395, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.8706, | |
| "eval_mar_small": 0.6902, | |
| "eval_runtime": 302.6848, | |
| "eval_samples_per_second": 0.436, | |
| "eval_steps_per_second": 0.056, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 25.00279998779297, | |
| "learning_rate": 9.116940118607792e-07, | |
| "loss": 6.7596, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_classes": 0, | |
| "eval_loss": 6.9286627769470215, | |
| "eval_map": 0.7675, | |
| "eval_map_50": 0.9702, | |
| "eval_map_75": 0.9041, | |
| "eval_map_large": 0.991, | |
| "eval_map_medium": 0.8007, | |
| "eval_map_per_class": 0.7675, | |
| "eval_map_small": 0.614, | |
| "eval_mar_1": 0.0406, | |
| "eval_mar_10": 0.399, | |
| "eval_mar_100": 0.8393, | |
| "eval_mar_100_per_class": 0.8393, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.8696, | |
| "eval_mar_small": 0.6931, | |
| "eval_runtime": 302.6539, | |
| "eval_samples_per_second": 0.436, | |
| "eval_steps_per_second": 0.056, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 29.690765380859375, | |
| "learning_rate": 5.070641900468149e-07, | |
| "loss": 6.7191, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_classes": 0, | |
| "eval_loss": 7.00290584564209, | |
| "eval_map": 0.7678, | |
| "eval_map_50": 0.9701, | |
| "eval_map_75": 0.9039, | |
| "eval_map_large": 0.991, | |
| "eval_map_medium": 0.8016, | |
| "eval_map_per_class": 0.7678, | |
| "eval_map_small": 0.61, | |
| "eval_mar_1": 0.0407, | |
| "eval_mar_10": 0.3991, | |
| "eval_mar_100": 0.8407, | |
| "eval_mar_100_per_class": 0.8407, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.8711, | |
| "eval_mar_small": 0.6943, | |
| "eval_runtime": 302.683, | |
| "eval_samples_per_second": 0.436, | |
| "eval_steps_per_second": 0.056, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 10.844990730285645, | |
| "learning_rate": 2.1606922831058198e-07, | |
| "loss": 6.6811, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_classes": 0, | |
| "eval_loss": 6.940825462341309, | |
| "eval_map": 0.7678, | |
| "eval_map_50": 0.97, | |
| "eval_map_75": 0.901, | |
| "eval_map_large": 0.991, | |
| "eval_map_medium": 0.8014, | |
| "eval_map_per_class": 0.7678, | |
| "eval_map_small": 0.6121, | |
| "eval_mar_1": 0.041, | |
| "eval_mar_10": 0.4008, | |
| "eval_mar_100": 0.84, | |
| "eval_mar_100_per_class": 0.84, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.8705, | |
| "eval_mar_small": 0.6928, | |
| "eval_runtime": 302.7462, | |
| "eval_samples_per_second": 0.436, | |
| "eval_steps_per_second": 0.056, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 35.2485237121582, | |
| "learning_rate": 4.60689406264897e-08, | |
| "loss": 6.6732, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_classes": 0, | |
| "eval_loss": 6.960495471954346, | |
| "eval_map": 0.7679, | |
| "eval_map_50": 0.9696, | |
| "eval_map_75": 0.9012, | |
| "eval_map_large": 0.9911, | |
| "eval_map_medium": 0.801, | |
| "eval_map_per_class": 0.7679, | |
| "eval_map_small": 0.6142, | |
| "eval_mar_1": 0.0413, | |
| "eval_mar_10": 0.4005, | |
| "eval_mar_100": 0.8405, | |
| "eval_mar_100_per_class": 0.8405, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.871, | |
| "eval_mar_small": 0.6935, | |
| "eval_runtime": 302.8692, | |
| "eval_samples_per_second": 0.436, | |
| "eval_steps_per_second": 0.056, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 19.74074074074074, | |
| "grad_norm": 524.6016845703125, | |
| "learning_rate": 9.124060015425206e-10, | |
| "loss": 6.7621, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 19.74074074074074, | |
| "eval_classes": 0, | |
| "eval_loss": 6.952253341674805, | |
| "eval_map": 0.7683, | |
| "eval_map_50": 0.9698, | |
| "eval_map_75": 0.9012, | |
| "eval_map_large": 0.9911, | |
| "eval_map_medium": 0.8012, | |
| "eval_map_per_class": 0.7683, | |
| "eval_map_small": 0.6166, | |
| "eval_mar_1": 0.0414, | |
| "eval_mar_10": 0.3998, | |
| "eval_mar_100": 0.8409, | |
| "eval_mar_100_per_class": 0.8409, | |
| "eval_mar_large": 0.9938, | |
| "eval_mar_medium": 0.8711, | |
| "eval_mar_small": 0.6954, | |
| "eval_runtime": 304.7146, | |
| "eval_samples_per_second": 0.433, | |
| "eval_steps_per_second": 0.056, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 19.74074074074074, | |
| "step": 1480, | |
| "total_flos": 5.235456347568341e+18, | |
| "train_loss": 8.241098208040805, | |
| "train_runtime": 50547.6535, | |
| "train_samples_per_second": 0.47, | |
| "train_steps_per_second": 0.029 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 1480, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.235456347568341e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |