|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.484536082474227, |
|
"global_step": 460, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"loss": 0.7393, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"loss": 0.7516, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.25e-07, |
|
"loss": 0.7506, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.7494, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"loss": 0.7602, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.6200000047683716, |
|
"eval_average_score": 0.6218893527984619, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.7480455040931702, |
|
"eval_runtime": 3.4088, |
|
"eval_samples_per_second": 14.668, |
|
"eval_steps_per_second": 3.814, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.7501, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4583333333333335e-06, |
|
"loss": 0.743, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.7412, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"loss": 0.7553, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 0.7355, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.5400000214576721, |
|
"eval_average_score": 0.5707897543907166, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.7415069341659546, |
|
"eval_runtime": 3.405, |
|
"eval_samples_per_second": 14.684, |
|
"eval_steps_per_second": 3.818, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2916666666666666e-06, |
|
"loss": 0.7402, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7427, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.7083333333333334e-06, |
|
"loss": 0.7286, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.916666666666667e-06, |
|
"loss": 0.727, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.7305, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.6000000238418579, |
|
"eval_average_score": 0.4747978150844574, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.7272794842720032, |
|
"eval_runtime": 3.41, |
|
"eval_samples_per_second": 14.663, |
|
"eval_steps_per_second": 3.812, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.7186, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.5416666666666673e-06, |
|
"loss": 0.7398, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.7166, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.958333333333333e-06, |
|
"loss": 0.7148, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.7116, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.6000000238418579, |
|
"eval_average_score": 0.3107444643974304, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.7109410166740417, |
|
"eval_runtime": 3.4072, |
|
"eval_samples_per_second": 14.675, |
|
"eval_steps_per_second": 3.815, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.7221, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.583333333333333e-06, |
|
"loss": 0.7042, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.791666666666668e-06, |
|
"loss": 0.7011, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6987, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.208333333333334e-06, |
|
"loss": 0.6926, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_accuracy": 0.6800000071525574, |
|
"eval_average_score": 0.07106868177652359, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.6947080492973328, |
|
"eval_runtime": 3.4106, |
|
"eval_samples_per_second": 14.66, |
|
"eval_steps_per_second": 3.812, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5.416666666666667e-06, |
|
"loss": 0.695, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5.625e-06, |
|
"loss": 0.6917, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.7115, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.041666666666667e-06, |
|
"loss": 0.6858, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.6867, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.6000000238418579, |
|
"eval_average_score": -0.15788692235946655, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.6935771703720093, |
|
"eval_runtime": 3.4133, |
|
"eval_samples_per_second": 14.648, |
|
"eval_steps_per_second": 3.809, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.458333333333334e-06, |
|
"loss": 0.6868, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.6837, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.875e-06, |
|
"loss": 0.6883, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.083333333333335e-06, |
|
"loss": 0.689, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.291666666666667e-06, |
|
"loss": 0.6761, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_accuracy": 0.7799999713897705, |
|
"eval_average_score": -0.07916055619716644, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.6847817897796631, |
|
"eval_runtime": 3.4111, |
|
"eval_samples_per_second": 14.658, |
|
"eval_steps_per_second": 3.811, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.6863, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.708333333333334e-06, |
|
"loss": 0.6841, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 7.916666666666667e-06, |
|
"loss": 0.6719, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 0.6802, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.672, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.8199999928474426, |
|
"eval_average_score": -0.042581118643283844, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.6735163331031799, |
|
"eval_runtime": 3.4082, |
|
"eval_samples_per_second": 14.67, |
|
"eval_steps_per_second": 3.814, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.541666666666666e-06, |
|
"loss": 0.6675, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.6649, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.958333333333334e-06, |
|
"loss": 0.6804, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.6692, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.654, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.8600000143051147, |
|
"eval_average_score": -0.11977884918451309, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.652625560760498, |
|
"eval_runtime": 3.4105, |
|
"eval_samples_per_second": 14.661, |
|
"eval_steps_per_second": 3.812, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.583333333333335e-06, |
|
"loss": 0.6495, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 9.791666666666666e-06, |
|
"loss": 0.6472, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6499, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6247, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6422, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_accuracy": 0.8799999952316284, |
|
"eval_average_score": -0.13170765340328217, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.608478307723999, |
|
"eval_runtime": 3.4104, |
|
"eval_samples_per_second": 14.661, |
|
"eval_steps_per_second": 3.812, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6252, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6065, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5829, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5936, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5528, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_accuracy": 0.8600000143051147, |
|
"eval_average_score": -0.0412043035030365, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.5438380241394043, |
|
"eval_runtime": 3.4096, |
|
"eval_samples_per_second": 14.665, |
|
"eval_steps_per_second": 3.813, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5529, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5658, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5421, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5503, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4854, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": -0.40412548184394836, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.46706727147102356, |
|
"eval_runtime": 3.4091, |
|
"eval_samples_per_second": 14.667, |
|
"eval_steps_per_second": 3.813, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5268, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5251, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4857, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5307, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4672, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": -0.14681239426136017, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.391941636800766, |
|
"eval_runtime": 3.4094, |
|
"eval_samples_per_second": 14.665, |
|
"eval_steps_per_second": 3.813, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4077, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3849, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4358, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3896, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4903, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": -0.11637383699417114, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.3501974940299988, |
|
"eval_runtime": 3.4098, |
|
"eval_samples_per_second": 14.664, |
|
"eval_steps_per_second": 3.813, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3395, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3976, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3826, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4534, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3473, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": -0.3039152920246124, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.3362675607204437, |
|
"eval_runtime": 3.4088, |
|
"eval_samples_per_second": 14.668, |
|
"eval_steps_per_second": 3.814, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1e-05, |
|
"loss": 0.368, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4096, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3829, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1e-05, |
|
"loss": 0.389, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4191, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": 0.41642922163009644, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.31328877806663513, |
|
"eval_runtime": 3.415, |
|
"eval_samples_per_second": 14.641, |
|
"eval_steps_per_second": 3.807, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2698, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4149, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3844, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3549, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3571, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": -0.14143863320350647, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.3141113221645355, |
|
"eval_runtime": 3.4148, |
|
"eval_samples_per_second": 14.642, |
|
"eval_steps_per_second": 3.807, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2812, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4222, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3538, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2896, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3285, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": -0.04474750533699989, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.30486491322517395, |
|
"eval_runtime": 3.4082, |
|
"eval_samples_per_second": 14.67, |
|
"eval_steps_per_second": 3.814, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3278, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3715, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3234, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2756, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3535, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": -0.3523316979408264, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.3148648738861084, |
|
"eval_runtime": 3.4081, |
|
"eval_samples_per_second": 14.671, |
|
"eval_steps_per_second": 3.814, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3098, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2694, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2731, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1e-05, |
|
"loss": 0.309, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3961, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": 0.40198975801467896, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2921418249607086, |
|
"eval_runtime": 3.4104, |
|
"eval_samples_per_second": 14.661, |
|
"eval_steps_per_second": 3.812, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3488, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1e-05, |
|
"loss": 0.399, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1e-05, |
|
"loss": 0.348, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2844, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3491, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": 0.008343610912561417, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2992297410964966, |
|
"eval_runtime": 3.4089, |
|
"eval_samples_per_second": 14.667, |
|
"eval_steps_per_second": 3.814, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2381, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3823, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2863, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2731, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2552, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": 0.21405518054962158, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.28597450256347656, |
|
"eval_runtime": 3.4094, |
|
"eval_samples_per_second": 14.665, |
|
"eval_steps_per_second": 3.813, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3438, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3672, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2783, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2705, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1839, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": -0.29983624815940857, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2997009754180908, |
|
"eval_runtime": 3.409, |
|
"eval_samples_per_second": 14.667, |
|
"eval_steps_per_second": 3.813, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2776, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1e-05, |
|
"loss": 0.284, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3313, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2521, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2896, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": 0.08488038927316666, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2867457866668701, |
|
"eval_runtime": 3.4098, |
|
"eval_samples_per_second": 14.664, |
|
"eval_steps_per_second": 3.813, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3284, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3234, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2818, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3356, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3125, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": -0.0647800862789154, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.28697526454925537, |
|
"eval_runtime": 3.4101, |
|
"eval_samples_per_second": 14.663, |
|
"eval_steps_per_second": 3.812, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2619, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2484, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3222, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1985, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3923, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": 0.1731201559305191, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2790464162826538, |
|
"eval_runtime": 3.4092, |
|
"eval_samples_per_second": 14.666, |
|
"eval_steps_per_second": 3.813, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3787, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2585, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2569, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2574, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3044, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": 0.16195712983608246, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.28128278255462646, |
|
"eval_runtime": 3.4082, |
|
"eval_samples_per_second": 14.67, |
|
"eval_steps_per_second": 3.814, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2866, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2532, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2748, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3158, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3294, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": 0.017967911437153816, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.280609667301178, |
|
"eval_runtime": 3.4082, |
|
"eval_samples_per_second": 14.67, |
|
"eval_steps_per_second": 3.814, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2155, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2301, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3384, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2534, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2902, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": 0.3276750147342682, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2765599191188812, |
|
"eval_runtime": 3.4103, |
|
"eval_samples_per_second": 14.661, |
|
"eval_steps_per_second": 3.812, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3011, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3248, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1e-05, |
|
"loss": 0.37, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1e-05, |
|
"loss": 0.278, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 1e-05, |
|
"loss": 0.275, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": -0.1893548220396042, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2832108438014984, |
|
"eval_runtime": 3.4099, |
|
"eval_samples_per_second": 14.663, |
|
"eval_steps_per_second": 3.812, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1e-05, |
|
"loss": 0.262, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2765, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2069, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2567, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2939, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": -0.16238823533058167, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2794664204120636, |
|
"eval_runtime": 3.407, |
|
"eval_samples_per_second": 14.676, |
|
"eval_steps_per_second": 3.816, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3302, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2244, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2158, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2536, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1905, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": 0.26926523447036743, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.26797807216644287, |
|
"eval_runtime": 3.4064, |
|
"eval_samples_per_second": 14.678, |
|
"eval_steps_per_second": 3.816, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2607, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2542, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 1e-05, |
|
"loss": 0.265, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2763, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3119, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": -0.12742717564105988, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2834363877773285, |
|
"eval_runtime": 3.4081, |
|
"eval_samples_per_second": 14.671, |
|
"eval_steps_per_second": 3.814, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1726, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2636, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3579, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2635, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2602, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_accuracy": 0.9399999976158142, |
|
"eval_average_score": -0.0478409007191658, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.27865225076675415, |
|
"eval_runtime": 3.4081, |
|
"eval_samples_per_second": 14.671, |
|
"eval_steps_per_second": 3.814, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2111, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2853, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2853, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3026, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2446, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"eval_accuracy": 0.9399999976158142, |
|
"eval_average_score": -0.1700994372367859, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2747596800327301, |
|
"eval_runtime": 3.4061, |
|
"eval_samples_per_second": 14.679, |
|
"eval_steps_per_second": 3.817, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2469, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2372, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2655, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2661, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2731, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": 0.01858169585466385, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2665027379989624, |
|
"eval_runtime": 3.4061, |
|
"eval_samples_per_second": 14.68, |
|
"eval_steps_per_second": 3.817, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1845, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2881, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2038, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2111, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2992, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"eval_accuracy": 0.9399999976158142, |
|
"eval_average_score": 0.4874734878540039, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2591661810874939, |
|
"eval_runtime": 3.4058, |
|
"eval_samples_per_second": 14.681, |
|
"eval_steps_per_second": 3.817, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1714, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2276, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3777, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2173, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": -0.07412362843751907, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.274651437997818, |
|
"eval_runtime": 3.4072, |
|
"eval_samples_per_second": 14.675, |
|
"eval_steps_per_second": 3.815, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2491, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2425, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2925, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2723, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3516, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"eval_accuracy": 0.9399999976158142, |
|
"eval_average_score": 0.005836525000631809, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.26507002115249634, |
|
"eval_runtime": 3.4088, |
|
"eval_samples_per_second": 14.668, |
|
"eval_steps_per_second": 3.814, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2519, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2991, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2234, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2711, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3236, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": 0.04313167557120323, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.25931546092033386, |
|
"eval_runtime": 3.4059, |
|
"eval_samples_per_second": 14.681, |
|
"eval_steps_per_second": 3.817, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1869, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2314, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2405, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1779, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2458, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": -0.21540480852127075, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2680150270462036, |
|
"eval_runtime": 3.4085, |
|
"eval_samples_per_second": 14.669, |
|
"eval_steps_per_second": 3.814, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1723, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3219, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 1e-05, |
|
"loss": 0.243, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2095, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1976, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": 0.17054100334644318, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2620932459831238, |
|
"eval_runtime": 3.4133, |
|
"eval_samples_per_second": 14.648, |
|
"eval_steps_per_second": 3.809, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1717, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2759, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3572, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1688, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2358, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": 0.2345839887857437, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2618144154548645, |
|
"eval_runtime": 3.406, |
|
"eval_samples_per_second": 14.68, |
|
"eval_steps_per_second": 3.817, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1975, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3316, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1964, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2148, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1855, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": 0.04021953418850899, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2684229612350464, |
|
"eval_runtime": 3.4104, |
|
"eval_samples_per_second": 14.661, |
|
"eval_steps_per_second": 3.812, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2186, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1604, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1649, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2496, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3004, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_average_score": 0.08485334366559982, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.26888754963874817, |
|
"eval_runtime": 3.4093, |
|
"eval_samples_per_second": 14.666, |
|
"eval_steps_per_second": 3.813, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2056, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2284, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2529, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2847, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2804, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"eval_accuracy": 0.9200000166893005, |
|
"eval_average_score": -0.19331374764442444, |
|
"eval_label_positive_rate": 0.47999998927116394, |
|
"eval_loss": 0.2731383442878723, |
|
"eval_runtime": 3.4089, |
|
"eval_samples_per_second": 14.668, |
|
"eval_steps_per_second": 3.814, |
|
"step": 460 |
|
} |
|
], |
|
"max_steps": 480, |
|
"num_train_epochs": 10, |
|
"total_flos": 8.790703793881743e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|