{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.484536082474227, "global_step": 460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.0833333333333333e-07, "loss": 0.7393, "step": 2 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "loss": 0.7516, "step": 4 }, { "epoch": 0.12, "learning_rate": 6.25e-07, "loss": 0.7506, "step": 6 }, { "epoch": 0.16, "learning_rate": 8.333333333333333e-07, "loss": 0.7494, "step": 8 }, { "epoch": 0.21, "learning_rate": 1.0416666666666667e-06, "loss": 0.7602, "step": 10 }, { "epoch": 0.21, "eval_accuracy": 0.6200000047683716, "eval_average_score": 0.6218893527984619, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.7480455040931702, "eval_runtime": 3.4088, "eval_samples_per_second": 14.668, "eval_steps_per_second": 3.814, "step": 10 }, { "epoch": 0.25, "learning_rate": 1.25e-06, "loss": 0.7501, "step": 12 }, { "epoch": 0.29, "learning_rate": 1.4583333333333335e-06, "loss": 0.743, "step": 14 }, { "epoch": 0.33, "learning_rate": 1.6666666666666667e-06, "loss": 0.7412, "step": 16 }, { "epoch": 0.37, "learning_rate": 1.8750000000000003e-06, "loss": 0.7553, "step": 18 }, { "epoch": 0.41, "learning_rate": 2.0833333333333334e-06, "loss": 0.7355, "step": 20 }, { "epoch": 0.41, "eval_accuracy": 0.5400000214576721, "eval_average_score": 0.5707897543907166, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.7415069341659546, "eval_runtime": 3.405, "eval_samples_per_second": 14.684, "eval_steps_per_second": 3.818, "step": 20 }, { "epoch": 0.45, "learning_rate": 2.2916666666666666e-06, "loss": 0.7402, "step": 22 }, { "epoch": 0.49, "learning_rate": 2.5e-06, "loss": 0.7427, "step": 24 }, { "epoch": 0.54, "learning_rate": 2.7083333333333334e-06, "loss": 0.7286, "step": 26 }, { "epoch": 0.58, "learning_rate": 2.916666666666667e-06, "loss": 0.727, "step": 28 }, { "epoch": 0.62, "learning_rate": 3.125e-06, "loss": 0.7305, "step": 30 }, { "epoch": 0.62, "eval_accuracy": 0.6000000238418579, "eval_average_score": 0.4747978150844574, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.7272794842720032, "eval_runtime": 3.41, "eval_samples_per_second": 14.663, "eval_steps_per_second": 3.812, "step": 30 }, { "epoch": 0.66, "learning_rate": 3.3333333333333333e-06, "loss": 0.7186, "step": 32 }, { "epoch": 0.7, "learning_rate": 3.5416666666666673e-06, "loss": 0.7398, "step": 34 }, { "epoch": 0.74, "learning_rate": 3.7500000000000005e-06, "loss": 0.7166, "step": 36 }, { "epoch": 0.78, "learning_rate": 3.958333333333333e-06, "loss": 0.7148, "step": 38 }, { "epoch": 0.82, "learning_rate": 4.166666666666667e-06, "loss": 0.7116, "step": 40 }, { "epoch": 0.82, "eval_accuracy": 0.6000000238418579, "eval_average_score": 0.3107444643974304, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.7109410166740417, "eval_runtime": 3.4072, "eval_samples_per_second": 14.675, "eval_steps_per_second": 3.815, "step": 40 }, { "epoch": 0.87, "learning_rate": 4.3750000000000005e-06, "loss": 0.7221, "step": 42 }, { "epoch": 0.91, "learning_rate": 4.583333333333333e-06, "loss": 0.7042, "step": 44 }, { "epoch": 0.95, "learning_rate": 4.791666666666668e-06, "loss": 0.7011, "step": 46 }, { "epoch": 0.99, "learning_rate": 5e-06, "loss": 0.6987, "step": 48 }, { "epoch": 1.03, "learning_rate": 5.208333333333334e-06, "loss": 0.6926, "step": 50 }, { "epoch": 1.03, "eval_accuracy": 0.6800000071525574, "eval_average_score": 0.07106868177652359, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.6947080492973328, "eval_runtime": 3.4106, "eval_samples_per_second": 14.66, "eval_steps_per_second": 3.812, "step": 50 }, { "epoch": 1.07, "learning_rate": 5.416666666666667e-06, "loss": 0.695, "step": 52 }, { "epoch": 1.11, "learning_rate": 5.625e-06, "loss": 0.6917, "step": 54 }, { "epoch": 1.15, "learning_rate": 5.833333333333334e-06, "loss": 0.7115, "step": 56 }, { "epoch": 1.2, "learning_rate": 6.041666666666667e-06, "loss": 0.6858, "step": 58 }, { "epoch": 1.24, "learning_rate": 6.25e-06, "loss": 0.6867, "step": 60 }, { "epoch": 1.24, "eval_accuracy": 0.6000000238418579, "eval_average_score": -0.15788692235946655, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.6935771703720093, "eval_runtime": 3.4133, "eval_samples_per_second": 14.648, "eval_steps_per_second": 3.809, "step": 60 }, { "epoch": 1.28, "learning_rate": 6.458333333333334e-06, "loss": 0.6868, "step": 62 }, { "epoch": 1.32, "learning_rate": 6.666666666666667e-06, "loss": 0.6837, "step": 64 }, { "epoch": 1.36, "learning_rate": 6.875e-06, "loss": 0.6883, "step": 66 }, { "epoch": 1.4, "learning_rate": 7.083333333333335e-06, "loss": 0.689, "step": 68 }, { "epoch": 1.44, "learning_rate": 7.291666666666667e-06, "loss": 0.6761, "step": 70 }, { "epoch": 1.44, "eval_accuracy": 0.7799999713897705, "eval_average_score": -0.07916055619716644, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.6847817897796631, "eval_runtime": 3.4111, "eval_samples_per_second": 14.658, "eval_steps_per_second": 3.811, "step": 70 }, { "epoch": 1.48, "learning_rate": 7.500000000000001e-06, "loss": 0.6863, "step": 72 }, { "epoch": 1.53, "learning_rate": 7.708333333333334e-06, "loss": 0.6841, "step": 74 }, { "epoch": 1.57, "learning_rate": 7.916666666666667e-06, "loss": 0.6719, "step": 76 }, { "epoch": 1.61, "learning_rate": 8.125000000000001e-06, "loss": 0.6802, "step": 78 }, { "epoch": 1.65, "learning_rate": 8.333333333333334e-06, "loss": 0.672, "step": 80 }, { "epoch": 1.65, "eval_accuracy": 0.8199999928474426, "eval_average_score": -0.042581118643283844, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.6735163331031799, "eval_runtime": 3.4082, "eval_samples_per_second": 14.67, "eval_steps_per_second": 3.814, "step": 80 }, { "epoch": 1.69, "learning_rate": 8.541666666666666e-06, "loss": 0.6675, "step": 82 }, { "epoch": 1.73, "learning_rate": 8.750000000000001e-06, "loss": 0.6649, "step": 84 }, { "epoch": 1.77, "learning_rate": 8.958333333333334e-06, "loss": 0.6804, "step": 86 }, { "epoch": 1.81, "learning_rate": 9.166666666666666e-06, "loss": 0.6692, "step": 88 }, { "epoch": 1.86, "learning_rate": 9.375000000000001e-06, "loss": 0.654, "step": 90 }, { "epoch": 1.86, "eval_accuracy": 0.8600000143051147, "eval_average_score": -0.11977884918451309, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.652625560760498, "eval_runtime": 3.4105, "eval_samples_per_second": 14.661, "eval_steps_per_second": 3.812, "step": 90 }, { "epoch": 1.9, "learning_rate": 9.583333333333335e-06, "loss": 0.6495, "step": 92 }, { "epoch": 1.94, "learning_rate": 9.791666666666666e-06, "loss": 0.6472, "step": 94 }, { "epoch": 1.98, "learning_rate": 1e-05, "loss": 0.6499, "step": 96 }, { "epoch": 2.02, "learning_rate": 1e-05, "loss": 0.6247, "step": 98 }, { "epoch": 2.06, "learning_rate": 1e-05, "loss": 0.6422, "step": 100 }, { "epoch": 2.06, "eval_accuracy": 0.8799999952316284, "eval_average_score": -0.13170765340328217, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.608478307723999, "eval_runtime": 3.4104, "eval_samples_per_second": 14.661, "eval_steps_per_second": 3.812, "step": 100 }, { "epoch": 2.1, "learning_rate": 1e-05, "loss": 0.6252, "step": 102 }, { "epoch": 2.14, "learning_rate": 1e-05, "loss": 0.6065, "step": 104 }, { "epoch": 2.19, "learning_rate": 1e-05, "loss": 0.5829, "step": 106 }, { "epoch": 2.23, "learning_rate": 1e-05, "loss": 0.5936, "step": 108 }, { "epoch": 2.27, "learning_rate": 1e-05, "loss": 0.5528, "step": 110 }, { "epoch": 2.27, "eval_accuracy": 0.8600000143051147, "eval_average_score": -0.0412043035030365, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.5438380241394043, "eval_runtime": 3.4096, "eval_samples_per_second": 14.665, "eval_steps_per_second": 3.813, "step": 110 }, { "epoch": 2.31, "learning_rate": 1e-05, "loss": 0.5529, "step": 112 }, { "epoch": 2.35, "learning_rate": 1e-05, "loss": 0.5658, "step": 114 }, { "epoch": 2.39, "learning_rate": 1e-05, "loss": 0.5421, "step": 116 }, { "epoch": 2.43, "learning_rate": 1e-05, "loss": 0.5503, "step": 118 }, { "epoch": 2.47, "learning_rate": 1e-05, "loss": 0.4854, "step": 120 }, { "epoch": 2.47, "eval_accuracy": 0.8999999761581421, "eval_average_score": -0.40412548184394836, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.46706727147102356, "eval_runtime": 3.4091, "eval_samples_per_second": 14.667, "eval_steps_per_second": 3.813, "step": 120 }, { "epoch": 2.52, "learning_rate": 1e-05, "loss": 0.5268, "step": 122 }, { "epoch": 2.56, "learning_rate": 1e-05, "loss": 0.5251, "step": 124 }, { "epoch": 2.6, "learning_rate": 1e-05, "loss": 0.4857, "step": 126 }, { "epoch": 2.64, "learning_rate": 1e-05, "loss": 0.5307, "step": 128 }, { "epoch": 2.68, "learning_rate": 1e-05, "loss": 0.4672, "step": 130 }, { "epoch": 2.68, "eval_accuracy": 0.8999999761581421, "eval_average_score": -0.14681239426136017, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.391941636800766, "eval_runtime": 3.4094, "eval_samples_per_second": 14.665, "eval_steps_per_second": 3.813, "step": 130 }, { "epoch": 2.72, "learning_rate": 1e-05, "loss": 0.4077, "step": 132 }, { "epoch": 2.76, "learning_rate": 1e-05, "loss": 0.3849, "step": 134 }, { "epoch": 2.8, "learning_rate": 1e-05, "loss": 0.4358, "step": 136 }, { "epoch": 2.85, "learning_rate": 1e-05, "loss": 0.3896, "step": 138 }, { "epoch": 2.89, "learning_rate": 1e-05, "loss": 0.4903, "step": 140 }, { "epoch": 2.89, "eval_accuracy": 0.8999999761581421, "eval_average_score": -0.11637383699417114, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.3501974940299988, "eval_runtime": 3.4098, "eval_samples_per_second": 14.664, "eval_steps_per_second": 3.813, "step": 140 }, { "epoch": 2.93, "learning_rate": 1e-05, "loss": 0.3395, "step": 142 }, { "epoch": 2.97, "learning_rate": 1e-05, "loss": 0.3976, "step": 144 }, { "epoch": 3.01, "learning_rate": 1e-05, "loss": 0.3826, "step": 146 }, { "epoch": 3.05, "learning_rate": 1e-05, "loss": 0.4534, "step": 148 }, { "epoch": 3.09, "learning_rate": 1e-05, "loss": 0.3473, "step": 150 }, { "epoch": 3.09, "eval_accuracy": 0.9200000166893005, "eval_average_score": -0.3039152920246124, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.3362675607204437, "eval_runtime": 3.4088, "eval_samples_per_second": 14.668, "eval_steps_per_second": 3.814, "step": 150 }, { "epoch": 3.13, "learning_rate": 1e-05, "loss": 0.368, "step": 152 }, { "epoch": 3.18, "learning_rate": 1e-05, "loss": 0.4096, "step": 154 }, { "epoch": 3.22, "learning_rate": 1e-05, "loss": 0.3829, "step": 156 }, { "epoch": 3.26, "learning_rate": 1e-05, "loss": 0.389, "step": 158 }, { "epoch": 3.3, "learning_rate": 1e-05, "loss": 0.4191, "step": 160 }, { "epoch": 3.3, "eval_accuracy": 0.8999999761581421, "eval_average_score": 0.41642922163009644, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.31328877806663513, "eval_runtime": 3.415, "eval_samples_per_second": 14.641, "eval_steps_per_second": 3.807, "step": 160 }, { "epoch": 3.34, "learning_rate": 1e-05, "loss": 0.2698, "step": 162 }, { "epoch": 3.38, "learning_rate": 1e-05, "loss": 0.4149, "step": 164 }, { "epoch": 3.42, "learning_rate": 1e-05, "loss": 0.3844, "step": 166 }, { "epoch": 3.46, "learning_rate": 1e-05, "loss": 0.3549, "step": 168 }, { "epoch": 3.51, "learning_rate": 1e-05, "loss": 0.3571, "step": 170 }, { "epoch": 3.51, "eval_accuracy": 0.8999999761581421, "eval_average_score": -0.14143863320350647, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.3141113221645355, "eval_runtime": 3.4148, "eval_samples_per_second": 14.642, "eval_steps_per_second": 3.807, "step": 170 }, { "epoch": 3.55, "learning_rate": 1e-05, "loss": 0.2812, "step": 172 }, { "epoch": 3.59, "learning_rate": 1e-05, "loss": 0.4222, "step": 174 }, { "epoch": 3.63, "learning_rate": 1e-05, "loss": 0.3538, "step": 176 }, { "epoch": 3.67, "learning_rate": 1e-05, "loss": 0.2896, "step": 178 }, { "epoch": 3.71, "learning_rate": 1e-05, "loss": 0.3285, "step": 180 }, { "epoch": 3.71, "eval_accuracy": 0.8999999761581421, "eval_average_score": -0.04474750533699989, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.30486491322517395, "eval_runtime": 3.4082, "eval_samples_per_second": 14.67, "eval_steps_per_second": 3.814, "step": 180 }, { "epoch": 3.75, "learning_rate": 1e-05, "loss": 0.3278, "step": 182 }, { "epoch": 3.79, "learning_rate": 1e-05, "loss": 0.3715, "step": 184 }, { "epoch": 3.84, "learning_rate": 1e-05, "loss": 0.3234, "step": 186 }, { "epoch": 3.88, "learning_rate": 1e-05, "loss": 0.2756, "step": 188 }, { "epoch": 3.92, "learning_rate": 1e-05, "loss": 0.3535, "step": 190 }, { "epoch": 3.92, "eval_accuracy": 0.8999999761581421, "eval_average_score": -0.3523316979408264, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.3148648738861084, "eval_runtime": 3.4081, "eval_samples_per_second": 14.671, "eval_steps_per_second": 3.814, "step": 190 }, { "epoch": 3.96, "learning_rate": 1e-05, "loss": 0.3098, "step": 192 }, { "epoch": 4.0, "learning_rate": 1e-05, "loss": 0.2694, "step": 194 }, { "epoch": 4.04, "learning_rate": 1e-05, "loss": 0.2731, "step": 196 }, { "epoch": 4.08, "learning_rate": 1e-05, "loss": 0.309, "step": 198 }, { "epoch": 4.12, "learning_rate": 1e-05, "loss": 0.3961, "step": 200 }, { "epoch": 4.12, "eval_accuracy": 0.8999999761581421, "eval_average_score": 0.40198975801467896, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2921418249607086, "eval_runtime": 3.4104, "eval_samples_per_second": 14.661, "eval_steps_per_second": 3.812, "step": 200 }, { "epoch": 4.16, "learning_rate": 1e-05, "loss": 0.3488, "step": 202 }, { "epoch": 4.21, "learning_rate": 1e-05, "loss": 0.399, "step": 204 }, { "epoch": 4.25, "learning_rate": 1e-05, "loss": 0.348, "step": 206 }, { "epoch": 4.29, "learning_rate": 1e-05, "loss": 0.2844, "step": 208 }, { "epoch": 4.33, "learning_rate": 1e-05, "loss": 0.3491, "step": 210 }, { "epoch": 4.33, "eval_accuracy": 0.8999999761581421, "eval_average_score": 0.008343610912561417, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2992297410964966, "eval_runtime": 3.4089, "eval_samples_per_second": 14.667, "eval_steps_per_second": 3.814, "step": 210 }, { "epoch": 4.37, "learning_rate": 1e-05, "loss": 0.2381, "step": 212 }, { "epoch": 4.41, "learning_rate": 1e-05, "loss": 0.3823, "step": 214 }, { "epoch": 4.45, "learning_rate": 1e-05, "loss": 0.2863, "step": 216 }, { "epoch": 4.49, "learning_rate": 1e-05, "loss": 0.2731, "step": 218 }, { "epoch": 4.54, "learning_rate": 1e-05, "loss": 0.2552, "step": 220 }, { "epoch": 4.54, "eval_accuracy": 0.8999999761581421, "eval_average_score": 0.21405518054962158, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.28597450256347656, "eval_runtime": 3.4094, "eval_samples_per_second": 14.665, "eval_steps_per_second": 3.813, "step": 220 }, { "epoch": 4.58, "learning_rate": 1e-05, "loss": 0.3438, "step": 222 }, { "epoch": 4.62, "learning_rate": 1e-05, "loss": 0.3672, "step": 224 }, { "epoch": 4.66, "learning_rate": 1e-05, "loss": 0.2783, "step": 226 }, { "epoch": 4.7, "learning_rate": 1e-05, "loss": 0.2705, "step": 228 }, { "epoch": 4.74, "learning_rate": 1e-05, "loss": 0.1839, "step": 230 }, { "epoch": 4.74, "eval_accuracy": 0.9200000166893005, "eval_average_score": -0.29983624815940857, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2997009754180908, "eval_runtime": 3.409, "eval_samples_per_second": 14.667, "eval_steps_per_second": 3.813, "step": 230 }, { "epoch": 4.78, "learning_rate": 1e-05, "loss": 0.2776, "step": 232 }, { "epoch": 4.82, "learning_rate": 1e-05, "loss": 0.284, "step": 234 }, { "epoch": 4.87, "learning_rate": 1e-05, "loss": 0.3313, "step": 236 }, { "epoch": 4.91, "learning_rate": 1e-05, "loss": 0.2521, "step": 238 }, { "epoch": 4.95, "learning_rate": 1e-05, "loss": 0.2896, "step": 240 }, { "epoch": 4.95, "eval_accuracy": 0.8999999761581421, "eval_average_score": 0.08488038927316666, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2867457866668701, "eval_runtime": 3.4098, "eval_samples_per_second": 14.664, "eval_steps_per_second": 3.813, "step": 240 }, { "epoch": 4.99, "learning_rate": 1e-05, "loss": 0.3284, "step": 242 }, { "epoch": 5.03, "learning_rate": 1e-05, "loss": 0.3234, "step": 244 }, { "epoch": 5.07, "learning_rate": 1e-05, "loss": 0.2818, "step": 246 }, { "epoch": 5.11, "learning_rate": 1e-05, "loss": 0.3356, "step": 248 }, { "epoch": 5.15, "learning_rate": 1e-05, "loss": 0.3125, "step": 250 }, { "epoch": 5.15, "eval_accuracy": 0.9200000166893005, "eval_average_score": -0.0647800862789154, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.28697526454925537, "eval_runtime": 3.4101, "eval_samples_per_second": 14.663, "eval_steps_per_second": 3.812, "step": 250 }, { "epoch": 5.2, "learning_rate": 1e-05, "loss": 0.2619, "step": 252 }, { "epoch": 5.24, "learning_rate": 1e-05, "loss": 0.2484, "step": 254 }, { "epoch": 5.28, "learning_rate": 1e-05, "loss": 0.3222, "step": 256 }, { "epoch": 5.32, "learning_rate": 1e-05, "loss": 0.1985, "step": 258 }, { "epoch": 5.36, "learning_rate": 1e-05, "loss": 0.3923, "step": 260 }, { "epoch": 5.36, "eval_accuracy": 0.9200000166893005, "eval_average_score": 0.1731201559305191, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2790464162826538, "eval_runtime": 3.4092, "eval_samples_per_second": 14.666, "eval_steps_per_second": 3.813, "step": 260 }, { "epoch": 5.4, "learning_rate": 1e-05, "loss": 0.3787, "step": 262 }, { "epoch": 5.44, "learning_rate": 1e-05, "loss": 0.2585, "step": 264 }, { "epoch": 5.48, "learning_rate": 1e-05, "loss": 0.2569, "step": 266 }, { "epoch": 5.53, "learning_rate": 1e-05, "loss": 0.2574, "step": 268 }, { "epoch": 5.57, "learning_rate": 1e-05, "loss": 0.3044, "step": 270 }, { "epoch": 5.57, "eval_accuracy": 0.9200000166893005, "eval_average_score": 0.16195712983608246, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.28128278255462646, "eval_runtime": 3.4082, "eval_samples_per_second": 14.67, "eval_steps_per_second": 3.814, "step": 270 }, { "epoch": 5.61, "learning_rate": 1e-05, "loss": 0.2866, "step": 272 }, { "epoch": 5.65, "learning_rate": 1e-05, "loss": 0.2532, "step": 274 }, { "epoch": 5.69, "learning_rate": 1e-05, "loss": 0.2748, "step": 276 }, { "epoch": 5.73, "learning_rate": 1e-05, "loss": 0.3158, "step": 278 }, { "epoch": 5.77, "learning_rate": 1e-05, "loss": 0.3294, "step": 280 }, { "epoch": 5.77, "eval_accuracy": 0.8999999761581421, "eval_average_score": 0.017967911437153816, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.280609667301178, "eval_runtime": 3.4082, "eval_samples_per_second": 14.67, "eval_steps_per_second": 3.814, "step": 280 }, { "epoch": 5.81, "learning_rate": 1e-05, "loss": 0.2155, "step": 282 }, { "epoch": 5.86, "learning_rate": 1e-05, "loss": 0.2301, "step": 284 }, { "epoch": 5.9, "learning_rate": 1e-05, "loss": 0.3384, "step": 286 }, { "epoch": 5.94, "learning_rate": 1e-05, "loss": 0.2534, "step": 288 }, { "epoch": 5.98, "learning_rate": 1e-05, "loss": 0.2902, "step": 290 }, { "epoch": 5.98, "eval_accuracy": 0.9200000166893005, "eval_average_score": 0.3276750147342682, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2765599191188812, "eval_runtime": 3.4103, "eval_samples_per_second": 14.661, "eval_steps_per_second": 3.812, "step": 290 }, { "epoch": 6.02, "learning_rate": 1e-05, "loss": 0.3011, "step": 292 }, { "epoch": 6.06, "learning_rate": 1e-05, "loss": 0.3248, "step": 294 }, { "epoch": 6.1, "learning_rate": 1e-05, "loss": 0.37, "step": 296 }, { "epoch": 6.14, "learning_rate": 1e-05, "loss": 0.278, "step": 298 }, { "epoch": 6.19, "learning_rate": 1e-05, "loss": 0.275, "step": 300 }, { "epoch": 6.19, "eval_accuracy": 0.9200000166893005, "eval_average_score": -0.1893548220396042, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2832108438014984, "eval_runtime": 3.4099, "eval_samples_per_second": 14.663, "eval_steps_per_second": 3.812, "step": 300 }, { "epoch": 6.23, "learning_rate": 1e-05, "loss": 0.262, "step": 302 }, { "epoch": 6.27, "learning_rate": 1e-05, "loss": 0.2765, "step": 304 }, { "epoch": 6.31, "learning_rate": 1e-05, "loss": 0.2069, "step": 306 }, { "epoch": 6.35, "learning_rate": 1e-05, "loss": 0.2567, "step": 308 }, { "epoch": 6.39, "learning_rate": 1e-05, "loss": 0.2939, "step": 310 }, { "epoch": 6.39, "eval_accuracy": 0.9200000166893005, "eval_average_score": -0.16238823533058167, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2794664204120636, "eval_runtime": 3.407, "eval_samples_per_second": 14.676, "eval_steps_per_second": 3.816, "step": 310 }, { "epoch": 6.43, "learning_rate": 1e-05, "loss": 0.3302, "step": 312 }, { "epoch": 6.47, "learning_rate": 1e-05, "loss": 0.2244, "step": 314 }, { "epoch": 6.52, "learning_rate": 1e-05, "loss": 0.2158, "step": 316 }, { "epoch": 6.56, "learning_rate": 1e-05, "loss": 0.2536, "step": 318 }, { "epoch": 6.6, "learning_rate": 1e-05, "loss": 0.1905, "step": 320 }, { "epoch": 6.6, "eval_accuracy": 0.9200000166893005, "eval_average_score": 0.26926523447036743, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.26797807216644287, "eval_runtime": 3.4064, "eval_samples_per_second": 14.678, "eval_steps_per_second": 3.816, "step": 320 }, { "epoch": 6.64, "learning_rate": 1e-05, "loss": 0.2607, "step": 322 }, { "epoch": 6.68, "learning_rate": 1e-05, "loss": 0.2542, "step": 324 }, { "epoch": 6.72, "learning_rate": 1e-05, "loss": 0.265, "step": 326 }, { "epoch": 6.76, "learning_rate": 1e-05, "loss": 0.2763, "step": 328 }, { "epoch": 6.8, "learning_rate": 1e-05, "loss": 0.3119, "step": 330 }, { "epoch": 6.8, "eval_accuracy": 0.9200000166893005, "eval_average_score": -0.12742717564105988, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2834363877773285, "eval_runtime": 3.4081, "eval_samples_per_second": 14.671, "eval_steps_per_second": 3.814, "step": 330 }, { "epoch": 6.85, "learning_rate": 1e-05, "loss": 0.1726, "step": 332 }, { "epoch": 6.89, "learning_rate": 1e-05, "loss": 0.2636, "step": 334 }, { "epoch": 6.93, "learning_rate": 1e-05, "loss": 0.3579, "step": 336 }, { "epoch": 6.97, "learning_rate": 1e-05, "loss": 0.2635, "step": 338 }, { "epoch": 7.01, "learning_rate": 1e-05, "loss": 0.2602, "step": 340 }, { "epoch": 7.01, "eval_accuracy": 0.9399999976158142, "eval_average_score": -0.0478409007191658, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.27865225076675415, "eval_runtime": 3.4081, "eval_samples_per_second": 14.671, "eval_steps_per_second": 3.814, "step": 340 }, { "epoch": 7.05, "learning_rate": 1e-05, "loss": 0.2111, "step": 342 }, { "epoch": 7.09, "learning_rate": 1e-05, "loss": 0.2853, "step": 344 }, { "epoch": 7.13, "learning_rate": 1e-05, "loss": 0.2853, "step": 346 }, { "epoch": 7.18, "learning_rate": 1e-05, "loss": 0.3026, "step": 348 }, { "epoch": 7.22, "learning_rate": 1e-05, "loss": 0.2446, "step": 350 }, { "epoch": 7.22, "eval_accuracy": 0.9399999976158142, "eval_average_score": -0.1700994372367859, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2747596800327301, "eval_runtime": 3.4061, "eval_samples_per_second": 14.679, "eval_steps_per_second": 3.817, "step": 350 }, { "epoch": 7.26, "learning_rate": 1e-05, "loss": 0.2469, "step": 352 }, { "epoch": 7.3, "learning_rate": 1e-05, "loss": 0.2372, "step": 354 }, { "epoch": 7.34, "learning_rate": 1e-05, "loss": 0.2655, "step": 356 }, { "epoch": 7.38, "learning_rate": 1e-05, "loss": 0.2661, "step": 358 }, { "epoch": 7.42, "learning_rate": 1e-05, "loss": 0.2731, "step": 360 }, { "epoch": 7.42, "eval_accuracy": 0.9200000166893005, "eval_average_score": 0.01858169585466385, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2665027379989624, "eval_runtime": 3.4061, "eval_samples_per_second": 14.68, "eval_steps_per_second": 3.817, "step": 360 }, { "epoch": 7.46, "learning_rate": 1e-05, "loss": 0.1845, "step": 362 }, { "epoch": 7.51, "learning_rate": 1e-05, "loss": 0.2881, "step": 364 }, { "epoch": 7.55, "learning_rate": 1e-05, "loss": 0.2038, "step": 366 }, { "epoch": 7.59, "learning_rate": 1e-05, "loss": 0.2111, "step": 368 }, { "epoch": 7.63, "learning_rate": 1e-05, "loss": 0.2992, "step": 370 }, { "epoch": 7.63, "eval_accuracy": 0.9399999976158142, "eval_average_score": 0.4874734878540039, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2591661810874939, "eval_runtime": 3.4058, "eval_samples_per_second": 14.681, "eval_steps_per_second": 3.817, "step": 370 }, { "epoch": 7.67, "learning_rate": 1e-05, "loss": 0.1714, "step": 372 }, { "epoch": 7.71, "learning_rate": 1e-05, "loss": 0.3, "step": 374 }, { "epoch": 7.75, "learning_rate": 1e-05, "loss": 0.2276, "step": 376 }, { "epoch": 7.79, "learning_rate": 1e-05, "loss": 0.3777, "step": 378 }, { "epoch": 7.84, "learning_rate": 1e-05, "loss": 0.2173, "step": 380 }, { "epoch": 7.84, "eval_accuracy": 0.9200000166893005, "eval_average_score": -0.07412362843751907, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.274651437997818, "eval_runtime": 3.4072, "eval_samples_per_second": 14.675, "eval_steps_per_second": 3.815, "step": 380 }, { "epoch": 7.88, "learning_rate": 1e-05, "loss": 0.2491, "step": 382 }, { "epoch": 7.92, "learning_rate": 1e-05, "loss": 0.2425, "step": 384 }, { "epoch": 7.96, "learning_rate": 1e-05, "loss": 0.2925, "step": 386 }, { "epoch": 8.0, "learning_rate": 1e-05, "loss": 0.2723, "step": 388 }, { "epoch": 8.04, "learning_rate": 1e-05, "loss": 0.3516, "step": 390 }, { "epoch": 8.04, "eval_accuracy": 0.9399999976158142, "eval_average_score": 0.005836525000631809, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.26507002115249634, "eval_runtime": 3.4088, "eval_samples_per_second": 14.668, "eval_steps_per_second": 3.814, "step": 390 }, { "epoch": 8.08, "learning_rate": 1e-05, "loss": 0.2519, "step": 392 }, { "epoch": 8.12, "learning_rate": 1e-05, "loss": 0.2991, "step": 394 }, { "epoch": 8.16, "learning_rate": 1e-05, "loss": 0.2234, "step": 396 }, { "epoch": 8.21, "learning_rate": 1e-05, "loss": 0.2711, "step": 398 }, { "epoch": 8.25, "learning_rate": 1e-05, "loss": 0.3236, "step": 400 }, { "epoch": 8.25, "eval_accuracy": 0.9200000166893005, "eval_average_score": 0.04313167557120323, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.25931546092033386, "eval_runtime": 3.4059, "eval_samples_per_second": 14.681, "eval_steps_per_second": 3.817, "step": 400 }, { "epoch": 8.29, "learning_rate": 1e-05, "loss": 0.1869, "step": 402 }, { "epoch": 8.33, "learning_rate": 1e-05, "loss": 0.2314, "step": 404 }, { "epoch": 8.37, "learning_rate": 1e-05, "loss": 0.2405, "step": 406 }, { "epoch": 8.41, "learning_rate": 1e-05, "loss": 0.1779, "step": 408 }, { "epoch": 8.45, "learning_rate": 1e-05, "loss": 0.2458, "step": 410 }, { "epoch": 8.45, "eval_accuracy": 0.8999999761581421, "eval_average_score": -0.21540480852127075, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2680150270462036, "eval_runtime": 3.4085, "eval_samples_per_second": 14.669, "eval_steps_per_second": 3.814, "step": 410 }, { "epoch": 8.49, "learning_rate": 1e-05, "loss": 0.1723, "step": 412 }, { "epoch": 8.54, "learning_rate": 1e-05, "loss": 0.3219, "step": 414 }, { "epoch": 8.58, "learning_rate": 1e-05, "loss": 0.243, "step": 416 }, { "epoch": 8.62, "learning_rate": 1e-05, "loss": 0.2095, "step": 418 }, { "epoch": 8.66, "learning_rate": 1e-05, "loss": 0.1976, "step": 420 }, { "epoch": 8.66, "eval_accuracy": 0.8999999761581421, "eval_average_score": 0.17054100334644318, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2620932459831238, "eval_runtime": 3.4133, "eval_samples_per_second": 14.648, "eval_steps_per_second": 3.809, "step": 420 }, { "epoch": 8.7, "learning_rate": 1e-05, "loss": 0.1717, "step": 422 }, { "epoch": 8.74, "learning_rate": 1e-05, "loss": 0.2759, "step": 424 }, { "epoch": 8.78, "learning_rate": 1e-05, "loss": 0.3572, "step": 426 }, { "epoch": 8.82, "learning_rate": 1e-05, "loss": 0.1688, "step": 428 }, { "epoch": 8.87, "learning_rate": 1e-05, "loss": 0.2358, "step": 430 }, { "epoch": 8.87, "eval_accuracy": 0.8999999761581421, "eval_average_score": 0.2345839887857437, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2618144154548645, "eval_runtime": 3.406, "eval_samples_per_second": 14.68, "eval_steps_per_second": 3.817, "step": 430 }, { "epoch": 8.91, "learning_rate": 1e-05, "loss": 0.1975, "step": 432 }, { "epoch": 8.95, "learning_rate": 1e-05, "loss": 0.3316, "step": 434 }, { "epoch": 8.99, "learning_rate": 1e-05, "loss": 0.1964, "step": 436 }, { "epoch": 9.03, "learning_rate": 1e-05, "loss": 0.2148, "step": 438 }, { "epoch": 9.07, "learning_rate": 1e-05, "loss": 0.1855, "step": 440 }, { "epoch": 9.07, "eval_accuracy": 0.8999999761581421, "eval_average_score": 0.04021953418850899, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2684229612350464, "eval_runtime": 3.4104, "eval_samples_per_second": 14.661, "eval_steps_per_second": 3.812, "step": 440 }, { "epoch": 9.11, "learning_rate": 1e-05, "loss": 0.2186, "step": 442 }, { "epoch": 9.15, "learning_rate": 1e-05, "loss": 0.1604, "step": 444 }, { "epoch": 9.2, "learning_rate": 1e-05, "loss": 0.1649, "step": 446 }, { "epoch": 9.24, "learning_rate": 1e-05, "loss": 0.2496, "step": 448 }, { "epoch": 9.28, "learning_rate": 1e-05, "loss": 0.3004, "step": 450 }, { "epoch": 9.28, "eval_accuracy": 0.8999999761581421, "eval_average_score": 0.08485334366559982, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.26888754963874817, "eval_runtime": 3.4093, "eval_samples_per_second": 14.666, "eval_steps_per_second": 3.813, "step": 450 }, { "epoch": 9.32, "learning_rate": 1e-05, "loss": 0.2056, "step": 452 }, { "epoch": 9.36, "learning_rate": 1e-05, "loss": 0.2284, "step": 454 }, { "epoch": 9.4, "learning_rate": 1e-05, "loss": 0.2529, "step": 456 }, { "epoch": 9.44, "learning_rate": 1e-05, "loss": 0.2847, "step": 458 }, { "epoch": 9.48, "learning_rate": 1e-05, "loss": 0.2804, "step": 460 }, { "epoch": 9.48, "eval_accuracy": 0.9200000166893005, "eval_average_score": -0.19331374764442444, "eval_label_positive_rate": 0.47999998927116394, "eval_loss": 0.2731383442878723, "eval_runtime": 3.4089, "eval_samples_per_second": 14.668, "eval_steps_per_second": 3.814, "step": 460 } ], "max_steps": 480, "num_train_epochs": 10, "total_flos": 8.790703793881743e+17, "trial_name": null, "trial_params": null }