{ "best_metric": null, "best_model_checkpoint": null, "epoch": 65.01950585175553, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.33, "learning_rate": 9.999383162408304e-05, "loss": 1.7573, "step": 500 }, { "epoch": 0.65, "learning_rate": 9.997532801828658e-05, "loss": 1.7515, "step": 1000 }, { "epoch": 0.98, "learning_rate": 9.994449374809851e-05, "loss": 1.7622, "step": 1500 }, { "epoch": 1.3, "learning_rate": 9.990133642141359e-05, "loss": 1.7166, "step": 2000 }, { "epoch": 1.63, "learning_rate": 9.98458666866564e-05, "loss": 1.7249, "step": 2500 }, { "epoch": 1.95, "learning_rate": 9.977809823015401e-05, "loss": 1.7108, "step": 3000 }, { "epoch": 2.28, "learning_rate": 9.9698047772759e-05, "loss": 1.6752, "step": 3500 }, { "epoch": 2.6, "learning_rate": 9.96057350657239e-05, "loss": 1.6668, "step": 4000 }, { "epoch": 2.93, "learning_rate": 9.950118288582788e-05, "loss": 1.6827, "step": 4500 }, { "epoch": 3.25, "learning_rate": 9.938441702975689e-05, "loss": 1.641, "step": 5000 }, { "epoch": 3.58, "learning_rate": 9.92554663077387e-05, "loss": 1.6448, "step": 5500 }, { "epoch": 3.9, "learning_rate": 9.911436253643445e-05, "loss": 1.6305, "step": 6000 }, { "epoch": 4.23, "learning_rate": 9.896114053108829e-05, "loss": 1.6062, "step": 6500 }, { "epoch": 4.55, "learning_rate": 9.879583809693738e-05, "loss": 1.5972, "step": 7000 }, { "epoch": 4.88, "learning_rate": 9.861849601988383e-05, "loss": 1.6113, "step": 7500 }, { "epoch": 5.2, "learning_rate": 9.842915805643155e-05, "loss": 1.5867, "step": 8000 }, { "epoch": 5.53, "learning_rate": 9.822787092288991e-05, "loss": 1.5802, "step": 8500 }, { "epoch": 5.85, "learning_rate": 9.801468428384716e-05, "loss": 1.5769, "step": 9000 }, { "epoch": 6.18, "learning_rate": 9.778965073991651e-05, "loss": 1.5605, "step": 9500 }, { "epoch": 6.5, "learning_rate": 9.755282581475769e-05, "loss": 1.5464, "step": 10000 }, { "epoch": 6.5, "eval_loss": 1.9329419136047363, "eval_runtime": 3.0765, "eval_samples_per_second": 499.912, "eval_steps_per_second": 31.529, "step": 10000 }, { "epoch": 6.83, "learning_rate": 9.730426794137727e-05, "loss": 1.527, "step": 10500 }, { "epoch": 7.15, "learning_rate": 9.704403844771128e-05, "loss": 1.5289, "step": 11000 }, { "epoch": 7.48, "learning_rate": 9.677220154149337e-05, "loss": 1.5033, "step": 11500 }, { "epoch": 7.8, "learning_rate": 9.648882429441257e-05, "loss": 1.5205, "step": 12000 }, { "epoch": 8.13, "learning_rate": 9.619397662556435e-05, "loss": 1.4979, "step": 12500 }, { "epoch": 8.45, "learning_rate": 9.588773128419906e-05, "loss": 1.4869, "step": 13000 }, { "epoch": 8.78, "learning_rate": 9.557016383177227e-05, "loss": 1.499, "step": 13500 }, { "epoch": 9.1, "learning_rate": 9.524135262330098e-05, "loss": 1.4814, "step": 14000 }, { "epoch": 9.43, "learning_rate": 9.490137878803079e-05, "loss": 1.4447, "step": 14500 }, { "epoch": 9.75, "learning_rate": 9.45503262094184e-05, "loss": 1.4686, "step": 15000 }, { "epoch": 10.08, "learning_rate": 9.418828150443469e-05, "loss": 1.4544, "step": 15500 }, { "epoch": 10.4, "learning_rate": 9.381533400219318e-05, "loss": 1.4202, "step": 16000 }, { "epoch": 10.73, "learning_rate": 9.343157572190957e-05, "loss": 1.4411, "step": 16500 }, { "epoch": 11.05, "learning_rate": 9.30371013501972e-05, "loss": 1.4283, "step": 17000 }, { "epoch": 11.38, "learning_rate": 9.263200821770461e-05, "loss": 1.4019, "step": 17500 }, { "epoch": 11.7, "learning_rate": 9.221639627510076e-05, "loss": 1.3973, "step": 18000 }, { "epoch": 12.03, "learning_rate": 9.179036806841353e-05, "loss": 1.4206, "step": 18500 }, { "epoch": 12.35, "learning_rate": 9.135402871372808e-05, "loss": 1.3621, "step": 19000 }, { "epoch": 12.68, "learning_rate": 9.090748587125118e-05, "loss": 1.3871, "step": 19500 }, { "epoch": 13.0, "learning_rate": 9.045084971874738e-05, "loss": 1.3845, "step": 20000 }, { "epoch": 13.0, "eval_loss": 2.0226035118103027, "eval_runtime": 2.951, "eval_samples_per_second": 521.185, "eval_steps_per_second": 32.871, "step": 20000 }, { "epoch": 13.33, "learning_rate": 8.998423292435454e-05, "loss": 1.3537, "step": 20500 }, { "epoch": 13.65, "learning_rate": 8.950775061878453e-05, "loss": 1.3523, "step": 21000 }, { "epoch": 13.98, "learning_rate": 8.90215203669165e-05, "loss": 1.3479, "step": 21500 }, { "epoch": 14.3, "learning_rate": 8.852566213878947e-05, "loss": 1.3374, "step": 22000 }, { "epoch": 14.63, "learning_rate": 8.802029828000156e-05, "loss": 1.333, "step": 22500 }, { "epoch": 14.95, "learning_rate": 8.750555348152298e-05, "loss": 1.3351, "step": 23000 }, { "epoch": 15.28, "learning_rate": 8.69815547489305e-05, "loss": 1.3062, "step": 23500 }, { "epoch": 15.6, "learning_rate": 8.644843137107059e-05, "loss": 1.3155, "step": 24000 }, { "epoch": 15.93, "learning_rate": 8.590631488815944e-05, "loss": 1.3185, "step": 24500 }, { "epoch": 16.25, "learning_rate": 8.535533905932738e-05, "loss": 1.2986, "step": 25000 }, { "epoch": 16.58, "learning_rate": 8.479563982961571e-05, "loss": 1.2964, "step": 25500 }, { "epoch": 16.91, "learning_rate": 8.422735529643444e-05, "loss": 1.2848, "step": 26000 }, { "epoch": 17.23, "learning_rate": 8.365062567548867e-05, "loss": 1.2745, "step": 26500 }, { "epoch": 17.56, "learning_rate": 8.306559326618259e-05, "loss": 1.2786, "step": 27000 }, { "epoch": 17.88, "learning_rate": 8.247240241650918e-05, "loss": 1.2679, "step": 27500 }, { "epoch": 18.21, "learning_rate": 8.18711994874345e-05, "loss": 1.2481, "step": 28000 }, { "epoch": 18.53, "learning_rate": 8.126213281678528e-05, "loss": 1.228, "step": 28500 }, { "epoch": 18.86, "learning_rate": 8.064535268264883e-05, "loss": 1.2452, "step": 29000 }, { "epoch": 19.18, "learning_rate": 8.002101126629421e-05, "loss": 1.2361, "step": 29500 }, { "epoch": 19.51, "learning_rate": 7.938926261462366e-05, "loss": 1.2254, "step": 30000 }, { "epoch": 19.51, "eval_loss": 2.1522765159606934, "eval_runtime": 2.9695, "eval_samples_per_second": 517.928, "eval_steps_per_second": 32.665, "step": 30000 }, { "epoch": 19.83, "learning_rate": 7.875026260216393e-05, "loss": 1.2298, "step": 30500 }, { "epoch": 20.16, "learning_rate": 7.810416889260653e-05, "loss": 1.2179, "step": 31000 }, { "epoch": 20.48, "learning_rate": 7.74511408999066e-05, "loss": 1.2067, "step": 31500 }, { "epoch": 20.81, "learning_rate": 7.679133974894983e-05, "loss": 1.2117, "step": 32000 }, { "epoch": 21.13, "learning_rate": 7.612492823579745e-05, "loss": 1.186, "step": 32500 }, { "epoch": 21.46, "learning_rate": 7.545207078751857e-05, "loss": 1.177, "step": 33000 }, { "epoch": 21.78, "learning_rate": 7.477293342162039e-05, "loss": 1.1876, "step": 33500 }, { "epoch": 22.11, "learning_rate": 7.408768370508576e-05, "loss": 1.1808, "step": 34000 }, { "epoch": 22.43, "learning_rate": 7.339649071302867e-05, "loss": 1.1701, "step": 34500 }, { "epoch": 22.76, "learning_rate": 7.269952498697734e-05, "loss": 1.1714, "step": 35000 }, { "epoch": 23.08, "learning_rate": 7.199695849279576e-05, "loss": 1.1605, "step": 35500 }, { "epoch": 23.41, "learning_rate": 7.128896457825364e-05, "loss": 1.155, "step": 36000 }, { "epoch": 23.73, "learning_rate": 7.057571793025544e-05, "loss": 1.148, "step": 36500 }, { "epoch": 24.06, "learning_rate": 6.985739453173903e-05, "loss": 1.142, "step": 37000 }, { "epoch": 24.38, "learning_rate": 6.91341716182545e-05, "loss": 1.1411, "step": 37500 }, { "epoch": 24.71, "learning_rate": 6.840622763423391e-05, "loss": 1.1299, "step": 38000 }, { "epoch": 25.03, "learning_rate": 6.767374218896286e-05, "loss": 1.1411, "step": 38500 }, { "epoch": 25.36, "learning_rate": 6.693689601226458e-05, "loss": 1.1077, "step": 39000 }, { "epoch": 25.68, "learning_rate": 6.619587090990748e-05, "loss": 1.12, "step": 39500 }, { "epoch": 26.01, "learning_rate": 6.545084971874738e-05, "loss": 1.1234, "step": 40000 }, { "epoch": 26.01, "eval_loss": 2.1873600482940674, "eval_runtime": 2.9334, "eval_samples_per_second": 524.314, "eval_steps_per_second": 33.068, "step": 40000 }, { "epoch": 26.33, "learning_rate": 6.47020162616152e-05, "loss": 1.0914, "step": 40500 }, { "epoch": 26.66, "learning_rate": 6.394955530196147e-05, "loss": 1.0948, "step": 41000 }, { "epoch": 26.98, "learning_rate": 6.319365249826865e-05, "loss": 1.104, "step": 41500 }, { "epoch": 27.31, "learning_rate": 6.243449435824276e-05, "loss": 1.0838, "step": 42000 }, { "epoch": 27.63, "learning_rate": 6.167226819279528e-05, "loss": 1.0952, "step": 42500 }, { "epoch": 27.96, "learning_rate": 6.090716206982714e-05, "loss": 1.0876, "step": 43000 }, { "epoch": 28.28, "learning_rate": 6.0139364767825626e-05, "loss": 1.0641, "step": 43500 }, { "epoch": 28.61, "learning_rate": 5.9369065729286245e-05, "loss": 1.0735, "step": 44000 }, { "epoch": 28.93, "learning_rate": 5.859645501397048e-05, "loss": 1.066, "step": 44500 }, { "epoch": 29.26, "learning_rate": 5.782172325201155e-05, "loss": 1.0598, "step": 45000 }, { "epoch": 29.58, "learning_rate": 5.7045061596879134e-05, "loss": 1.0457, "step": 45500 }, { "epoch": 29.91, "learning_rate": 5.6266661678215216e-05, "loss": 1.0597, "step": 46000 }, { "epoch": 30.23, "learning_rate": 5.548671555455226e-05, "loss": 1.041, "step": 46500 }, { "epoch": 30.56, "learning_rate": 5.470541566592573e-05, "loss": 1.0387, "step": 47000 }, { "epoch": 30.88, "learning_rate": 5.392295478639225e-05, "loss": 1.0336, "step": 47500 }, { "epoch": 31.21, "learning_rate": 5.313952597646568e-05, "loss": 1.0336, "step": 48000 }, { "epoch": 31.53, "learning_rate": 5.235532253548213e-05, "loss": 1.0209, "step": 48500 }, { "epoch": 31.86, "learning_rate": 5.157053795390642e-05, "loss": 1.0265, "step": 49000 }, { "epoch": 32.18, "learning_rate": 5.078536586559104e-05, "loss": 1.0256, "step": 49500 }, { "epoch": 32.51, "learning_rate": 5e-05, "loss": 1.0087, "step": 50000 }, { "epoch": 32.51, "eval_loss": 2.3058323860168457, "eval_runtime": 3.027, "eval_samples_per_second": 508.087, "eval_steps_per_second": 32.045, "step": 50000 }, { "epoch": 32.83, "learning_rate": 4.921463413440898e-05, "loss": 1.0137, "step": 50500 }, { "epoch": 33.16, "learning_rate": 4.8429462046093585e-05, "loss": 1.0106, "step": 51000 }, { "epoch": 33.49, "learning_rate": 4.7644677464517874e-05, "loss": 1.008, "step": 51500 }, { "epoch": 33.81, "learning_rate": 4.6860474023534335e-05, "loss": 1.0014, "step": 52000 }, { "epoch": 34.14, "learning_rate": 4.607704521360776e-05, "loss": 0.9956, "step": 52500 }, { "epoch": 34.46, "learning_rate": 4.529458433407429e-05, "loss": 0.9926, "step": 53000 }, { "epoch": 34.79, "learning_rate": 4.451328444544774e-05, "loss": 0.9865, "step": 53500 }, { "epoch": 35.11, "learning_rate": 4.373333832178478e-05, "loss": 0.9809, "step": 54000 }, { "epoch": 35.44, "learning_rate": 4.295493840312087e-05, "loss": 0.9809, "step": 54500 }, { "epoch": 35.76, "learning_rate": 4.2178276747988446e-05, "loss": 0.9785, "step": 55000 }, { "epoch": 36.09, "learning_rate": 4.140354498602952e-05, "loss": 0.9749, "step": 55500 }, { "epoch": 36.41, "learning_rate": 4.063093427071376e-05, "loss": 0.9737, "step": 56000 }, { "epoch": 36.74, "learning_rate": 3.986063523217439e-05, "loss": 0.9697, "step": 56500 }, { "epoch": 37.06, "learning_rate": 3.9092837930172884e-05, "loss": 0.9528, "step": 57000 }, { "epoch": 37.39, "learning_rate": 3.832773180720475e-05, "loss": 0.9622, "step": 57500 }, { "epoch": 37.71, "learning_rate": 3.756550564175727e-05, "loss": 0.9613, "step": 58000 }, { "epoch": 38.04, "learning_rate": 3.680634750173137e-05, "loss": 0.954, "step": 58500 }, { "epoch": 38.36, "learning_rate": 3.605044469803854e-05, "loss": 0.9494, "step": 59000 }, { "epoch": 38.69, "learning_rate": 3.529798373838481e-05, "loss": 0.9451, "step": 59500 }, { "epoch": 39.01, "learning_rate": 3.4549150281252636e-05, "loss": 0.946, "step": 60000 }, { "epoch": 39.01, "eval_loss": 2.416886568069458, "eval_runtime": 2.9777, "eval_samples_per_second": 516.506, "eval_steps_per_second": 32.575, "step": 60000 }, { "epoch": 39.34, "learning_rate": 3.380412909009254e-05, "loss": 0.939, "step": 60500 }, { "epoch": 39.66, "learning_rate": 3.3063103987735433e-05, "loss": 0.9388, "step": 61000 }, { "epoch": 39.99, "learning_rate": 3.2326257811037155e-05, "loss": 0.9315, "step": 61500 }, { "epoch": 40.31, "learning_rate": 3.1593772365766105e-05, "loss": 0.9318, "step": 62000 }, { "epoch": 40.64, "learning_rate": 3.086582838174551e-05, "loss": 0.9243, "step": 62500 }, { "epoch": 40.96, "learning_rate": 3.0142605468260978e-05, "loss": 0.9259, "step": 63000 }, { "epoch": 41.29, "learning_rate": 2.9424282069744564e-05, "loss": 0.9274, "step": 63500 }, { "epoch": 41.61, "learning_rate": 2.8711035421746367e-05, "loss": 0.9215, "step": 64000 }, { "epoch": 41.94, "learning_rate": 2.8003041507204242e-05, "loss": 0.9226, "step": 64500 }, { "epoch": 42.26, "learning_rate": 2.7300475013022663e-05, "loss": 0.9104, "step": 65000 }, { "epoch": 42.59, "learning_rate": 2.660350928697134e-05, "loss": 0.91, "step": 65500 }, { "epoch": 42.91, "learning_rate": 2.591231629491423e-05, "loss": 0.9108, "step": 66000 }, { "epoch": 43.24, "learning_rate": 2.522706657837962e-05, "loss": 0.9068, "step": 66500 }, { "epoch": 43.56, "learning_rate": 2.4547929212481435e-05, "loss": 0.9092, "step": 67000 }, { "epoch": 43.89, "learning_rate": 2.3875071764202563e-05, "loss": 0.9059, "step": 67500 }, { "epoch": 44.21, "learning_rate": 2.3208660251050158e-05, "loss": 0.8956, "step": 68000 }, { "epoch": 44.54, "learning_rate": 2.2548859100093407e-05, "loss": 0.9111, "step": 68500 }, { "epoch": 44.86, "learning_rate": 2.1895831107393484e-05, "loss": 0.9063, "step": 69000 }, { "epoch": 45.19, "learning_rate": 2.124973739783609e-05, "loss": 0.897, "step": 69500 }, { "epoch": 45.51, "learning_rate": 2.061073738537635e-05, "loss": 0.8985, "step": 70000 }, { "epoch": 45.51, "eval_loss": 2.46510648727417, "eval_runtime": 3.0226, "eval_samples_per_second": 508.83, "eval_steps_per_second": 32.091, "step": 70000 }, { "epoch": 45.84, "learning_rate": 1.9978988733705807e-05, "loss": 0.8948, "step": 70500 }, { "epoch": 46.16, "learning_rate": 1.9354647317351188e-05, "loss": 0.8855, "step": 71000 }, { "epoch": 46.49, "learning_rate": 1.8737867183214757e-05, "loss": 0.8908, "step": 71500 }, { "epoch": 46.81, "learning_rate": 1.8128800512565513e-05, "loss": 0.8945, "step": 72000 }, { "epoch": 47.14, "learning_rate": 1.7527597583490822e-05, "loss": 0.8896, "step": 72500 }, { "epoch": 47.46, "learning_rate": 1.6934406733817414e-05, "loss": 0.8817, "step": 73000 }, { "epoch": 47.79, "learning_rate": 1.6349374324511345e-05, "loss": 0.8842, "step": 73500 }, { "epoch": 48.11, "learning_rate": 1.5772644703565565e-05, "loss": 0.8852, "step": 74000 }, { "epoch": 48.44, "learning_rate": 1.5204360170384286e-05, "loss": 0.8803, "step": 74500 }, { "epoch": 48.76, "learning_rate": 1.4644660940672627e-05, "loss": 0.8763, "step": 75000 }, { "epoch": 49.09, "learning_rate": 1.4093685111840566e-05, "loss": 0.8765, "step": 75500 }, { "epoch": 49.41, "learning_rate": 1.3551568628929434e-05, "loss": 0.8757, "step": 76000 }, { "epoch": 49.74, "learning_rate": 1.301844525106951e-05, "loss": 0.8738, "step": 76500 }, { "epoch": 50.07, "learning_rate": 1.2494446518477022e-05, "loss": 0.8683, "step": 77000 }, { "epoch": 50.39, "learning_rate": 1.1979701719998453e-05, "loss": 0.8791, "step": 77500 }, { "epoch": 50.72, "learning_rate": 1.1474337861210543e-05, "loss": 0.867, "step": 78000 }, { "epoch": 51.04, "learning_rate": 1.097847963308351e-05, "loss": 0.8751, "step": 78500 }, { "epoch": 51.37, "learning_rate": 1.049224938121548e-05, "loss": 0.868, "step": 79000 }, { "epoch": 51.69, "learning_rate": 1.0015767075645471e-05, "loss": 0.8604, "step": 79500 }, { "epoch": 52.02, "learning_rate": 9.549150281252633e-06, "loss": 0.8658, "step": 80000 }, { "epoch": 52.02, "eval_loss": 2.4832496643066406, "eval_runtime": 2.9713, "eval_samples_per_second": 517.619, "eval_steps_per_second": 32.646, "step": 80000 }, { "epoch": 52.34, "learning_rate": 9.09251412874882e-06, "loss": 0.866, "step": 80500 }, { "epoch": 52.67, "learning_rate": 8.645971286271904e-06, "loss": 0.8648, "step": 81000 }, { "epoch": 52.99, "learning_rate": 8.209631931586498e-06, "loss": 0.8655, "step": 81500 }, { "epoch": 53.32, "learning_rate": 7.783603724899257e-06, "loss": 0.8679, "step": 82000 }, { "epoch": 53.64, "learning_rate": 7.367991782295391e-06, "loss": 0.8555, "step": 82500 }, { "epoch": 53.97, "learning_rate": 6.962898649802823e-06, "loss": 0.8593, "step": 83000 }, { "epoch": 54.29, "learning_rate": 6.568424278090446e-06, "loss": 0.863, "step": 83500 }, { "epoch": 54.62, "learning_rate": 6.184665997806832e-06, "loss": 0.8577, "step": 84000 }, { "epoch": 54.94, "learning_rate": 5.811718495565327e-06, "loss": 0.8646, "step": 84500 }, { "epoch": 55.27, "learning_rate": 5.449673790581611e-06, "loss": 0.8512, "step": 85000 }, { "epoch": 55.59, "learning_rate": 5.098621211969223e-06, "loss": 0.8623, "step": 85500 }, { "epoch": 55.92, "learning_rate": 4.758647376699032e-06, "loss": 0.8535, "step": 86000 }, { "epoch": 56.24, "learning_rate": 4.429836168227735e-06, "loss": 0.8414, "step": 86500 }, { "epoch": 56.57, "learning_rate": 4.112268715800943e-06, "loss": 0.8644, "step": 87000 }, { "epoch": 56.89, "learning_rate": 3.8060233744356633e-06, "loss": 0.8567, "step": 87500 }, { "epoch": 57.22, "learning_rate": 3.511175705587433e-06, "loss": 0.8577, "step": 88000 }, { "epoch": 57.54, "learning_rate": 3.2277984585066366e-06, "loss": 0.855, "step": 88500 }, { "epoch": 57.87, "learning_rate": 2.9559615522887273e-06, "loss": 0.8384, "step": 89000 }, { "epoch": 58.19, "learning_rate": 2.695732058622735e-06, "loss": 0.8549, "step": 89500 }, { "epoch": 58.52, "learning_rate": 2.4471741852423237e-06, "loss": 0.8479, "step": 90000 }, { "epoch": 58.52, "eval_loss": 2.5272536277770996, "eval_runtime": 2.9755, "eval_samples_per_second": 516.881, "eval_steps_per_second": 32.599, "step": 90000 }, { "epoch": 58.84, "learning_rate": 2.210349260083494e-06, "loss": 0.8509, "step": 90500 }, { "epoch": 59.17, "learning_rate": 1.985315716152847e-06, "loss": 0.8535, "step": 91000 }, { "epoch": 59.49, "learning_rate": 1.7721290771100961e-06, "loss": 0.8562, "step": 91500 }, { "epoch": 59.82, "learning_rate": 1.5708419435684462e-06, "loss": 0.8448, "step": 92000 }, { "epoch": 60.14, "learning_rate": 1.3815039801161721e-06, "loss": 0.8514, "step": 92500 }, { "epoch": 60.47, "learning_rate": 1.2041619030626284e-06, "loss": 0.8516, "step": 93000 }, { "epoch": 60.79, "learning_rate": 1.0388594689117071e-06, "loss": 0.8464, "step": 93500 }, { "epoch": 61.12, "learning_rate": 8.856374635655695e-07, "loss": 0.8573, "step": 94000 }, { "epoch": 61.44, "learning_rate": 7.445336922613067e-07, "loss": 0.8526, "step": 94500 }, { "epoch": 61.77, "learning_rate": 6.15582970243117e-07, "loss": 0.8404, "step": 95000 }, { "epoch": 62.09, "learning_rate": 4.988171141721232e-07, "loss": 0.8532, "step": 95500 }, { "epoch": 62.42, "learning_rate": 3.9426493427611177e-07, "loss": 0.852, "step": 96000 }, { "epoch": 62.74, "learning_rate": 3.019522272410202e-07, "loss": 0.8508, "step": 96500 }, { "epoch": 63.07, "learning_rate": 2.219017698460002e-07, "loss": 0.8475, "step": 97000 }, { "epoch": 63.39, "learning_rate": 1.5413331334360182e-07, "loss": 0.8491, "step": 97500 }, { "epoch": 63.72, "learning_rate": 9.866357858642205e-08, "loss": 0.8448, "step": 98000 }, { "epoch": 64.04, "learning_rate": 5.550625190150483e-08, "loss": 0.8596, "step": 98500 }, { "epoch": 64.37, "learning_rate": 2.467198171342e-08, "loss": 0.8478, "step": 99000 }, { "epoch": 64.69, "learning_rate": 6.1683759169706146e-09, "loss": 0.8531, "step": 99500 }, { "epoch": 65.02, "learning_rate": 0.0, "loss": 0.8508, "step": 100000 }, { "epoch": 65.02, "eval_loss": 2.5495502948760986, "eval_runtime": 3.0032, "eval_samples_per_second": 512.121, "eval_steps_per_second": 32.299, "step": 100000 } ], "max_steps": 100000, "num_train_epochs": 66, "total_flos": 1.258672428813312e+17, "trial_name": null, "trial_params": null }