{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9141551206970433, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 2.2499999999999998e-05, "loss": 5.1881, "step": 50 }, { "epoch": 0.06, "eval_loss": 5.518892288208008, "eval_runtime": 281.7614, "eval_samples_per_second": 16.539, "eval_steps_per_second": 16.539, "step": 50 }, { "epoch": 0.11, "learning_rate": 4.75e-05, "loss": 3.8191, "step": 100 }, { "epoch": 0.11, "eval_loss": 2.910094738006592, "eval_runtime": 282.3915, "eval_samples_per_second": 16.502, "eval_steps_per_second": 16.502, "step": 100 }, { "epoch": 0.17, "learning_rate": 7.25e-05, "loss": 2.5718, "step": 150 }, { "epoch": 0.17, "eval_loss": 2.240684986114502, "eval_runtime": 283.3137, "eval_samples_per_second": 16.448, "eval_steps_per_second": 16.448, "step": 150 }, { "epoch": 0.23, "learning_rate": 9.750000000000001e-05, "loss": 2.3477, "step": 200 }, { "epoch": 0.23, "eval_loss": 2.1633121967315674, "eval_runtime": 282.161, "eval_samples_per_second": 16.515, "eval_steps_per_second": 16.515, "step": 200 }, { "epoch": 0.29, "learning_rate": 0.0001225, "loss": 2.3472, "step": 250 }, { "epoch": 0.29, "eval_loss": 2.118901252746582, "eval_runtime": 281.7032, "eval_samples_per_second": 16.542, "eval_steps_per_second": 16.542, "step": 250 }, { "epoch": 0.34, "learning_rate": 0.0001475, "loss": 2.2188, "step": 300 }, { "epoch": 0.34, "eval_loss": 2.1062021255493164, "eval_runtime": 282.3706, "eval_samples_per_second": 16.503, "eval_steps_per_second": 16.503, "step": 300 }, { "epoch": 0.4, "learning_rate": 0.0001725, "loss": 2.2085, "step": 350 }, { "epoch": 0.4, "eval_loss": 2.0873425006866455, "eval_runtime": 282.0216, "eval_samples_per_second": 16.524, "eval_steps_per_second": 16.524, "step": 350 }, { "epoch": 0.46, "learning_rate": 0.0001975, "loss": 2.1271, "step": 400 }, { "epoch": 0.46, "eval_loss": 2.0874459743499756, "eval_runtime": 283.0992, "eval_samples_per_second": 16.461, "eval_steps_per_second": 16.461, "step": 400 }, { "epoch": 0.51, "learning_rate": 0.00022250000000000001, "loss": 2.1834, "step": 450 }, { "epoch": 0.51, "eval_loss": 2.0584352016448975, "eval_runtime": 281.7518, "eval_samples_per_second": 16.539, "eval_steps_per_second": 16.539, "step": 450 }, { "epoch": 0.57, "learning_rate": 0.0002475, "loss": 2.1927, "step": 500 }, { "epoch": 0.57, "eval_loss": 2.050870656967163, "eval_runtime": 281.7194, "eval_samples_per_second": 16.541, "eval_steps_per_second": 16.541, "step": 500 }, { "epoch": 0.63, "learning_rate": 0.0002725, "loss": 2.1816, "step": 550 }, { "epoch": 0.63, "eval_loss": 2.039118766784668, "eval_runtime": 282.0613, "eval_samples_per_second": 16.521, "eval_steps_per_second": 16.521, "step": 550 }, { "epoch": 0.69, "learning_rate": 0.00029749999999999997, "loss": 2.2131, "step": 600 }, { "epoch": 0.69, "eval_loss": 2.0333147048950195, "eval_runtime": 282.4675, "eval_samples_per_second": 16.497, "eval_steps_per_second": 16.497, "step": 600 }, { "epoch": 0.74, "learning_rate": 0.00032250000000000003, "loss": 2.2322, "step": 650 }, { "epoch": 0.74, "eval_loss": 2.0386910438537598, "eval_runtime": 282.3106, "eval_samples_per_second": 16.507, "eval_steps_per_second": 16.507, "step": 650 }, { "epoch": 0.8, "learning_rate": 0.0003475, "loss": 2.2614, "step": 700 }, { "epoch": 0.8, "eval_loss": 2.025144577026367, "eval_runtime": 285.8974, "eval_samples_per_second": 16.3, "eval_steps_per_second": 16.3, "step": 700 }, { "epoch": 0.86, "learning_rate": 0.0003725, "loss": 2.186, "step": 750 }, { "epoch": 0.86, "eval_loss": 2.0244803428649902, "eval_runtime": 284.3127, "eval_samples_per_second": 16.39, "eval_steps_per_second": 16.39, "step": 750 }, { "epoch": 0.91, "learning_rate": 0.0003975, "loss": 2.1677, "step": 800 }, { "epoch": 0.91, "eval_loss": 2.0197227001190186, "eval_runtime": 284.0212, "eval_samples_per_second": 16.407, "eval_steps_per_second": 16.407, "step": 800 } ], "max_steps": 875, "num_train_epochs": 1, "total_flos": 4.324339205829427e+16, "trial_name": null, "trial_params": null }