|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9141551206970433, |
|
"global_step": 800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.2499999999999998e-05, |
|
"loss": 5.1881, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 5.518892288208008, |
|
"eval_runtime": 281.7614, |
|
"eval_samples_per_second": 16.539, |
|
"eval_steps_per_second": 16.539, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.75e-05, |
|
"loss": 3.8191, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.910094738006592, |
|
"eval_runtime": 282.3915, |
|
"eval_samples_per_second": 16.502, |
|
"eval_steps_per_second": 16.502, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.25e-05, |
|
"loss": 2.5718, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.240684986114502, |
|
"eval_runtime": 283.3137, |
|
"eval_samples_per_second": 16.448, |
|
"eval_steps_per_second": 16.448, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.750000000000001e-05, |
|
"loss": 2.3477, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.1633121967315674, |
|
"eval_runtime": 282.161, |
|
"eval_samples_per_second": 16.515, |
|
"eval_steps_per_second": 16.515, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001225, |
|
"loss": 2.3472, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.118901252746582, |
|
"eval_runtime": 281.7032, |
|
"eval_samples_per_second": 16.542, |
|
"eval_steps_per_second": 16.542, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001475, |
|
"loss": 2.2188, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.1062021255493164, |
|
"eval_runtime": 282.3706, |
|
"eval_samples_per_second": 16.503, |
|
"eval_steps_per_second": 16.503, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001725, |
|
"loss": 2.2085, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.0873425006866455, |
|
"eval_runtime": 282.0216, |
|
"eval_samples_per_second": 16.524, |
|
"eval_steps_per_second": 16.524, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001975, |
|
"loss": 2.1271, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 2.0874459743499756, |
|
"eval_runtime": 283.0992, |
|
"eval_samples_per_second": 16.461, |
|
"eval_steps_per_second": 16.461, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00022250000000000001, |
|
"loss": 2.1834, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.0584352016448975, |
|
"eval_runtime": 281.7518, |
|
"eval_samples_per_second": 16.539, |
|
"eval_steps_per_second": 16.539, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002475, |
|
"loss": 2.1927, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.050870656967163, |
|
"eval_runtime": 281.7194, |
|
"eval_samples_per_second": 16.541, |
|
"eval_steps_per_second": 16.541, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002725, |
|
"loss": 2.1816, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 2.039118766784668, |
|
"eval_runtime": 282.0613, |
|
"eval_samples_per_second": 16.521, |
|
"eval_steps_per_second": 16.521, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00029749999999999997, |
|
"loss": 2.2131, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 2.0333147048950195, |
|
"eval_runtime": 282.4675, |
|
"eval_samples_per_second": 16.497, |
|
"eval_steps_per_second": 16.497, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00032250000000000003, |
|
"loss": 2.2322, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 2.0386910438537598, |
|
"eval_runtime": 282.3106, |
|
"eval_samples_per_second": 16.507, |
|
"eval_steps_per_second": 16.507, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0003475, |
|
"loss": 2.2614, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.025144577026367, |
|
"eval_runtime": 285.8974, |
|
"eval_samples_per_second": 16.3, |
|
"eval_steps_per_second": 16.3, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0003725, |
|
"loss": 2.186, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 2.0244803428649902, |
|
"eval_runtime": 284.3127, |
|
"eval_samples_per_second": 16.39, |
|
"eval_steps_per_second": 16.39, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0003975, |
|
"loss": 2.1677, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 2.0197227001190186, |
|
"eval_runtime": 284.0212, |
|
"eval_samples_per_second": 16.407, |
|
"eval_steps_per_second": 16.407, |
|
"step": 800 |
|
} |
|
], |
|
"max_steps": 875, |
|
"num_train_epochs": 1, |
|
"total_flos": 4.324339205829427e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|