|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 3810, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.0167, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0651702880859375, |
|
"eval_runtime": 37.0111, |
|
"eval_samples_per_second": 20.588, |
|
"eval_steps_per_second": 1.297, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.343832020997376e-05, |
|
"loss": 1.5288, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.0402, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.933931291103363, |
|
"eval_runtime": 37.1713, |
|
"eval_samples_per_second": 20.5, |
|
"eval_steps_per_second": 1.291, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.6876640419947505e-05, |
|
"loss": 1.063, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.0311, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8651323318481445, |
|
"eval_runtime": 37.9241, |
|
"eval_samples_per_second": 20.093, |
|
"eval_steps_per_second": 1.266, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.0314960629921263e-05, |
|
"loss": 0.9558, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.1018, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8271389603614807, |
|
"eval_runtime": 37.7015, |
|
"eval_samples_per_second": 20.211, |
|
"eval_steps_per_second": 1.273, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.0744, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8043217062950134, |
|
"eval_runtime": 38.014, |
|
"eval_samples_per_second": 20.045, |
|
"eval_steps_per_second": 1.263, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 2.3753280839895015e-05, |
|
"loss": 0.8979, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.0786, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7830905318260193, |
|
"eval_runtime": 37.5069, |
|
"eval_samples_per_second": 20.316, |
|
"eval_steps_per_second": 1.28, |
|
"step": 2286 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.7191601049868766e-05, |
|
"loss": 0.8598, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.086, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7698926329612732, |
|
"eval_runtime": 37.7633, |
|
"eval_samples_per_second": 20.178, |
|
"eval_steps_per_second": 1.271, |
|
"step": 2667 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 1.062992125984252e-05, |
|
"loss": 0.8346, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.0803, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7630091309547424, |
|
"eval_runtime": 37.5503, |
|
"eval_samples_per_second": 20.293, |
|
"eval_steps_per_second": 1.278, |
|
"step": 3048 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.1179, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7571505904197693, |
|
"eval_runtime": 37.5775, |
|
"eval_samples_per_second": 20.278, |
|
"eval_steps_per_second": 1.277, |
|
"step": 3429 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.068241469816273e-06, |
|
"loss": 0.8194, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.1133, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7551639676094055, |
|
"eval_runtime": 37.5909, |
|
"eval_samples_per_second": 20.271, |
|
"eval_steps_per_second": 1.277, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3810, |
|
"total_flos": 2.167800665997312e+16, |
|
"train_loss": 0.9795461539521305, |
|
"train_runtime": 2020.5453, |
|
"train_samples_per_second": 30.17, |
|
"train_steps_per_second": 1.886 |
|
} |
|
], |
|
"max_steps": 3810, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.167800665997312e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|