byt5-base-es_guc / trainer_state.json
mekjr1's picture
End of training
a60f308
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 3810,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 0.0167,
"eval_gen_len": 19.0,
"eval_loss": 1.0651702880859375,
"eval_runtime": 37.0111,
"eval_samples_per_second": 20.588,
"eval_steps_per_second": 1.297,
"step": 381
},
{
"epoch": 1.31,
"learning_rate": 4.343832020997376e-05,
"loss": 1.5288,
"step": 500
},
{
"epoch": 2.0,
"eval_bleu": 0.0402,
"eval_gen_len": 19.0,
"eval_loss": 0.933931291103363,
"eval_runtime": 37.1713,
"eval_samples_per_second": 20.5,
"eval_steps_per_second": 1.291,
"step": 762
},
{
"epoch": 2.62,
"learning_rate": 3.6876640419947505e-05,
"loss": 1.063,
"step": 1000
},
{
"epoch": 3.0,
"eval_bleu": 0.0311,
"eval_gen_len": 19.0,
"eval_loss": 0.8651323318481445,
"eval_runtime": 37.9241,
"eval_samples_per_second": 20.093,
"eval_steps_per_second": 1.266,
"step": 1143
},
{
"epoch": 3.94,
"learning_rate": 3.0314960629921263e-05,
"loss": 0.9558,
"step": 1500
},
{
"epoch": 4.0,
"eval_bleu": 0.1018,
"eval_gen_len": 19.0,
"eval_loss": 0.8271389603614807,
"eval_runtime": 37.7015,
"eval_samples_per_second": 20.211,
"eval_steps_per_second": 1.273,
"step": 1524
},
{
"epoch": 5.0,
"eval_bleu": 0.0744,
"eval_gen_len": 19.0,
"eval_loss": 0.8043217062950134,
"eval_runtime": 38.014,
"eval_samples_per_second": 20.045,
"eval_steps_per_second": 1.263,
"step": 1905
},
{
"epoch": 5.25,
"learning_rate": 2.3753280839895015e-05,
"loss": 0.8979,
"step": 2000
},
{
"epoch": 6.0,
"eval_bleu": 0.0786,
"eval_gen_len": 19.0,
"eval_loss": 0.7830905318260193,
"eval_runtime": 37.5069,
"eval_samples_per_second": 20.316,
"eval_steps_per_second": 1.28,
"step": 2286
},
{
"epoch": 6.56,
"learning_rate": 1.7191601049868766e-05,
"loss": 0.8598,
"step": 2500
},
{
"epoch": 7.0,
"eval_bleu": 0.086,
"eval_gen_len": 19.0,
"eval_loss": 0.7698926329612732,
"eval_runtime": 37.7633,
"eval_samples_per_second": 20.178,
"eval_steps_per_second": 1.271,
"step": 2667
},
{
"epoch": 7.87,
"learning_rate": 1.062992125984252e-05,
"loss": 0.8346,
"step": 3000
},
{
"epoch": 8.0,
"eval_bleu": 0.0803,
"eval_gen_len": 19.0,
"eval_loss": 0.7630091309547424,
"eval_runtime": 37.5503,
"eval_samples_per_second": 20.293,
"eval_steps_per_second": 1.278,
"step": 3048
},
{
"epoch": 9.0,
"eval_bleu": 0.1179,
"eval_gen_len": 19.0,
"eval_loss": 0.7571505904197693,
"eval_runtime": 37.5775,
"eval_samples_per_second": 20.278,
"eval_steps_per_second": 1.277,
"step": 3429
},
{
"epoch": 9.19,
"learning_rate": 4.068241469816273e-06,
"loss": 0.8194,
"step": 3500
},
{
"epoch": 10.0,
"eval_bleu": 0.1133,
"eval_gen_len": 19.0,
"eval_loss": 0.7551639676094055,
"eval_runtime": 37.5909,
"eval_samples_per_second": 20.271,
"eval_steps_per_second": 1.277,
"step": 3810
},
{
"epoch": 10.0,
"step": 3810,
"total_flos": 2.167800665997312e+16,
"train_loss": 0.9795461539521305,
"train_runtime": 2020.5453,
"train_samples_per_second": 30.17,
"train_steps_per_second": 1.886
}
],
"max_steps": 3810,
"num_train_epochs": 10,
"total_flos": 2.167800665997312e+16,
"trial_name": null,
"trial_params": null
}