squad_llama_finetuned / trainer_state.json
shuheng's picture
End of training
fc7edab verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 8304,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18063583815028902,
"grad_norm": 7.173945903778076,
"learning_rate": 9.397880539499037e-06,
"loss": 6.2596,
"step": 500
},
{
"epoch": 0.36127167630057805,
"grad_norm": 7.090274333953857,
"learning_rate": 8.795761078998074e-06,
"loss": 6.1274,
"step": 1000
},
{
"epoch": 0.541907514450867,
"grad_norm": 6.882017612457275,
"learning_rate": 8.19364161849711e-06,
"loss": 6.0157,
"step": 1500
},
{
"epoch": 0.7225433526011561,
"grad_norm": 6.622304916381836,
"learning_rate": 7.591522157996147e-06,
"loss": 5.9661,
"step": 2000
},
{
"epoch": 0.903179190751445,
"grad_norm": 6.847517967224121,
"learning_rate": 6.989402697495183e-06,
"loss": 5.9061,
"step": 2500
},
{
"epoch": 1.083815028901734,
"grad_norm": 6.756404399871826,
"learning_rate": 6.387283236994221e-06,
"loss": 5.8773,
"step": 3000
},
{
"epoch": 1.2644508670520231,
"grad_norm": 6.702535629272461,
"learning_rate": 5.785163776493257e-06,
"loss": 5.8402,
"step": 3500
},
{
"epoch": 1.4450867052023122,
"grad_norm": 6.539181709289551,
"learning_rate": 5.1830443159922924e-06,
"loss": 5.8124,
"step": 4000
},
{
"epoch": 1.6257225433526012,
"grad_norm": 6.716819763183594,
"learning_rate": 4.58092485549133e-06,
"loss": 5.7907,
"step": 4500
},
{
"epoch": 1.80635838150289,
"grad_norm": 6.560629367828369,
"learning_rate": 3.978805394990367e-06,
"loss": 5.769,
"step": 5000
},
{
"epoch": 1.9869942196531793,
"grad_norm": 6.585288047790527,
"learning_rate": 3.376685934489403e-06,
"loss": 5.746,
"step": 5500
},
{
"epoch": 2.167630057803468,
"grad_norm": 6.649946689605713,
"learning_rate": 2.774566473988439e-06,
"loss": 5.7399,
"step": 6000
},
{
"epoch": 2.348265895953757,
"grad_norm": 6.707363605499268,
"learning_rate": 2.1724470134874763e-06,
"loss": 5.7306,
"step": 6500
},
{
"epoch": 2.5289017341040463,
"grad_norm": 6.509890556335449,
"learning_rate": 1.5703275529865126e-06,
"loss": 5.7217,
"step": 7000
},
{
"epoch": 2.709537572254335,
"grad_norm": 6.645078659057617,
"learning_rate": 9.682080924855492e-07,
"loss": 5.7249,
"step": 7500
},
{
"epoch": 2.8901734104046244,
"grad_norm": 6.534315586090088,
"learning_rate": 3.660886319845858e-07,
"loss": 5.7126,
"step": 8000
},
{
"epoch": 3.0,
"step": 8304,
"total_flos": 1.7254148744798807e+18,
"train_loss": 5.854142925872509,
"train_runtime": 17646.7524,
"train_samples_per_second": 15.055,
"train_steps_per_second": 0.471
}
],
"logging_steps": 500,
"max_steps": 8304,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.7254148744798807e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}