hindi-devanagari-math / trainer_state.json
gitanshgarg's picture
Upload folder using huggingface_hub
149c33c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.006396929473852551,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006396929473852551,
"grad_norm": 0.8444250226020813,
"learning_rate": 3.6e-05,
"loss": 0.8966,
"step": 10
},
{
"epoch": 0.0012793858947705101,
"grad_norm": 0.6855414509773254,
"learning_rate": 7.6e-05,
"loss": 0.7299,
"step": 20
},
{
"epoch": 0.0019190788421557653,
"grad_norm": 0.7116178870201111,
"learning_rate": 0.000116,
"loss": 0.5625,
"step": 30
},
{
"epoch": 0.0025587717895410203,
"grad_norm": 0.5287570953369141,
"learning_rate": 0.00015600000000000002,
"loss": 0.481,
"step": 40
},
{
"epoch": 0.0031984647369262755,
"grad_norm": 0.4259079694747925,
"learning_rate": 0.000196,
"loss": 0.4765,
"step": 50
},
{
"epoch": 0.0038381576843115306,
"grad_norm": 0.5958250761032104,
"learning_rate": 0.000236,
"loss": 0.44,
"step": 60
},
{
"epoch": 0.004477850631696785,
"grad_norm": 0.4068315923213959,
"learning_rate": 0.000276,
"loss": 0.4348,
"step": 70
},
{
"epoch": 0.005117543579082041,
"grad_norm": 0.6012944579124451,
"learning_rate": 0.00031600000000000004,
"loss": 0.4497,
"step": 80
},
{
"epoch": 0.005757236526467296,
"grad_norm": 0.39029160141944885,
"learning_rate": 0.00035600000000000003,
"loss": 0.4214,
"step": 90
},
{
"epoch": 0.006396929473852551,
"grad_norm": 0.3756840229034424,
"learning_rate": 0.00039600000000000003,
"loss": 0.4643,
"step": 100
}
],
"logging_steps": 10,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.502292152831181e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}