ChenWu98's picture
Upload folder using huggingface_hub
577a1e2 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 198,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010129787907565685,
"grad_norm": 5.634124615502245,
"learning_rate": 0.0001,
"loss": 0.3589,
"mean_token_accuracy": 0.9127573072910309,
"num_tokens": 586686.0,
"step": 1
},
{
"epoch": 0.10129787907565685,
"grad_norm": 0.34120253622850233,
"learning_rate": 0.0001,
"loss": 0.2653,
"mean_token_accuracy": 0.9179378294696411,
"num_tokens": 5928997.0,
"step": 10
},
{
"epoch": 0.2025957581513137,
"grad_norm": 0.15880751179799044,
"learning_rate": 0.0001,
"loss": 0.1607,
"mean_token_accuracy": 0.9390914304181933,
"num_tokens": 11864766.0,
"step": 20
},
{
"epoch": 0.30389363722697055,
"grad_norm": 0.10858768982732472,
"learning_rate": 0.0001,
"loss": 0.1389,
"mean_token_accuracy": 0.9470763374119997,
"num_tokens": 17788976.0,
"step": 30
},
{
"epoch": 0.4051915163026274,
"grad_norm": 0.07843910135917725,
"learning_rate": 0.0001,
"loss": 0.1254,
"mean_token_accuracy": 0.9517961731180549,
"num_tokens": 23762431.0,
"step": 40
},
{
"epoch": 0.5064893953782843,
"grad_norm": 0.09149076132992681,
"learning_rate": 0.0001,
"loss": 0.1173,
"mean_token_accuracy": 0.9551124922931195,
"num_tokens": 29691182.0,
"step": 50
},
{
"epoch": 0.6077872744539411,
"grad_norm": 0.0988060674250123,
"learning_rate": 0.0001,
"loss": 0.1126,
"mean_token_accuracy": 0.9569881336763502,
"num_tokens": 35615925.0,
"step": 60
},
{
"epoch": 0.709085153529598,
"grad_norm": 0.0747939087236935,
"learning_rate": 0.0001,
"loss": 0.1078,
"mean_token_accuracy": 0.9588578680530191,
"num_tokens": 41567014.0,
"step": 70
},
{
"epoch": 0.8103830326052548,
"grad_norm": 0.09381859172196332,
"learning_rate": 0.0001,
"loss": 0.107,
"mean_token_accuracy": 0.9592851245775819,
"num_tokens": 47465256.0,
"step": 80
},
{
"epoch": 0.9116809116809117,
"grad_norm": 0.07959394726227396,
"learning_rate": 0.0001,
"loss": 0.1045,
"mean_token_accuracy": 0.9601302666589617,
"num_tokens": 53394634.0,
"step": 90
},
{
"epoch": 1.0101297879075657,
"grad_norm": 0.08937759002808963,
"learning_rate": 0.0001,
"loss": 0.1018,
"mean_token_accuracy": 0.9612304681367047,
"num_tokens": 59195734.0,
"step": 100
},
{
"epoch": 1.1114276669832226,
"grad_norm": 0.07683886672466819,
"learning_rate": 0.0001,
"loss": 0.0887,
"mean_token_accuracy": 0.9659375650808215,
"num_tokens": 65134645.0,
"step": 110
},
{
"epoch": 1.2127255460588793,
"grad_norm": 0.08142019069177424,
"learning_rate": 0.0001,
"loss": 0.0885,
"mean_token_accuracy": 0.965754240937531,
"num_tokens": 71072294.0,
"step": 120
},
{
"epoch": 1.3140234251345362,
"grad_norm": 0.07420029680099594,
"learning_rate": 0.0001,
"loss": 0.0892,
"mean_token_accuracy": 0.9654547093436122,
"num_tokens": 77036690.0,
"step": 130
},
{
"epoch": 1.415321304210193,
"grad_norm": 0.082199072710958,
"learning_rate": 0.0001,
"loss": 0.0908,
"mean_token_accuracy": 0.9649530470371246,
"num_tokens": 82943314.0,
"step": 140
},
{
"epoch": 1.51661918328585,
"grad_norm": 0.07212494518614486,
"learning_rate": 0.0001,
"loss": 0.0906,
"mean_token_accuracy": 0.9651403101161122,
"num_tokens": 88875871.0,
"step": 150
},
{
"epoch": 1.617917062361507,
"grad_norm": 0.10352344172757606,
"learning_rate": 0.0001,
"loss": 0.0909,
"mean_token_accuracy": 0.9648977383971215,
"num_tokens": 94820130.0,
"step": 160
},
{
"epoch": 1.7192149414371638,
"grad_norm": 0.07698503957302266,
"learning_rate": 0.0001,
"loss": 0.0915,
"mean_token_accuracy": 0.9645912747830152,
"num_tokens": 100733124.0,
"step": 170
},
{
"epoch": 1.8205128205128205,
"grad_norm": 0.08462828819382234,
"learning_rate": 0.0001,
"loss": 0.0911,
"mean_token_accuracy": 0.9649000752717256,
"num_tokens": 106690216.0,
"step": 180
},
{
"epoch": 1.9218106995884774,
"grad_norm": 0.06837628398080642,
"learning_rate": 0.0001,
"loss": 0.0915,
"mean_token_accuracy": 0.9646391872316599,
"num_tokens": 112618930.0,
"step": 190
}
],
"logging_steps": 10,
"max_steps": 392,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 309419476910080.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}