sumuks's picture
Upload folder using huggingface_hub
53d1306 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.30140908748398765,
"eval_steps": 400,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006028181749679753,
"grad_norm": 0.38845974438826136,
"learning_rate": 1.0040160642570282e-06,
"loss": 1.6618,
"step": 10
},
{
"epoch": 0.012056363499359506,
"grad_norm": 0.255603405930248,
"learning_rate": 2.0080321285140564e-06,
"loss": 1.6698,
"step": 20
},
{
"epoch": 0.01808454524903926,
"grad_norm": 0.20116505722657768,
"learning_rate": 3.0120481927710846e-06,
"loss": 1.6264,
"step": 30
},
{
"epoch": 0.02411272699871901,
"grad_norm": 0.1795881732266397,
"learning_rate": 4.016064257028113e-06,
"loss": 1.6025,
"step": 40
},
{
"epoch": 0.030140908748398764,
"grad_norm": 0.14600421495766222,
"learning_rate": 5.020080321285141e-06,
"loss": 1.5584,
"step": 50
},
{
"epoch": 0.03616909049807852,
"grad_norm": 0.1170942466306718,
"learning_rate": 6.024096385542169e-06,
"loss": 1.5604,
"step": 60
},
{
"epoch": 0.04219727224775827,
"grad_norm": 0.2445974669666656,
"learning_rate": 7.028112449799197e-06,
"loss": 1.5057,
"step": 70
},
{
"epoch": 0.04822545399743802,
"grad_norm": 0.11238025351136724,
"learning_rate": 8.032128514056226e-06,
"loss": 1.4724,
"step": 80
},
{
"epoch": 0.05425363574711778,
"grad_norm": 0.13639577534070754,
"learning_rate": 9.036144578313253e-06,
"loss": 1.4962,
"step": 90
},
{
"epoch": 0.06028181749679753,
"grad_norm": 0.17113178431310286,
"learning_rate": 1.0040160642570281e-05,
"loss": 1.4765,
"step": 100
},
{
"epoch": 0.06630999924647728,
"grad_norm": 0.4795401637258333,
"learning_rate": 1.104417670682731e-05,
"loss": 1.4468,
"step": 110
},
{
"epoch": 0.07233818099615703,
"grad_norm": 0.13385762516900662,
"learning_rate": 1.2048192771084338e-05,
"loss": 1.4134,
"step": 120
},
{
"epoch": 0.07836636274583679,
"grad_norm": 0.12303707813666019,
"learning_rate": 1.3052208835341367e-05,
"loss": 1.4191,
"step": 130
},
{
"epoch": 0.08439454449551655,
"grad_norm": 0.10822073133399364,
"learning_rate": 1.4056224899598394e-05,
"loss": 1.397,
"step": 140
},
{
"epoch": 0.09042272624519629,
"grad_norm": 0.1109270134990499,
"learning_rate": 1.5060240963855424e-05,
"loss": 1.3818,
"step": 150
},
{
"epoch": 0.09645090799487605,
"grad_norm": 0.14622173134033867,
"learning_rate": 1.606425702811245e-05,
"loss": 1.4002,
"step": 160
},
{
"epoch": 0.1024790897445558,
"grad_norm": 0.10587626114414271,
"learning_rate": 1.706827309236948e-05,
"loss": 1.4123,
"step": 170
},
{
"epoch": 0.10850727149423556,
"grad_norm": 0.10302196814593138,
"learning_rate": 1.8072289156626505e-05,
"loss": 1.4016,
"step": 180
},
{
"epoch": 0.1145354532439153,
"grad_norm": 0.1359849724314843,
"learning_rate": 1.9076305220883535e-05,
"loss": 1.404,
"step": 190
},
{
"epoch": 0.12056363499359506,
"grad_norm": 0.10587622358885339,
"learning_rate": 2.0080321285140562e-05,
"loss": 1.4019,
"step": 200
},
{
"epoch": 0.1265918167432748,
"grad_norm": 0.15017595066321648,
"learning_rate": 2.1084337349397593e-05,
"loss": 1.393,
"step": 210
},
{
"epoch": 0.13261999849295456,
"grad_norm": 0.19475575142022897,
"learning_rate": 2.208835341365462e-05,
"loss": 1.3876,
"step": 220
},
{
"epoch": 0.1386481802426343,
"grad_norm": 0.12084095277263424,
"learning_rate": 2.309236947791165e-05,
"loss": 1.3916,
"step": 230
},
{
"epoch": 0.14467636199231407,
"grad_norm": 0.11857482977859173,
"learning_rate": 2.4096385542168677e-05,
"loss": 1.4056,
"step": 240
},
{
"epoch": 0.15070454374199382,
"grad_norm": 0.1403959719635503,
"learning_rate": 2.5100401606425704e-05,
"loss": 1.3935,
"step": 250
},
{
"epoch": 0.15673272549167358,
"grad_norm": 0.10800155257965392,
"learning_rate": 2.6104417670682734e-05,
"loss": 1.3826,
"step": 260
},
{
"epoch": 0.16276090724135334,
"grad_norm": 0.10598439909830581,
"learning_rate": 2.7108433734939758e-05,
"loss": 1.3999,
"step": 270
},
{
"epoch": 0.1687890889910331,
"grad_norm": 0.10753449693494475,
"learning_rate": 2.8112449799196788e-05,
"loss": 1.4047,
"step": 280
},
{
"epoch": 0.17481727074071282,
"grad_norm": 0.36718328659037996,
"learning_rate": 2.911646586345382e-05,
"loss": 1.3935,
"step": 290
},
{
"epoch": 0.18084545249039258,
"grad_norm": 0.10611900000479042,
"learning_rate": 3.012048192771085e-05,
"loss": 1.3736,
"step": 300
},
{
"epoch": 0.18687363424007233,
"grad_norm": 0.11901555220652378,
"learning_rate": 3.112449799196787e-05,
"loss": 1.3927,
"step": 310
},
{
"epoch": 0.1929018159897521,
"grad_norm": 0.118935148513695,
"learning_rate": 3.21285140562249e-05,
"loss": 1.3636,
"step": 320
},
{
"epoch": 0.19892999773943185,
"grad_norm": 0.1974545721831922,
"learning_rate": 3.313253012048193e-05,
"loss": 1.3892,
"step": 330
},
{
"epoch": 0.2049581794891116,
"grad_norm": 0.13145409772199562,
"learning_rate": 3.413654618473896e-05,
"loss": 1.3756,
"step": 340
},
{
"epoch": 0.21098636123879136,
"grad_norm": 0.11064380941915805,
"learning_rate": 3.5140562248995983e-05,
"loss": 1.3935,
"step": 350
},
{
"epoch": 0.21701454298847112,
"grad_norm": 0.12160423827639648,
"learning_rate": 3.614457831325301e-05,
"loss": 1.3698,
"step": 360
},
{
"epoch": 0.22304272473815084,
"grad_norm": 0.10349641889173723,
"learning_rate": 3.7148594377510044e-05,
"loss": 1.3771,
"step": 370
},
{
"epoch": 0.2290709064878306,
"grad_norm": 0.10682144059511894,
"learning_rate": 3.815261044176707e-05,
"loss": 1.3768,
"step": 380
},
{
"epoch": 0.23509908823751036,
"grad_norm": 0.11625245619819907,
"learning_rate": 3.91566265060241e-05,
"loss": 1.3795,
"step": 390
},
{
"epoch": 0.2411272699871901,
"grad_norm": 0.10327726962763091,
"learning_rate": 4.0160642570281125e-05,
"loss": 1.3987,
"step": 400
},
{
"epoch": 0.2411272699871901,
"eval_loss": 1.3548544645309448,
"eval_runtime": 148.2269,
"eval_samples_per_second": 7.239,
"eval_steps_per_second": 0.911,
"step": 400
},
{
"epoch": 0.24715545173686987,
"grad_norm": 0.10660530950921367,
"learning_rate": 4.116465863453816e-05,
"loss": 1.3886,
"step": 410
},
{
"epoch": 0.2531836334865496,
"grad_norm": 0.10405582985373843,
"learning_rate": 4.2168674698795186e-05,
"loss": 1.3645,
"step": 420
},
{
"epoch": 0.2592118152362294,
"grad_norm": 0.3318479326670041,
"learning_rate": 4.317269076305221e-05,
"loss": 1.3591,
"step": 430
},
{
"epoch": 0.2652399969859091,
"grad_norm": 0.10840544026201794,
"learning_rate": 4.417670682730924e-05,
"loss": 1.3805,
"step": 440
},
{
"epoch": 0.2712681787355889,
"grad_norm": 0.10730056620740543,
"learning_rate": 4.5180722891566266e-05,
"loss": 1.3888,
"step": 450
},
{
"epoch": 0.2772963604852686,
"grad_norm": 0.10699620793474768,
"learning_rate": 4.61847389558233e-05,
"loss": 1.3935,
"step": 460
},
{
"epoch": 0.2833245422349484,
"grad_norm": 0.10595493402596641,
"learning_rate": 4.718875502008032e-05,
"loss": 1.3659,
"step": 470
},
{
"epoch": 0.28935272398462814,
"grad_norm": 0.14234040947748414,
"learning_rate": 4.8192771084337354e-05,
"loss": 1.371,
"step": 480
},
{
"epoch": 0.29538090573430786,
"grad_norm": 0.1095349792774781,
"learning_rate": 4.919678714859438e-05,
"loss": 1.3647,
"step": 490
},
{
"epoch": 0.30140908748398765,
"grad_norm": 0.10655792946130023,
"learning_rate": 4.999997536857586e-05,
"loss": 1.3606,
"step": 500
}
],
"logging_steps": 10,
"max_steps": 4974,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 840529663229952.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}