Training in progress, step 626, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +896 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b1dc78f2811fda88b6512f88c78676cb2fe7d46b216090d585f2fddf17cf2e5
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:03f7fe686ad7c64bd02d4fd094260bb621073341f3d4940d8aa14be3b59e6d64
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4993232b70f7933ea0542f10f85ddae5170cc7f565380cdf9563cd04075734fc
 size 102864868

 version https://git-lfs.github.com/spec/v1
+oid sha256:31f576c838d24ff45665b08ea5486b4a27d0c8d600c5d252c3502ef97ab980e0
 size 102864868

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5c3744dee5ce5fe12a3912f55428a52f75277710f850c4f7eecbb8b78b9ac1a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d48d59ca308d922824d340dfcf616c74acd85da32e75e9dc52670721e9935a1
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a69697be86f6228636dd6aabb85e071a7891a09986a73f06d5f0057c339b6544
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:46d020344d4cdb625fa3ccdabe828dfdae277b492bb5282dbb9f16edd7c73161
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.384363055229187,
-  "best_model_checkpoint": "miner_id_24/checkpoint-500",
-  "epoch": 1.5987210231814548,
   "eval_steps": 100,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3555,6 +3555,896 @@
       "eval_samples_per_second": 23.99,
       "eval_steps_per_second": 6.009,
       "step": 500
     }
   ],
   "logging_steps": 1,
@@ -3578,12 +4468,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.405657490625659e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.379648208618164,
+  "best_model_checkpoint": "miner_id_24/checkpoint-600",
+  "epoch": 2.0015987210231816,
   "eval_steps": 100,
+  "global_step": 626,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.99,
       "eval_steps_per_second": 6.009,
       "step": 500
+    },
+    {
+      "epoch": 1.6019184652278178,
+      "grad_norm": 0.5501524209976196,
+      "learning_rate": 1.9641287434001355e-05,
+      "loss": 1.3995,
+      "step": 501
+    },
+    {
+      "epoch": 1.6051159072741807,
+      "grad_norm": 0.457994282245636,
+      "learning_rate": 1.9338789076247e-05,
+      "loss": 1.2441,
+      "step": 502
+    },
+    {
+      "epoch": 1.6083133493205435,
+      "grad_norm": 0.4338493347167969,
+      "learning_rate": 1.9038388702229403e-05,
+      "loss": 1.1424,
+      "step": 503
+    },
+    {
+      "epoch": 1.6115107913669064,
+      "grad_norm": 0.5547595620155334,
+      "learning_rate": 1.874009412530877e-05,
+      "loss": 1.524,
+      "step": 504
+    },
+    {
+      "epoch": 1.6147082334132694,
+      "grad_norm": 0.4631533920764923,
+      "learning_rate": 1.8443913104073983e-05,
+      "loss": 1.1378,
+      "step": 505
+    },
+    {
+      "epoch": 1.6179056754596322,
+      "grad_norm": 0.4832412898540497,
+      "learning_rate": 1.8149853342140645e-05,
+      "loss": 1.3494,
+      "step": 506
+    },
+    {
+      "epoch": 1.6211031175059953,
+      "grad_norm": 0.48691773414611816,
+      "learning_rate": 1.7857922487950874e-05,
+      "loss": 1.2168,
+      "step": 507
+    },
+    {
+      "epoch": 1.624300559552358,
+      "grad_norm": 0.5417378544807434,
+      "learning_rate": 1.7568128134574113e-05,
+      "loss": 1.3404,
+      "step": 508
+    },
+    {
+      "epoch": 1.627498001598721,
+      "grad_norm": 0.4586952328681946,
+      "learning_rate": 1.728047781950999e-05,
+      "loss": 1.2989,
+      "step": 509
+    },
+    {
+      "epoch": 1.630695443645084,
+      "grad_norm": 0.4418376386165619,
+      "learning_rate": 1.6994979024491942e-05,
+      "loss": 1.1664,
+      "step": 510
+    },
+    {
+      "epoch": 1.6338928856914468,
+      "grad_norm": 0.4903421700000763,
+      "learning_rate": 1.671163917529285e-05,
+      "loss": 1.2322,
+      "step": 511
+    },
+    {
+      "epoch": 1.6370903277378097,
+      "grad_norm": 0.44814610481262207,
+      "learning_rate": 1.64304656415317e-05,
+      "loss": 1.1332,
+      "step": 512
+    },
+    {
+      "epoch": 1.6402877697841727,
+      "grad_norm": 0.5581910610198975,
+      "learning_rate": 1.6151465736482107e-05,
+      "loss": 1.2723,
+      "step": 513
+    },
+    {
+      "epoch": 1.6434852118305354,
+      "grad_norm": 0.5089916586875916,
+      "learning_rate": 1.587464671688187e-05,
+      "loss": 1.359,
+      "step": 514
+    },
+    {
+      "epoch": 1.6466826538768986,
+      "grad_norm": 0.4594471752643585,
+      "learning_rate": 1.5600015782744492e-05,
+      "loss": 1.0479,
+      "step": 515
+    },
+    {
+      "epoch": 1.6498800959232613,
+      "grad_norm": 0.48605117201805115,
+      "learning_rate": 1.5327580077171587e-05,
+      "loss": 1.1852,
+      "step": 516
+    },
+    {
+      "epoch": 1.6530775379696243,
+      "grad_norm": 0.4928354322910309,
+      "learning_rate": 1.5057346686167428e-05,
+      "loss": 1.4087,
+      "step": 517
+    },
+    {
+      "epoch": 1.6562749800159873,
+      "grad_norm": 0.5630521774291992,
+      "learning_rate": 1.4789322638454351e-05,
+      "loss": 1.5852,
+      "step": 518
+    },
+    {
+      "epoch": 1.65947242206235,
+      "grad_norm": 0.46705761551856995,
+      "learning_rate": 1.452351490529017e-05,
+      "loss": 1.0012,
+      "step": 519
+    },
+    {
+      "epoch": 1.662669864108713,
+      "grad_norm": 0.4570344388484955,
+      "learning_rate": 1.4259930400286669e-05,
+      "loss": 1.266,
+      "step": 520
+    },
+    {
+      "epoch": 1.665867306155076,
+      "grad_norm": 0.4869745671749115,
+      "learning_rate": 1.3998575979229944e-05,
+      "loss": 1.4107,
+      "step": 521
+    },
+    {
+      "epoch": 1.6690647482014387,
+      "grad_norm": 0.46870559453964233,
+      "learning_rate": 1.373945843990192e-05,
+      "loss": 1.0377,
+      "step": 522
+    },
+    {
+      "epoch": 1.6722621902478019,
+      "grad_norm": 0.46983346343040466,
+      "learning_rate": 1.3482584521903718e-05,
+      "loss": 1.3612,
+      "step": 523
+    },
+    {
+      "epoch": 1.6754596322941646,
+      "grad_norm": 0.5902780294418335,
+      "learning_rate": 1.322796090648013e-05,
+      "loss": 1.1495,
+      "step": 524
+    },
+    {
+      "epoch": 1.6786570743405276,
+      "grad_norm": 0.5421798229217529,
+      "learning_rate": 1.2975594216346144e-05,
+      "loss": 1.4595,
+      "step": 525
+    },
+    {
+      "epoch": 1.6818545163868905,
+      "grad_norm": 0.47739914059638977,
+      "learning_rate": 1.272549101551438e-05,
+      "loss": 1.296,
+      "step": 526
+    },
+    {
+      "epoch": 1.6850519584332533,
+      "grad_norm": 0.46318313479423523,
+      "learning_rate": 1.2477657809124631e-05,
+      "loss": 1.2161,
+      "step": 527
+    },
+    {
+      "epoch": 1.6882494004796165,
+      "grad_norm": 0.5958060026168823,
+      "learning_rate": 1.2232101043274436e-05,
+      "loss": 1.2663,
+      "step": 528
+    },
+    {
+      "epoch": 1.6914468425259792,
+      "grad_norm": 0.44811907410621643,
+      "learning_rate": 1.1988827104851574e-05,
+      "loss": 1.1838,
+      "step": 529
+    },
+    {
+      "epoch": 1.6946442845723422,
+      "grad_norm": 0.45057952404022217,
+      "learning_rate": 1.1747842321367886e-05,
+      "loss": 1.3221,
+      "step": 530
+    },
+    {
+      "epoch": 1.6978417266187051,
+      "grad_norm": 0.4354062080383301,
+      "learning_rate": 1.1509152960794666e-05,
+      "loss": 1.2421,
+      "step": 531
+    },
+    {
+      "epoch": 1.7010391686650679,
+      "grad_norm": 0.47361254692077637,
+      "learning_rate": 1.1272765231399685e-05,
+      "loss": 1.4439,
+      "step": 532
+    },
+    {
+      "epoch": 1.7042366107114308,
+      "grad_norm": 0.4950743019580841,
+      "learning_rate": 1.1038685281585736e-05,
+      "loss": 1.3103,
+      "step": 533
+    },
+    {
+      "epoch": 1.7074340527577938,
+      "grad_norm": 0.5186318159103394,
+      "learning_rate": 1.0806919199730615e-05,
+      "loss": 1.3485,
+      "step": 534
+    },
+    {
+      "epoch": 1.7106314948041565,
+      "grad_norm": 0.38383203744888306,
+      "learning_rate": 1.057747301402887e-05,
+      "loss": 1.0194,
+      "step": 535
+    },
+    {
+      "epoch": 1.7138289368505197,
+      "grad_norm": 0.4821476638317108,
+      "learning_rate": 1.035035269233493e-05,
+      "loss": 1.3887,
+      "step": 536
+    },
+    {
+      "epoch": 1.7170263788968825,
+      "grad_norm": 0.5094884634017944,
+      "learning_rate": 1.0125564142007948e-05,
+      "loss": 1.2771,
+      "step": 537
+    },
+    {
+      "epoch": 1.7202238209432454,
+      "grad_norm": 0.6006470918655396,
+      "learning_rate": 9.903113209758096e-06,
+      "loss": 1.4993,
+      "step": 538
+    },
+    {
+      "epoch": 1.7234212629896084,
+      "grad_norm": 0.4611685276031494,
+      "learning_rate": 9.683005681494506e-06,
+      "loss": 1.1291,
+      "step": 539
+    },
+    {
+      "epoch": 1.7266187050359711,
+      "grad_norm": 0.4802876114845276,
+      "learning_rate": 9.465247282174805e-06,
+      "loss": 1.3996,
+      "step": 540
+    },
+    {
+      "epoch": 1.729816147082334,
+      "grad_norm": 0.4477510154247284,
+      "learning_rate": 9.249843675656212e-06,
+      "loss": 1.2913,
+      "step": 541
+    },
+    {
+      "epoch": 1.733013589128697,
+      "grad_norm": 0.403728187084198,
+      "learning_rate": 9.036800464548157e-06,
+      "loss": 1.1401,
+      "step": 542
+    },
+    {
+      "epoch": 1.7362110311750598,
+      "grad_norm": 0.553962230682373,
+      "learning_rate": 8.826123190066671e-06,
+      "loss": 1.4372,
+      "step": 543
+    },
+    {
+      "epoch": 1.739408473221423,
+      "grad_norm": 0.5109530091285706,
+      "learning_rate": 8.617817331890154e-06,
+      "loss": 1.2767,
+      "step": 544
+    },
+    {
+      "epoch": 1.7426059152677857,
+      "grad_norm": 0.4755638837814331,
+      "learning_rate": 8.411888308016847e-06,
+      "loss": 1.3341,
+      "step": 545
+    },
+    {
+      "epoch": 1.7458033573141487,
+      "grad_norm": 0.4327421188354492,
+      "learning_rate": 8.208341474624071e-06,
+      "loss": 1.0796,
+      "step": 546
+    },
+    {
+      "epoch": 1.7490007993605117,
+      "grad_norm": 0.465468168258667,
+      "learning_rate": 8.00718212592868e-06,
+      "loss": 1.3402,
+      "step": 547
+    },
+    {
+      "epoch": 1.7521982414068744,
+      "grad_norm": 0.5804145932197571,
+      "learning_rate": 7.808415494049514e-06,
+      "loss": 1.5293,
+      "step": 548
+    },
+    {
+      "epoch": 1.7553956834532374,
+      "grad_norm": 0.44623619318008423,
+      "learning_rate": 7.612046748871327e-06,
+      "loss": 1.0433,
+      "step": 549
+    },
+    {
+      "epoch": 1.7585931254996003,
+      "grad_norm": 0.48140379786491394,
+      "learning_rate": 7.4180809979102036e-06,
+      "loss": 1.2669,
+      "step": 550
+    },
+    {
+      "epoch": 1.761790567545963,
+      "grad_norm": 0.4790396988391876,
+      "learning_rate": 7.226523286180776e-06,
+      "loss": 1.2912,
+      "step": 551
+    },
+    {
+      "epoch": 1.7649880095923263,
+      "grad_norm": 0.5187086462974548,
+      "learning_rate": 7.0373785960650475e-06,
+      "loss": 1.0535,
+      "step": 552
+    },
+    {
+      "epoch": 1.768185451638689,
+      "grad_norm": 0.539508581161499,
+      "learning_rate": 6.850651847182743e-06,
+      "loss": 1.486,
+      "step": 553
+    },
+    {
+      "epoch": 1.771382893685052,
+      "grad_norm": 0.5147770643234253,
+      "learning_rate": 6.666347896263325e-06,
+      "loss": 1.2978,
+      "step": 554
+    },
+    {
+      "epoch": 1.774580335731415,
+      "grad_norm": 0.5139119625091553,
+      "learning_rate": 6.4844715370197874e-06,
+      "loss": 1.2245,
+      "step": 555
+    },
+    {
+      "epoch": 1.7777777777777777,
+      "grad_norm": 0.4947463572025299,
+      "learning_rate": 6.3050275000238414e-06,
+      "loss": 1.1726,
+      "step": 556
+    },
+    {
+      "epoch": 1.7809752198241406,
+      "grad_norm": 0.5504704713821411,
+      "learning_rate": 6.128020452582917e-06,
+      "loss": 1.4667,
+      "step": 557
+    },
+    {
+      "epoch": 1.7841726618705036,
+      "grad_norm": 0.46442416310310364,
+      "learning_rate": 5.953454998618857e-06,
+      "loss": 1.2782,
+      "step": 558
+    },
+    {
+      "epoch": 1.7873701039168663,
+      "grad_norm": 0.6431064009666443,
+      "learning_rate": 5.781335678547995e-06,
+      "loss": 1.4561,
+      "step": 559
+    },
+    {
+      "epoch": 1.7905675459632295,
+      "grad_norm": 0.5108866095542908,
+      "learning_rate": 5.611666969163243e-06,
+      "loss": 1.3854,
+      "step": 560
+    },
+    {
+      "epoch": 1.7937649880095923,
+      "grad_norm": 0.5375909209251404,
+      "learning_rate": 5.4444532835175144e-06,
+      "loss": 1.4556,
+      "step": 561
+    },
+    {
+      "epoch": 1.7969624300559552,
+      "grad_norm": 0.4582259953022003,
+      "learning_rate": 5.27969897080901e-06,
+      "loss": 1.1938,
+      "step": 562
+    },
+    {
+      "epoch": 1.8001598721023182,
+      "grad_norm": 0.4605944752693176,
+      "learning_rate": 5.1174083162680465e-06,
+      "loss": 1.3145,
+      "step": 563
+    },
+    {
+      "epoch": 1.803357314148681,
+      "grad_norm": 0.4929840862751007,
+      "learning_rate": 4.957585541045684e-06,
+      "loss": 1.146,
+      "step": 564
+    },
+    {
+      "epoch": 1.8065547561950441,
+      "grad_norm": 0.5457170009613037,
+      "learning_rate": 4.800234802103842e-06,
+      "loss": 1.4803,
+      "step": 565
+    },
+    {
+      "epoch": 1.8097521982414069,
+      "grad_norm": 0.4208320379257202,
+      "learning_rate": 4.6453601921072395e-06,
+      "loss": 1.0077,
+      "step": 566
+    },
+    {
+      "epoch": 1.8129496402877698,
+      "grad_norm": 0.4881625175476074,
+      "learning_rate": 4.492965739316901e-06,
+      "loss": 1.2129,
+      "step": 567
+    },
+    {
+      "epoch": 1.8161470823341328,
+      "grad_norm": 0.4958392381668091,
+      "learning_rate": 4.34305540748543e-06,
+      "loss": 1.3164,
+      "step": 568
+    },
+    {
+      "epoch": 1.8193445243804955,
+      "grad_norm": 0.5175157785415649,
+      "learning_rate": 4.195633095753859e-06,
+      "loss": 1.5108,
+      "step": 569
+    },
+    {
+      "epoch": 1.8225419664268585,
+      "grad_norm": 0.4751617908477783,
+      "learning_rate": 4.050702638550275e-06,
+      "loss": 1.2642,
+      "step": 570
+    },
+    {
+      "epoch": 1.8257394084732215,
+      "grad_norm": 0.500234842300415,
+      "learning_rate": 3.908267805490051e-06,
+      "loss": 1.1798,
+      "step": 571
+    },
+    {
+      "epoch": 1.8289368505195842,
+      "grad_norm": 0.4904455542564392,
+      "learning_rate": 3.768332301277866e-06,
+      "loss": 1.504,
+      "step": 572
+    },
+    {
+      "epoch": 1.8321342925659474,
+      "grad_norm": 0.46577635407447815,
+      "learning_rate": 3.630899765611251e-06,
+      "loss": 1.2053,
+      "step": 573
+    },
+    {
+      "epoch": 1.8353317346123101,
+      "grad_norm": 0.4044407308101654,
+      "learning_rate": 3.495973773086014e-06,
+      "loss": 0.9835,
+      "step": 574
+    },
+    {
+      "epoch": 1.838529176658673,
+      "grad_norm": 0.5665880441665649,
+      "learning_rate": 3.3635578331031814e-06,
+      "loss": 1.6426,
+      "step": 575
+    },
+    {
+      "epoch": 1.841726618705036,
+      "grad_norm": 0.5140945911407471,
+      "learning_rate": 3.233655389777801e-06,
+      "loss": 1.1846,
+      "step": 576
+    },
+    {
+      "epoch": 1.8449240607513988,
+      "grad_norm": 0.4654732346534729,
+      "learning_rate": 3.1062698218492724e-06,
+      "loss": 1.4804,
+      "step": 577
+    },
+    {
+      "epoch": 1.8481215027977618,
+      "grad_norm": 0.5238311290740967,
+      "learning_rate": 2.9814044425935606e-06,
+      "loss": 1.4453,
+      "step": 578
+    },
+    {
+      "epoch": 1.8513189448441247,
+      "grad_norm": 0.47380512952804565,
+      "learning_rate": 2.859062499736931e-06,
+      "loss": 1.1711,
+      "step": 579
+    },
+    {
+      "epoch": 1.8545163868904875,
+      "grad_norm": 0.4145112633705139,
+      "learning_rate": 2.739247175371562e-06,
+      "loss": 1.0746,
+      "step": 580
+    },
+    {
+      "epoch": 1.8577138289368507,
+      "grad_norm": 0.4292459785938263,
+      "learning_rate": 2.62196158587269e-06,
+      "loss": 1.1786,
+      "step": 581
+    },
+    {
+      "epoch": 1.8609112709832134,
+      "grad_norm": 0.45340195298194885,
+      "learning_rate": 2.5072087818176382e-06,
+      "loss": 1.2485,
+      "step": 582
+    },
+    {
+      "epoch": 1.8641087130295764,
+      "grad_norm": 0.5513341426849365,
+      "learning_rate": 2.3949917479063945e-06,
+      "loss": 1.4376,
+      "step": 583
+    },
+    {
+      "epoch": 1.8673061550759393,
+      "grad_norm": 0.4209807515144348,
+      "learning_rate": 2.2853134028840594e-06,
+      "loss": 1.1981,
+      "step": 584
+    },
+    {
+      "epoch": 1.870503597122302,
+      "grad_norm": 0.47939029335975647,
+      "learning_rate": 2.178176599464821e-06,
+      "loss": 1.2066,
+      "step": 585
+    },
+    {
+      "epoch": 1.873701039168665,
+      "grad_norm": 0.5361736416816711,
+      "learning_rate": 2.073584124257899e-06,
+      "loss": 1.4421,
+      "step": 586
+    },
+    {
+      "epoch": 1.876898481215028,
+      "grad_norm": 0.4201466143131256,
+      "learning_rate": 1.971538697694919e-06,
+      "loss": 1.1246,
+      "step": 587
+    },
+    {
+      "epoch": 1.8800959232613907,
+      "grad_norm": 0.5004400014877319,
+      "learning_rate": 1.8720429739592982e-06,
+      "loss": 1.3158,
+      "step": 588
+    },
+    {
+      "epoch": 1.883293365307754,
+      "grad_norm": 0.4206116497516632,
+      "learning_rate": 1.77509954091708e-06,
+      "loss": 1.1231,
+      "step": 589
+    },
+    {
+      "epoch": 1.8864908073541167,
+      "grad_norm": 0.5120659470558167,
+      "learning_rate": 1.6807109200496995e-06,
+      "loss": 1.3357,
+      "step": 590
+    },
+    {
+      "epoch": 1.8896882494004796,
+      "grad_norm": 0.5396211743354797,
+      "learning_rate": 1.5888795663883904e-06,
+      "loss": 1.3456,
+      "step": 591
+    },
+    {
+      "epoch": 1.8928856914468426,
+      "grad_norm": 1.0130178928375244,
+      "learning_rate": 1.4996078684503144e-06,
+      "loss": 1.0338,
+      "step": 592
+    },
+    {
+      "epoch": 1.8960831334932053,
+      "grad_norm": 0.5105600953102112,
+      "learning_rate": 1.4128981481764115e-06,
+      "loss": 1.3242,
+      "step": 593
+    },
+    {
+      "epoch": 1.8992805755395683,
+      "grad_norm": 0.5030739903450012,
+      "learning_rate": 1.3287526608711131e-06,
+      "loss": 1.2315,
+      "step": 594
+    },
+    {
+      "epoch": 1.9024780175859313,
+      "grad_norm": 0.5108282566070557,
+      "learning_rate": 1.247173595143536e-06,
+      "loss": 1.3224,
+      "step": 595
+    },
+    {
+      "epoch": 1.905675459632294,
+      "grad_norm": 0.45918774604797363,
+      "learning_rate": 1.1681630728506699e-06,
+      "loss": 1.181,
+      "step": 596
+    },
+    {
+      "epoch": 1.9088729016786572,
+      "grad_norm": 0.49473392963409424,
+      "learning_rate": 1.0917231490421232e-06,
+      "loss": 1.4179,
+      "step": 597
+    },
+    {
+      "epoch": 1.91207034372502,
+      "grad_norm": 0.4828788638114929,
+      "learning_rate": 1.0178558119067315e-06,
+      "loss": 1.3524,
+      "step": 598
+    },
+    {
+      "epoch": 1.915267785771383,
+      "grad_norm": 0.9607488512992859,
+      "learning_rate": 9.465629827207445e-07,
+      "loss": 1.328,
+      "step": 599
+    },
+    {
+      "epoch": 1.9184652278177459,
+      "grad_norm": 0.5251384377479553,
+      "learning_rate": 8.778465157979976e-07,
+      "loss": 1.3309,
+      "step": 600
+    },
+    {
+      "epoch": 1.9184652278177459,
+      "eval_loss": 1.379648208618164,
+      "eval_runtime": 21.9766,
+      "eval_samples_per_second": 23.98,
+      "eval_steps_per_second": 6.006,
+      "step": 600
+    },
+    {
+      "epoch": 1.9216626698641086,
+      "grad_norm": 0.4664270281791687,
+      "learning_rate": 8.117081984415298e-07,
+      "loss": 1.2665,
+      "step": 601
+    },
+    {
+      "epoch": 1.9248601119104716,
+      "grad_norm": 0.5185204148292542,
+      "learning_rate": 7.481497508972312e-07,
+      "loss": 1.1728,
+      "step": 602
+    },
+    {
+      "epoch": 1.9280575539568345,
+      "grad_norm": 0.5853208303451538,
+      "learning_rate": 6.871728263089794e-07,
+      "loss": 1.4523,
+      "step": 603
+    },
+    {
+      "epoch": 1.9312549960031973,
+      "grad_norm": 0.497002512216568,
+      "learning_rate": 6.287790106757396e-07,
+      "loss": 1.3046,
+      "step": 604
+    },
+    {
+      "epoch": 1.9344524380495605,
+      "grad_norm": 0.6803504824638367,
+      "learning_rate": 5.729698228102653e-07,
+      "loss": 1.2796,
+      "step": 605
+    },
+    {
+      "epoch": 1.9376498800959232,
+      "grad_norm": 0.4755241870880127,
+      "learning_rate": 5.19746714299596e-07,
+      "loss": 1.2355,
+      "step": 606
+    },
+    {
+      "epoch": 1.9408473221422862,
+      "grad_norm": 0.4982335567474365,
+      "learning_rate": 4.691110694673095e-07,
+      "loss": 1.367,
+      "step": 607
+    },
+    {
+      "epoch": 1.9440447641886491,
+      "grad_norm": 0.42912325263023376,
+      "learning_rate": 4.210642053375069e-07,
+      "loss": 1.2386,
+      "step": 608
+    },
+    {
+      "epoch": 1.9472422062350119,
+      "grad_norm": 0.47803112864494324,
+      "learning_rate": 3.756073716005837e-07,
+      "loss": 1.2978,
+      "step": 609
+    },
+    {
+      "epoch": 1.950439648281375,
+      "grad_norm": 0.49415600299835205,
+      "learning_rate": 3.3274175058067846e-07,
+      "loss": 1.162,
+      "step": 610
+    },
+    {
+      "epoch": 1.9536370903277378,
+      "grad_norm": 0.4752744436264038,
+      "learning_rate": 2.9246845720496407e-07,
+      "loss": 1.2916,
+      "step": 611
+    },
+    {
+      "epoch": 1.9568345323741008,
+      "grad_norm": 0.4329509735107422,
+      "learning_rate": 2.547885389746485e-07,
+      "loss": 1.2092,
+      "step": 612
+    },
+    {
+      "epoch": 1.9600319744204637,
+      "grad_norm": 0.48668840527534485,
+      "learning_rate": 2.1970297593767453e-07,
+      "loss": 1.4974,
+      "step": 613
+    },
+    {
+      "epoch": 1.9632294164668265,
+      "grad_norm": 0.4879298806190491,
+      "learning_rate": 1.8721268066330676e-07,
+      "loss": 1.2918,
+      "step": 614
+    },
+    {
+      "epoch": 1.9664268585131894,
+      "grad_norm": 0.4559149146080017,
+      "learning_rate": 1.5731849821833954e-07,
+      "loss": 1.1624,
+      "step": 615
+    },
+    {
+      "epoch": 1.9696243005595524,
+      "grad_norm": 0.541537880897522,
+      "learning_rate": 1.300212061451367e-07,
+      "loss": 1.3551,
+      "step": 616
+    },
+    {
+      "epoch": 1.9728217426059151,
+      "grad_norm": 0.5082891583442688,
+      "learning_rate": 1.0532151444140326e-07,
+      "loss": 1.2856,
+      "step": 617
+    },
+    {
+      "epoch": 1.9760191846522783,
+      "grad_norm": 0.4802534580230713,
+      "learning_rate": 8.322006554171146e-08,
+      "loss": 1.2623,
+      "step": 618
+    },
+    {
+      "epoch": 1.979216626698641,
+      "grad_norm": 0.4262090027332306,
+      "learning_rate": 6.37174343008251e-08,
+      "loss": 0.9914,
+      "step": 619
+    },
+    {
+      "epoch": 1.982414068745004,
+      "grad_norm": 0.5872986912727356,
+      "learning_rate": 4.6814127978722644e-08,
+      "loss": 1.3731,
+      "step": 620
+    },
+    {
+      "epoch": 1.985611510791367,
+      "grad_norm": 0.5640340447425842,
+      "learning_rate": 3.251058622737446e-08,
+      "loss": 1.5329,
+      "step": 621
+    },
+    {
+      "epoch": 1.9888089528377297,
+      "grad_norm": 0.5046076774597168,
+      "learning_rate": 2.080718107935198e-08,
+      "loss": 1.4028,
+      "step": 622
+    },
+    {
+      "epoch": 1.9920063948840927,
+      "grad_norm": 0.4681106209754944,
+      "learning_rate": 1.1704216938146496e-08,
+      "loss": 1.0768,
+      "step": 623
+    },
+    {
+      "epoch": 1.9952038369304557,
+      "grad_norm": 0.5022634863853455,
+      "learning_rate": 5.201930570242208e-09,
+      "loss": 1.255,
+      "step": 624
+    },
+    {
+      "epoch": 1.9984012789768184,
+      "grad_norm": 0.4953170716762543,
+      "learning_rate": 1.3004910989433682e-09,
+      "loss": 1.4848,
+      "step": 625
+    },
+    {
+      "epoch": 2.0015987210231816,
+      "grad_norm": 0.49646100401878357,
+      "learning_rate": 0.0,
+      "loss": 1.2531,
+      "step": 626
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.0168043503589786e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null