{ "best_metric": 10.313650131225586, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.6872852233676976, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003436426116838488, "grad_norm": 0.14621073007583618, "learning_rate": 7e-06, "loss": 10.3796, "step": 1 }, { "epoch": 0.003436426116838488, "eval_loss": 10.382977485656738, "eval_runtime": 1.2679, "eval_samples_per_second": 386.456, "eval_steps_per_second": 97.008, "step": 1 }, { "epoch": 0.006872852233676976, "grad_norm": 0.13013671338558197, "learning_rate": 1.4e-05, "loss": 10.381, "step": 2 }, { "epoch": 0.010309278350515464, "grad_norm": 0.13039547204971313, "learning_rate": 2.1e-05, "loss": 10.3796, "step": 3 }, { "epoch": 0.013745704467353952, "grad_norm": 0.13283351063728333, "learning_rate": 2.8e-05, "loss": 10.3808, "step": 4 }, { "epoch": 0.01718213058419244, "grad_norm": 0.13187821209430695, "learning_rate": 3.5e-05, "loss": 10.378, "step": 5 }, { "epoch": 0.020618556701030927, "grad_norm": 0.12483494728803635, "learning_rate": 4.2e-05, "loss": 10.3771, "step": 6 }, { "epoch": 0.024054982817869417, "grad_norm": 0.12284944206476212, "learning_rate": 4.899999999999999e-05, "loss": 10.3777, "step": 7 }, { "epoch": 0.027491408934707903, "grad_norm": 0.1272677779197693, "learning_rate": 5.6e-05, "loss": 10.3777, "step": 8 }, { "epoch": 0.030927835051546393, "grad_norm": 0.13827842473983765, "learning_rate": 6.3e-05, "loss": 10.379, "step": 9 }, { "epoch": 0.03436426116838488, "grad_norm": 0.13934029638767242, "learning_rate": 7e-05, "loss": 10.378, "step": 10 }, { "epoch": 0.037800687285223365, "grad_norm": 0.1622723788022995, "learning_rate": 6.999521567473641e-05, "loss": 10.3774, "step": 11 }, { "epoch": 0.041237113402061855, "grad_norm": 0.18605606257915497, "learning_rate": 6.998086400693241e-05, "loss": 10.3758, "step": 12 }, { "epoch": 0.044673539518900345, "grad_norm": 0.19694095849990845, "learning_rate": 6.995694892019065e-05, "loss": 10.378, "step": 13 }, { "epoch": 0.048109965635738834, "grad_norm": 0.17464105784893036, "learning_rate": 6.99234769526571e-05, "loss": 10.376, "step": 14 }, { "epoch": 0.05154639175257732, "grad_norm": 0.18610422313213348, "learning_rate": 6.988045725523343e-05, "loss": 10.3742, "step": 15 }, { "epoch": 0.054982817869415807, "grad_norm": 0.19709458947181702, "learning_rate": 6.982790158907539e-05, "loss": 10.3739, "step": 16 }, { "epoch": 0.058419243986254296, "grad_norm": 0.1989479809999466, "learning_rate": 6.976582432237733e-05, "loss": 10.3725, "step": 17 }, { "epoch": 0.061855670103092786, "grad_norm": 0.2221895009279251, "learning_rate": 6.969424242644413e-05, "loss": 10.3696, "step": 18 }, { "epoch": 0.06529209621993128, "grad_norm": 0.21855509281158447, "learning_rate": 6.961317547105138e-05, "loss": 10.3701, "step": 19 }, { "epoch": 0.06872852233676977, "grad_norm": 0.22505971789360046, "learning_rate": 6.952264561909527e-05, "loss": 10.3691, "step": 20 }, { "epoch": 0.07216494845360824, "grad_norm": 0.24064525961875916, "learning_rate": 6.942267762053337e-05, "loss": 10.3686, "step": 21 }, { "epoch": 0.07560137457044673, "grad_norm": 0.22770003974437714, "learning_rate": 6.931329880561832e-05, "loss": 10.3673, "step": 22 }, { "epoch": 0.07903780068728522, "grad_norm": 0.26311901211738586, "learning_rate": 6.919453907742597e-05, "loss": 10.3671, "step": 23 }, { "epoch": 0.08247422680412371, "grad_norm": 0.294527530670166, "learning_rate": 6.90664309036802e-05, "loss": 10.3605, "step": 24 }, { "epoch": 0.0859106529209622, "grad_norm": 0.28976741433143616, "learning_rate": 6.892900930787656e-05, "loss": 10.3618, "step": 25 }, { "epoch": 0.08934707903780069, "grad_norm": 0.31070640683174133, "learning_rate": 6.87823118597072e-05, "loss": 10.36, "step": 26 }, { "epoch": 0.09278350515463918, "grad_norm": 0.30455639958381653, "learning_rate": 6.862637866478969e-05, "loss": 10.3603, "step": 27 }, { "epoch": 0.09621993127147767, "grad_norm": 0.28142404556274414, "learning_rate": 6.846125235370252e-05, "loss": 10.3583, "step": 28 }, { "epoch": 0.09965635738831616, "grad_norm": 0.2736535966396332, "learning_rate": 6.828697807033038e-05, "loss": 10.3567, "step": 29 }, { "epoch": 0.10309278350515463, "grad_norm": 0.3131939470767975, "learning_rate": 6.81036034595222e-05, "loss": 10.3549, "step": 30 }, { "epoch": 0.10652920962199312, "grad_norm": 0.35810837149620056, "learning_rate": 6.791117865406564e-05, "loss": 10.3476, "step": 31 }, { "epoch": 0.10996563573883161, "grad_norm": 0.34098097681999207, "learning_rate": 6.770975626098112e-05, "loss": 10.3456, "step": 32 }, { "epoch": 0.1134020618556701, "grad_norm": 0.3132035732269287, "learning_rate": 6.749939134713974e-05, "loss": 10.3525, "step": 33 }, { "epoch": 0.11683848797250859, "grad_norm": 0.3359844386577606, "learning_rate": 6.728014142420846e-05, "loss": 10.3495, "step": 34 }, { "epoch": 0.12027491408934708, "grad_norm": 0.31782594323158264, "learning_rate": 6.7052066432927e-05, "loss": 10.3469, "step": 35 }, { "epoch": 0.12371134020618557, "grad_norm": 0.3528274893760681, "learning_rate": 6.681522872672069e-05, "loss": 10.3455, "step": 36 }, { "epoch": 0.12714776632302405, "grad_norm": 0.3631799519062042, "learning_rate": 6.656969305465356e-05, "loss": 10.3362, "step": 37 }, { "epoch": 0.13058419243986255, "grad_norm": 0.32367148995399475, "learning_rate": 6.631552654372672e-05, "loss": 10.3388, "step": 38 }, { "epoch": 0.13402061855670103, "grad_norm": 0.32164278626441956, "learning_rate": 6.60527986805264e-05, "loss": 10.3376, "step": 39 }, { "epoch": 0.13745704467353953, "grad_norm": 0.30449166893959045, "learning_rate": 6.578158129222711e-05, "loss": 10.3315, "step": 40 }, { "epoch": 0.140893470790378, "grad_norm": 0.29294437170028687, "learning_rate": 6.550194852695469e-05, "loss": 10.3386, "step": 41 }, { "epoch": 0.14432989690721648, "grad_norm": 0.2926699221134186, "learning_rate": 6.521397683351509e-05, "loss": 10.3273, "step": 42 }, { "epoch": 0.14776632302405499, "grad_norm": 0.30104655027389526, "learning_rate": 6.491774494049386e-05, "loss": 10.3345, "step": 43 }, { "epoch": 0.15120274914089346, "grad_norm": 0.32206588983535767, "learning_rate": 6.461333383473272e-05, "loss": 10.3283, "step": 44 }, { "epoch": 0.15463917525773196, "grad_norm": 0.29377982020378113, "learning_rate": 6.430082673918849e-05, "loss": 10.328, "step": 45 }, { "epoch": 0.15807560137457044, "grad_norm": 0.2791949212551117, "learning_rate": 6.398030909018069e-05, "loss": 10.3234, "step": 46 }, { "epoch": 0.16151202749140894, "grad_norm": 0.3035615384578705, "learning_rate": 6.365186851403423e-05, "loss": 10.3232, "step": 47 }, { "epoch": 0.16494845360824742, "grad_norm": 0.28784212470054626, "learning_rate": 6.331559480312315e-05, "loss": 10.3256, "step": 48 }, { "epoch": 0.16838487972508592, "grad_norm": 0.28169599175453186, "learning_rate": 6.297157989132236e-05, "loss": 10.3245, "step": 49 }, { "epoch": 0.1718213058419244, "grad_norm": 0.3047144114971161, "learning_rate": 6.261991782887377e-05, "loss": 10.3321, "step": 50 }, { "epoch": 0.1718213058419244, "eval_loss": 10.326597213745117, "eval_runtime": 1.2221, "eval_samples_per_second": 400.953, "eval_steps_per_second": 100.648, "step": 50 }, { "epoch": 0.17525773195876287, "grad_norm": 0.17344312369823456, "learning_rate": 6.226070475667393e-05, "loss": 10.344, "step": 51 }, { "epoch": 0.17869415807560138, "grad_norm": 0.1896529644727707, "learning_rate": 6.189403887999006e-05, "loss": 10.337, "step": 52 }, { "epoch": 0.18213058419243985, "grad_norm": 0.1752912849187851, "learning_rate": 6.152002044161171e-05, "loss": 10.3336, "step": 53 }, { "epoch": 0.18556701030927836, "grad_norm": 0.16864024102687836, "learning_rate": 6.113875169444539e-05, "loss": 10.3394, "step": 54 }, { "epoch": 0.18900343642611683, "grad_norm": 0.1622706800699234, "learning_rate": 6.0750336873559605e-05, "loss": 10.3406, "step": 55 }, { "epoch": 0.19243986254295534, "grad_norm": 0.1765105128288269, "learning_rate": 6.035488216768811e-05, "loss": 10.3372, "step": 56 }, { "epoch": 0.1958762886597938, "grad_norm": 0.15661264955997467, "learning_rate": 5.9952495690198894e-05, "loss": 10.3363, "step": 57 }, { "epoch": 0.19931271477663232, "grad_norm": 0.170566126704216, "learning_rate": 5.954328744953709e-05, "loss": 10.3346, "step": 58 }, { "epoch": 0.2027491408934708, "grad_norm": 0.1481970250606537, "learning_rate": 5.91273693191498e-05, "loss": 10.3371, "step": 59 }, { "epoch": 0.20618556701030927, "grad_norm": 0.14849814772605896, "learning_rate": 5.870485500690094e-05, "loss": 10.3411, "step": 60 }, { "epoch": 0.20962199312714777, "grad_norm": 0.14094178378582, "learning_rate": 5.827586002398468e-05, "loss": 10.3391, "step": 61 }, { "epoch": 0.21305841924398625, "grad_norm": 0.13221575319766998, "learning_rate": 5.784050165334589e-05, "loss": 10.3233, "step": 62 }, { "epoch": 0.21649484536082475, "grad_norm": 0.12167951464653015, "learning_rate": 5.739889891761608e-05, "loss": 10.3296, "step": 63 }, { "epoch": 0.21993127147766323, "grad_norm": 0.11556979268789291, "learning_rate": 5.6951172546573794e-05, "loss": 10.3221, "step": 64 }, { "epoch": 0.22336769759450173, "grad_norm": 0.1198616772890091, "learning_rate": 5.6497444944138376e-05, "loss": 10.3265, "step": 65 }, { "epoch": 0.2268041237113402, "grad_norm": 0.1239163875579834, "learning_rate": 5.603784015490587e-05, "loss": 10.3234, "step": 66 }, { "epoch": 0.23024054982817868, "grad_norm": 0.10672570765018463, "learning_rate": 5.557248383023655e-05, "loss": 10.325, "step": 67 }, { "epoch": 0.23367697594501718, "grad_norm": 0.09688537567853928, "learning_rate": 5.510150319390302e-05, "loss": 10.3251, "step": 68 }, { "epoch": 0.23711340206185566, "grad_norm": 0.10790200531482697, "learning_rate": 5.4625027007308546e-05, "loss": 10.3222, "step": 69 }, { "epoch": 0.24054982817869416, "grad_norm": 0.10650131851434708, "learning_rate": 5.414318553428494e-05, "loss": 10.3246, "step": 70 }, { "epoch": 0.24398625429553264, "grad_norm": 0.09294351935386658, "learning_rate": 5.3656110505479776e-05, "loss": 10.3206, "step": 71 }, { "epoch": 0.24742268041237114, "grad_norm": 0.07881864905357361, "learning_rate": 5.316393508234253e-05, "loss": 10.3213, "step": 72 }, { "epoch": 0.2508591065292096, "grad_norm": 0.07995471358299255, "learning_rate": 5.266679382071953e-05, "loss": 10.3194, "step": 73 }, { "epoch": 0.2542955326460481, "grad_norm": 0.10525557398796082, "learning_rate": 5.216482263406778e-05, "loss": 10.3193, "step": 74 }, { "epoch": 0.25773195876288657, "grad_norm": 0.12144356966018677, "learning_rate": 5.1658158756297576e-05, "loss": 10.3155, "step": 75 }, { "epoch": 0.2611683848797251, "grad_norm": 0.11480545252561569, "learning_rate": 5.114694070425407e-05, "loss": 10.3142, "step": 76 }, { "epoch": 0.2646048109965636, "grad_norm": 0.08777925372123718, "learning_rate": 5.063130823984823e-05, "loss": 10.3183, "step": 77 }, { "epoch": 0.26804123711340205, "grad_norm": 0.08677059412002563, "learning_rate": 5.011140233184724e-05, "loss": 10.3156, "step": 78 }, { "epoch": 0.27147766323024053, "grad_norm": 0.10238943248987198, "learning_rate": 4.958736511733516e-05, "loss": 10.3091, "step": 79 }, { "epoch": 0.27491408934707906, "grad_norm": 0.12061156332492828, "learning_rate": 4.905933986285393e-05, "loss": 10.3181, "step": 80 }, { "epoch": 0.27835051546391754, "grad_norm": 0.1294454038143158, "learning_rate": 4.8527470925235824e-05, "loss": 10.3156, "step": 81 }, { "epoch": 0.281786941580756, "grad_norm": 0.09726651757955551, "learning_rate": 4.799190371213772e-05, "loss": 10.3096, "step": 82 }, { "epoch": 0.2852233676975945, "grad_norm": 0.10515337437391281, "learning_rate": 4.745278464228808e-05, "loss": 10.315, "step": 83 }, { "epoch": 0.28865979381443296, "grad_norm": 0.10330478101968765, "learning_rate": 4.69102611054575e-05, "loss": 10.3112, "step": 84 }, { "epoch": 0.2920962199312715, "grad_norm": 0.11965789645910263, "learning_rate": 4.6364481422163926e-05, "loss": 10.3146, "step": 85 }, { "epoch": 0.29553264604810997, "grad_norm": 0.11173126846551895, "learning_rate": 4.581559480312316e-05, "loss": 10.309, "step": 86 }, { "epoch": 0.29896907216494845, "grad_norm": 0.12359092384576797, "learning_rate": 4.526375130845627e-05, "loss": 10.3085, "step": 87 }, { "epoch": 0.3024054982817869, "grad_norm": 0.10708631575107574, "learning_rate": 4.4709101806664554e-05, "loss": 10.3095, "step": 88 }, { "epoch": 0.30584192439862545, "grad_norm": 0.13176876306533813, "learning_rate": 4.4151797933383685e-05, "loss": 10.309, "step": 89 }, { "epoch": 0.30927835051546393, "grad_norm": 0.1190120279788971, "learning_rate": 4.359199204992797e-05, "loss": 10.3115, "step": 90 }, { "epoch": 0.3127147766323024, "grad_norm": 0.16441021859645844, "learning_rate": 4.30298372016363e-05, "loss": 10.3109, "step": 91 }, { "epoch": 0.3161512027491409, "grad_norm": 0.14107558131217957, "learning_rate": 4.246548707603114e-05, "loss": 10.3065, "step": 92 }, { "epoch": 0.31958762886597936, "grad_norm": 0.11827161908149719, "learning_rate": 4.1899095960801805e-05, "loss": 10.3054, "step": 93 }, { "epoch": 0.3230240549828179, "grad_norm": 0.13922590017318726, "learning_rate": 4.133081870162385e-05, "loss": 10.3067, "step": 94 }, { "epoch": 0.32646048109965636, "grad_norm": 0.16643556952476501, "learning_rate": 4.076081065982569e-05, "loss": 10.3099, "step": 95 }, { "epoch": 0.32989690721649484, "grad_norm": 0.198710635304451, "learning_rate": 4.018922766991447e-05, "loss": 10.3113, "step": 96 }, { "epoch": 0.3333333333333333, "grad_norm": 0.1632414162158966, "learning_rate": 3.961622599697241e-05, "loss": 10.313, "step": 97 }, { "epoch": 0.33676975945017185, "grad_norm": 0.23238177597522736, "learning_rate": 3.9041962293935516e-05, "loss": 10.3027, "step": 98 }, { "epoch": 0.3402061855670103, "grad_norm": 0.23765434324741364, "learning_rate": 3.84665935587662e-05, "loss": 10.3145, "step": 99 }, { "epoch": 0.3436426116838488, "grad_norm": 0.24826158583164215, "learning_rate": 3.7890277091531636e-05, "loss": 10.33, "step": 100 }, { "epoch": 0.3436426116838488, "eval_loss": 10.316010475158691, "eval_runtime": 1.2121, "eval_samples_per_second": 404.267, "eval_steps_per_second": 101.479, "step": 100 }, { "epoch": 0.3470790378006873, "grad_norm": 0.10495775938034058, "learning_rate": 3.7313170451399475e-05, "loss": 10.3287, "step": 101 }, { "epoch": 0.35051546391752575, "grad_norm": 0.135188028216362, "learning_rate": 3.673543141356278e-05, "loss": 10.3303, "step": 102 }, { "epoch": 0.3539518900343643, "grad_norm": 0.12455492466688156, "learning_rate": 3.6157217926105783e-05, "loss": 10.3329, "step": 103 }, { "epoch": 0.35738831615120276, "grad_norm": 0.10499635338783264, "learning_rate": 3.557868806682255e-05, "loss": 10.3307, "step": 104 }, { "epoch": 0.36082474226804123, "grad_norm": 0.10193989425897598, "learning_rate": 3.5e-05, "loss": 10.3303, "step": 105 }, { "epoch": 0.3642611683848797, "grad_norm": 0.11833474785089493, "learning_rate": 3.442131193317745e-05, "loss": 10.3314, "step": 106 }, { "epoch": 0.36769759450171824, "grad_norm": 0.1317451298236847, "learning_rate": 3.384278207389421e-05, "loss": 10.3295, "step": 107 }, { "epoch": 0.3711340206185567, "grad_norm": 0.1392153650522232, "learning_rate": 3.3264568586437216e-05, "loss": 10.3321, "step": 108 }, { "epoch": 0.3745704467353952, "grad_norm": 0.11478523164987564, "learning_rate": 3.268682954860052e-05, "loss": 10.3325, "step": 109 }, { "epoch": 0.37800687285223367, "grad_norm": 0.12629568576812744, "learning_rate": 3.210972290846837e-05, "loss": 10.3336, "step": 110 }, { "epoch": 0.38144329896907214, "grad_norm": 0.10715722292661667, "learning_rate": 3.15334064412338e-05, "loss": 10.3338, "step": 111 }, { "epoch": 0.3848797250859107, "grad_norm": 0.06600625813007355, "learning_rate": 3.0958037706064485e-05, "loss": 10.3184, "step": 112 }, { "epoch": 0.38831615120274915, "grad_norm": 0.06942149996757507, "learning_rate": 3.038377400302758e-05, "loss": 10.3184, "step": 113 }, { "epoch": 0.3917525773195876, "grad_norm": 0.07464867830276489, "learning_rate": 2.9810772330085524e-05, "loss": 10.3213, "step": 114 }, { "epoch": 0.3951890034364261, "grad_norm": 0.09142441302537918, "learning_rate": 2.9239189340174306e-05, "loss": 10.3182, "step": 115 }, { "epoch": 0.39862542955326463, "grad_norm": 0.06075633689761162, "learning_rate": 2.8669181298376163e-05, "loss": 10.3187, "step": 116 }, { "epoch": 0.4020618556701031, "grad_norm": 0.08275322616100311, "learning_rate": 2.8100904039198193e-05, "loss": 10.3168, "step": 117 }, { "epoch": 0.4054982817869416, "grad_norm": 0.07392256706953049, "learning_rate": 2.7534512923968863e-05, "loss": 10.317, "step": 118 }, { "epoch": 0.40893470790378006, "grad_norm": 0.0642940104007721, "learning_rate": 2.6970162798363695e-05, "loss": 10.3149, "step": 119 }, { "epoch": 0.41237113402061853, "grad_norm": 0.05324407294392586, "learning_rate": 2.640800795007203e-05, "loss": 10.3161, "step": 120 }, { "epoch": 0.41580756013745707, "grad_norm": 0.060321275144815445, "learning_rate": 2.5848202066616305e-05, "loss": 10.3181, "step": 121 }, { "epoch": 0.41924398625429554, "grad_norm": 0.05816487967967987, "learning_rate": 2.5290898193335446e-05, "loss": 10.3192, "step": 122 }, { "epoch": 0.422680412371134, "grad_norm": 0.061207115650177, "learning_rate": 2.4736248691543736e-05, "loss": 10.314, "step": 123 }, { "epoch": 0.4261168384879725, "grad_norm": 0.05672181025147438, "learning_rate": 2.4184405196876842e-05, "loss": 10.3127, "step": 124 }, { "epoch": 0.42955326460481097, "grad_norm": 0.06459632515907288, "learning_rate": 2.363551857783608e-05, "loss": 10.3139, "step": 125 }, { "epoch": 0.4329896907216495, "grad_norm": 0.061975572258234024, "learning_rate": 2.308973889454249e-05, "loss": 10.3089, "step": 126 }, { "epoch": 0.436426116838488, "grad_norm": 0.05950874462723732, "learning_rate": 2.2547215357711918e-05, "loss": 10.3154, "step": 127 }, { "epoch": 0.43986254295532645, "grad_norm": 0.07566502690315247, "learning_rate": 2.2008096287862266e-05, "loss": 10.3081, "step": 128 }, { "epoch": 0.44329896907216493, "grad_norm": 0.08625920116901398, "learning_rate": 2.1472529074764177e-05, "loss": 10.3082, "step": 129 }, { "epoch": 0.44673539518900346, "grad_norm": 0.06160920113325119, "learning_rate": 2.0940660137146074e-05, "loss": 10.3107, "step": 130 }, { "epoch": 0.45017182130584193, "grad_norm": 0.0664173811674118, "learning_rate": 2.041263488266484e-05, "loss": 10.3091, "step": 131 }, { "epoch": 0.4536082474226804, "grad_norm": 0.059261005371809006, "learning_rate": 1.988859766815275e-05, "loss": 10.311, "step": 132 }, { "epoch": 0.4570446735395189, "grad_norm": 0.10069288313388824, "learning_rate": 1.9368691760151773e-05, "loss": 10.3081, "step": 133 }, { "epoch": 0.46048109965635736, "grad_norm": 0.1071212887763977, "learning_rate": 1.885305929574593e-05, "loss": 10.3057, "step": 134 }, { "epoch": 0.4639175257731959, "grad_norm": 0.09773365408182144, "learning_rate": 1.8341841243702424e-05, "loss": 10.3078, "step": 135 }, { "epoch": 0.46735395189003437, "grad_norm": 0.08326241374015808, "learning_rate": 1.7835177365932225e-05, "loss": 10.3074, "step": 136 }, { "epoch": 0.47079037800687284, "grad_norm": 0.08722704648971558, "learning_rate": 1.7333206179280478e-05, "loss": 10.3082, "step": 137 }, { "epoch": 0.4742268041237113, "grad_norm": 0.11179947853088379, "learning_rate": 1.6836064917657478e-05, "loss": 10.3017, "step": 138 }, { "epoch": 0.47766323024054985, "grad_norm": 0.0975564494729042, "learning_rate": 1.6343889494520224e-05, "loss": 10.3016, "step": 139 }, { "epoch": 0.48109965635738833, "grad_norm": 0.1403576284646988, "learning_rate": 1.5856814465715064e-05, "loss": 10.2993, "step": 140 }, { "epoch": 0.4845360824742268, "grad_norm": 0.1420505940914154, "learning_rate": 1.5374972992691458e-05, "loss": 10.3045, "step": 141 }, { "epoch": 0.4879725085910653, "grad_norm": 0.10898818075656891, "learning_rate": 1.4898496806096974e-05, "loss": 10.3004, "step": 142 }, { "epoch": 0.49140893470790376, "grad_norm": 0.17305278778076172, "learning_rate": 1.4427516169763444e-05, "loss": 10.293, "step": 143 }, { "epoch": 0.4948453608247423, "grad_norm": 0.11871832609176636, "learning_rate": 1.396215984509412e-05, "loss": 10.3052, "step": 144 }, { "epoch": 0.49828178694158076, "grad_norm": 0.17952845990657806, "learning_rate": 1.3502555055861625e-05, "loss": 10.3035, "step": 145 }, { "epoch": 0.5017182130584192, "grad_norm": 0.13097617030143738, "learning_rate": 1.3048827453426203e-05, "loss": 10.3123, "step": 146 }, { "epoch": 0.5051546391752577, "grad_norm": 0.15512241423130035, "learning_rate": 1.2601101082383917e-05, "loss": 10.302, "step": 147 }, { "epoch": 0.5085910652920962, "grad_norm": 0.18273089826107025, "learning_rate": 1.2159498346654094e-05, "loss": 10.3085, "step": 148 }, { "epoch": 0.5120274914089347, "grad_norm": 0.22535182535648346, "learning_rate": 1.1724139976015306e-05, "loss": 10.3045, "step": 149 }, { "epoch": 0.5154639175257731, "grad_norm": 0.21504375338554382, "learning_rate": 1.1295144993099068e-05, "loss": 10.3068, "step": 150 }, { "epoch": 0.5154639175257731, "eval_loss": 10.313986778259277, "eval_runtime": 1.2091, "eval_samples_per_second": 405.25, "eval_steps_per_second": 101.726, "step": 150 }, { "epoch": 0.5189003436426117, "grad_norm": 0.10286428779363632, "learning_rate": 1.0872630680850196e-05, "loss": 10.3191, "step": 151 }, { "epoch": 0.5223367697594502, "grad_norm": 0.1072426438331604, "learning_rate": 1.0456712550462898e-05, "loss": 10.3293, "step": 152 }, { "epoch": 0.5257731958762887, "grad_norm": 0.1187005415558815, "learning_rate": 1.0047504309801104e-05, "loss": 10.3297, "step": 153 }, { "epoch": 0.5292096219931272, "grad_norm": 0.13239331543445587, "learning_rate": 9.645117832311886e-06, "loss": 10.3274, "step": 154 }, { "epoch": 0.5326460481099656, "grad_norm": 0.1178116425871849, "learning_rate": 9.249663126440394e-06, "loss": 10.3342, "step": 155 }, { "epoch": 0.5360824742268041, "grad_norm": 0.11966834217309952, "learning_rate": 8.861248305554624e-06, "loss": 10.3277, "step": 156 }, { "epoch": 0.5395189003436426, "grad_norm": 0.1354375034570694, "learning_rate": 8.47997955838829e-06, "loss": 10.3306, "step": 157 }, { "epoch": 0.5429553264604811, "grad_norm": 0.09789423644542694, "learning_rate": 8.10596112000994e-06, "loss": 10.3306, "step": 158 }, { "epoch": 0.5463917525773195, "grad_norm": 0.11997918039560318, "learning_rate": 7.739295243326067e-06, "loss": 10.3291, "step": 159 }, { "epoch": 0.5498281786941581, "grad_norm": 0.13282926380634308, "learning_rate": 7.380082171126228e-06, "loss": 10.3291, "step": 160 }, { "epoch": 0.5532646048109966, "grad_norm": 0.1250826120376587, "learning_rate": 7.028420108677635e-06, "loss": 10.3297, "step": 161 }, { "epoch": 0.5567010309278351, "grad_norm": 0.08764857798814774, "learning_rate": 6.684405196876842e-06, "loss": 10.3315, "step": 162 }, { "epoch": 0.5601374570446735, "grad_norm": 0.08268949389457703, "learning_rate": 6.3481314859657675e-06, "loss": 10.3211, "step": 163 }, { "epoch": 0.563573883161512, "grad_norm": 0.07326449453830719, "learning_rate": 6.019690909819298e-06, "loss": 10.322, "step": 164 }, { "epoch": 0.5670103092783505, "grad_norm": 0.06199254095554352, "learning_rate": 5.6991732608115e-06, "loss": 10.3173, "step": 165 }, { "epoch": 0.570446735395189, "grad_norm": 0.06376270204782486, "learning_rate": 5.386666165267256e-06, "loss": 10.3183, "step": 166 }, { "epoch": 0.5738831615120275, "grad_norm": 0.05299001559615135, "learning_rate": 5.08225505950613e-06, "loss": 10.3187, "step": 167 }, { "epoch": 0.5773195876288659, "grad_norm": 0.06819414347410202, "learning_rate": 4.786023166484913e-06, "loss": 10.3203, "step": 168 }, { "epoch": 0.5807560137457045, "grad_norm": 0.060465797781944275, "learning_rate": 4.498051473045291e-06, "loss": 10.3163, "step": 169 }, { "epoch": 0.584192439862543, "grad_norm": 0.05379629135131836, "learning_rate": 4.218418707772886e-06, "loss": 10.3172, "step": 170 }, { "epoch": 0.5876288659793815, "grad_norm": 0.051934510469436646, "learning_rate": 3.947201319473587e-06, "loss": 10.315, "step": 171 }, { "epoch": 0.5910652920962199, "grad_norm": 0.06327671557664871, "learning_rate": 3.684473456273278e-06, "loss": 10.3175, "step": 172 }, { "epoch": 0.5945017182130584, "grad_norm": 0.0464518778026104, "learning_rate": 3.4303069453464383e-06, "loss": 10.3134, "step": 173 }, { "epoch": 0.5979381443298969, "grad_norm": 0.08796032518148422, "learning_rate": 3.184771273279312e-06, "loss": 10.3112, "step": 174 }, { "epoch": 0.6013745704467354, "grad_norm": 0.053006719797849655, "learning_rate": 2.947933567072987e-06, "loss": 10.3147, "step": 175 }, { "epoch": 0.6048109965635738, "grad_norm": 0.07130801677703857, "learning_rate": 2.719858575791534e-06, "loss": 10.3101, "step": 176 }, { "epoch": 0.6082474226804123, "grad_norm": 0.0520925372838974, "learning_rate": 2.500608652860256e-06, "loss": 10.3158, "step": 177 }, { "epoch": 0.6116838487972509, "grad_norm": 0.07356636226177216, "learning_rate": 2.2902437390188737e-06, "loss": 10.309, "step": 178 }, { "epoch": 0.6151202749140894, "grad_norm": 0.06691022962331772, "learning_rate": 2.0888213459343587e-06, "loss": 10.3082, "step": 179 }, { "epoch": 0.6185567010309279, "grad_norm": 0.08219817280769348, "learning_rate": 1.8963965404777875e-06, "loss": 10.3076, "step": 180 }, { "epoch": 0.6219931271477663, "grad_norm": 0.07329320162534714, "learning_rate": 1.7130219296696263e-06, "loss": 10.3096, "step": 181 }, { "epoch": 0.6254295532646048, "grad_norm": 0.11828658729791641, "learning_rate": 1.5387476462974824e-06, "loss": 10.303, "step": 182 }, { "epoch": 0.6288659793814433, "grad_norm": 0.06761389970779419, "learning_rate": 1.3736213352103147e-06, "loss": 10.3093, "step": 183 }, { "epoch": 0.6323024054982818, "grad_norm": 0.0602264404296875, "learning_rate": 1.2176881402928002e-06, "loss": 10.3025, "step": 184 }, { "epoch": 0.6357388316151202, "grad_norm": 0.09645050019025803, "learning_rate": 1.0709906921234367e-06, "loss": 10.3069, "step": 185 }, { "epoch": 0.6391752577319587, "grad_norm": 0.08812431246042252, "learning_rate": 9.33569096319799e-07, "loss": 10.3071, "step": 186 }, { "epoch": 0.6426116838487973, "grad_norm": 0.08224661648273468, "learning_rate": 8.054609225740255e-07, "loss": 10.3073, "step": 187 }, { "epoch": 0.6460481099656358, "grad_norm": 0.10281522572040558, "learning_rate": 6.867011943816724e-07, "loss": 10.2995, "step": 188 }, { "epoch": 0.6494845360824743, "grad_norm": 0.10730140656232834, "learning_rate": 5.77322379466617e-07, "loss": 10.3019, "step": 189 }, { "epoch": 0.6529209621993127, "grad_norm": 0.09621263295412064, "learning_rate": 4.773543809047186e-07, "loss": 10.3072, "step": 190 }, { "epoch": 0.6563573883161512, "grad_norm": 0.09780553728342056, "learning_rate": 3.868245289486027e-07, "loss": 10.2942, "step": 191 }, { "epoch": 0.6597938144329897, "grad_norm": 0.09069101512432098, "learning_rate": 3.0575757355586817e-07, "loss": 10.2994, "step": 192 }, { "epoch": 0.6632302405498282, "grad_norm": 0.09934303909540176, "learning_rate": 2.3417567762266497e-07, "loss": 10.2954, "step": 193 }, { "epoch": 0.6666666666666666, "grad_norm": 0.11044549196958542, "learning_rate": 1.7209841092460043e-07, "loss": 10.303, "step": 194 }, { "epoch": 0.6701030927835051, "grad_norm": 0.11844594776630402, "learning_rate": 1.1954274476655534e-07, "loss": 10.3086, "step": 195 }, { "epoch": 0.6735395189003437, "grad_norm": 0.1371518224477768, "learning_rate": 7.652304734289127e-08, "loss": 10.2992, "step": 196 }, { "epoch": 0.6769759450171822, "grad_norm": 0.1752624660730362, "learning_rate": 4.30510798093342e-08, "loss": 10.3042, "step": 197 }, { "epoch": 0.6804123711340206, "grad_norm": 0.20586471259593964, "learning_rate": 1.9135993067588284e-08, "loss": 10.2985, "step": 198 }, { "epoch": 0.6838487972508591, "grad_norm": 0.21869732439517975, "learning_rate": 4.784325263584854e-09, "loss": 10.299, "step": 199 }, { "epoch": 0.6872852233676976, "grad_norm": 0.2391654998064041, "learning_rate": 0.0, "loss": 10.3077, "step": 200 }, { "epoch": 0.6872852233676976, "eval_loss": 10.313650131225586, "eval_runtime": 1.2572, "eval_samples_per_second": 389.751, "eval_steps_per_second": 97.835, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 52854480961536.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }