{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999838628991915, "eval_steps": 500, "global_step": 15492, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 9.779763221740723, "learning_rate": 2.1505376344086022e-08, "loss": 1.1562, "step": 1 }, { "epoch": 0.0, "grad_norm": 6.056145191192627, "learning_rate": 4.3010752688172045e-08, "loss": 1.1506, "step": 2 }, { "epoch": 0.0, "grad_norm": 7.74028205871582, "learning_rate": 6.451612903225807e-08, "loss": 1.1414, "step": 3 }, { "epoch": 0.0, "grad_norm": 8.438684463500977, "learning_rate": 8.602150537634409e-08, "loss": 1.2422, "step": 4 }, { "epoch": 0.0, "grad_norm": 7.811471939086914, "learning_rate": 1.0752688172043012e-07, "loss": 1.1386, "step": 5 }, { "epoch": 0.0, "grad_norm": 8.381765365600586, "learning_rate": 1.2903225806451614e-07, "loss": 1.1748, "step": 6 }, { "epoch": 0.0, "grad_norm": 7.129976749420166, "learning_rate": 1.5053763440860217e-07, "loss": 1.1047, "step": 7 }, { "epoch": 0.0, "grad_norm": 7.7725300788879395, "learning_rate": 1.7204301075268818e-07, "loss": 1.1502, "step": 8 }, { "epoch": 0.0, "grad_norm": 6.109231948852539, "learning_rate": 1.935483870967742e-07, "loss": 1.1074, "step": 9 }, { "epoch": 0.0, "grad_norm": 7.641543388366699, "learning_rate": 2.1505376344086024e-07, "loss": 1.151, "step": 10 }, { "epoch": 0.0, "grad_norm": 7.298976898193359, "learning_rate": 2.3655913978494625e-07, "loss": 1.176, "step": 11 }, { "epoch": 0.0, "grad_norm": 6.175539970397949, "learning_rate": 2.580645161290323e-07, "loss": 1.133, "step": 12 }, { "epoch": 0.0, "grad_norm": 6.808745861053467, "learning_rate": 2.795698924731183e-07, "loss": 1.1438, "step": 13 }, { "epoch": 0.0, "grad_norm": 8.53105640411377, "learning_rate": 3.0107526881720434e-07, "loss": 1.1382, "step": 14 }, { "epoch": 0.0, "grad_norm": 7.016663551330566, "learning_rate": 3.2258064516129035e-07, "loss": 1.1772, "step": 15 }, { "epoch": 0.0, "grad_norm": 6.884775638580322, "learning_rate": 3.4408602150537636e-07, "loss": 1.1375, "step": 16 }, { "epoch": 0.0, "grad_norm": 8.930130958557129, "learning_rate": 3.655913978494624e-07, "loss": 1.1963, "step": 17 }, { "epoch": 0.0, "grad_norm": 6.91475248336792, "learning_rate": 3.870967741935484e-07, "loss": 1.1097, "step": 18 }, { "epoch": 0.0, "grad_norm": 6.964858055114746, "learning_rate": 4.086021505376345e-07, "loss": 1.1307, "step": 19 }, { "epoch": 0.0, "grad_norm": 8.291003227233887, "learning_rate": 4.301075268817205e-07, "loss": 1.1258, "step": 20 }, { "epoch": 0.0, "grad_norm": 6.160224914550781, "learning_rate": 4.5161290322580644e-07, "loss": 1.0827, "step": 21 }, { "epoch": 0.0, "grad_norm": 6.527561664581299, "learning_rate": 4.731182795698925e-07, "loss": 1.0722, "step": 22 }, { "epoch": 0.0, "grad_norm": 6.242156505584717, "learning_rate": 4.946236559139786e-07, "loss": 1.0346, "step": 23 }, { "epoch": 0.0, "grad_norm": 5.871310234069824, "learning_rate": 5.161290322580646e-07, "loss": 1.0797, "step": 24 }, { "epoch": 0.0, "grad_norm": 4.9141716957092285, "learning_rate": 5.376344086021506e-07, "loss": 1.0784, "step": 25 }, { "epoch": 0.0, "grad_norm": 5.260191440582275, "learning_rate": 5.591397849462366e-07, "loss": 1.0509, "step": 26 }, { "epoch": 0.0, "grad_norm": 5.377659320831299, "learning_rate": 5.806451612903227e-07, "loss": 1.0645, "step": 27 }, { "epoch": 0.0, "grad_norm": 5.446381092071533, "learning_rate": 6.021505376344087e-07, "loss": 1.0806, "step": 28 }, { "epoch": 0.0, "grad_norm": 4.747959613800049, "learning_rate": 6.236559139784947e-07, "loss": 0.9726, "step": 29 }, { "epoch": 0.0, "grad_norm": 3.957566499710083, "learning_rate": 6.451612903225807e-07, "loss": 0.9665, "step": 30 }, { "epoch": 0.0, "grad_norm": 4.233704090118408, "learning_rate": 6.666666666666667e-07, "loss": 0.9761, "step": 31 }, { "epoch": 0.0, "grad_norm": 4.070135593414307, "learning_rate": 6.881720430107527e-07, "loss": 0.9533, "step": 32 }, { "epoch": 0.0, "grad_norm": 3.709791421890259, "learning_rate": 7.096774193548388e-07, "loss": 0.9762, "step": 33 }, { "epoch": 0.0, "grad_norm": 3.7802231311798096, "learning_rate": 7.311827956989248e-07, "loss": 0.9639, "step": 34 }, { "epoch": 0.0, "grad_norm": 3.533092737197876, "learning_rate": 7.526881720430108e-07, "loss": 0.9346, "step": 35 }, { "epoch": 0.0, "grad_norm": 3.57824969291687, "learning_rate": 7.741935483870968e-07, "loss": 0.9414, "step": 36 }, { "epoch": 0.0, "grad_norm": 3.4498841762542725, "learning_rate": 7.95698924731183e-07, "loss": 0.9666, "step": 37 }, { "epoch": 0.0, "grad_norm": 3.7886860370635986, "learning_rate": 8.17204301075269e-07, "loss": 0.9449, "step": 38 }, { "epoch": 0.0, "grad_norm": 3.0164389610290527, "learning_rate": 8.38709677419355e-07, "loss": 0.9183, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.6029374599456787, "learning_rate": 8.60215053763441e-07, "loss": 0.8925, "step": 40 }, { "epoch": 0.0, "grad_norm": 2.417372703552246, "learning_rate": 8.817204301075269e-07, "loss": 0.8725, "step": 41 }, { "epoch": 0.0, "grad_norm": 2.54435133934021, "learning_rate": 9.032258064516129e-07, "loss": 0.8836, "step": 42 }, { "epoch": 0.0, "grad_norm": 2.3039801120758057, "learning_rate": 9.24731182795699e-07, "loss": 0.8681, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.314213991165161, "learning_rate": 9.46236559139785e-07, "loss": 0.88, "step": 44 }, { "epoch": 0.0, "grad_norm": 2.0663201808929443, "learning_rate": 9.67741935483871e-07, "loss": 0.8555, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.177731990814209, "learning_rate": 9.892473118279571e-07, "loss": 0.9477, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.26688814163208, "learning_rate": 1.010752688172043e-06, "loss": 0.8098, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.212724208831787, "learning_rate": 1.0322580645161291e-06, "loss": 0.8165, "step": 48 }, { "epoch": 0.0, "grad_norm": 1.9739530086517334, "learning_rate": 1.0537634408602152e-06, "loss": 0.8132, "step": 49 }, { "epoch": 0.0, "grad_norm": 1.9489492177963257, "learning_rate": 1.0752688172043011e-06, "loss": 0.8189, "step": 50 }, { "epoch": 0.0, "grad_norm": 2.31488299369812, "learning_rate": 1.0967741935483872e-06, "loss": 0.8279, "step": 51 }, { "epoch": 0.0, "grad_norm": 1.9821362495422363, "learning_rate": 1.1182795698924731e-06, "loss": 0.8432, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.226656913757324, "learning_rate": 1.1397849462365593e-06, "loss": 0.8712, "step": 53 }, { "epoch": 0.0, "grad_norm": 1.9179885387420654, "learning_rate": 1.1612903225806454e-06, "loss": 0.8491, "step": 54 }, { "epoch": 0.0, "grad_norm": 1.8012481927871704, "learning_rate": 1.1827956989247313e-06, "loss": 0.7886, "step": 55 }, { "epoch": 0.0, "grad_norm": 1.863537073135376, "learning_rate": 1.2043010752688174e-06, "loss": 0.8363, "step": 56 }, { "epoch": 0.0, "grad_norm": 1.964224934577942, "learning_rate": 1.2258064516129033e-06, "loss": 0.8546, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.041654348373413, "learning_rate": 1.2473118279569894e-06, "loss": 0.783, "step": 58 }, { "epoch": 0.0, "grad_norm": 1.8591960668563843, "learning_rate": 1.2688172043010755e-06, "loss": 0.6981, "step": 59 }, { "epoch": 0.0, "grad_norm": 1.7832316160202026, "learning_rate": 1.2903225806451614e-06, "loss": 0.7783, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.7388675212860107, "learning_rate": 1.3118279569892475e-06, "loss": 0.7882, "step": 61 }, { "epoch": 0.0, "grad_norm": 1.8810282945632935, "learning_rate": 1.3333333333333334e-06, "loss": 0.7522, "step": 62 }, { "epoch": 0.0, "grad_norm": 1.8771487474441528, "learning_rate": 1.3548387096774195e-06, "loss": 0.8439, "step": 63 }, { "epoch": 0.0, "grad_norm": 1.6894810199737549, "learning_rate": 1.3763440860215054e-06, "loss": 0.7739, "step": 64 }, { "epoch": 0.0, "grad_norm": 1.8120399713516235, "learning_rate": 1.3978494623655913e-06, "loss": 0.7816, "step": 65 }, { "epoch": 0.0, "grad_norm": 1.8787275552749634, "learning_rate": 1.4193548387096776e-06, "loss": 0.8567, "step": 66 }, { "epoch": 0.0, "grad_norm": 1.5519928932189941, "learning_rate": 1.4408602150537635e-06, "loss": 0.7372, "step": 67 }, { "epoch": 0.0, "grad_norm": 1.6324131488800049, "learning_rate": 1.4623655913978497e-06, "loss": 0.8208, "step": 68 }, { "epoch": 0.0, "grad_norm": 1.6370766162872314, "learning_rate": 1.4838709677419356e-06, "loss": 0.7789, "step": 69 }, { "epoch": 0.0, "grad_norm": 1.7306194305419922, "learning_rate": 1.5053763440860217e-06, "loss": 0.7692, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.4681227207183838, "learning_rate": 1.5268817204301076e-06, "loss": 0.6737, "step": 71 }, { "epoch": 0.0, "grad_norm": 1.5740220546722412, "learning_rate": 1.5483870967741937e-06, "loss": 0.7382, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.5179052352905273, "learning_rate": 1.5698924731182796e-06, "loss": 0.7283, "step": 73 }, { "epoch": 0.0, "grad_norm": 1.4919624328613281, "learning_rate": 1.591397849462366e-06, "loss": 0.7364, "step": 74 }, { "epoch": 0.0, "grad_norm": 1.636202096939087, "learning_rate": 1.6129032258064516e-06, "loss": 0.7353, "step": 75 }, { "epoch": 0.0, "grad_norm": 1.5289045572280884, "learning_rate": 1.634408602150538e-06, "loss": 0.745, "step": 76 }, { "epoch": 0.0, "grad_norm": 1.4717429876327515, "learning_rate": 1.6559139784946238e-06, "loss": 0.7126, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.6405115127563477, "learning_rate": 1.67741935483871e-06, "loss": 0.7345, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.536274790763855, "learning_rate": 1.6989247311827958e-06, "loss": 0.6935, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.439832091331482, "learning_rate": 1.720430107526882e-06, "loss": 0.6937, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.5801339149475098, "learning_rate": 1.7419354838709678e-06, "loss": 0.7354, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.617895245552063, "learning_rate": 1.7634408602150537e-06, "loss": 0.7754, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.6002197265625, "learning_rate": 1.7849462365591399e-06, "loss": 0.7033, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.5745460987091064, "learning_rate": 1.8064516129032258e-06, "loss": 0.7415, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.5314617156982422, "learning_rate": 1.827956989247312e-06, "loss": 0.7793, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.682547926902771, "learning_rate": 1.849462365591398e-06, "loss": 0.7479, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.4831479787826538, "learning_rate": 1.870967741935484e-06, "loss": 0.7761, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.5317834615707397, "learning_rate": 1.89247311827957e-06, "loss": 0.7798, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.6041256189346313, "learning_rate": 1.9139784946236563e-06, "loss": 0.7434, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.4544087648391724, "learning_rate": 1.935483870967742e-06, "loss": 0.6918, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.4118844270706177, "learning_rate": 1.956989247311828e-06, "loss": 0.7577, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.493300437927246, "learning_rate": 1.9784946236559142e-06, "loss": 0.727, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.5533000230789185, "learning_rate": 2.0000000000000003e-06, "loss": 0.7305, "step": 93 }, { "epoch": 0.01, "grad_norm": 1.4120842218399048, "learning_rate": 2.021505376344086e-06, "loss": 0.7335, "step": 94 }, { "epoch": 0.01, "grad_norm": 1.515145182609558, "learning_rate": 2.043010752688172e-06, "loss": 0.7321, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.3799384832382202, "learning_rate": 2.0645161290322582e-06, "loss": 0.7323, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.7641617059707642, "learning_rate": 2.086021505376344e-06, "loss": 0.7697, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.4051079750061035, "learning_rate": 2.1075268817204305e-06, "loss": 0.6658, "step": 98 }, { "epoch": 0.01, "grad_norm": 1.5083285570144653, "learning_rate": 2.129032258064516e-06, "loss": 0.7757, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.5822933912277222, "learning_rate": 2.1505376344086023e-06, "loss": 0.7456, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.5780296325683594, "learning_rate": 2.1720430107526884e-06, "loss": 0.6747, "step": 101 }, { "epoch": 0.01, "grad_norm": 1.4694095849990845, "learning_rate": 2.1935483870967745e-06, "loss": 0.6984, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.5026099681854248, "learning_rate": 2.21505376344086e-06, "loss": 0.7368, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.5145339965820312, "learning_rate": 2.2365591397849463e-06, "loss": 0.7108, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.5217875242233276, "learning_rate": 2.2580645161290324e-06, "loss": 0.6985, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.4532506465911865, "learning_rate": 2.2795698924731185e-06, "loss": 0.668, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.494356632232666, "learning_rate": 2.3010752688172046e-06, "loss": 0.7506, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.3570446968078613, "learning_rate": 2.3225806451612907e-06, "loss": 0.6852, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.446302056312561, "learning_rate": 2.3440860215053764e-06, "loss": 0.6646, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.3760210275650024, "learning_rate": 2.3655913978494625e-06, "loss": 0.7418, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.4000678062438965, "learning_rate": 2.3870967741935486e-06, "loss": 0.6615, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.5761970281600952, "learning_rate": 2.4086021505376348e-06, "loss": 0.662, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.4585932493209839, "learning_rate": 2.4301075268817204e-06, "loss": 0.6507, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.4953062534332275, "learning_rate": 2.4516129032258066e-06, "loss": 0.7453, "step": 114 }, { "epoch": 0.01, "grad_norm": 1.4963825941085815, "learning_rate": 2.4731182795698927e-06, "loss": 0.7007, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.6004172563552856, "learning_rate": 2.4946236559139788e-06, "loss": 0.7326, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.3959921598434448, "learning_rate": 2.5161290322580645e-06, "loss": 0.6623, "step": 117 }, { "epoch": 0.01, "grad_norm": 1.4449049234390259, "learning_rate": 2.537634408602151e-06, "loss": 0.654, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.3591331243515015, "learning_rate": 2.5591397849462367e-06, "loss": 0.6298, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.4383726119995117, "learning_rate": 2.580645161290323e-06, "loss": 0.6909, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.4664223194122314, "learning_rate": 2.6021505376344085e-06, "loss": 0.6293, "step": 121 }, { "epoch": 0.01, "grad_norm": 1.4478882551193237, "learning_rate": 2.623655913978495e-06, "loss": 0.752, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.625137209892273, "learning_rate": 2.645161290322581e-06, "loss": 0.7494, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.332025170326233, "learning_rate": 2.666666666666667e-06, "loss": 0.6703, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.5015639066696167, "learning_rate": 2.688172043010753e-06, "loss": 0.705, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.42239248752594, "learning_rate": 2.709677419354839e-06, "loss": 0.7388, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.4475207328796387, "learning_rate": 2.731182795698925e-06, "loss": 0.6957, "step": 127 }, { "epoch": 0.01, "grad_norm": 1.5709055662155151, "learning_rate": 2.752688172043011e-06, "loss": 0.7216, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.5183013677597046, "learning_rate": 2.774193548387097e-06, "loss": 0.6629, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.5590118169784546, "learning_rate": 2.7956989247311827e-06, "loss": 0.7238, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.4664820432662964, "learning_rate": 2.817204301075269e-06, "loss": 0.7336, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.5131858587265015, "learning_rate": 2.8387096774193553e-06, "loss": 0.715, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.420447587966919, "learning_rate": 2.860215053763441e-06, "loss": 0.6798, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.4075288772583008, "learning_rate": 2.881720430107527e-06, "loss": 0.6282, "step": 134 }, { "epoch": 0.01, "grad_norm": 1.3591053485870361, "learning_rate": 2.903225806451613e-06, "loss": 0.6363, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.6163558959960938, "learning_rate": 2.9247311827956993e-06, "loss": 0.7008, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.374337077140808, "learning_rate": 2.946236559139785e-06, "loss": 0.6402, "step": 137 }, { "epoch": 0.01, "grad_norm": 1.5306495428085327, "learning_rate": 2.967741935483871e-06, "loss": 0.6989, "step": 138 }, { "epoch": 0.01, "grad_norm": 1.5252563953399658, "learning_rate": 2.9892473118279577e-06, "loss": 0.7048, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.4661043882369995, "learning_rate": 3.0107526881720433e-06, "loss": 0.7244, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.3755998611450195, "learning_rate": 3.0322580645161295e-06, "loss": 0.6592, "step": 141 }, { "epoch": 0.01, "grad_norm": 1.50421142578125, "learning_rate": 3.053763440860215e-06, "loss": 0.7069, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.5445609092712402, "learning_rate": 3.0752688172043017e-06, "loss": 0.6942, "step": 143 }, { "epoch": 0.01, "grad_norm": 1.4062697887420654, "learning_rate": 3.0967741935483874e-06, "loss": 0.6818, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.5162020921707153, "learning_rate": 3.1182795698924735e-06, "loss": 0.686, "step": 145 }, { "epoch": 0.01, "grad_norm": 1.476649284362793, "learning_rate": 3.139784946236559e-06, "loss": 0.664, "step": 146 }, { "epoch": 0.01, "grad_norm": 1.3435170650482178, "learning_rate": 3.1612903225806453e-06, "loss": 0.651, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.483250617980957, "learning_rate": 3.182795698924732e-06, "loss": 0.6569, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.3496644496917725, "learning_rate": 3.2043010752688175e-06, "loss": 0.6488, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.5881503820419312, "learning_rate": 3.225806451612903e-06, "loss": 0.7177, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.4342613220214844, "learning_rate": 3.2473118279569893e-06, "loss": 0.7013, "step": 151 }, { "epoch": 0.01, "grad_norm": 1.4460082054138184, "learning_rate": 3.268817204301076e-06, "loss": 0.7057, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.4755594730377197, "learning_rate": 3.2903225806451615e-06, "loss": 0.6786, "step": 153 }, { "epoch": 0.01, "grad_norm": 1.361755609512329, "learning_rate": 3.3118279569892476e-06, "loss": 0.6506, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.588915228843689, "learning_rate": 3.3333333333333333e-06, "loss": 0.7174, "step": 155 }, { "epoch": 0.01, "grad_norm": 1.445926308631897, "learning_rate": 3.35483870967742e-06, "loss": 0.6273, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.4859739542007446, "learning_rate": 3.376344086021506e-06, "loss": 0.6817, "step": 157 }, { "epoch": 0.01, "grad_norm": 1.3606878519058228, "learning_rate": 3.3978494623655917e-06, "loss": 0.6523, "step": 158 }, { "epoch": 0.01, "grad_norm": 1.3789238929748535, "learning_rate": 3.4193548387096773e-06, "loss": 0.7365, "step": 159 }, { "epoch": 0.01, "grad_norm": 1.459825038909912, "learning_rate": 3.440860215053764e-06, "loss": 0.6441, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.4620033502578735, "learning_rate": 3.46236559139785e-06, "loss": 0.6718, "step": 161 }, { "epoch": 0.01, "grad_norm": 1.2860753536224365, "learning_rate": 3.4838709677419357e-06, "loss": 0.6496, "step": 162 }, { "epoch": 0.01, "grad_norm": 1.4331198930740356, "learning_rate": 3.505376344086022e-06, "loss": 0.5708, "step": 163 }, { "epoch": 0.01, "grad_norm": 1.3713304996490479, "learning_rate": 3.5268817204301075e-06, "loss": 0.5925, "step": 164 }, { "epoch": 0.01, "grad_norm": 1.4113457202911377, "learning_rate": 3.548387096774194e-06, "loss": 0.626, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.5618762969970703, "learning_rate": 3.5698924731182797e-06, "loss": 0.6727, "step": 166 }, { "epoch": 0.01, "grad_norm": 1.5016919374465942, "learning_rate": 3.591397849462366e-06, "loss": 0.6412, "step": 167 }, { "epoch": 0.01, "grad_norm": 1.422415018081665, "learning_rate": 3.6129032258064515e-06, "loss": 0.7061, "step": 168 }, { "epoch": 0.01, "grad_norm": 1.367357850074768, "learning_rate": 3.634408602150538e-06, "loss": 0.6456, "step": 169 }, { "epoch": 0.01, "grad_norm": 1.5864694118499756, "learning_rate": 3.655913978494624e-06, "loss": 0.7216, "step": 170 }, { "epoch": 0.01, "grad_norm": 1.4777114391326904, "learning_rate": 3.67741935483871e-06, "loss": 0.6288, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.3818461894989014, "learning_rate": 3.698924731182796e-06, "loss": 0.6792, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.5298271179199219, "learning_rate": 3.720430107526882e-06, "loss": 0.6121, "step": 173 }, { "epoch": 0.01, "grad_norm": 1.3923414945602417, "learning_rate": 3.741935483870968e-06, "loss": 0.6468, "step": 174 }, { "epoch": 0.01, "grad_norm": 1.362740159034729, "learning_rate": 3.763440860215054e-06, "loss": 0.6033, "step": 175 }, { "epoch": 0.01, "grad_norm": 1.5465327501296997, "learning_rate": 3.78494623655914e-06, "loss": 0.6414, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.352845311164856, "learning_rate": 3.8064516129032257e-06, "loss": 0.6351, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.4600354433059692, "learning_rate": 3.827956989247313e-06, "loss": 0.6456, "step": 178 }, { "epoch": 0.01, "grad_norm": 1.4185914993286133, "learning_rate": 3.849462365591398e-06, "loss": 0.6446, "step": 179 }, { "epoch": 0.01, "grad_norm": 1.4454548358917236, "learning_rate": 3.870967741935484e-06, "loss": 0.6287, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.4779889583587646, "learning_rate": 3.89247311827957e-06, "loss": 0.6125, "step": 181 }, { "epoch": 0.01, "grad_norm": 1.5546371936798096, "learning_rate": 3.913978494623656e-06, "loss": 0.6485, "step": 182 }, { "epoch": 0.01, "grad_norm": 1.4161559343338013, "learning_rate": 3.935483870967742e-06, "loss": 0.654, "step": 183 }, { "epoch": 0.01, "grad_norm": 1.4823075532913208, "learning_rate": 3.9569892473118284e-06, "loss": 0.675, "step": 184 }, { "epoch": 0.01, "grad_norm": 1.5997334718704224, "learning_rate": 3.978494623655914e-06, "loss": 0.7027, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.4537665843963623, "learning_rate": 4.000000000000001e-06, "loss": 0.707, "step": 186 }, { "epoch": 0.01, "grad_norm": 1.4136734008789062, "learning_rate": 4.021505376344086e-06, "loss": 0.6604, "step": 187 }, { "epoch": 0.01, "grad_norm": 1.420630931854248, "learning_rate": 4.043010752688172e-06, "loss": 0.68, "step": 188 }, { "epoch": 0.01, "grad_norm": 1.370713233947754, "learning_rate": 4.064516129032259e-06, "loss": 0.5979, "step": 189 }, { "epoch": 0.01, "grad_norm": 1.3468844890594482, "learning_rate": 4.086021505376344e-06, "loss": 0.6676, "step": 190 }, { "epoch": 0.01, "grad_norm": 1.3542845249176025, "learning_rate": 4.107526881720431e-06, "loss": 0.6564, "step": 191 }, { "epoch": 0.01, "grad_norm": 1.3464215993881226, "learning_rate": 4.1290322580645165e-06, "loss": 0.6297, "step": 192 }, { "epoch": 0.01, "grad_norm": 1.5844863653182983, "learning_rate": 4.150537634408602e-06, "loss": 0.6908, "step": 193 }, { "epoch": 0.01, "grad_norm": 1.3974964618682861, "learning_rate": 4.172043010752688e-06, "loss": 0.6681, "step": 194 }, { "epoch": 0.01, "grad_norm": 1.4985181093215942, "learning_rate": 4.193548387096774e-06, "loss": 0.6385, "step": 195 }, { "epoch": 0.01, "grad_norm": 1.3571962118148804, "learning_rate": 4.215053763440861e-06, "loss": 0.6571, "step": 196 }, { "epoch": 0.01, "grad_norm": 1.448898434638977, "learning_rate": 4.236559139784947e-06, "loss": 0.6589, "step": 197 }, { "epoch": 0.01, "grad_norm": 1.4113610982894897, "learning_rate": 4.258064516129032e-06, "loss": 0.631, "step": 198 }, { "epoch": 0.01, "grad_norm": 1.2628109455108643, "learning_rate": 4.279569892473119e-06, "loss": 0.5911, "step": 199 }, { "epoch": 0.01, "grad_norm": 1.3514397144317627, "learning_rate": 4.3010752688172045e-06, "loss": 0.6545, "step": 200 }, { "epoch": 0.01, "grad_norm": 1.4449620246887207, "learning_rate": 4.32258064516129e-06, "loss": 0.6454, "step": 201 }, { "epoch": 0.01, "grad_norm": 1.6572059392929077, "learning_rate": 4.344086021505377e-06, "loss": 0.7222, "step": 202 }, { "epoch": 0.01, "grad_norm": 1.3483619689941406, "learning_rate": 4.365591397849463e-06, "loss": 0.6626, "step": 203 }, { "epoch": 0.01, "grad_norm": 1.4800024032592773, "learning_rate": 4.387096774193549e-06, "loss": 0.6243, "step": 204 }, { "epoch": 0.01, "grad_norm": 1.4003472328186035, "learning_rate": 4.408602150537635e-06, "loss": 0.6304, "step": 205 }, { "epoch": 0.01, "grad_norm": 1.4581936597824097, "learning_rate": 4.43010752688172e-06, "loss": 0.6991, "step": 206 }, { "epoch": 0.01, "grad_norm": 1.4806365966796875, "learning_rate": 4.451612903225807e-06, "loss": 0.6611, "step": 207 }, { "epoch": 0.01, "grad_norm": 1.5777963399887085, "learning_rate": 4.473118279569893e-06, "loss": 0.6944, "step": 208 }, { "epoch": 0.01, "grad_norm": 1.359972357749939, "learning_rate": 4.494623655913979e-06, "loss": 0.6352, "step": 209 }, { "epoch": 0.01, "grad_norm": 1.519012689590454, "learning_rate": 4.516129032258065e-06, "loss": 0.6687, "step": 210 }, { "epoch": 0.01, "grad_norm": 1.5149580240249634, "learning_rate": 4.5376344086021505e-06, "loss": 0.6845, "step": 211 }, { "epoch": 0.01, "grad_norm": 1.4824870824813843, "learning_rate": 4.559139784946237e-06, "loss": 0.6758, "step": 212 }, { "epoch": 0.01, "grad_norm": 1.4147361516952515, "learning_rate": 4.580645161290323e-06, "loss": 0.651, "step": 213 }, { "epoch": 0.01, "grad_norm": 1.397584080696106, "learning_rate": 4.602150537634409e-06, "loss": 0.7291, "step": 214 }, { "epoch": 0.01, "grad_norm": 1.4491121768951416, "learning_rate": 4.623655913978495e-06, "loss": 0.6824, "step": 215 }, { "epoch": 0.01, "grad_norm": 1.3864790201187134, "learning_rate": 4.6451612903225815e-06, "loss": 0.6511, "step": 216 }, { "epoch": 0.01, "grad_norm": 1.4799574613571167, "learning_rate": 4.666666666666667e-06, "loss": 0.677, "step": 217 }, { "epoch": 0.01, "grad_norm": 1.4617304801940918, "learning_rate": 4.688172043010753e-06, "loss": 0.6641, "step": 218 }, { "epoch": 0.01, "grad_norm": 1.3787248134613037, "learning_rate": 4.7096774193548385e-06, "loss": 0.6086, "step": 219 }, { "epoch": 0.01, "grad_norm": 1.366694688796997, "learning_rate": 4.731182795698925e-06, "loss": 0.6376, "step": 220 }, { "epoch": 0.01, "grad_norm": 1.381858468055725, "learning_rate": 4.752688172043012e-06, "loss": 0.6499, "step": 221 }, { "epoch": 0.01, "grad_norm": 1.5026929378509521, "learning_rate": 4.774193548387097e-06, "loss": 0.6893, "step": 222 }, { "epoch": 0.01, "grad_norm": 1.485606074333191, "learning_rate": 4.795698924731183e-06, "loss": 0.7644, "step": 223 }, { "epoch": 0.01, "grad_norm": 1.4079500436782837, "learning_rate": 4.8172043010752695e-06, "loss": 0.6276, "step": 224 }, { "epoch": 0.01, "grad_norm": 1.3345839977264404, "learning_rate": 4.838709677419355e-06, "loss": 0.6196, "step": 225 }, { "epoch": 0.01, "grad_norm": 1.456888198852539, "learning_rate": 4.860215053763441e-06, "loss": 0.6281, "step": 226 }, { "epoch": 0.01, "grad_norm": 1.4627163410186768, "learning_rate": 4.8817204301075274e-06, "loss": 0.6232, "step": 227 }, { "epoch": 0.01, "grad_norm": 1.3238873481750488, "learning_rate": 4.903225806451613e-06, "loss": 0.6448, "step": 228 }, { "epoch": 0.01, "grad_norm": 1.340203046798706, "learning_rate": 4.9247311827957e-06, "loss": 0.65, "step": 229 }, { "epoch": 0.01, "grad_norm": 1.5663219690322876, "learning_rate": 4.946236559139785e-06, "loss": 0.6528, "step": 230 }, { "epoch": 0.01, "grad_norm": 1.540247917175293, "learning_rate": 4.967741935483871e-06, "loss": 0.6998, "step": 231 }, { "epoch": 0.01, "grad_norm": 1.470038652420044, "learning_rate": 4.9892473118279576e-06, "loss": 0.6023, "step": 232 }, { "epoch": 0.02, "grad_norm": 1.5302600860595703, "learning_rate": 5.010752688172043e-06, "loss": 0.6646, "step": 233 }, { "epoch": 0.02, "grad_norm": 1.3384464979171753, "learning_rate": 5.032258064516129e-06, "loss": 0.6627, "step": 234 }, { "epoch": 0.02, "grad_norm": 1.5624356269836426, "learning_rate": 5.0537634408602155e-06, "loss": 0.6348, "step": 235 }, { "epoch": 0.02, "grad_norm": 1.4840127229690552, "learning_rate": 5.075268817204302e-06, "loss": 0.6875, "step": 236 }, { "epoch": 0.02, "grad_norm": 1.3700177669525146, "learning_rate": 5.096774193548387e-06, "loss": 0.6746, "step": 237 }, { "epoch": 0.02, "grad_norm": 1.460861325263977, "learning_rate": 5.118279569892473e-06, "loss": 0.6486, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.4349925518035889, "learning_rate": 5.13978494623656e-06, "loss": 0.6654, "step": 239 }, { "epoch": 0.02, "grad_norm": 1.5404717922210693, "learning_rate": 5.161290322580646e-06, "loss": 0.631, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.403262734413147, "learning_rate": 5.182795698924732e-06, "loss": 0.6049, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.562657117843628, "learning_rate": 5.204301075268817e-06, "loss": 0.6531, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.5281354188919067, "learning_rate": 5.2258064516129035e-06, "loss": 0.6413, "step": 243 }, { "epoch": 0.02, "grad_norm": 1.4710280895233154, "learning_rate": 5.24731182795699e-06, "loss": 0.6473, "step": 244 }, { "epoch": 0.02, "grad_norm": 1.3256354331970215, "learning_rate": 5.268817204301076e-06, "loss": 0.6863, "step": 245 }, { "epoch": 0.02, "grad_norm": 1.4925252199172974, "learning_rate": 5.290322580645162e-06, "loss": 0.6633, "step": 246 }, { "epoch": 0.02, "grad_norm": 1.5581611394882202, "learning_rate": 5.311827956989247e-06, "loss": 0.6491, "step": 247 }, { "epoch": 0.02, "grad_norm": 1.5785328149795532, "learning_rate": 5.333333333333334e-06, "loss": 0.6925, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.7980413436889648, "learning_rate": 5.35483870967742e-06, "loss": 0.6674, "step": 249 }, { "epoch": 0.02, "grad_norm": 1.314960241317749, "learning_rate": 5.376344086021506e-06, "loss": 0.5851, "step": 250 }, { "epoch": 0.02, "grad_norm": 1.2901309728622437, "learning_rate": 5.3978494623655916e-06, "loss": 0.6119, "step": 251 }, { "epoch": 0.02, "grad_norm": 1.3908600807189941, "learning_rate": 5.419354838709678e-06, "loss": 0.6533, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.5262831449508667, "learning_rate": 5.440860215053764e-06, "loss": 0.6516, "step": 253 }, { "epoch": 0.02, "grad_norm": 1.5047768354415894, "learning_rate": 5.46236559139785e-06, "loss": 0.7023, "step": 254 }, { "epoch": 0.02, "grad_norm": 1.5169037580490112, "learning_rate": 5.483870967741935e-06, "loss": 0.6316, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.7102844715118408, "learning_rate": 5.505376344086022e-06, "loss": 0.6967, "step": 256 }, { "epoch": 0.02, "grad_norm": 1.4694008827209473, "learning_rate": 5.526881720430108e-06, "loss": 0.6138, "step": 257 }, { "epoch": 0.02, "grad_norm": 1.3127514123916626, "learning_rate": 5.548387096774194e-06, "loss": 0.6415, "step": 258 }, { "epoch": 0.02, "grad_norm": 1.4299575090408325, "learning_rate": 5.5698924731182805e-06, "loss": 0.6341, "step": 259 }, { "epoch": 0.02, "grad_norm": 1.4140585660934448, "learning_rate": 5.591397849462365e-06, "loss": 0.6135, "step": 260 }, { "epoch": 0.02, "grad_norm": 1.4923820495605469, "learning_rate": 5.612903225806452e-06, "loss": 0.6505, "step": 261 }, { "epoch": 0.02, "grad_norm": 1.5570168495178223, "learning_rate": 5.634408602150538e-06, "loss": 0.5985, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.477238655090332, "learning_rate": 5.655913978494624e-06, "loss": 0.6176, "step": 263 }, { "epoch": 0.02, "grad_norm": 1.4586527347564697, "learning_rate": 5.677419354838711e-06, "loss": 0.6679, "step": 264 }, { "epoch": 0.02, "grad_norm": 1.4303314685821533, "learning_rate": 5.698924731182796e-06, "loss": 0.6443, "step": 265 }, { "epoch": 0.02, "grad_norm": 1.4830927848815918, "learning_rate": 5.720430107526882e-06, "loss": 0.6999, "step": 266 }, { "epoch": 0.02, "grad_norm": 1.4471492767333984, "learning_rate": 5.7419354838709685e-06, "loss": 0.6401, "step": 267 }, { "epoch": 0.02, "grad_norm": 1.5368587970733643, "learning_rate": 5.763440860215054e-06, "loss": 0.7283, "step": 268 }, { "epoch": 0.02, "grad_norm": 1.4728994369506836, "learning_rate": 5.78494623655914e-06, "loss": 0.5753, "step": 269 }, { "epoch": 0.02, "grad_norm": 1.4920686483383179, "learning_rate": 5.806451612903226e-06, "loss": 0.648, "step": 270 }, { "epoch": 0.02, "grad_norm": 1.489173412322998, "learning_rate": 5.827956989247312e-06, "loss": 0.7034, "step": 271 }, { "epoch": 0.02, "grad_norm": 1.432425856590271, "learning_rate": 5.849462365591399e-06, "loss": 0.6409, "step": 272 }, { "epoch": 0.02, "grad_norm": 1.5326480865478516, "learning_rate": 5.8709677419354835e-06, "loss": 0.5976, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.2451320886611938, "learning_rate": 5.89247311827957e-06, "loss": 0.58, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.4113458395004272, "learning_rate": 5.9139784946236566e-06, "loss": 0.664, "step": 275 }, { "epoch": 0.02, "grad_norm": 1.4853435754776, "learning_rate": 5.935483870967742e-06, "loss": 0.6463, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.4112480878829956, "learning_rate": 5.956989247311829e-06, "loss": 0.6761, "step": 277 }, { "epoch": 0.02, "grad_norm": 1.4136167764663696, "learning_rate": 5.978494623655915e-06, "loss": 0.5818, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.3428866863250732, "learning_rate": 6e-06, "loss": 0.6676, "step": 279 }, { "epoch": 0.02, "grad_norm": 1.3388895988464355, "learning_rate": 6.021505376344087e-06, "loss": 0.5859, "step": 280 }, { "epoch": 0.02, "grad_norm": 1.4309245347976685, "learning_rate": 6.043010752688172e-06, "loss": 0.6026, "step": 281 }, { "epoch": 0.02, "grad_norm": 1.4981694221496582, "learning_rate": 6.064516129032259e-06, "loss": 0.6535, "step": 282 }, { "epoch": 0.02, "grad_norm": 1.4574475288391113, "learning_rate": 6.086021505376345e-06, "loss": 0.6572, "step": 283 }, { "epoch": 0.02, "grad_norm": 1.3734815120697021, "learning_rate": 6.10752688172043e-06, "loss": 0.6083, "step": 284 }, { "epoch": 0.02, "grad_norm": 1.3968099355697632, "learning_rate": 6.129032258064517e-06, "loss": 0.6419, "step": 285 }, { "epoch": 0.02, "grad_norm": 1.364245057106018, "learning_rate": 6.150537634408603e-06, "loss": 0.6011, "step": 286 }, { "epoch": 0.02, "grad_norm": 1.3838967084884644, "learning_rate": 6.172043010752688e-06, "loss": 0.644, "step": 287 }, { "epoch": 0.02, "grad_norm": 1.5137556791305542, "learning_rate": 6.193548387096775e-06, "loss": 0.7008, "step": 288 }, { "epoch": 0.02, "grad_norm": 1.3506392240524292, "learning_rate": 6.21505376344086e-06, "loss": 0.6509, "step": 289 }, { "epoch": 0.02, "grad_norm": 1.3271832466125488, "learning_rate": 6.236559139784947e-06, "loss": 0.5993, "step": 290 }, { "epoch": 0.02, "grad_norm": 1.403112769126892, "learning_rate": 6.2580645161290335e-06, "loss": 0.5949, "step": 291 }, { "epoch": 0.02, "grad_norm": 1.3226643800735474, "learning_rate": 6.279569892473118e-06, "loss": 0.6265, "step": 292 }, { "epoch": 0.02, "grad_norm": 1.460944652557373, "learning_rate": 6.301075268817205e-06, "loss": 0.6186, "step": 293 }, { "epoch": 0.02, "grad_norm": 1.4437795877456665, "learning_rate": 6.3225806451612906e-06, "loss": 0.6155, "step": 294 }, { "epoch": 0.02, "grad_norm": 1.372902512550354, "learning_rate": 6.344086021505377e-06, "loss": 0.6744, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.5733133554458618, "learning_rate": 6.365591397849464e-06, "loss": 0.7174, "step": 296 }, { "epoch": 0.02, "grad_norm": 1.3875151872634888, "learning_rate": 6.3870967741935485e-06, "loss": 0.6477, "step": 297 }, { "epoch": 0.02, "grad_norm": 1.4557615518569946, "learning_rate": 6.408602150537635e-06, "loss": 0.6046, "step": 298 }, { "epoch": 0.02, "grad_norm": 1.3458600044250488, "learning_rate": 6.4301075268817215e-06, "loss": 0.6031, "step": 299 }, { "epoch": 0.02, "grad_norm": 1.4376322031021118, "learning_rate": 6.451612903225806e-06, "loss": 0.5812, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.3428351879119873, "learning_rate": 6.473118279569893e-06, "loss": 0.6554, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.4042518138885498, "learning_rate": 6.494623655913979e-06, "loss": 0.6696, "step": 302 }, { "epoch": 0.02, "grad_norm": 1.3414688110351562, "learning_rate": 6.516129032258065e-06, "loss": 0.6668, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.557831048965454, "learning_rate": 6.537634408602152e-06, "loss": 0.7132, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.3986623287200928, "learning_rate": 6.5591397849462365e-06, "loss": 0.6563, "step": 305 }, { "epoch": 0.02, "grad_norm": 1.3947936296463013, "learning_rate": 6.580645161290323e-06, "loss": 0.6242, "step": 306 }, { "epoch": 0.02, "grad_norm": 1.2314656972885132, "learning_rate": 6.602150537634409e-06, "loss": 0.6483, "step": 307 }, { "epoch": 0.02, "grad_norm": 1.3264108896255493, "learning_rate": 6.623655913978495e-06, "loss": 0.5159, "step": 308 }, { "epoch": 0.02, "grad_norm": 1.3433727025985718, "learning_rate": 6.645161290322582e-06, "loss": 0.6422, "step": 309 }, { "epoch": 0.02, "grad_norm": 1.4343948364257812, "learning_rate": 6.666666666666667e-06, "loss": 0.6129, "step": 310 }, { "epoch": 0.02, "grad_norm": 1.4233579635620117, "learning_rate": 6.688172043010753e-06, "loss": 0.6266, "step": 311 }, { "epoch": 0.02, "grad_norm": 1.497401237487793, "learning_rate": 6.70967741935484e-06, "loss": 0.6763, "step": 312 }, { "epoch": 0.02, "grad_norm": 1.3524738550186157, "learning_rate": 6.731182795698925e-06, "loss": 0.6172, "step": 313 }, { "epoch": 0.02, "grad_norm": 1.3482792377471924, "learning_rate": 6.752688172043012e-06, "loss": 0.67, "step": 314 }, { "epoch": 0.02, "grad_norm": 1.3911526203155518, "learning_rate": 6.774193548387097e-06, "loss": 0.593, "step": 315 }, { "epoch": 0.02, "grad_norm": 1.3784382343292236, "learning_rate": 6.795698924731183e-06, "loss": 0.6262, "step": 316 }, { "epoch": 0.02, "grad_norm": 1.301755666732788, "learning_rate": 6.81720430107527e-06, "loss": 0.606, "step": 317 }, { "epoch": 0.02, "grad_norm": 1.4112850427627563, "learning_rate": 6.838709677419355e-06, "loss": 0.6471, "step": 318 }, { "epoch": 0.02, "grad_norm": 1.3583770990371704, "learning_rate": 6.860215053763441e-06, "loss": 0.6047, "step": 319 }, { "epoch": 0.02, "grad_norm": 1.4293373823165894, "learning_rate": 6.881720430107528e-06, "loss": 0.5994, "step": 320 }, { "epoch": 0.02, "grad_norm": 1.5244840383529663, "learning_rate": 6.9032258064516135e-06, "loss": 0.6577, "step": 321 }, { "epoch": 0.02, "grad_norm": 1.414840817451477, "learning_rate": 6.9247311827957e-06, "loss": 0.6056, "step": 322 }, { "epoch": 0.02, "grad_norm": 1.5029287338256836, "learning_rate": 6.946236559139785e-06, "loss": 0.6343, "step": 323 }, { "epoch": 0.02, "grad_norm": 1.4087852239608765, "learning_rate": 6.967741935483871e-06, "loss": 0.7045, "step": 324 }, { "epoch": 0.02, "grad_norm": 1.4259071350097656, "learning_rate": 6.989247311827958e-06, "loss": 0.62, "step": 325 }, { "epoch": 0.02, "grad_norm": 1.4089728593826294, "learning_rate": 7.010752688172044e-06, "loss": 0.6535, "step": 326 }, { "epoch": 0.02, "grad_norm": 1.4867100715637207, "learning_rate": 7.03225806451613e-06, "loss": 0.6694, "step": 327 }, { "epoch": 0.02, "grad_norm": 1.4505767822265625, "learning_rate": 7.053763440860215e-06, "loss": 0.6616, "step": 328 }, { "epoch": 0.02, "grad_norm": 1.460151195526123, "learning_rate": 7.0752688172043015e-06, "loss": 0.6329, "step": 329 }, { "epoch": 0.02, "grad_norm": 1.3247636556625366, "learning_rate": 7.096774193548388e-06, "loss": 0.5923, "step": 330 }, { "epoch": 0.02, "grad_norm": 1.355065941810608, "learning_rate": 7.118279569892474e-06, "loss": 0.6027, "step": 331 }, { "epoch": 0.02, "grad_norm": 1.550376534461975, "learning_rate": 7.139784946236559e-06, "loss": 0.6598, "step": 332 }, { "epoch": 0.02, "grad_norm": 1.3131173849105835, "learning_rate": 7.161290322580646e-06, "loss": 0.5927, "step": 333 }, { "epoch": 0.02, "grad_norm": 1.4033622741699219, "learning_rate": 7.182795698924732e-06, "loss": 0.638, "step": 334 }, { "epoch": 0.02, "grad_norm": 3.383760929107666, "learning_rate": 7.204301075268818e-06, "loss": 0.6761, "step": 335 }, { "epoch": 0.02, "grad_norm": 1.4576994180679321, "learning_rate": 7.225806451612903e-06, "loss": 0.6386, "step": 336 }, { "epoch": 0.02, "grad_norm": 1.532431721687317, "learning_rate": 7.2473118279569895e-06, "loss": 0.6561, "step": 337 }, { "epoch": 0.02, "grad_norm": 1.3121355772018433, "learning_rate": 7.268817204301076e-06, "loss": 0.636, "step": 338 }, { "epoch": 0.02, "grad_norm": 1.323399305343628, "learning_rate": 7.290322580645162e-06, "loss": 0.6028, "step": 339 }, { "epoch": 0.02, "grad_norm": 1.3759629726409912, "learning_rate": 7.311827956989248e-06, "loss": 0.6222, "step": 340 }, { "epoch": 0.02, "grad_norm": 1.4417436122894287, "learning_rate": 7.333333333333333e-06, "loss": 0.6003, "step": 341 }, { "epoch": 0.02, "grad_norm": 1.450297474861145, "learning_rate": 7.35483870967742e-06, "loss": 0.6463, "step": 342 }, { "epoch": 0.02, "grad_norm": 1.4627034664154053, "learning_rate": 7.376344086021506e-06, "loss": 0.6653, "step": 343 }, { "epoch": 0.02, "grad_norm": 1.4003043174743652, "learning_rate": 7.397849462365592e-06, "loss": 0.5789, "step": 344 }, { "epoch": 0.02, "grad_norm": 1.330725073814392, "learning_rate": 7.4193548387096784e-06, "loss": 0.6392, "step": 345 }, { "epoch": 0.02, "grad_norm": 1.3740488290786743, "learning_rate": 7.440860215053764e-06, "loss": 0.603, "step": 346 }, { "epoch": 0.02, "grad_norm": 1.4228111505508423, "learning_rate": 7.46236559139785e-06, "loss": 0.6197, "step": 347 }, { "epoch": 0.02, "grad_norm": 1.2890325784683228, "learning_rate": 7.483870967741936e-06, "loss": 0.5706, "step": 348 }, { "epoch": 0.02, "grad_norm": 1.3660738468170166, "learning_rate": 7.505376344086022e-06, "loss": 0.6362, "step": 349 }, { "epoch": 0.02, "grad_norm": 1.4797449111938477, "learning_rate": 7.526881720430108e-06, "loss": 0.696, "step": 350 }, { "epoch": 0.02, "grad_norm": 1.3609850406646729, "learning_rate": 7.548387096774194e-06, "loss": 0.6464, "step": 351 }, { "epoch": 0.02, "grad_norm": 1.5972503423690796, "learning_rate": 7.56989247311828e-06, "loss": 0.6999, "step": 352 }, { "epoch": 0.02, "grad_norm": 1.4545588493347168, "learning_rate": 7.5913978494623665e-06, "loss": 0.6235, "step": 353 }, { "epoch": 0.02, "grad_norm": 1.4551633596420288, "learning_rate": 7.612903225806451e-06, "loss": 0.6687, "step": 354 }, { "epoch": 0.02, "grad_norm": 1.363119125366211, "learning_rate": 7.634408602150538e-06, "loss": 0.5688, "step": 355 }, { "epoch": 0.02, "grad_norm": 1.3650763034820557, "learning_rate": 7.655913978494625e-06, "loss": 0.605, "step": 356 }, { "epoch": 0.02, "grad_norm": 1.3820191621780396, "learning_rate": 7.67741935483871e-06, "loss": 0.6119, "step": 357 }, { "epoch": 0.02, "grad_norm": 1.5312913656234741, "learning_rate": 7.698924731182797e-06, "loss": 0.6456, "step": 358 }, { "epoch": 0.02, "grad_norm": 1.7233582735061646, "learning_rate": 7.720430107526882e-06, "loss": 0.6648, "step": 359 }, { "epoch": 0.02, "grad_norm": 1.4153186082839966, "learning_rate": 7.741935483870968e-06, "loss": 0.6685, "step": 360 }, { "epoch": 0.02, "grad_norm": 1.3374271392822266, "learning_rate": 7.763440860215055e-06, "loss": 0.6036, "step": 361 }, { "epoch": 0.02, "grad_norm": 1.364261507987976, "learning_rate": 7.78494623655914e-06, "loss": 0.5734, "step": 362 }, { "epoch": 0.02, "grad_norm": 1.3479622602462769, "learning_rate": 7.806451612903227e-06, "loss": 0.6457, "step": 363 }, { "epoch": 0.02, "grad_norm": 1.3547790050506592, "learning_rate": 7.827956989247312e-06, "loss": 0.6959, "step": 364 }, { "epoch": 0.02, "grad_norm": 1.466463327407837, "learning_rate": 7.849462365591398e-06, "loss": 0.6245, "step": 365 }, { "epoch": 0.02, "grad_norm": 1.3732305765151978, "learning_rate": 7.870967741935484e-06, "loss": 0.6513, "step": 366 }, { "epoch": 0.02, "grad_norm": 1.3370320796966553, "learning_rate": 7.892473118279571e-06, "loss": 0.6822, "step": 367 }, { "epoch": 0.02, "grad_norm": 1.6328262090682983, "learning_rate": 7.913978494623657e-06, "loss": 0.6566, "step": 368 }, { "epoch": 0.02, "grad_norm": 1.4983960390090942, "learning_rate": 7.935483870967743e-06, "loss": 0.6333, "step": 369 }, { "epoch": 0.02, "grad_norm": 1.3956624269485474, "learning_rate": 7.956989247311828e-06, "loss": 0.5919, "step": 370 }, { "epoch": 0.02, "grad_norm": 1.3953498601913452, "learning_rate": 7.978494623655914e-06, "loss": 0.5982, "step": 371 }, { "epoch": 0.02, "grad_norm": 1.4050501585006714, "learning_rate": 8.000000000000001e-06, "loss": 0.6492, "step": 372 }, { "epoch": 0.02, "grad_norm": 1.3902778625488281, "learning_rate": 8.021505376344087e-06, "loss": 0.6619, "step": 373 }, { "epoch": 0.02, "grad_norm": 1.393054485321045, "learning_rate": 8.043010752688173e-06, "loss": 0.6881, "step": 374 }, { "epoch": 0.02, "grad_norm": 1.4529262781143188, "learning_rate": 8.064516129032258e-06, "loss": 0.6461, "step": 375 }, { "epoch": 0.02, "grad_norm": 1.3594259023666382, "learning_rate": 8.086021505376344e-06, "loss": 0.6159, "step": 376 }, { "epoch": 0.02, "grad_norm": 1.3977912664413452, "learning_rate": 8.107526881720431e-06, "loss": 0.6171, "step": 377 }, { "epoch": 0.02, "grad_norm": 1.412697196006775, "learning_rate": 8.129032258064517e-06, "loss": 0.641, "step": 378 }, { "epoch": 0.02, "grad_norm": 1.328952431678772, "learning_rate": 8.150537634408603e-06, "loss": 0.6344, "step": 379 }, { "epoch": 0.02, "grad_norm": 1.3605684041976929, "learning_rate": 8.172043010752689e-06, "loss": 0.5942, "step": 380 }, { "epoch": 0.02, "grad_norm": 1.5911377668380737, "learning_rate": 8.193548387096774e-06, "loss": 0.6342, "step": 381 }, { "epoch": 0.02, "grad_norm": 1.3547784090042114, "learning_rate": 8.215053763440862e-06, "loss": 0.6234, "step": 382 }, { "epoch": 0.02, "grad_norm": 1.5001435279846191, "learning_rate": 8.236559139784947e-06, "loss": 0.6768, "step": 383 }, { "epoch": 0.02, "grad_norm": 1.4094359874725342, "learning_rate": 8.258064516129033e-06, "loss": 0.6435, "step": 384 }, { "epoch": 0.02, "grad_norm": 1.4108891487121582, "learning_rate": 8.279569892473119e-06, "loss": 0.6489, "step": 385 }, { "epoch": 0.02, "grad_norm": 1.3410402536392212, "learning_rate": 8.301075268817204e-06, "loss": 0.6529, "step": 386 }, { "epoch": 0.02, "grad_norm": 1.3629075288772583, "learning_rate": 8.322580645161292e-06, "loss": 0.6037, "step": 387 }, { "epoch": 0.03, "grad_norm": 1.305983066558838, "learning_rate": 8.344086021505376e-06, "loss": 0.6134, "step": 388 }, { "epoch": 0.03, "grad_norm": 1.3505597114562988, "learning_rate": 8.365591397849463e-06, "loss": 0.6175, "step": 389 }, { "epoch": 0.03, "grad_norm": 1.3664557933807373, "learning_rate": 8.387096774193549e-06, "loss": 0.5971, "step": 390 }, { "epoch": 0.03, "grad_norm": 1.4941099882125854, "learning_rate": 8.408602150537634e-06, "loss": 0.6846, "step": 391 }, { "epoch": 0.03, "grad_norm": 1.4789901971817017, "learning_rate": 8.430107526881722e-06, "loss": 0.6495, "step": 392 }, { "epoch": 0.03, "grad_norm": 1.3570103645324707, "learning_rate": 8.451612903225808e-06, "loss": 0.6455, "step": 393 }, { "epoch": 0.03, "grad_norm": 1.3536481857299805, "learning_rate": 8.473118279569893e-06, "loss": 0.649, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.4208588600158691, "learning_rate": 8.494623655913979e-06, "loss": 0.6448, "step": 395 }, { "epoch": 0.03, "grad_norm": 1.3573576211929321, "learning_rate": 8.516129032258065e-06, "loss": 0.6482, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.322425127029419, "learning_rate": 8.537634408602152e-06, "loss": 0.6553, "step": 397 }, { "epoch": 0.03, "grad_norm": 1.5230833292007446, "learning_rate": 8.559139784946238e-06, "loss": 0.6032, "step": 398 }, { "epoch": 0.03, "grad_norm": 1.3428488969802856, "learning_rate": 8.580645161290323e-06, "loss": 0.601, "step": 399 }, { "epoch": 0.03, "grad_norm": 1.5842676162719727, "learning_rate": 8.602150537634409e-06, "loss": 0.6058, "step": 400 }, { "epoch": 0.03, "grad_norm": 1.5223866701126099, "learning_rate": 8.623655913978495e-06, "loss": 0.7053, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.4061591625213623, "learning_rate": 8.64516129032258e-06, "loss": 0.6705, "step": 402 }, { "epoch": 0.03, "grad_norm": 1.2860901355743408, "learning_rate": 8.666666666666668e-06, "loss": 0.6522, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.4867624044418335, "learning_rate": 8.688172043010754e-06, "loss": 0.6434, "step": 404 }, { "epoch": 0.03, "grad_norm": 1.5345767736434937, "learning_rate": 8.70967741935484e-06, "loss": 0.6308, "step": 405 }, { "epoch": 0.03, "grad_norm": 1.5417383909225464, "learning_rate": 8.731182795698927e-06, "loss": 0.6722, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.2449352741241455, "learning_rate": 8.75268817204301e-06, "loss": 0.5685, "step": 407 }, { "epoch": 0.03, "grad_norm": 1.4412016868591309, "learning_rate": 8.774193548387098e-06, "loss": 0.5672, "step": 408 }, { "epoch": 0.03, "grad_norm": 1.4722940921783447, "learning_rate": 8.795698924731184e-06, "loss": 0.5837, "step": 409 }, { "epoch": 0.03, "grad_norm": 1.5083857774734497, "learning_rate": 8.81720430107527e-06, "loss": 0.693, "step": 410 }, { "epoch": 0.03, "grad_norm": 1.4543815851211548, "learning_rate": 8.838709677419357e-06, "loss": 0.6103, "step": 411 }, { "epoch": 0.03, "grad_norm": 1.277834177017212, "learning_rate": 8.86021505376344e-06, "loss": 0.6156, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.2799676656723022, "learning_rate": 8.881720430107528e-06, "loss": 0.6487, "step": 413 }, { "epoch": 0.03, "grad_norm": 1.4370607137680054, "learning_rate": 8.903225806451614e-06, "loss": 0.6173, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.5443744659423828, "learning_rate": 8.9247311827957e-06, "loss": 0.6474, "step": 415 }, { "epoch": 0.03, "grad_norm": 1.5752028226852417, "learning_rate": 8.946236559139785e-06, "loss": 0.6657, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.6973398923873901, "learning_rate": 8.967741935483871e-06, "loss": 0.6227, "step": 417 }, { "epoch": 0.03, "grad_norm": 1.3309566974639893, "learning_rate": 8.989247311827958e-06, "loss": 0.5969, "step": 418 }, { "epoch": 0.03, "grad_norm": 1.2705786228179932, "learning_rate": 9.010752688172044e-06, "loss": 0.6393, "step": 419 }, { "epoch": 0.03, "grad_norm": 1.3991286754608154, "learning_rate": 9.03225806451613e-06, "loss": 0.6321, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.388646125793457, "learning_rate": 9.053763440860215e-06, "loss": 0.6418, "step": 421 }, { "epoch": 0.03, "grad_norm": 1.5069942474365234, "learning_rate": 9.075268817204301e-06, "loss": 0.6255, "step": 422 }, { "epoch": 0.03, "grad_norm": 1.4923782348632812, "learning_rate": 9.096774193548388e-06, "loss": 0.6865, "step": 423 }, { "epoch": 0.03, "grad_norm": 1.5693137645721436, "learning_rate": 9.118279569892474e-06, "loss": 0.713, "step": 424 }, { "epoch": 0.03, "grad_norm": 1.3926331996917725, "learning_rate": 9.13978494623656e-06, "loss": 0.5932, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.3405671119689941, "learning_rate": 9.161290322580645e-06, "loss": 0.6404, "step": 426 }, { "epoch": 0.03, "grad_norm": 1.38405179977417, "learning_rate": 9.182795698924733e-06, "loss": 0.6527, "step": 427 }, { "epoch": 0.03, "grad_norm": 1.4522117376327515, "learning_rate": 9.204301075268819e-06, "loss": 0.5592, "step": 428 }, { "epoch": 0.03, "grad_norm": 1.4289207458496094, "learning_rate": 9.225806451612904e-06, "loss": 0.6307, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.355312466621399, "learning_rate": 9.24731182795699e-06, "loss": 0.5883, "step": 430 }, { "epoch": 0.03, "grad_norm": 1.4518020153045654, "learning_rate": 9.268817204301076e-06, "loss": 0.6711, "step": 431 }, { "epoch": 0.03, "grad_norm": 1.3813982009887695, "learning_rate": 9.290322580645163e-06, "loss": 0.6443, "step": 432 }, { "epoch": 0.03, "grad_norm": 1.235568881034851, "learning_rate": 9.311827956989249e-06, "loss": 0.646, "step": 433 }, { "epoch": 0.03, "grad_norm": 1.3711236715316772, "learning_rate": 9.333333333333334e-06, "loss": 0.6435, "step": 434 }, { "epoch": 0.03, "grad_norm": 1.435325026512146, "learning_rate": 9.35483870967742e-06, "loss": 0.6705, "step": 435 }, { "epoch": 0.03, "grad_norm": 1.3130897283554077, "learning_rate": 9.376344086021506e-06, "loss": 0.5808, "step": 436 }, { "epoch": 0.03, "grad_norm": 1.3166669607162476, "learning_rate": 9.397849462365593e-06, "loss": 0.6557, "step": 437 }, { "epoch": 0.03, "grad_norm": 1.3044676780700684, "learning_rate": 9.419354838709677e-06, "loss": 0.6128, "step": 438 }, { "epoch": 0.03, "grad_norm": 1.4075138568878174, "learning_rate": 9.440860215053764e-06, "loss": 0.6915, "step": 439 }, { "epoch": 0.03, "grad_norm": 1.3613035678863525, "learning_rate": 9.46236559139785e-06, "loss": 0.6579, "step": 440 }, { "epoch": 0.03, "grad_norm": 1.3176119327545166, "learning_rate": 9.483870967741936e-06, "loss": 0.6079, "step": 441 }, { "epoch": 0.03, "grad_norm": 1.2872364521026611, "learning_rate": 9.505376344086023e-06, "loss": 0.579, "step": 442 }, { "epoch": 0.03, "grad_norm": 1.2854247093200684, "learning_rate": 9.526881720430107e-06, "loss": 0.576, "step": 443 }, { "epoch": 0.03, "grad_norm": 1.261618971824646, "learning_rate": 9.548387096774195e-06, "loss": 0.6406, "step": 444 }, { "epoch": 0.03, "grad_norm": 1.376854419708252, "learning_rate": 9.56989247311828e-06, "loss": 0.6271, "step": 445 }, { "epoch": 0.03, "grad_norm": 1.3311340808868408, "learning_rate": 9.591397849462366e-06, "loss": 0.6309, "step": 446 }, { "epoch": 0.03, "grad_norm": 1.2978858947753906, "learning_rate": 9.612903225806453e-06, "loss": 0.6312, "step": 447 }, { "epoch": 0.03, "grad_norm": 1.4054745435714722, "learning_rate": 9.634408602150539e-06, "loss": 0.6355, "step": 448 }, { "epoch": 0.03, "grad_norm": 1.1630349159240723, "learning_rate": 9.655913978494625e-06, "loss": 0.5532, "step": 449 }, { "epoch": 0.03, "grad_norm": 1.3240445852279663, "learning_rate": 9.67741935483871e-06, "loss": 0.5802, "step": 450 }, { "epoch": 0.03, "grad_norm": 1.3332797288894653, "learning_rate": 9.698924731182796e-06, "loss": 0.6415, "step": 451 }, { "epoch": 0.03, "grad_norm": 1.185860276222229, "learning_rate": 9.720430107526882e-06, "loss": 0.5287, "step": 452 }, { "epoch": 0.03, "grad_norm": 1.5209908485412598, "learning_rate": 9.74193548387097e-06, "loss": 0.6501, "step": 453 }, { "epoch": 0.03, "grad_norm": 1.392098307609558, "learning_rate": 9.763440860215055e-06, "loss": 0.6595, "step": 454 }, { "epoch": 0.03, "grad_norm": 1.276890516281128, "learning_rate": 9.78494623655914e-06, "loss": 0.5785, "step": 455 }, { "epoch": 0.03, "grad_norm": 1.5221747159957886, "learning_rate": 9.806451612903226e-06, "loss": 0.5945, "step": 456 }, { "epoch": 0.03, "grad_norm": 1.3471137285232544, "learning_rate": 9.827956989247312e-06, "loss": 0.5924, "step": 457 }, { "epoch": 0.03, "grad_norm": 1.1319068670272827, "learning_rate": 9.8494623655914e-06, "loss": 0.5568, "step": 458 }, { "epoch": 0.03, "grad_norm": 1.3586151599884033, "learning_rate": 9.870967741935485e-06, "loss": 0.5697, "step": 459 }, { "epoch": 0.03, "grad_norm": 1.406134009361267, "learning_rate": 9.89247311827957e-06, "loss": 0.6265, "step": 460 }, { "epoch": 0.03, "grad_norm": 1.4299999475479126, "learning_rate": 9.913978494623658e-06, "loss": 0.6462, "step": 461 }, { "epoch": 0.03, "grad_norm": 1.3570669889450073, "learning_rate": 9.935483870967742e-06, "loss": 0.5992, "step": 462 }, { "epoch": 0.03, "grad_norm": 1.287385106086731, "learning_rate": 9.95698924731183e-06, "loss": 0.5774, "step": 463 }, { "epoch": 0.03, "grad_norm": 1.2230979204177856, "learning_rate": 9.978494623655915e-06, "loss": 0.6027, "step": 464 }, { "epoch": 0.03, "grad_norm": 1.3355122804641724, "learning_rate": 1e-05, "loss": 0.6502, "step": 465 }, { "epoch": 0.03, "grad_norm": 1.4757527112960815, "learning_rate": 9.999999890731451e-06, "loss": 0.6733, "step": 466 }, { "epoch": 0.03, "grad_norm": 1.2944368124008179, "learning_rate": 9.999999562925806e-06, "loss": 0.6205, "step": 467 }, { "epoch": 0.03, "grad_norm": 1.2971357107162476, "learning_rate": 9.99999901658308e-06, "loss": 0.5876, "step": 468 }, { "epoch": 0.03, "grad_norm": 1.3300307989120483, "learning_rate": 9.999998251703298e-06, "loss": 0.6528, "step": 469 }, { "epoch": 0.03, "grad_norm": 1.3819242715835571, "learning_rate": 9.999997268286493e-06, "loss": 0.6512, "step": 470 }, { "epoch": 0.03, "grad_norm": 1.3781582117080688, "learning_rate": 9.999996066332705e-06, "loss": 0.6196, "step": 471 }, { "epoch": 0.03, "grad_norm": 1.5223759412765503, "learning_rate": 9.999994645841991e-06, "loss": 0.6639, "step": 472 }, { "epoch": 0.03, "grad_norm": 1.5605597496032715, "learning_rate": 9.99999300681441e-06, "loss": 0.6244, "step": 473 }, { "epoch": 0.03, "grad_norm": 1.400432825088501, "learning_rate": 9.999991149250037e-06, "loss": 0.6656, "step": 474 }, { "epoch": 0.03, "grad_norm": 1.3611760139465332, "learning_rate": 9.99998907314895e-06, "loss": 0.6416, "step": 475 }, { "epoch": 0.03, "grad_norm": 1.4041498899459839, "learning_rate": 9.999986778511239e-06, "loss": 0.6397, "step": 476 }, { "epoch": 0.03, "grad_norm": 1.3800691366195679, "learning_rate": 9.999984265337008e-06, "loss": 0.5853, "step": 477 }, { "epoch": 0.03, "grad_norm": 1.5269255638122559, "learning_rate": 9.999981533626365e-06, "loss": 0.6667, "step": 478 }, { "epoch": 0.03, "grad_norm": 1.3410454988479614, "learning_rate": 9.999978583379428e-06, "loss": 0.5767, "step": 479 }, { "epoch": 0.03, "grad_norm": 1.3209458589553833, "learning_rate": 9.999975414596328e-06, "loss": 0.6456, "step": 480 }, { "epoch": 0.03, "grad_norm": 1.4876658916473389, "learning_rate": 9.999972027277203e-06, "loss": 0.5429, "step": 481 }, { "epoch": 0.03, "grad_norm": 1.3828930854797363, "learning_rate": 9.999968421422202e-06, "loss": 0.6761, "step": 482 }, { "epoch": 0.03, "grad_norm": 1.3698159456253052, "learning_rate": 9.99996459703148e-06, "loss": 0.6281, "step": 483 }, { "epoch": 0.03, "grad_norm": 1.2629188299179077, "learning_rate": 9.999960554105203e-06, "loss": 0.6314, "step": 484 }, { "epoch": 0.03, "grad_norm": 1.3644013404846191, "learning_rate": 9.999956292643553e-06, "loss": 0.6634, "step": 485 }, { "epoch": 0.03, "grad_norm": 1.5174560546875, "learning_rate": 9.999951812646715e-06, "loss": 0.6658, "step": 486 }, { "epoch": 0.03, "grad_norm": 1.305527925491333, "learning_rate": 9.99994711411488e-06, "loss": 0.6134, "step": 487 }, { "epoch": 0.03, "grad_norm": 1.3298759460449219, "learning_rate": 9.999942197048258e-06, "loss": 0.5759, "step": 488 }, { "epoch": 0.03, "grad_norm": 1.4020183086395264, "learning_rate": 9.999937061447063e-06, "loss": 0.6384, "step": 489 }, { "epoch": 0.03, "grad_norm": 1.3056825399398804, "learning_rate": 9.999931707311519e-06, "loss": 0.6086, "step": 490 }, { "epoch": 0.03, "grad_norm": 1.5247503519058228, "learning_rate": 9.99992613464186e-06, "loss": 0.6463, "step": 491 }, { "epoch": 0.03, "grad_norm": 1.4376918077468872, "learning_rate": 9.99992034343833e-06, "loss": 0.6421, "step": 492 }, { "epoch": 0.03, "grad_norm": 1.2398453950881958, "learning_rate": 9.999914333701181e-06, "loss": 0.6036, "step": 493 }, { "epoch": 0.03, "grad_norm": 1.2932826280593872, "learning_rate": 9.999908105430676e-06, "loss": 0.6812, "step": 494 }, { "epoch": 0.03, "grad_norm": 1.4609074592590332, "learning_rate": 9.999901658627089e-06, "loss": 0.663, "step": 495 }, { "epoch": 0.03, "grad_norm": 1.502657413482666, "learning_rate": 9.9998949932907e-06, "loss": 0.6921, "step": 496 }, { "epoch": 0.03, "grad_norm": 1.3374508619308472, "learning_rate": 9.999888109421801e-06, "loss": 0.6512, "step": 497 }, { "epoch": 0.03, "grad_norm": 1.2538045644760132, "learning_rate": 9.99988100702069e-06, "loss": 0.5872, "step": 498 }, { "epoch": 0.03, "grad_norm": 1.3569806814193726, "learning_rate": 9.999873686087685e-06, "loss": 0.6564, "step": 499 }, { "epoch": 0.03, "grad_norm": 1.3760786056518555, "learning_rate": 9.999866146623096e-06, "loss": 0.5953, "step": 500 }, { "epoch": 0.03, "grad_norm": 1.2224057912826538, "learning_rate": 9.999858388627262e-06, "loss": 0.5766, "step": 501 }, { "epoch": 0.03, "grad_norm": 1.3444105386734009, "learning_rate": 9.999850412100513e-06, "loss": 0.6899, "step": 502 }, { "epoch": 0.03, "grad_norm": 1.3441311120986938, "learning_rate": 9.999842217043206e-06, "loss": 0.5821, "step": 503 }, { "epoch": 0.03, "grad_norm": 1.3449276685714722, "learning_rate": 9.999833803455695e-06, "loss": 0.5629, "step": 504 }, { "epoch": 0.03, "grad_norm": 1.3225734233856201, "learning_rate": 9.999825171338347e-06, "loss": 0.6239, "step": 505 }, { "epoch": 0.03, "grad_norm": 1.229529619216919, "learning_rate": 9.99981632069154e-06, "loss": 0.6192, "step": 506 }, { "epoch": 0.03, "grad_norm": 1.2435340881347656, "learning_rate": 9.999807251515662e-06, "loss": 0.5966, "step": 507 }, { "epoch": 0.03, "grad_norm": 1.1903313398361206, "learning_rate": 9.999797963811108e-06, "loss": 0.6038, "step": 508 }, { "epoch": 0.03, "grad_norm": 1.4838323593139648, "learning_rate": 9.999788457578287e-06, "loss": 0.6634, "step": 509 }, { "epoch": 0.03, "grad_norm": 1.2415093183517456, "learning_rate": 9.99977873281761e-06, "loss": 0.5942, "step": 510 }, { "epoch": 0.03, "grad_norm": 1.3115867376327515, "learning_rate": 9.999768789529505e-06, "loss": 0.644, "step": 511 }, { "epoch": 0.03, "grad_norm": 1.2966361045837402, "learning_rate": 9.999758627714407e-06, "loss": 0.5995, "step": 512 }, { "epoch": 0.03, "grad_norm": 1.378242015838623, "learning_rate": 9.999748247372755e-06, "loss": 0.5909, "step": 513 }, { "epoch": 0.03, "grad_norm": 1.3480839729309082, "learning_rate": 9.99973764850501e-06, "loss": 0.6294, "step": 514 }, { "epoch": 0.03, "grad_norm": 1.3282829523086548, "learning_rate": 9.999726831111631e-06, "loss": 0.6239, "step": 515 }, { "epoch": 0.03, "grad_norm": 1.224538803100586, "learning_rate": 9.999715795193092e-06, "loss": 0.5908, "step": 516 }, { "epoch": 0.03, "grad_norm": 1.1748985052108765, "learning_rate": 9.999704540749875e-06, "loss": 0.5513, "step": 517 }, { "epoch": 0.03, "grad_norm": 1.3855990171432495, "learning_rate": 9.999693067782473e-06, "loss": 0.628, "step": 518 }, { "epoch": 0.03, "grad_norm": 1.338421106338501, "learning_rate": 9.999681376291383e-06, "loss": 0.6017, "step": 519 }, { "epoch": 0.03, "grad_norm": 1.348456621170044, "learning_rate": 9.999669466277121e-06, "loss": 0.573, "step": 520 }, { "epoch": 0.03, "grad_norm": 1.3201391696929932, "learning_rate": 9.999657337740208e-06, "loss": 0.6494, "step": 521 }, { "epoch": 0.03, "grad_norm": 1.3455854654312134, "learning_rate": 9.99964499068117e-06, "loss": 0.6898, "step": 522 }, { "epoch": 0.03, "grad_norm": 1.4575845003128052, "learning_rate": 9.99963242510055e-06, "loss": 0.6058, "step": 523 }, { "epoch": 0.03, "grad_norm": 1.3367388248443604, "learning_rate": 9.999619640998895e-06, "loss": 0.6841, "step": 524 }, { "epoch": 0.03, "grad_norm": 1.485576868057251, "learning_rate": 9.999606638376765e-06, "loss": 0.6207, "step": 525 }, { "epoch": 0.03, "grad_norm": 1.3323869705200195, "learning_rate": 9.999593417234727e-06, "loss": 0.6676, "step": 526 }, { "epoch": 0.03, "grad_norm": 1.381944179534912, "learning_rate": 9.999579977573361e-06, "loss": 0.6541, "step": 527 }, { "epoch": 0.03, "grad_norm": 1.4131807088851929, "learning_rate": 9.999566319393253e-06, "loss": 0.6031, "step": 528 }, { "epoch": 0.03, "grad_norm": 1.3178260326385498, "learning_rate": 9.999552442694999e-06, "loss": 0.5831, "step": 529 }, { "epoch": 0.03, "grad_norm": 1.6524200439453125, "learning_rate": 9.999538347479209e-06, "loss": 0.6461, "step": 530 }, { "epoch": 0.03, "grad_norm": 1.6775611639022827, "learning_rate": 9.999524033746496e-06, "loss": 0.64, "step": 531 }, { "epoch": 0.03, "grad_norm": 1.3287729024887085, "learning_rate": 9.999509501497486e-06, "loss": 0.573, "step": 532 }, { "epoch": 0.03, "grad_norm": 1.1799744367599487, "learning_rate": 9.999494750732814e-06, "loss": 0.6332, "step": 533 }, { "epoch": 0.03, "grad_norm": 1.4676812887191772, "learning_rate": 9.999479781453128e-06, "loss": 0.6601, "step": 534 }, { "epoch": 0.03, "grad_norm": 1.3672637939453125, "learning_rate": 9.999464593659077e-06, "loss": 0.6601, "step": 535 }, { "epoch": 0.03, "grad_norm": 1.3543058633804321, "learning_rate": 9.999449187351328e-06, "loss": 0.6317, "step": 536 }, { "epoch": 0.03, "grad_norm": 1.508683443069458, "learning_rate": 9.999433562530553e-06, "loss": 0.5825, "step": 537 }, { "epoch": 0.03, "grad_norm": 1.3828397989273071, "learning_rate": 9.999417719197437e-06, "loss": 0.6306, "step": 538 }, { "epoch": 0.03, "grad_norm": 1.275328278541565, "learning_rate": 9.99940165735267e-06, "loss": 0.6179, "step": 539 }, { "epoch": 0.03, "grad_norm": 1.4599757194519043, "learning_rate": 9.999385376996957e-06, "loss": 0.5908, "step": 540 }, { "epoch": 0.03, "grad_norm": 1.4210025072097778, "learning_rate": 9.999368878131007e-06, "loss": 0.636, "step": 541 }, { "epoch": 0.03, "grad_norm": 1.3707892894744873, "learning_rate": 9.99935216075554e-06, "loss": 0.6747, "step": 542 }, { "epoch": 0.04, "grad_norm": 1.3234251737594604, "learning_rate": 9.999335224871291e-06, "loss": 0.5971, "step": 543 }, { "epoch": 0.04, "grad_norm": 1.2856186628341675, "learning_rate": 9.999318070478996e-06, "loss": 0.6039, "step": 544 }, { "epoch": 0.04, "grad_norm": 1.4024916887283325, "learning_rate": 9.999300697579409e-06, "loss": 0.6453, "step": 545 }, { "epoch": 0.04, "grad_norm": 1.5657267570495605, "learning_rate": 9.999283106173284e-06, "loss": 0.6354, "step": 546 }, { "epoch": 0.04, "grad_norm": 1.231017827987671, "learning_rate": 9.999265296261393e-06, "loss": 0.5821, "step": 547 }, { "epoch": 0.04, "grad_norm": 1.4070674180984497, "learning_rate": 9.999247267844516e-06, "loss": 0.6493, "step": 548 }, { "epoch": 0.04, "grad_norm": 1.3544397354125977, "learning_rate": 9.999229020923438e-06, "loss": 0.6828, "step": 549 }, { "epoch": 0.04, "grad_norm": 1.4174363613128662, "learning_rate": 9.99921055549896e-06, "loss": 0.6189, "step": 550 }, { "epoch": 0.04, "grad_norm": 1.3297157287597656, "learning_rate": 9.999191871571883e-06, "loss": 0.6465, "step": 551 }, { "epoch": 0.04, "grad_norm": 1.3302680253982544, "learning_rate": 9.999172969143029e-06, "loss": 0.6325, "step": 552 }, { "epoch": 0.04, "grad_norm": 1.416316032409668, "learning_rate": 9.999153848213224e-06, "loss": 0.6493, "step": 553 }, { "epoch": 0.04, "grad_norm": 1.3371374607086182, "learning_rate": 9.9991345087833e-06, "loss": 0.6683, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.6822746992111206, "learning_rate": 9.999114950854105e-06, "loss": 0.5948, "step": 555 }, { "epoch": 0.04, "grad_norm": 1.2920632362365723, "learning_rate": 9.999095174426495e-06, "loss": 0.6092, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.3790969848632812, "learning_rate": 9.999075179501332e-06, "loss": 0.6167, "step": 557 }, { "epoch": 0.04, "grad_norm": 1.2978794574737549, "learning_rate": 9.99905496607949e-06, "loss": 0.6521, "step": 558 }, { "epoch": 0.04, "grad_norm": 1.5805668830871582, "learning_rate": 9.999034534161853e-06, "loss": 0.5858, "step": 559 }, { "epoch": 0.04, "grad_norm": 1.3197576999664307, "learning_rate": 9.999013883749316e-06, "loss": 0.6365, "step": 560 }, { "epoch": 0.04, "grad_norm": 1.542375087738037, "learning_rate": 9.998993014842776e-06, "loss": 0.6349, "step": 561 }, { "epoch": 0.04, "grad_norm": 1.3597489595413208, "learning_rate": 9.998971927443153e-06, "loss": 0.591, "step": 562 }, { "epoch": 0.04, "grad_norm": 1.4243230819702148, "learning_rate": 9.998950621551362e-06, "loss": 0.5896, "step": 563 }, { "epoch": 0.04, "grad_norm": 1.3951241970062256, "learning_rate": 9.99892909716834e-06, "loss": 0.6392, "step": 564 }, { "epoch": 0.04, "grad_norm": 1.274986743927002, "learning_rate": 9.998907354295023e-06, "loss": 0.6064, "step": 565 }, { "epoch": 0.04, "grad_norm": 1.4108690023422241, "learning_rate": 9.998885392932361e-06, "loss": 0.631, "step": 566 }, { "epoch": 0.04, "grad_norm": 1.3731757402420044, "learning_rate": 9.998863213081316e-06, "loss": 0.6167, "step": 567 }, { "epoch": 0.04, "grad_norm": 1.284859538078308, "learning_rate": 9.998840814742858e-06, "loss": 0.6138, "step": 568 }, { "epoch": 0.04, "grad_norm": 1.3897593021392822, "learning_rate": 9.998818197917965e-06, "loss": 0.6117, "step": 569 }, { "epoch": 0.04, "grad_norm": 1.2721205949783325, "learning_rate": 9.998795362607626e-06, "loss": 0.6207, "step": 570 }, { "epoch": 0.04, "grad_norm": 1.2395820617675781, "learning_rate": 9.99877230881284e-06, "loss": 0.6319, "step": 571 }, { "epoch": 0.04, "grad_norm": 1.336958646774292, "learning_rate": 9.998749036534612e-06, "loss": 0.5971, "step": 572 }, { "epoch": 0.04, "grad_norm": 1.2798365354537964, "learning_rate": 9.998725545773961e-06, "loss": 0.6481, "step": 573 }, { "epoch": 0.04, "grad_norm": 1.2847399711608887, "learning_rate": 9.998701836531913e-06, "loss": 0.6164, "step": 574 }, { "epoch": 0.04, "grad_norm": 1.4229387044906616, "learning_rate": 9.998677908809505e-06, "loss": 0.6266, "step": 575 }, { "epoch": 0.04, "grad_norm": 1.4259870052337646, "learning_rate": 9.998653762607782e-06, "loss": 0.6593, "step": 576 }, { "epoch": 0.04, "grad_norm": 1.3164218664169312, "learning_rate": 9.998629397927798e-06, "loss": 0.6094, "step": 577 }, { "epoch": 0.04, "grad_norm": 1.2816392183303833, "learning_rate": 9.998604814770623e-06, "loss": 0.5917, "step": 578 }, { "epoch": 0.04, "grad_norm": 1.3385627269744873, "learning_rate": 9.998580013137327e-06, "loss": 0.6261, "step": 579 }, { "epoch": 0.04, "grad_norm": 1.2864853143692017, "learning_rate": 9.998554993028995e-06, "loss": 0.5835, "step": 580 }, { "epoch": 0.04, "grad_norm": 1.3402376174926758, "learning_rate": 9.99852975444672e-06, "loss": 0.6378, "step": 581 }, { "epoch": 0.04, "grad_norm": 1.3464585542678833, "learning_rate": 9.998504297391606e-06, "loss": 0.6274, "step": 582 }, { "epoch": 0.04, "grad_norm": 1.3946614265441895, "learning_rate": 9.998478621864765e-06, "loss": 0.6421, "step": 583 }, { "epoch": 0.04, "grad_norm": 1.3102638721466064, "learning_rate": 9.99845272786732e-06, "loss": 0.5926, "step": 584 }, { "epoch": 0.04, "grad_norm": 1.2628836631774902, "learning_rate": 9.998426615400402e-06, "loss": 0.5686, "step": 585 }, { "epoch": 0.04, "grad_norm": 1.4661788940429688, "learning_rate": 9.998400284465155e-06, "loss": 0.6429, "step": 586 }, { "epoch": 0.04, "grad_norm": 1.4061461687088013, "learning_rate": 9.998373735062725e-06, "loss": 0.642, "step": 587 }, { "epoch": 0.04, "grad_norm": 1.4899805784225464, "learning_rate": 9.998346967194277e-06, "loss": 0.6313, "step": 588 }, { "epoch": 0.04, "grad_norm": 1.4284398555755615, "learning_rate": 9.998319980860977e-06, "loss": 0.6523, "step": 589 }, { "epoch": 0.04, "grad_norm": 1.4457521438598633, "learning_rate": 9.998292776064009e-06, "loss": 0.7068, "step": 590 }, { "epoch": 0.04, "grad_norm": 1.3531560897827148, "learning_rate": 9.998265352804557e-06, "loss": 0.6362, "step": 591 }, { "epoch": 0.04, "grad_norm": 1.29694664478302, "learning_rate": 9.998237711083825e-06, "loss": 0.6524, "step": 592 }, { "epoch": 0.04, "grad_norm": 1.2487621307373047, "learning_rate": 9.998209850903015e-06, "loss": 0.6414, "step": 593 }, { "epoch": 0.04, "grad_norm": 1.2403746843338013, "learning_rate": 9.99818177226335e-06, "loss": 0.6643, "step": 594 }, { "epoch": 0.04, "grad_norm": 1.266990303993225, "learning_rate": 9.998153475166054e-06, "loss": 0.582, "step": 595 }, { "epoch": 0.04, "grad_norm": 1.2384122610092163, "learning_rate": 9.998124959612366e-06, "loss": 0.6334, "step": 596 }, { "epoch": 0.04, "grad_norm": 1.332375168800354, "learning_rate": 9.998096225603532e-06, "loss": 0.5811, "step": 597 }, { "epoch": 0.04, "grad_norm": 1.317677617073059, "learning_rate": 9.998067273140805e-06, "loss": 0.6092, "step": 598 }, { "epoch": 0.04, "grad_norm": 1.2698678970336914, "learning_rate": 9.998038102225454e-06, "loss": 0.6193, "step": 599 }, { "epoch": 0.04, "grad_norm": 1.2599191665649414, "learning_rate": 9.998008712858753e-06, "loss": 0.596, "step": 600 }, { "epoch": 0.04, "grad_norm": 1.3329696655273438, "learning_rate": 9.997979105041986e-06, "loss": 0.5752, "step": 601 }, { "epoch": 0.04, "grad_norm": 1.3046659231185913, "learning_rate": 9.997949278776446e-06, "loss": 0.5962, "step": 602 }, { "epoch": 0.04, "grad_norm": 1.4190504550933838, "learning_rate": 9.99791923406344e-06, "loss": 0.6456, "step": 603 }, { "epoch": 0.04, "grad_norm": 1.386319637298584, "learning_rate": 9.997888970904279e-06, "loss": 0.6739, "step": 604 }, { "epoch": 0.04, "grad_norm": 1.3471676111221313, "learning_rate": 9.997858489300284e-06, "loss": 0.6306, "step": 605 }, { "epoch": 0.04, "grad_norm": 1.207558274269104, "learning_rate": 9.99782778925279e-06, "loss": 0.6104, "step": 606 }, { "epoch": 0.04, "grad_norm": 1.28770911693573, "learning_rate": 9.997796870763138e-06, "loss": 0.6505, "step": 607 }, { "epoch": 0.04, "grad_norm": 1.3224656581878662, "learning_rate": 9.997765733832678e-06, "loss": 0.6171, "step": 608 }, { "epoch": 0.04, "grad_norm": 1.2593183517456055, "learning_rate": 9.997734378462773e-06, "loss": 0.6058, "step": 609 }, { "epoch": 0.04, "grad_norm": 1.3443915843963623, "learning_rate": 9.997702804654794e-06, "loss": 0.6262, "step": 610 }, { "epoch": 0.04, "grad_norm": 1.207453727722168, "learning_rate": 9.99767101241012e-06, "loss": 0.5959, "step": 611 }, { "epoch": 0.04, "grad_norm": 1.312470555305481, "learning_rate": 9.997639001730137e-06, "loss": 0.6418, "step": 612 }, { "epoch": 0.04, "grad_norm": 1.381596326828003, "learning_rate": 9.99760677261625e-06, "loss": 0.6192, "step": 613 }, { "epoch": 0.04, "grad_norm": 1.3433594703674316, "learning_rate": 9.997574325069864e-06, "loss": 0.6467, "step": 614 }, { "epoch": 0.04, "grad_norm": 1.5092898607254028, "learning_rate": 9.997541659092399e-06, "loss": 0.62, "step": 615 }, { "epoch": 0.04, "grad_norm": 1.211731195449829, "learning_rate": 9.99750877468528e-06, "loss": 0.5716, "step": 616 }, { "epoch": 0.04, "grad_norm": 1.359011173248291, "learning_rate": 9.997475671849948e-06, "loss": 0.6183, "step": 617 }, { "epoch": 0.04, "grad_norm": 1.2276490926742554, "learning_rate": 9.997442350587847e-06, "loss": 0.6208, "step": 618 }, { "epoch": 0.04, "grad_norm": 1.2783503532409668, "learning_rate": 9.997408810900435e-06, "loss": 0.5672, "step": 619 }, { "epoch": 0.04, "grad_norm": 1.2918111085891724, "learning_rate": 9.997375052789177e-06, "loss": 0.6021, "step": 620 }, { "epoch": 0.04, "grad_norm": 1.3448309898376465, "learning_rate": 9.99734107625555e-06, "loss": 0.6118, "step": 621 }, { "epoch": 0.04, "grad_norm": 1.4664503335952759, "learning_rate": 9.997306881301037e-06, "loss": 0.6446, "step": 622 }, { "epoch": 0.04, "grad_norm": 1.4792218208312988, "learning_rate": 9.997272467927133e-06, "loss": 0.6913, "step": 623 }, { "epoch": 0.04, "grad_norm": 1.3366453647613525, "learning_rate": 9.997237836135343e-06, "loss": 0.6528, "step": 624 }, { "epoch": 0.04, "grad_norm": 1.3094818592071533, "learning_rate": 9.997202985927179e-06, "loss": 0.6343, "step": 625 }, { "epoch": 0.04, "grad_norm": 1.345465898513794, "learning_rate": 9.997167917304167e-06, "loss": 0.5684, "step": 626 }, { "epoch": 0.04, "grad_norm": 1.2416417598724365, "learning_rate": 9.997132630267838e-06, "loss": 0.5972, "step": 627 }, { "epoch": 0.04, "grad_norm": 1.3582313060760498, "learning_rate": 9.997097124819735e-06, "loss": 0.6174, "step": 628 }, { "epoch": 0.04, "grad_norm": 1.289483666419983, "learning_rate": 9.997061400961407e-06, "loss": 0.6578, "step": 629 }, { "epoch": 0.04, "grad_norm": 1.371659278869629, "learning_rate": 9.99702545869442e-06, "loss": 0.5938, "step": 630 }, { "epoch": 0.04, "grad_norm": 1.3025915622711182, "learning_rate": 9.996989298020342e-06, "loss": 0.6114, "step": 631 }, { "epoch": 0.04, "grad_norm": 1.284164547920227, "learning_rate": 9.996952918940754e-06, "loss": 0.6409, "step": 632 }, { "epoch": 0.04, "grad_norm": 1.195328712463379, "learning_rate": 9.996916321457248e-06, "loss": 0.6186, "step": 633 }, { "epoch": 0.04, "grad_norm": 1.2779732942581177, "learning_rate": 9.99687950557142e-06, "loss": 0.6642, "step": 634 }, { "epoch": 0.04, "grad_norm": 1.3149299621582031, "learning_rate": 9.99684247128488e-06, "loss": 0.6558, "step": 635 }, { "epoch": 0.04, "grad_norm": 1.2343803644180298, "learning_rate": 9.996805218599249e-06, "loss": 0.5961, "step": 636 }, { "epoch": 0.04, "grad_norm": 1.222564697265625, "learning_rate": 9.996767747516155e-06, "loss": 0.5975, "step": 637 }, { "epoch": 0.04, "grad_norm": 1.211363434791565, "learning_rate": 9.996730058037231e-06, "loss": 0.6038, "step": 638 }, { "epoch": 0.04, "grad_norm": 1.5018917322158813, "learning_rate": 9.99669215016413e-06, "loss": 0.6867, "step": 639 }, { "epoch": 0.04, "grad_norm": 1.3805820941925049, "learning_rate": 9.996654023898509e-06, "loss": 0.5954, "step": 640 }, { "epoch": 0.04, "grad_norm": 1.2078715562820435, "learning_rate": 9.99661567924203e-06, "loss": 0.633, "step": 641 }, { "epoch": 0.04, "grad_norm": 1.2406684160232544, "learning_rate": 9.996577116196372e-06, "loss": 0.6207, "step": 642 }, { "epoch": 0.04, "grad_norm": 1.4157217741012573, "learning_rate": 9.996538334763217e-06, "loss": 0.6545, "step": 643 }, { "epoch": 0.04, "grad_norm": 1.2953068017959595, "learning_rate": 9.996499334944265e-06, "loss": 0.6508, "step": 644 }, { "epoch": 0.04, "grad_norm": 1.2780482769012451, "learning_rate": 9.996460116741217e-06, "loss": 0.6581, "step": 645 }, { "epoch": 0.04, "grad_norm": 1.3972628116607666, "learning_rate": 9.996420680155789e-06, "loss": 0.5941, "step": 646 }, { "epoch": 0.04, "grad_norm": 1.3138420581817627, "learning_rate": 9.996381025189704e-06, "loss": 0.5947, "step": 647 }, { "epoch": 0.04, "grad_norm": 1.31589937210083, "learning_rate": 9.996341151844694e-06, "loss": 0.5906, "step": 648 }, { "epoch": 0.04, "grad_norm": 1.240477442741394, "learning_rate": 9.996301060122506e-06, "loss": 0.5599, "step": 649 }, { "epoch": 0.04, "grad_norm": 1.2585747241973877, "learning_rate": 9.996260750024886e-06, "loss": 0.6169, "step": 650 }, { "epoch": 0.04, "grad_norm": 1.2373098134994507, "learning_rate": 9.996220221553603e-06, "loss": 0.5933, "step": 651 }, { "epoch": 0.04, "grad_norm": 1.3539001941680908, "learning_rate": 9.996179474710422e-06, "loss": 0.6232, "step": 652 }, { "epoch": 0.04, "grad_norm": 1.2914259433746338, "learning_rate": 9.996138509497126e-06, "loss": 0.6256, "step": 653 }, { "epoch": 0.04, "grad_norm": 1.4551327228546143, "learning_rate": 9.996097325915506e-06, "loss": 0.6501, "step": 654 }, { "epoch": 0.04, "grad_norm": 1.297116994857788, "learning_rate": 9.996055923967363e-06, "loss": 0.6251, "step": 655 }, { "epoch": 0.04, "grad_norm": 1.236744999885559, "learning_rate": 9.996014303654504e-06, "loss": 0.5985, "step": 656 }, { "epoch": 0.04, "grad_norm": 1.3472373485565186, "learning_rate": 9.995972464978752e-06, "loss": 0.6582, "step": 657 }, { "epoch": 0.04, "grad_norm": 1.2879011631011963, "learning_rate": 9.995930407941932e-06, "loss": 0.5313, "step": 658 }, { "epoch": 0.04, "grad_norm": 1.6424403190612793, "learning_rate": 9.995888132545883e-06, "loss": 0.6094, "step": 659 }, { "epoch": 0.04, "grad_norm": 1.3824845552444458, "learning_rate": 9.995845638792453e-06, "loss": 0.6251, "step": 660 }, { "epoch": 0.04, "grad_norm": 1.3312753438949585, "learning_rate": 9.995802926683503e-06, "loss": 0.5945, "step": 661 }, { "epoch": 0.04, "grad_norm": 1.278936505317688, "learning_rate": 9.995759996220894e-06, "loss": 0.6934, "step": 662 }, { "epoch": 0.04, "grad_norm": 1.275549054145813, "learning_rate": 9.995716847406504e-06, "loss": 0.6343, "step": 663 }, { "epoch": 0.04, "grad_norm": 1.370938777923584, "learning_rate": 9.99567348024222e-06, "loss": 0.6576, "step": 664 }, { "epoch": 0.04, "grad_norm": 1.4579659700393677, "learning_rate": 9.995629894729937e-06, "loss": 0.5786, "step": 665 }, { "epoch": 0.04, "grad_norm": 1.4113339185714722, "learning_rate": 9.995586090871561e-06, "loss": 0.6315, "step": 666 }, { "epoch": 0.04, "grad_norm": 1.3470135927200317, "learning_rate": 9.995542068669006e-06, "loss": 0.7192, "step": 667 }, { "epoch": 0.04, "grad_norm": 1.2419312000274658, "learning_rate": 9.995497828124195e-06, "loss": 0.6285, "step": 668 }, { "epoch": 0.04, "grad_norm": 1.3299139738082886, "learning_rate": 9.995453369239062e-06, "loss": 0.6408, "step": 669 }, { "epoch": 0.04, "grad_norm": 1.220598816871643, "learning_rate": 9.995408692015553e-06, "loss": 0.6214, "step": 670 }, { "epoch": 0.04, "grad_norm": 1.4219657182693481, "learning_rate": 9.995363796455617e-06, "loss": 0.5881, "step": 671 }, { "epoch": 0.04, "grad_norm": 1.305813193321228, "learning_rate": 9.995318682561217e-06, "loss": 0.6083, "step": 672 }, { "epoch": 0.04, "grad_norm": 1.1777911186218262, "learning_rate": 9.995273350334326e-06, "loss": 0.601, "step": 673 }, { "epoch": 0.04, "grad_norm": 1.180584192276001, "learning_rate": 9.995227799776926e-06, "loss": 0.5953, "step": 674 }, { "epoch": 0.04, "grad_norm": 1.3072441816329956, "learning_rate": 9.995182030891007e-06, "loss": 0.5845, "step": 675 }, { "epoch": 0.04, "grad_norm": 1.2619361877441406, "learning_rate": 9.995136043678566e-06, "loss": 0.5958, "step": 676 }, { "epoch": 0.04, "grad_norm": 1.2260758876800537, "learning_rate": 9.995089838141619e-06, "loss": 0.6332, "step": 677 }, { "epoch": 0.04, "grad_norm": 1.3020881414413452, "learning_rate": 9.995043414282182e-06, "loss": 0.6379, "step": 678 }, { "epoch": 0.04, "grad_norm": 1.2657369375228882, "learning_rate": 9.994996772102284e-06, "loss": 0.6364, "step": 679 }, { "epoch": 0.04, "grad_norm": 1.3133066892623901, "learning_rate": 9.994949911603965e-06, "loss": 0.6431, "step": 680 }, { "epoch": 0.04, "grad_norm": 1.3301125764846802, "learning_rate": 9.994902832789272e-06, "loss": 0.6189, "step": 681 }, { "epoch": 0.04, "grad_norm": 1.3152416944503784, "learning_rate": 9.994855535660265e-06, "loss": 0.6437, "step": 682 }, { "epoch": 0.04, "grad_norm": 1.2814825773239136, "learning_rate": 9.994808020219007e-06, "loss": 0.6189, "step": 683 }, { "epoch": 0.04, "grad_norm": 1.2831264734268188, "learning_rate": 9.994760286467578e-06, "loss": 0.6236, "step": 684 }, { "epoch": 0.04, "grad_norm": 1.3100135326385498, "learning_rate": 9.994712334408063e-06, "loss": 0.6355, "step": 685 }, { "epoch": 0.04, "grad_norm": 1.1557562351226807, "learning_rate": 9.99466416404256e-06, "loss": 0.5821, "step": 686 }, { "epoch": 0.04, "grad_norm": 1.1266735792160034, "learning_rate": 9.99461577537317e-06, "loss": 0.6068, "step": 687 }, { "epoch": 0.04, "grad_norm": 1.1948823928833008, "learning_rate": 9.994567168402014e-06, "loss": 0.5687, "step": 688 }, { "epoch": 0.04, "grad_norm": 1.3634592294692993, "learning_rate": 9.994518343131212e-06, "loss": 0.6647, "step": 689 }, { "epoch": 0.04, "grad_norm": 1.1781846284866333, "learning_rate": 9.9944692995629e-06, "loss": 0.5871, "step": 690 }, { "epoch": 0.04, "grad_norm": 1.1810084581375122, "learning_rate": 9.994420037699219e-06, "loss": 0.5743, "step": 691 }, { "epoch": 0.04, "grad_norm": 1.3490301370620728, "learning_rate": 9.994370557542326e-06, "loss": 0.7233, "step": 692 }, { "epoch": 0.04, "grad_norm": 1.2057101726531982, "learning_rate": 9.99432085909438e-06, "loss": 0.6074, "step": 693 }, { "epoch": 0.04, "grad_norm": 1.2959293127059937, "learning_rate": 9.994270942357554e-06, "loss": 0.5768, "step": 694 }, { "epoch": 0.04, "grad_norm": 1.2555745840072632, "learning_rate": 9.994220807334032e-06, "loss": 0.637, "step": 695 }, { "epoch": 0.04, "grad_norm": 1.2791564464569092, "learning_rate": 9.994170454026004e-06, "loss": 0.6517, "step": 696 }, { "epoch": 0.04, "grad_norm": 1.179724931716919, "learning_rate": 9.99411988243567e-06, "loss": 0.5982, "step": 697 }, { "epoch": 0.05, "grad_norm": 1.3064098358154297, "learning_rate": 9.994069092565241e-06, "loss": 0.5965, "step": 698 }, { "epoch": 0.05, "grad_norm": 1.3212107419967651, "learning_rate": 9.994018084416937e-06, "loss": 0.6145, "step": 699 }, { "epoch": 0.05, "grad_norm": 1.2585086822509766, "learning_rate": 9.993966857992988e-06, "loss": 0.5268, "step": 700 }, { "epoch": 0.05, "grad_norm": 1.365216612815857, "learning_rate": 9.99391541329563e-06, "loss": 0.6055, "step": 701 }, { "epoch": 0.05, "grad_norm": 1.223777413368225, "learning_rate": 9.993863750327116e-06, "loss": 0.6273, "step": 702 }, { "epoch": 0.05, "grad_norm": 1.3158364295959473, "learning_rate": 9.9938118690897e-06, "loss": 0.5958, "step": 703 }, { "epoch": 0.05, "grad_norm": 1.347861886024475, "learning_rate": 9.993759769585654e-06, "loss": 0.6456, "step": 704 }, { "epoch": 0.05, "grad_norm": 1.400897741317749, "learning_rate": 9.99370745181725e-06, "loss": 0.5992, "step": 705 }, { "epoch": 0.05, "grad_norm": 1.1967368125915527, "learning_rate": 9.993654915786777e-06, "loss": 0.584, "step": 706 }, { "epoch": 0.05, "grad_norm": 1.290209174156189, "learning_rate": 9.993602161496534e-06, "loss": 0.5773, "step": 707 }, { "epoch": 0.05, "grad_norm": 1.2717548608779907, "learning_rate": 9.993549188948823e-06, "loss": 0.6207, "step": 708 }, { "epoch": 0.05, "grad_norm": 1.4441581964492798, "learning_rate": 9.99349599814596e-06, "loss": 0.6826, "step": 709 }, { "epoch": 0.05, "grad_norm": 1.2555763721466064, "learning_rate": 9.99344258909027e-06, "loss": 0.6518, "step": 710 }, { "epoch": 0.05, "grad_norm": 1.2035099267959595, "learning_rate": 9.993388961784087e-06, "loss": 0.6267, "step": 711 }, { "epoch": 0.05, "grad_norm": 1.2162823677062988, "learning_rate": 9.993335116229758e-06, "loss": 0.5549, "step": 712 }, { "epoch": 0.05, "grad_norm": 1.2453747987747192, "learning_rate": 9.993281052429633e-06, "loss": 0.648, "step": 713 }, { "epoch": 0.05, "grad_norm": 1.3581827878952026, "learning_rate": 9.993226770386075e-06, "loss": 0.6033, "step": 714 }, { "epoch": 0.05, "grad_norm": 1.4765944480895996, "learning_rate": 9.99317227010146e-06, "loss": 0.6332, "step": 715 }, { "epoch": 0.05, "grad_norm": 1.2027941942214966, "learning_rate": 9.993117551578164e-06, "loss": 0.611, "step": 716 }, { "epoch": 0.05, "grad_norm": 1.270143985748291, "learning_rate": 9.993062614818586e-06, "loss": 0.6058, "step": 717 }, { "epoch": 0.05, "grad_norm": 1.3551231622695923, "learning_rate": 9.993007459825122e-06, "loss": 0.6259, "step": 718 }, { "epoch": 0.05, "grad_norm": 1.2765908241271973, "learning_rate": 9.992952086600182e-06, "loss": 0.5825, "step": 719 }, { "epoch": 0.05, "grad_norm": 1.5044827461242676, "learning_rate": 9.992896495146192e-06, "loss": 0.6082, "step": 720 }, { "epoch": 0.05, "grad_norm": 1.2510666847229004, "learning_rate": 9.992840685465575e-06, "loss": 0.6378, "step": 721 }, { "epoch": 0.05, "grad_norm": 1.20393705368042, "learning_rate": 9.992784657560774e-06, "loss": 0.6226, "step": 722 }, { "epoch": 0.05, "grad_norm": 1.2273883819580078, "learning_rate": 9.992728411434238e-06, "loss": 0.6168, "step": 723 }, { "epoch": 0.05, "grad_norm": 1.358774185180664, "learning_rate": 9.992671947088424e-06, "loss": 0.6242, "step": 724 }, { "epoch": 0.05, "grad_norm": 1.41788649559021, "learning_rate": 9.9926152645258e-06, "loss": 0.6155, "step": 725 }, { "epoch": 0.05, "grad_norm": 1.4462772607803345, "learning_rate": 9.992558363748846e-06, "loss": 0.6405, "step": 726 }, { "epoch": 0.05, "grad_norm": 1.3435741662979126, "learning_rate": 9.992501244760044e-06, "loss": 0.6438, "step": 727 }, { "epoch": 0.05, "grad_norm": 1.3797948360443115, "learning_rate": 9.992443907561895e-06, "loss": 0.6193, "step": 728 }, { "epoch": 0.05, "grad_norm": 1.2988684177398682, "learning_rate": 9.992386352156903e-06, "loss": 0.6705, "step": 729 }, { "epoch": 0.05, "grad_norm": 1.320826530456543, "learning_rate": 9.992328578547585e-06, "loss": 0.6123, "step": 730 }, { "epoch": 0.05, "grad_norm": 1.2240113019943237, "learning_rate": 9.992270586736464e-06, "loss": 0.6341, "step": 731 }, { "epoch": 0.05, "grad_norm": 1.2287269830703735, "learning_rate": 9.992212376726077e-06, "loss": 0.6242, "step": 732 }, { "epoch": 0.05, "grad_norm": 1.2742928266525269, "learning_rate": 9.992153948518967e-06, "loss": 0.5943, "step": 733 }, { "epoch": 0.05, "grad_norm": 1.3095282316207886, "learning_rate": 9.992095302117687e-06, "loss": 0.5788, "step": 734 }, { "epoch": 0.05, "grad_norm": 1.4033558368682861, "learning_rate": 9.992036437524801e-06, "loss": 0.6249, "step": 735 }, { "epoch": 0.05, "grad_norm": 1.2717691659927368, "learning_rate": 9.991977354742883e-06, "loss": 0.5675, "step": 736 }, { "epoch": 0.05, "grad_norm": 1.2348840236663818, "learning_rate": 9.991918053774514e-06, "loss": 0.6153, "step": 737 }, { "epoch": 0.05, "grad_norm": 1.362776279449463, "learning_rate": 9.991858534622285e-06, "loss": 0.5804, "step": 738 }, { "epoch": 0.05, "grad_norm": 1.2267539501190186, "learning_rate": 9.9917987972888e-06, "loss": 0.5638, "step": 739 }, { "epoch": 0.05, "grad_norm": 1.2984521389007568, "learning_rate": 9.991738841776668e-06, "loss": 0.6315, "step": 740 }, { "epoch": 0.05, "grad_norm": 1.2296321392059326, "learning_rate": 9.99167866808851e-06, "loss": 0.6387, "step": 741 }, { "epoch": 0.05, "grad_norm": 1.1935471296310425, "learning_rate": 9.991618276226958e-06, "loss": 0.6091, "step": 742 }, { "epoch": 0.05, "grad_norm": 1.3240699768066406, "learning_rate": 9.991557666194647e-06, "loss": 0.6591, "step": 743 }, { "epoch": 0.05, "grad_norm": 1.1959114074707031, "learning_rate": 9.99149683799423e-06, "loss": 0.5792, "step": 744 }, { "epoch": 0.05, "grad_norm": 1.3239802122116089, "learning_rate": 9.991435791628363e-06, "loss": 0.6576, "step": 745 }, { "epoch": 0.05, "grad_norm": 1.260502815246582, "learning_rate": 9.991374527099717e-06, "loss": 0.5768, "step": 746 }, { "epoch": 0.05, "grad_norm": 1.180129051208496, "learning_rate": 9.991313044410968e-06, "loss": 0.5994, "step": 747 }, { "epoch": 0.05, "grad_norm": 1.4211536645889282, "learning_rate": 9.991251343564805e-06, "loss": 0.6654, "step": 748 }, { "epoch": 0.05, "grad_norm": 1.1996296644210815, "learning_rate": 9.991189424563923e-06, "loss": 0.5733, "step": 749 }, { "epoch": 0.05, "grad_norm": 1.0855921506881714, "learning_rate": 9.991127287411027e-06, "loss": 0.5675, "step": 750 }, { "epoch": 0.05, "grad_norm": 1.2112547159194946, "learning_rate": 9.991064932108836e-06, "loss": 0.5728, "step": 751 }, { "epoch": 0.05, "grad_norm": 1.4121824502944946, "learning_rate": 9.991002358660074e-06, "loss": 0.6236, "step": 752 }, { "epoch": 0.05, "grad_norm": 1.307271957397461, "learning_rate": 9.990939567067475e-06, "loss": 0.5876, "step": 753 }, { "epoch": 0.05, "grad_norm": 1.324796199798584, "learning_rate": 9.990876557333783e-06, "loss": 0.5609, "step": 754 }, { "epoch": 0.05, "grad_norm": 1.3254085779190063, "learning_rate": 9.990813329461755e-06, "loss": 0.5953, "step": 755 }, { "epoch": 0.05, "grad_norm": 1.250463604927063, "learning_rate": 9.990749883454154e-06, "loss": 0.5906, "step": 756 }, { "epoch": 0.05, "grad_norm": 1.2544255256652832, "learning_rate": 9.99068621931375e-06, "loss": 0.5226, "step": 757 }, { "epoch": 0.05, "grad_norm": 1.1885249614715576, "learning_rate": 9.990622337043328e-06, "loss": 0.5549, "step": 758 }, { "epoch": 0.05, "grad_norm": 1.1991541385650635, "learning_rate": 9.99055823664568e-06, "loss": 0.5708, "step": 759 }, { "epoch": 0.05, "grad_norm": 1.1789265871047974, "learning_rate": 9.990493918123607e-06, "loss": 0.5793, "step": 760 }, { "epoch": 0.05, "grad_norm": 1.3204694986343384, "learning_rate": 9.99042938147992e-06, "loss": 0.612, "step": 761 }, { "epoch": 0.05, "grad_norm": 1.316900372505188, "learning_rate": 9.990364626717441e-06, "loss": 0.6292, "step": 762 }, { "epoch": 0.05, "grad_norm": 1.1159744262695312, "learning_rate": 9.990299653839e-06, "loss": 0.5954, "step": 763 }, { "epoch": 0.05, "grad_norm": 1.2961786985397339, "learning_rate": 9.990234462847435e-06, "loss": 0.6131, "step": 764 }, { "epoch": 0.05, "grad_norm": 1.4107635021209717, "learning_rate": 9.990169053745597e-06, "loss": 0.6075, "step": 765 }, { "epoch": 0.05, "grad_norm": 1.2866042852401733, "learning_rate": 9.990103426536344e-06, "loss": 0.5739, "step": 766 }, { "epoch": 0.05, "grad_norm": 1.3043292760849, "learning_rate": 9.990037581222545e-06, "loss": 0.6086, "step": 767 }, { "epoch": 0.05, "grad_norm": 1.204859733581543, "learning_rate": 9.989971517807078e-06, "loss": 0.6214, "step": 768 }, { "epoch": 0.05, "grad_norm": 1.2650980949401855, "learning_rate": 9.989905236292832e-06, "loss": 0.6187, "step": 769 }, { "epoch": 0.05, "grad_norm": 1.3149570226669312, "learning_rate": 9.9898387366827e-06, "loss": 0.6296, "step": 770 }, { "epoch": 0.05, "grad_norm": 1.2332391738891602, "learning_rate": 9.989772018979591e-06, "loss": 0.6276, "step": 771 }, { "epoch": 0.05, "grad_norm": 1.2414213418960571, "learning_rate": 9.989705083186422e-06, "loss": 0.5936, "step": 772 }, { "epoch": 0.05, "grad_norm": 1.317628264427185, "learning_rate": 9.989637929306118e-06, "loss": 0.6287, "step": 773 }, { "epoch": 0.05, "grad_norm": 1.255882978439331, "learning_rate": 9.98957055734161e-06, "loss": 0.6135, "step": 774 }, { "epoch": 0.05, "grad_norm": 1.4007010459899902, "learning_rate": 9.98950296729585e-06, "loss": 0.6493, "step": 775 }, { "epoch": 0.05, "grad_norm": 1.3377958536148071, "learning_rate": 9.989435159171786e-06, "loss": 0.5756, "step": 776 }, { "epoch": 0.05, "grad_norm": 1.1720798015594482, "learning_rate": 9.989367132972385e-06, "loss": 0.6093, "step": 777 }, { "epoch": 0.05, "grad_norm": 1.1775676012039185, "learning_rate": 9.989298888700621e-06, "loss": 0.5786, "step": 778 }, { "epoch": 0.05, "grad_norm": 1.41486394405365, "learning_rate": 9.989230426359472e-06, "loss": 0.6336, "step": 779 }, { "epoch": 0.05, "grad_norm": 1.2969028949737549, "learning_rate": 9.989161745951936e-06, "loss": 0.5975, "step": 780 }, { "epoch": 0.05, "grad_norm": 1.2303740978240967, "learning_rate": 9.98909284748101e-06, "loss": 0.6232, "step": 781 }, { "epoch": 0.05, "grad_norm": 1.3672415018081665, "learning_rate": 9.98902373094971e-06, "loss": 0.6557, "step": 782 }, { "epoch": 0.05, "grad_norm": 1.3400826454162598, "learning_rate": 9.988954396361053e-06, "loss": 0.5442, "step": 783 }, { "epoch": 0.05, "grad_norm": 1.2115839719772339, "learning_rate": 9.988884843718072e-06, "loss": 0.5946, "step": 784 }, { "epoch": 0.05, "grad_norm": 1.2874220609664917, "learning_rate": 9.988815073023806e-06, "loss": 0.5989, "step": 785 }, { "epoch": 0.05, "grad_norm": 1.2748186588287354, "learning_rate": 9.988745084281302e-06, "loss": 0.6319, "step": 786 }, { "epoch": 0.05, "grad_norm": 1.2610372304916382, "learning_rate": 9.988674877493625e-06, "loss": 0.5938, "step": 787 }, { "epoch": 0.05, "grad_norm": 1.4561867713928223, "learning_rate": 9.988604452663837e-06, "loss": 0.6502, "step": 788 }, { "epoch": 0.05, "grad_norm": 1.2633576393127441, "learning_rate": 9.988533809795022e-06, "loss": 0.5688, "step": 789 }, { "epoch": 0.05, "grad_norm": 1.2195382118225098, "learning_rate": 9.988462948890262e-06, "loss": 0.6012, "step": 790 }, { "epoch": 0.05, "grad_norm": 1.1686161756515503, "learning_rate": 9.988391869952659e-06, "loss": 0.5658, "step": 791 }, { "epoch": 0.05, "grad_norm": 1.3605598211288452, "learning_rate": 9.988320572985317e-06, "loss": 0.6365, "step": 792 }, { "epoch": 0.05, "grad_norm": 1.2631806135177612, "learning_rate": 9.988249057991353e-06, "loss": 0.6023, "step": 793 }, { "epoch": 0.05, "grad_norm": 1.2865442037582397, "learning_rate": 9.988177324973891e-06, "loss": 0.6388, "step": 794 }, { "epoch": 0.05, "grad_norm": 1.253281593322754, "learning_rate": 9.98810537393607e-06, "loss": 0.6333, "step": 795 }, { "epoch": 0.05, "grad_norm": 1.2490798234939575, "learning_rate": 9.98803320488103e-06, "loss": 0.5382, "step": 796 }, { "epoch": 0.05, "grad_norm": 1.248786211013794, "learning_rate": 9.98796081781193e-06, "loss": 0.617, "step": 797 }, { "epoch": 0.05, "grad_norm": 1.2232046127319336, "learning_rate": 9.987888212731932e-06, "loss": 0.6267, "step": 798 }, { "epoch": 0.05, "grad_norm": 1.2543914318084717, "learning_rate": 9.98781538964421e-06, "loss": 0.547, "step": 799 }, { "epoch": 0.05, "grad_norm": 1.2306928634643555, "learning_rate": 9.987742348551943e-06, "loss": 0.607, "step": 800 }, { "epoch": 0.05, "grad_norm": 1.4710330963134766, "learning_rate": 9.987669089458327e-06, "loss": 0.632, "step": 801 }, { "epoch": 0.05, "grad_norm": 1.1867766380310059, "learning_rate": 9.987595612366566e-06, "loss": 0.6139, "step": 802 }, { "epoch": 0.05, "grad_norm": 1.193033218383789, "learning_rate": 9.987521917279866e-06, "loss": 0.6109, "step": 803 }, { "epoch": 0.05, "grad_norm": 1.3625398874282837, "learning_rate": 9.987448004201453e-06, "loss": 0.6437, "step": 804 }, { "epoch": 0.05, "grad_norm": 1.3329142332077026, "learning_rate": 9.987373873134555e-06, "loss": 0.6336, "step": 805 }, { "epoch": 0.05, "grad_norm": 1.1992052793502808, "learning_rate": 9.987299524082413e-06, "loss": 0.6299, "step": 806 }, { "epoch": 0.05, "grad_norm": 1.2588757276535034, "learning_rate": 9.987224957048275e-06, "loss": 0.6256, "step": 807 }, { "epoch": 0.05, "grad_norm": 1.3142235279083252, "learning_rate": 9.987150172035402e-06, "loss": 0.5856, "step": 808 }, { "epoch": 0.05, "grad_norm": 1.2910722494125366, "learning_rate": 9.987075169047063e-06, "loss": 0.626, "step": 809 }, { "epoch": 0.05, "grad_norm": 1.2415562868118286, "learning_rate": 9.986999948086533e-06, "loss": 0.603, "step": 810 }, { "epoch": 0.05, "grad_norm": 1.2316285371780396, "learning_rate": 9.986924509157103e-06, "loss": 0.6209, "step": 811 }, { "epoch": 0.05, "grad_norm": 1.352343201637268, "learning_rate": 9.98684885226207e-06, "loss": 0.6348, "step": 812 }, { "epoch": 0.05, "grad_norm": 1.2484899759292603, "learning_rate": 9.986772977404739e-06, "loss": 0.6277, "step": 813 }, { "epoch": 0.05, "grad_norm": 1.3277833461761475, "learning_rate": 9.986696884588428e-06, "loss": 0.5901, "step": 814 }, { "epoch": 0.05, "grad_norm": 1.2897406816482544, "learning_rate": 9.986620573816461e-06, "loss": 0.5907, "step": 815 }, { "epoch": 0.05, "grad_norm": 1.3730381727218628, "learning_rate": 9.986544045092175e-06, "loss": 0.6157, "step": 816 }, { "epoch": 0.05, "grad_norm": 1.2355902194976807, "learning_rate": 9.986467298418914e-06, "loss": 0.619, "step": 817 }, { "epoch": 0.05, "grad_norm": 1.3027957677841187, "learning_rate": 9.98639033380003e-06, "loss": 0.6003, "step": 818 }, { "epoch": 0.05, "grad_norm": 1.300930380821228, "learning_rate": 9.986313151238893e-06, "loss": 0.6181, "step": 819 }, { "epoch": 0.05, "grad_norm": 1.2643738985061646, "learning_rate": 9.986235750738872e-06, "loss": 0.623, "step": 820 }, { "epoch": 0.05, "grad_norm": 1.3172075748443604, "learning_rate": 9.986158132303353e-06, "loss": 0.6284, "step": 821 }, { "epoch": 0.05, "grad_norm": 1.161787509918213, "learning_rate": 9.986080295935723e-06, "loss": 0.6176, "step": 822 }, { "epoch": 0.05, "grad_norm": 1.2196667194366455, "learning_rate": 9.98600224163939e-06, "loss": 0.6464, "step": 823 }, { "epoch": 0.05, "grad_norm": 1.3472455739974976, "learning_rate": 9.985923969417763e-06, "loss": 0.5998, "step": 824 }, { "epoch": 0.05, "grad_norm": 1.185465931892395, "learning_rate": 9.985845479274262e-06, "loss": 0.6066, "step": 825 }, { "epoch": 0.05, "grad_norm": 1.3540101051330566, "learning_rate": 9.98576677121232e-06, "loss": 0.6288, "step": 826 }, { "epoch": 0.05, "grad_norm": 1.1765334606170654, "learning_rate": 9.985687845235375e-06, "loss": 0.5739, "step": 827 }, { "epoch": 0.05, "grad_norm": 1.5631078481674194, "learning_rate": 9.985608701346877e-06, "loss": 0.6337, "step": 828 }, { "epoch": 0.05, "grad_norm": 1.3698357343673706, "learning_rate": 9.985529339550286e-06, "loss": 0.5838, "step": 829 }, { "epoch": 0.05, "grad_norm": 1.3335708379745483, "learning_rate": 9.98544975984907e-06, "loss": 0.5609, "step": 830 }, { "epoch": 0.05, "grad_norm": 1.3372868299484253, "learning_rate": 9.985369962246709e-06, "loss": 0.6076, "step": 831 }, { "epoch": 0.05, "grad_norm": 1.226194143295288, "learning_rate": 9.98528994674669e-06, "loss": 0.6588, "step": 832 }, { "epoch": 0.05, "grad_norm": 1.281947135925293, "learning_rate": 9.985209713352509e-06, "loss": 0.6326, "step": 833 }, { "epoch": 0.05, "grad_norm": 1.3255478143692017, "learning_rate": 9.985129262067672e-06, "loss": 0.6214, "step": 834 }, { "epoch": 0.05, "grad_norm": 1.3235219717025757, "learning_rate": 9.985048592895697e-06, "loss": 0.5943, "step": 835 }, { "epoch": 0.05, "grad_norm": 1.557449221611023, "learning_rate": 9.98496770584011e-06, "loss": 0.5421, "step": 836 }, { "epoch": 0.05, "grad_norm": 1.2526482343673706, "learning_rate": 9.984886600904446e-06, "loss": 0.6195, "step": 837 }, { "epoch": 0.05, "grad_norm": 1.4403488636016846, "learning_rate": 9.984805278092252e-06, "loss": 0.6202, "step": 838 }, { "epoch": 0.05, "grad_norm": 1.261703610420227, "learning_rate": 9.984723737407078e-06, "loss": 0.6821, "step": 839 }, { "epoch": 0.05, "grad_norm": 1.4564203023910522, "learning_rate": 9.98464197885249e-06, "loss": 0.5997, "step": 840 }, { "epoch": 0.05, "grad_norm": 1.5623525381088257, "learning_rate": 9.984560002432062e-06, "loss": 0.6123, "step": 841 }, { "epoch": 0.05, "grad_norm": 1.3804877996444702, "learning_rate": 9.984477808149376e-06, "loss": 0.5947, "step": 842 }, { "epoch": 0.05, "grad_norm": 1.1786019802093506, "learning_rate": 9.984395396008027e-06, "loss": 0.57, "step": 843 }, { "epoch": 0.05, "grad_norm": 1.2866415977478027, "learning_rate": 9.984312766011613e-06, "loss": 0.6288, "step": 844 }, { "epoch": 0.05, "grad_norm": 1.1051063537597656, "learning_rate": 9.98422991816375e-06, "loss": 0.6086, "step": 845 }, { "epoch": 0.05, "grad_norm": 1.5923144817352295, "learning_rate": 9.984146852468055e-06, "loss": 0.6505, "step": 846 }, { "epoch": 0.05, "grad_norm": 1.3788366317749023, "learning_rate": 9.98406356892816e-06, "loss": 0.5991, "step": 847 }, { "epoch": 0.05, "grad_norm": 1.2025681734085083, "learning_rate": 9.983980067547708e-06, "loss": 0.6157, "step": 848 }, { "epoch": 0.05, "grad_norm": 1.19566011428833, "learning_rate": 9.983896348330343e-06, "loss": 0.5495, "step": 849 }, { "epoch": 0.05, "grad_norm": 1.2052993774414062, "learning_rate": 9.983812411279728e-06, "loss": 0.6017, "step": 850 }, { "epoch": 0.05, "grad_norm": 1.3187018632888794, "learning_rate": 9.983728256399532e-06, "loss": 0.6181, "step": 851 }, { "epoch": 0.05, "grad_norm": 1.4236623048782349, "learning_rate": 9.983643883693432e-06, "loss": 0.6766, "step": 852 }, { "epoch": 0.06, "grad_norm": 1.2790628671646118, "learning_rate": 9.983559293165115e-06, "loss": 0.6468, "step": 853 }, { "epoch": 0.06, "grad_norm": 1.2686830759048462, "learning_rate": 9.98347448481828e-06, "loss": 0.6033, "step": 854 }, { "epoch": 0.06, "grad_norm": 1.2375627756118774, "learning_rate": 9.983389458656631e-06, "loss": 0.6213, "step": 855 }, { "epoch": 0.06, "grad_norm": 1.2139925956726074, "learning_rate": 9.98330421468389e-06, "loss": 0.5533, "step": 856 }, { "epoch": 0.06, "grad_norm": 1.2773964405059814, "learning_rate": 9.983218752903776e-06, "loss": 0.6413, "step": 857 }, { "epoch": 0.06, "grad_norm": 1.2184851169586182, "learning_rate": 9.983133073320025e-06, "loss": 0.632, "step": 858 }, { "epoch": 0.06, "grad_norm": 1.2012836933135986, "learning_rate": 9.983047175936388e-06, "loss": 0.5852, "step": 859 }, { "epoch": 0.06, "grad_norm": 1.3325608968734741, "learning_rate": 9.982961060756614e-06, "loss": 0.6957, "step": 860 }, { "epoch": 0.06, "grad_norm": 1.2660789489746094, "learning_rate": 9.982874727784469e-06, "loss": 0.5885, "step": 861 }, { "epoch": 0.06, "grad_norm": 1.3176480531692505, "learning_rate": 9.982788177023724e-06, "loss": 0.6524, "step": 862 }, { "epoch": 0.06, "grad_norm": 1.2478309869766235, "learning_rate": 9.982701408478164e-06, "loss": 0.6506, "step": 863 }, { "epoch": 0.06, "grad_norm": 1.2376103401184082, "learning_rate": 9.982614422151582e-06, "loss": 0.6187, "step": 864 }, { "epoch": 0.06, "grad_norm": 1.2478013038635254, "learning_rate": 9.982527218047777e-06, "loss": 0.6269, "step": 865 }, { "epoch": 0.06, "grad_norm": 1.2169177532196045, "learning_rate": 9.982439796170565e-06, "loss": 0.5917, "step": 866 }, { "epoch": 0.06, "grad_norm": 1.2722738981246948, "learning_rate": 9.982352156523762e-06, "loss": 0.6448, "step": 867 }, { "epoch": 0.06, "grad_norm": 1.2766400575637817, "learning_rate": 9.982264299111202e-06, "loss": 0.5578, "step": 868 }, { "epoch": 0.06, "grad_norm": 1.1523939371109009, "learning_rate": 9.982176223936724e-06, "loss": 0.6134, "step": 869 }, { "epoch": 0.06, "grad_norm": 1.2455285787582397, "learning_rate": 9.982087931004177e-06, "loss": 0.6789, "step": 870 }, { "epoch": 0.06, "grad_norm": 1.1005405187606812, "learning_rate": 9.981999420317419e-06, "loss": 0.5496, "step": 871 }, { "epoch": 0.06, "grad_norm": 1.3222668170928955, "learning_rate": 9.981910691880324e-06, "loss": 0.6705, "step": 872 }, { "epoch": 0.06, "grad_norm": 1.2242785692214966, "learning_rate": 9.981821745696762e-06, "loss": 0.5954, "step": 873 }, { "epoch": 0.06, "grad_norm": 1.2519280910491943, "learning_rate": 9.981732581770626e-06, "loss": 0.5915, "step": 874 }, { "epoch": 0.06, "grad_norm": 1.2866630554199219, "learning_rate": 9.981643200105813e-06, "loss": 0.6463, "step": 875 }, { "epoch": 0.06, "grad_norm": 1.1915504932403564, "learning_rate": 9.981553600706228e-06, "loss": 0.5832, "step": 876 }, { "epoch": 0.06, "grad_norm": 1.2576707601547241, "learning_rate": 9.981463783575788e-06, "loss": 0.6181, "step": 877 }, { "epoch": 0.06, "grad_norm": 1.1597962379455566, "learning_rate": 9.98137374871842e-06, "loss": 0.5947, "step": 878 }, { "epoch": 0.06, "grad_norm": 1.3411592245101929, "learning_rate": 9.981283496138055e-06, "loss": 0.5975, "step": 879 }, { "epoch": 0.06, "grad_norm": 1.3354560136795044, "learning_rate": 9.98119302583864e-06, "loss": 0.5935, "step": 880 }, { "epoch": 0.06, "grad_norm": 1.248321771621704, "learning_rate": 9.981102337824131e-06, "loss": 0.6146, "step": 881 }, { "epoch": 0.06, "grad_norm": 1.20414400100708, "learning_rate": 9.98101143209849e-06, "loss": 0.5823, "step": 882 }, { "epoch": 0.06, "grad_norm": 1.4060609340667725, "learning_rate": 9.98092030866569e-06, "loss": 0.6115, "step": 883 }, { "epoch": 0.06, "grad_norm": 1.298300862312317, "learning_rate": 9.980828967529714e-06, "loss": 0.6532, "step": 884 }, { "epoch": 0.06, "grad_norm": 1.4017025232315063, "learning_rate": 9.980737408694554e-06, "loss": 0.6704, "step": 885 }, { "epoch": 0.06, "grad_norm": 1.4661026000976562, "learning_rate": 9.980645632164214e-06, "loss": 0.592, "step": 886 }, { "epoch": 0.06, "grad_norm": 1.354820728302002, "learning_rate": 9.980553637942702e-06, "loss": 0.6293, "step": 887 }, { "epoch": 0.06, "grad_norm": 1.231047511100769, "learning_rate": 9.980461426034042e-06, "loss": 0.6107, "step": 888 }, { "epoch": 0.06, "grad_norm": 1.2608742713928223, "learning_rate": 9.980368996442262e-06, "loss": 0.6382, "step": 889 }, { "epoch": 0.06, "grad_norm": 1.2965399026870728, "learning_rate": 9.980276349171404e-06, "loss": 0.6304, "step": 890 }, { "epoch": 0.06, "grad_norm": 1.3508367538452148, "learning_rate": 9.980183484225515e-06, "loss": 0.6247, "step": 891 }, { "epoch": 0.06, "grad_norm": 1.2987724542617798, "learning_rate": 9.980090401608655e-06, "loss": 0.5833, "step": 892 }, { "epoch": 0.06, "grad_norm": 1.2296866178512573, "learning_rate": 9.979997101324893e-06, "loss": 0.6158, "step": 893 }, { "epoch": 0.06, "grad_norm": 1.2844258546829224, "learning_rate": 9.979903583378305e-06, "loss": 0.5949, "step": 894 }, { "epoch": 0.06, "grad_norm": 1.1637974977493286, "learning_rate": 9.979809847772981e-06, "loss": 0.5606, "step": 895 }, { "epoch": 0.06, "grad_norm": 1.2235151529312134, "learning_rate": 9.979715894513016e-06, "loss": 0.6159, "step": 896 }, { "epoch": 0.06, "grad_norm": 1.3089059591293335, "learning_rate": 9.979621723602516e-06, "loss": 0.651, "step": 897 }, { "epoch": 0.06, "grad_norm": 1.2659716606140137, "learning_rate": 9.979527335045599e-06, "loss": 0.6007, "step": 898 }, { "epoch": 0.06, "grad_norm": 1.2856205701828003, "learning_rate": 9.97943272884639e-06, "loss": 0.607, "step": 899 }, { "epoch": 0.06, "grad_norm": 1.1709691286087036, "learning_rate": 9.979337905009023e-06, "loss": 0.6347, "step": 900 }, { "epoch": 0.06, "grad_norm": 1.2775671482086182, "learning_rate": 9.979242863537644e-06, "loss": 0.603, "step": 901 }, { "epoch": 0.06, "grad_norm": 1.1570857763290405, "learning_rate": 9.979147604436405e-06, "loss": 0.5932, "step": 902 }, { "epoch": 0.06, "grad_norm": 1.2721045017242432, "learning_rate": 9.97905212770947e-06, "loss": 0.5544, "step": 903 }, { "epoch": 0.06, "grad_norm": 1.1412490606307983, "learning_rate": 9.978956433361013e-06, "loss": 0.59, "step": 904 }, { "epoch": 0.06, "grad_norm": 1.3590960502624512, "learning_rate": 9.978860521395218e-06, "loss": 0.6133, "step": 905 }, { "epoch": 0.06, "grad_norm": 1.2698040008544922, "learning_rate": 9.978764391816274e-06, "loss": 0.6024, "step": 906 }, { "epoch": 0.06, "grad_norm": 1.4201289415359497, "learning_rate": 9.978668044628383e-06, "loss": 0.6247, "step": 907 }, { "epoch": 0.06, "grad_norm": 1.383396029472351, "learning_rate": 9.978571479835757e-06, "loss": 0.6411, "step": 908 }, { "epoch": 0.06, "grad_norm": 1.2004008293151855, "learning_rate": 9.978474697442617e-06, "loss": 0.6079, "step": 909 }, { "epoch": 0.06, "grad_norm": 1.2527267932891846, "learning_rate": 9.978377697453191e-06, "loss": 0.6119, "step": 910 }, { "epoch": 0.06, "grad_norm": 1.1821072101593018, "learning_rate": 9.978280479871723e-06, "loss": 0.6179, "step": 911 }, { "epoch": 0.06, "grad_norm": 1.3939571380615234, "learning_rate": 9.97818304470246e-06, "loss": 0.6492, "step": 912 }, { "epoch": 0.06, "grad_norm": 1.5267562866210938, "learning_rate": 9.978085391949657e-06, "loss": 0.6136, "step": 913 }, { "epoch": 0.06, "grad_norm": 1.2458933591842651, "learning_rate": 9.977987521617588e-06, "loss": 0.6084, "step": 914 }, { "epoch": 0.06, "grad_norm": 1.2722402811050415, "learning_rate": 9.977889433710525e-06, "loss": 0.6376, "step": 915 }, { "epoch": 0.06, "grad_norm": 1.4034563302993774, "learning_rate": 9.977791128232758e-06, "loss": 0.6288, "step": 916 }, { "epoch": 0.06, "grad_norm": 1.4008065462112427, "learning_rate": 9.977692605188585e-06, "loss": 0.6504, "step": 917 }, { "epoch": 0.06, "grad_norm": 1.406072735786438, "learning_rate": 9.977593864582311e-06, "loss": 0.6053, "step": 918 }, { "epoch": 0.06, "grad_norm": 1.2448539733886719, "learning_rate": 9.977494906418252e-06, "loss": 0.623, "step": 919 }, { "epoch": 0.06, "grad_norm": 1.2532833814620972, "learning_rate": 9.977395730700733e-06, "loss": 0.6789, "step": 920 }, { "epoch": 0.06, "grad_norm": 1.1808061599731445, "learning_rate": 9.977296337434087e-06, "loss": 0.5308, "step": 921 }, { "epoch": 0.06, "grad_norm": 1.2976723909378052, "learning_rate": 9.977196726622659e-06, "loss": 0.6208, "step": 922 }, { "epoch": 0.06, "grad_norm": 1.2247024774551392, "learning_rate": 9.977096898270804e-06, "loss": 0.6671, "step": 923 }, { "epoch": 0.06, "grad_norm": 1.249965786933899, "learning_rate": 9.976996852382887e-06, "loss": 0.6491, "step": 924 }, { "epoch": 0.06, "grad_norm": 1.3198792934417725, "learning_rate": 9.976896588963276e-06, "loss": 0.633, "step": 925 }, { "epoch": 0.06, "grad_norm": 1.2567161321640015, "learning_rate": 9.976796108016355e-06, "loss": 0.5729, "step": 926 }, { "epoch": 0.06, "grad_norm": 1.2342407703399658, "learning_rate": 9.97669540954652e-06, "loss": 0.6499, "step": 927 }, { "epoch": 0.06, "grad_norm": 1.2322415113449097, "learning_rate": 9.976594493558166e-06, "loss": 0.5746, "step": 928 }, { "epoch": 0.06, "grad_norm": 1.1950874328613281, "learning_rate": 9.976493360055706e-06, "loss": 0.6108, "step": 929 }, { "epoch": 0.06, "grad_norm": 1.3008131980895996, "learning_rate": 9.976392009043562e-06, "loss": 0.5964, "step": 930 }, { "epoch": 0.06, "grad_norm": 1.3301632404327393, "learning_rate": 9.976290440526161e-06, "loss": 0.6508, "step": 931 }, { "epoch": 0.06, "grad_norm": 1.2326123714447021, "learning_rate": 9.976188654507945e-06, "loss": 0.5785, "step": 932 }, { "epoch": 0.06, "grad_norm": 1.209626317024231, "learning_rate": 9.976086650993361e-06, "loss": 0.5608, "step": 933 }, { "epoch": 0.06, "grad_norm": 1.1961774826049805, "learning_rate": 9.97598442998687e-06, "loss": 0.5513, "step": 934 }, { "epoch": 0.06, "grad_norm": 1.324852466583252, "learning_rate": 9.975881991492938e-06, "loss": 0.6189, "step": 935 }, { "epoch": 0.06, "grad_norm": 1.2580069303512573, "learning_rate": 9.97577933551604e-06, "loss": 0.5941, "step": 936 }, { "epoch": 0.06, "grad_norm": 1.30707848072052, "learning_rate": 9.975676462060663e-06, "loss": 0.6213, "step": 937 }, { "epoch": 0.06, "grad_norm": 1.2888063192367554, "learning_rate": 9.975573371131309e-06, "loss": 0.6093, "step": 938 }, { "epoch": 0.06, "grad_norm": 1.2579505443572998, "learning_rate": 9.975470062732479e-06, "loss": 0.571, "step": 939 }, { "epoch": 0.06, "grad_norm": 1.2957708835601807, "learning_rate": 9.975366536868689e-06, "loss": 0.6733, "step": 940 }, { "epoch": 0.06, "grad_norm": 1.325613021850586, "learning_rate": 9.975262793544465e-06, "loss": 0.6397, "step": 941 }, { "epoch": 0.06, "grad_norm": 1.2037620544433594, "learning_rate": 9.97515883276434e-06, "loss": 0.5763, "step": 942 }, { "epoch": 0.06, "grad_norm": 1.2128825187683105, "learning_rate": 9.975054654532858e-06, "loss": 0.6062, "step": 943 }, { "epoch": 0.06, "grad_norm": 1.2934964895248413, "learning_rate": 9.974950258854575e-06, "loss": 0.5695, "step": 944 }, { "epoch": 0.06, "grad_norm": 1.2614521980285645, "learning_rate": 9.974845645734049e-06, "loss": 0.6339, "step": 945 }, { "epoch": 0.06, "grad_norm": 1.1881873607635498, "learning_rate": 9.974740815175856e-06, "loss": 0.6351, "step": 946 }, { "epoch": 0.06, "grad_norm": 1.3223987817764282, "learning_rate": 9.974635767184578e-06, "loss": 0.581, "step": 947 }, { "epoch": 0.06, "grad_norm": 1.2861248254776, "learning_rate": 9.974530501764806e-06, "loss": 0.6193, "step": 948 }, { "epoch": 0.06, "grad_norm": 1.3506149053573608, "learning_rate": 9.974425018921138e-06, "loss": 0.611, "step": 949 }, { "epoch": 0.06, "grad_norm": 1.2428369522094727, "learning_rate": 9.974319318658188e-06, "loss": 0.606, "step": 950 }, { "epoch": 0.06, "grad_norm": 1.1606550216674805, "learning_rate": 9.974213400980573e-06, "loss": 0.5928, "step": 951 }, { "epoch": 0.06, "grad_norm": 1.2925598621368408, "learning_rate": 9.974107265892925e-06, "loss": 0.6209, "step": 952 }, { "epoch": 0.06, "grad_norm": 1.3577920198440552, "learning_rate": 9.97400091339988e-06, "loss": 0.6211, "step": 953 }, { "epoch": 0.06, "grad_norm": 1.2649238109588623, "learning_rate": 9.973894343506092e-06, "loss": 0.5946, "step": 954 }, { "epoch": 0.06, "grad_norm": 1.2924957275390625, "learning_rate": 9.973787556216213e-06, "loss": 0.6267, "step": 955 }, { "epoch": 0.06, "grad_norm": 1.162206768989563, "learning_rate": 9.973680551534912e-06, "loss": 0.5587, "step": 956 }, { "epoch": 0.06, "grad_norm": 1.2520642280578613, "learning_rate": 9.973573329466867e-06, "loss": 0.6136, "step": 957 }, { "epoch": 0.06, "grad_norm": 1.2471672296524048, "learning_rate": 9.973465890016762e-06, "loss": 0.585, "step": 958 }, { "epoch": 0.06, "grad_norm": 1.1193711757659912, "learning_rate": 9.973358233189297e-06, "loss": 0.557, "step": 959 }, { "epoch": 0.06, "grad_norm": 1.1878868341445923, "learning_rate": 9.973250358989175e-06, "loss": 0.6061, "step": 960 }, { "epoch": 0.06, "grad_norm": 1.2156562805175781, "learning_rate": 9.97314226742111e-06, "loss": 0.6131, "step": 961 }, { "epoch": 0.06, "grad_norm": 1.2843506336212158, "learning_rate": 9.973033958489828e-06, "loss": 0.6243, "step": 962 }, { "epoch": 0.06, "grad_norm": 1.2564018964767456, "learning_rate": 9.97292543220006e-06, "loss": 0.611, "step": 963 }, { "epoch": 0.06, "grad_norm": 1.1566494703292847, "learning_rate": 9.972816688556555e-06, "loss": 0.5436, "step": 964 }, { "epoch": 0.06, "grad_norm": 1.2930046319961548, "learning_rate": 9.97270772756406e-06, "loss": 0.6121, "step": 965 }, { "epoch": 0.06, "grad_norm": 1.2738231420516968, "learning_rate": 9.972598549227342e-06, "loss": 0.6157, "step": 966 }, { "epoch": 0.06, "grad_norm": 1.2901301383972168, "learning_rate": 9.97248915355117e-06, "loss": 0.6088, "step": 967 }, { "epoch": 0.06, "grad_norm": 1.2126623392105103, "learning_rate": 9.972379540540325e-06, "loss": 0.6036, "step": 968 }, { "epoch": 0.06, "grad_norm": 1.3718812465667725, "learning_rate": 9.9722697101996e-06, "loss": 0.6368, "step": 969 }, { "epoch": 0.06, "grad_norm": 1.2617267370224, "learning_rate": 9.972159662533796e-06, "loss": 0.6698, "step": 970 }, { "epoch": 0.06, "grad_norm": 1.1576164960861206, "learning_rate": 9.97204939754772e-06, "loss": 0.6025, "step": 971 }, { "epoch": 0.06, "grad_norm": 1.2549669742584229, "learning_rate": 9.971938915246194e-06, "loss": 0.6139, "step": 972 }, { "epoch": 0.06, "grad_norm": 1.2359199523925781, "learning_rate": 9.971828215634044e-06, "loss": 0.5576, "step": 973 }, { "epoch": 0.06, "grad_norm": 1.2803503274917603, "learning_rate": 9.971717298716113e-06, "loss": 0.5579, "step": 974 }, { "epoch": 0.06, "grad_norm": 1.271308422088623, "learning_rate": 9.971606164497243e-06, "loss": 0.6084, "step": 975 }, { "epoch": 0.06, "grad_norm": 1.3497918844223022, "learning_rate": 9.971494812982297e-06, "loss": 0.5917, "step": 976 }, { "epoch": 0.06, "grad_norm": 1.2264504432678223, "learning_rate": 9.971383244176139e-06, "loss": 0.5919, "step": 977 }, { "epoch": 0.06, "grad_norm": 1.1865934133529663, "learning_rate": 9.971271458083644e-06, "loss": 0.6266, "step": 978 }, { "epoch": 0.06, "grad_norm": 1.1579093933105469, "learning_rate": 9.9711594547097e-06, "loss": 0.568, "step": 979 }, { "epoch": 0.06, "grad_norm": 1.2030593156814575, "learning_rate": 9.971047234059203e-06, "loss": 0.553, "step": 980 }, { "epoch": 0.06, "grad_norm": 1.2436498403549194, "learning_rate": 9.970934796137058e-06, "loss": 0.6264, "step": 981 }, { "epoch": 0.06, "grad_norm": 1.377514362335205, "learning_rate": 9.970822140948176e-06, "loss": 0.6169, "step": 982 }, { "epoch": 0.06, "grad_norm": 1.2259386777877808, "learning_rate": 9.970709268497483e-06, "loss": 0.5914, "step": 983 }, { "epoch": 0.06, "grad_norm": 1.238358497619629, "learning_rate": 9.970596178789913e-06, "loss": 0.6271, "step": 984 }, { "epoch": 0.06, "grad_norm": 1.3627619743347168, "learning_rate": 9.970482871830409e-06, "loss": 0.6563, "step": 985 }, { "epoch": 0.06, "grad_norm": 1.279189109802246, "learning_rate": 9.970369347623923e-06, "loss": 0.6422, "step": 986 }, { "epoch": 0.06, "grad_norm": 1.1940354108810425, "learning_rate": 9.970255606175416e-06, "loss": 0.6251, "step": 987 }, { "epoch": 0.06, "grad_norm": 1.2056682109832764, "learning_rate": 9.970141647489859e-06, "loss": 0.6334, "step": 988 }, { "epoch": 0.06, "grad_norm": 1.1727417707443237, "learning_rate": 9.970027471572234e-06, "loss": 0.5446, "step": 989 }, { "epoch": 0.06, "grad_norm": 1.259385108947754, "learning_rate": 9.969913078427533e-06, "loss": 0.6396, "step": 990 }, { "epoch": 0.06, "grad_norm": 1.2069865465164185, "learning_rate": 9.969798468060752e-06, "loss": 0.6323, "step": 991 }, { "epoch": 0.06, "grad_norm": 1.3006101846694946, "learning_rate": 9.969683640476903e-06, "loss": 0.5693, "step": 992 }, { "epoch": 0.06, "grad_norm": 1.1139830350875854, "learning_rate": 9.969568595681003e-06, "loss": 0.5588, "step": 993 }, { "epoch": 0.06, "grad_norm": 1.1768721342086792, "learning_rate": 9.969453333678084e-06, "loss": 0.5944, "step": 994 }, { "epoch": 0.06, "grad_norm": 1.3618686199188232, "learning_rate": 9.969337854473177e-06, "loss": 0.6458, "step": 995 }, { "epoch": 0.06, "grad_norm": 1.2257293462753296, "learning_rate": 9.969222158071337e-06, "loss": 0.6289, "step": 996 }, { "epoch": 0.06, "grad_norm": 1.347925066947937, "learning_rate": 9.969106244477616e-06, "loss": 0.617, "step": 997 }, { "epoch": 0.06, "grad_norm": 1.1878228187561035, "learning_rate": 9.96899011369708e-06, "loss": 0.5917, "step": 998 }, { "epoch": 0.06, "grad_norm": 1.197739601135254, "learning_rate": 9.968873765734808e-06, "loss": 0.5911, "step": 999 }, { "epoch": 0.06, "grad_norm": 1.2393320798873901, "learning_rate": 9.968757200595883e-06, "loss": 0.5865, "step": 1000 }, { "epoch": 0.06, "grad_norm": 1.3428075313568115, "learning_rate": 9.9686404182854e-06, "loss": 0.6276, "step": 1001 }, { "epoch": 0.06, "grad_norm": 1.357546329498291, "learning_rate": 9.968523418808464e-06, "loss": 0.5843, "step": 1002 }, { "epoch": 0.06, "grad_norm": 1.3540008068084717, "learning_rate": 9.968406202170189e-06, "loss": 0.6248, "step": 1003 }, { "epoch": 0.06, "grad_norm": 1.3305245637893677, "learning_rate": 9.968288768375699e-06, "loss": 0.6257, "step": 1004 }, { "epoch": 0.06, "grad_norm": 1.244650959968567, "learning_rate": 9.968171117430121e-06, "loss": 0.6481, "step": 1005 }, { "epoch": 0.06, "grad_norm": 1.2405517101287842, "learning_rate": 9.968053249338605e-06, "loss": 0.6009, "step": 1006 }, { "epoch": 0.07, "grad_norm": 1.2346045970916748, "learning_rate": 9.967935164106297e-06, "loss": 0.5861, "step": 1007 }, { "epoch": 0.07, "grad_norm": 1.1866501569747925, "learning_rate": 9.967816861738363e-06, "loss": 0.5443, "step": 1008 }, { "epoch": 0.07, "grad_norm": 1.2267893552780151, "learning_rate": 9.967698342239968e-06, "loss": 0.6267, "step": 1009 }, { "epoch": 0.07, "grad_norm": 1.4667688608169556, "learning_rate": 9.967579605616297e-06, "loss": 0.6058, "step": 1010 }, { "epoch": 0.07, "grad_norm": 1.2920573949813843, "learning_rate": 9.967460651872537e-06, "loss": 0.5531, "step": 1011 }, { "epoch": 0.07, "grad_norm": 1.338931679725647, "learning_rate": 9.96734148101389e-06, "loss": 0.6034, "step": 1012 }, { "epoch": 0.07, "grad_norm": 1.3478633165359497, "learning_rate": 9.967222093045562e-06, "loss": 0.5894, "step": 1013 }, { "epoch": 0.07, "grad_norm": 1.3318928480148315, "learning_rate": 9.967102487972773e-06, "loss": 0.5639, "step": 1014 }, { "epoch": 0.07, "grad_norm": 1.4014052152633667, "learning_rate": 9.966982665800747e-06, "loss": 0.5992, "step": 1015 }, { "epoch": 0.07, "grad_norm": 1.1974468231201172, "learning_rate": 9.966862626534725e-06, "loss": 0.5718, "step": 1016 }, { "epoch": 0.07, "grad_norm": 1.3457157611846924, "learning_rate": 9.966742370179951e-06, "loss": 0.6895, "step": 1017 }, { "epoch": 0.07, "grad_norm": 1.2093976736068726, "learning_rate": 9.966621896741683e-06, "loss": 0.5606, "step": 1018 }, { "epoch": 0.07, "grad_norm": 1.2151882648468018, "learning_rate": 9.966501206225187e-06, "loss": 0.6066, "step": 1019 }, { "epoch": 0.07, "grad_norm": 1.2843255996704102, "learning_rate": 9.966380298635737e-06, "loss": 0.5823, "step": 1020 }, { "epoch": 0.07, "grad_norm": 1.4036842584609985, "learning_rate": 9.966259173978617e-06, "loss": 0.6514, "step": 1021 }, { "epoch": 0.07, "grad_norm": 1.2982702255249023, "learning_rate": 9.966137832259121e-06, "loss": 0.6558, "step": 1022 }, { "epoch": 0.07, "grad_norm": 1.296386957168579, "learning_rate": 9.966016273482556e-06, "loss": 0.6071, "step": 1023 }, { "epoch": 0.07, "grad_norm": 1.210015892982483, "learning_rate": 9.96589449765423e-06, "loss": 0.5496, "step": 1024 }, { "epoch": 0.07, "grad_norm": 1.2903902530670166, "learning_rate": 9.965772504779467e-06, "loss": 0.6435, "step": 1025 }, { "epoch": 0.07, "grad_norm": 1.2653001546859741, "learning_rate": 9.9656502948636e-06, "loss": 0.5733, "step": 1026 }, { "epoch": 0.07, "grad_norm": 1.363677740097046, "learning_rate": 9.96552786791197e-06, "loss": 0.6307, "step": 1027 }, { "epoch": 0.07, "grad_norm": 1.3014277219772339, "learning_rate": 9.965405223929929e-06, "loss": 0.592, "step": 1028 }, { "epoch": 0.07, "grad_norm": 1.2786054611206055, "learning_rate": 9.965282362922837e-06, "loss": 0.6103, "step": 1029 }, { "epoch": 0.07, "grad_norm": 1.1366921663284302, "learning_rate": 9.965159284896063e-06, "loss": 0.5584, "step": 1030 }, { "epoch": 0.07, "grad_norm": 1.299695372581482, "learning_rate": 9.965035989854985e-06, "loss": 0.5814, "step": 1031 }, { "epoch": 0.07, "grad_norm": 1.3521603345870972, "learning_rate": 9.964912477804995e-06, "loss": 0.6863, "step": 1032 }, { "epoch": 0.07, "grad_norm": 1.2169584035873413, "learning_rate": 9.964788748751493e-06, "loss": 0.5892, "step": 1033 }, { "epoch": 0.07, "grad_norm": 1.2113759517669678, "learning_rate": 9.964664802699881e-06, "loss": 0.6127, "step": 1034 }, { "epoch": 0.07, "grad_norm": 1.3748409748077393, "learning_rate": 9.96454063965558e-06, "loss": 0.6217, "step": 1035 }, { "epoch": 0.07, "grad_norm": 1.253067135810852, "learning_rate": 9.964416259624017e-06, "loss": 0.6603, "step": 1036 }, { "epoch": 0.07, "grad_norm": 1.2329555749893188, "learning_rate": 9.964291662610628e-06, "loss": 0.6134, "step": 1037 }, { "epoch": 0.07, "grad_norm": 1.3657525777816772, "learning_rate": 9.964166848620857e-06, "loss": 0.6371, "step": 1038 }, { "epoch": 0.07, "grad_norm": 1.2161109447479248, "learning_rate": 9.964041817660162e-06, "loss": 0.6279, "step": 1039 }, { "epoch": 0.07, "grad_norm": 1.245696783065796, "learning_rate": 9.963916569734006e-06, "loss": 0.5917, "step": 1040 }, { "epoch": 0.07, "grad_norm": 1.2491488456726074, "learning_rate": 9.963791104847864e-06, "loss": 0.6229, "step": 1041 }, { "epoch": 0.07, "grad_norm": 1.1881153583526611, "learning_rate": 9.963665423007219e-06, "loss": 0.5878, "step": 1042 }, { "epoch": 0.07, "grad_norm": 1.3814630508422852, "learning_rate": 9.963539524217567e-06, "loss": 0.5866, "step": 1043 }, { "epoch": 0.07, "grad_norm": 1.2681533098220825, "learning_rate": 9.963413408484406e-06, "loss": 0.6701, "step": 1044 }, { "epoch": 0.07, "grad_norm": 1.2393368482589722, "learning_rate": 9.96328707581325e-06, "loss": 0.6141, "step": 1045 }, { "epoch": 0.07, "grad_norm": 1.1737481355667114, "learning_rate": 9.963160526209624e-06, "loss": 0.61, "step": 1046 }, { "epoch": 0.07, "grad_norm": 1.274159550666809, "learning_rate": 9.963033759679056e-06, "loss": 0.6157, "step": 1047 }, { "epoch": 0.07, "grad_norm": 1.1347925662994385, "learning_rate": 9.962906776227085e-06, "loss": 0.6295, "step": 1048 }, { "epoch": 0.07, "grad_norm": 1.2593406438827515, "learning_rate": 9.962779575859266e-06, "loss": 0.6175, "step": 1049 }, { "epoch": 0.07, "grad_norm": 1.3389242887496948, "learning_rate": 9.962652158581155e-06, "loss": 0.637, "step": 1050 }, { "epoch": 0.07, "grad_norm": 1.4090609550476074, "learning_rate": 9.96252452439832e-06, "loss": 0.6351, "step": 1051 }, { "epoch": 0.07, "grad_norm": 1.2360295057296753, "learning_rate": 9.962396673316343e-06, "loss": 0.6583, "step": 1052 }, { "epoch": 0.07, "grad_norm": 1.3091259002685547, "learning_rate": 9.962268605340812e-06, "loss": 0.5795, "step": 1053 }, { "epoch": 0.07, "grad_norm": 1.358341097831726, "learning_rate": 9.962140320477323e-06, "loss": 0.6201, "step": 1054 }, { "epoch": 0.07, "grad_norm": 1.3342446088790894, "learning_rate": 9.962011818731482e-06, "loss": 0.5535, "step": 1055 }, { "epoch": 0.07, "grad_norm": 1.3756113052368164, "learning_rate": 9.961883100108905e-06, "loss": 0.6129, "step": 1056 }, { "epoch": 0.07, "grad_norm": 1.3789933919906616, "learning_rate": 9.96175416461522e-06, "loss": 0.5987, "step": 1057 }, { "epoch": 0.07, "grad_norm": 1.299752950668335, "learning_rate": 9.961625012256065e-06, "loss": 0.6417, "step": 1058 }, { "epoch": 0.07, "grad_norm": 1.2664552927017212, "learning_rate": 9.961495643037079e-06, "loss": 0.6343, "step": 1059 }, { "epoch": 0.07, "grad_norm": 1.3910688161849976, "learning_rate": 9.96136605696392e-06, "loss": 0.606, "step": 1060 }, { "epoch": 0.07, "grad_norm": 1.27036452293396, "learning_rate": 9.961236254042251e-06, "loss": 0.6144, "step": 1061 }, { "epoch": 0.07, "grad_norm": 1.291195034980774, "learning_rate": 9.961106234277746e-06, "loss": 0.611, "step": 1062 }, { "epoch": 0.07, "grad_norm": 1.216302752494812, "learning_rate": 9.960975997676088e-06, "loss": 0.5839, "step": 1063 }, { "epoch": 0.07, "grad_norm": 1.4464287757873535, "learning_rate": 9.960845544242968e-06, "loss": 0.6347, "step": 1064 }, { "epoch": 0.07, "grad_norm": 1.4188205003738403, "learning_rate": 9.960714873984088e-06, "loss": 0.6593, "step": 1065 }, { "epoch": 0.07, "grad_norm": 1.2355942726135254, "learning_rate": 9.960583986905159e-06, "loss": 0.5864, "step": 1066 }, { "epoch": 0.07, "grad_norm": 1.2308704853057861, "learning_rate": 9.960452883011904e-06, "loss": 0.6218, "step": 1067 }, { "epoch": 0.07, "grad_norm": 1.3424538373947144, "learning_rate": 9.96032156231005e-06, "loss": 0.6265, "step": 1068 }, { "epoch": 0.07, "grad_norm": 1.198171854019165, "learning_rate": 9.960190024805339e-06, "loss": 0.5479, "step": 1069 }, { "epoch": 0.07, "grad_norm": 1.2114673852920532, "learning_rate": 9.96005827050352e-06, "loss": 0.6232, "step": 1070 }, { "epoch": 0.07, "grad_norm": 1.2386096715927124, "learning_rate": 9.95992629941035e-06, "loss": 0.6318, "step": 1071 }, { "epoch": 0.07, "grad_norm": 1.1560373306274414, "learning_rate": 9.9597941115316e-06, "loss": 0.6004, "step": 1072 }, { "epoch": 0.07, "grad_norm": 1.1394177675247192, "learning_rate": 9.959661706873046e-06, "loss": 0.6005, "step": 1073 }, { "epoch": 0.07, "grad_norm": 1.300654649734497, "learning_rate": 9.959529085440472e-06, "loss": 0.6055, "step": 1074 }, { "epoch": 0.07, "grad_norm": 1.207750916481018, "learning_rate": 9.959396247239678e-06, "loss": 0.6033, "step": 1075 }, { "epoch": 0.07, "grad_norm": 1.171964406967163, "learning_rate": 9.959263192276472e-06, "loss": 0.609, "step": 1076 }, { "epoch": 0.07, "grad_norm": 1.329696536064148, "learning_rate": 9.959129920556664e-06, "loss": 0.5932, "step": 1077 }, { "epoch": 0.07, "grad_norm": 1.2454519271850586, "learning_rate": 9.958996432086085e-06, "loss": 0.6178, "step": 1078 }, { "epoch": 0.07, "grad_norm": 1.1543397903442383, "learning_rate": 9.958862726870565e-06, "loss": 0.5157, "step": 1079 }, { "epoch": 0.07, "grad_norm": 1.2717963457107544, "learning_rate": 9.95872880491595e-06, "loss": 0.6012, "step": 1080 }, { "epoch": 0.07, "grad_norm": 1.239335536956787, "learning_rate": 9.95859466622809e-06, "loss": 0.5887, "step": 1081 }, { "epoch": 0.07, "grad_norm": 1.2856870889663696, "learning_rate": 9.958460310812852e-06, "loss": 0.6468, "step": 1082 }, { "epoch": 0.07, "grad_norm": 1.2318378686904907, "learning_rate": 9.958325738676106e-06, "loss": 0.5877, "step": 1083 }, { "epoch": 0.07, "grad_norm": 1.14888596534729, "learning_rate": 9.958190949823736e-06, "loss": 0.5723, "step": 1084 }, { "epoch": 0.07, "grad_norm": 1.1683262586593628, "learning_rate": 9.958055944261632e-06, "loss": 0.6478, "step": 1085 }, { "epoch": 0.07, "grad_norm": 1.2754720449447632, "learning_rate": 9.957920721995695e-06, "loss": 0.6033, "step": 1086 }, { "epoch": 0.07, "grad_norm": 1.213649034500122, "learning_rate": 9.957785283031833e-06, "loss": 0.5839, "step": 1087 }, { "epoch": 0.07, "grad_norm": 1.2099649906158447, "learning_rate": 9.957649627375969e-06, "loss": 0.5965, "step": 1088 }, { "epoch": 0.07, "grad_norm": 1.2237528562545776, "learning_rate": 9.957513755034029e-06, "loss": 0.594, "step": 1089 }, { "epoch": 0.07, "grad_norm": 1.309736967086792, "learning_rate": 9.957377666011955e-06, "loss": 0.5856, "step": 1090 }, { "epoch": 0.07, "grad_norm": 1.3431192636489868, "learning_rate": 9.957241360315692e-06, "loss": 0.646, "step": 1091 }, { "epoch": 0.07, "grad_norm": 1.0927166938781738, "learning_rate": 9.957104837951201e-06, "loss": 0.5455, "step": 1092 }, { "epoch": 0.07, "grad_norm": 1.2971012592315674, "learning_rate": 9.956968098924445e-06, "loss": 0.6305, "step": 1093 }, { "epoch": 0.07, "grad_norm": 1.2305786609649658, "learning_rate": 9.956831143241404e-06, "loss": 0.6175, "step": 1094 }, { "epoch": 0.07, "grad_norm": 1.210850477218628, "learning_rate": 9.956693970908062e-06, "loss": 0.6597, "step": 1095 }, { "epoch": 0.07, "grad_norm": 1.216955542564392, "learning_rate": 9.956556581930414e-06, "loss": 0.6312, "step": 1096 }, { "epoch": 0.07, "grad_norm": 1.2221180200576782, "learning_rate": 9.956418976314469e-06, "loss": 0.5775, "step": 1097 }, { "epoch": 0.07, "grad_norm": 1.1291214227676392, "learning_rate": 9.956281154066234e-06, "loss": 0.5747, "step": 1098 }, { "epoch": 0.07, "grad_norm": 1.2486892938613892, "learning_rate": 9.956143115191741e-06, "loss": 0.5806, "step": 1099 }, { "epoch": 0.07, "grad_norm": 1.2354475259780884, "learning_rate": 9.956004859697017e-06, "loss": 0.6218, "step": 1100 }, { "epoch": 0.07, "grad_norm": 1.3598357439041138, "learning_rate": 9.955866387588109e-06, "loss": 0.6246, "step": 1101 }, { "epoch": 0.07, "grad_norm": 1.202012538909912, "learning_rate": 9.955727698871065e-06, "loss": 0.5991, "step": 1102 }, { "epoch": 0.07, "grad_norm": 1.3942759037017822, "learning_rate": 9.955588793551952e-06, "loss": 0.6265, "step": 1103 }, { "epoch": 0.07, "grad_norm": 1.1680580377578735, "learning_rate": 9.955449671636836e-06, "loss": 0.5615, "step": 1104 }, { "epoch": 0.07, "grad_norm": 1.1628893613815308, "learning_rate": 9.955310333131802e-06, "loss": 0.59, "step": 1105 }, { "epoch": 0.07, "grad_norm": 1.2387324571609497, "learning_rate": 9.955170778042936e-06, "loss": 0.6422, "step": 1106 }, { "epoch": 0.07, "grad_norm": 1.262616515159607, "learning_rate": 9.95503100637634e-06, "loss": 0.6339, "step": 1107 }, { "epoch": 0.07, "grad_norm": 1.464924693107605, "learning_rate": 9.954891018138127e-06, "loss": 0.6275, "step": 1108 }, { "epoch": 0.07, "grad_norm": 1.2714338302612305, "learning_rate": 9.954750813334407e-06, "loss": 0.6136, "step": 1109 }, { "epoch": 0.07, "grad_norm": 1.2849901914596558, "learning_rate": 9.954610391971313e-06, "loss": 0.6153, "step": 1110 }, { "epoch": 0.07, "grad_norm": 1.2063571214675903, "learning_rate": 9.954469754054982e-06, "loss": 0.5592, "step": 1111 }, { "epoch": 0.07, "grad_norm": 1.2415201663970947, "learning_rate": 9.954328899591564e-06, "loss": 0.6167, "step": 1112 }, { "epoch": 0.07, "grad_norm": 1.213470697402954, "learning_rate": 9.954187828587209e-06, "loss": 0.629, "step": 1113 }, { "epoch": 0.07, "grad_norm": 1.2028242349624634, "learning_rate": 9.954046541048086e-06, "loss": 0.5853, "step": 1114 }, { "epoch": 0.07, "grad_norm": 1.1989924907684326, "learning_rate": 9.953905036980371e-06, "loss": 0.5512, "step": 1115 }, { "epoch": 0.07, "grad_norm": 1.2092164754867554, "learning_rate": 9.953763316390248e-06, "loss": 0.5502, "step": 1116 }, { "epoch": 0.07, "grad_norm": 1.3489493131637573, "learning_rate": 9.953621379283911e-06, "loss": 0.6322, "step": 1117 }, { "epoch": 0.07, "grad_norm": 1.2174055576324463, "learning_rate": 9.953479225667564e-06, "loss": 0.605, "step": 1118 }, { "epoch": 0.07, "grad_norm": 1.3762885332107544, "learning_rate": 9.953336855547421e-06, "loss": 0.5954, "step": 1119 }, { "epoch": 0.07, "grad_norm": 1.3273640871047974, "learning_rate": 9.953194268929701e-06, "loss": 0.6125, "step": 1120 }, { "epoch": 0.07, "grad_norm": 1.260887861251831, "learning_rate": 9.953051465820644e-06, "loss": 0.659, "step": 1121 }, { "epoch": 0.07, "grad_norm": 1.3008610010147095, "learning_rate": 9.952908446226483e-06, "loss": 0.6207, "step": 1122 }, { "epoch": 0.07, "grad_norm": 1.38685941696167, "learning_rate": 9.952765210153472e-06, "loss": 0.6859, "step": 1123 }, { "epoch": 0.07, "grad_norm": 1.1753040552139282, "learning_rate": 9.952621757607873e-06, "loss": 0.5591, "step": 1124 }, { "epoch": 0.07, "grad_norm": 1.2671395540237427, "learning_rate": 9.952478088595956e-06, "loss": 0.6101, "step": 1125 }, { "epoch": 0.07, "grad_norm": 1.1865808963775635, "learning_rate": 9.952334203123999e-06, "loss": 0.6207, "step": 1126 }, { "epoch": 0.07, "grad_norm": 1.315147042274475, "learning_rate": 9.952190101198291e-06, "loss": 0.5653, "step": 1127 }, { "epoch": 0.07, "grad_norm": 1.1798144578933716, "learning_rate": 9.952045782825128e-06, "loss": 0.6244, "step": 1128 }, { "epoch": 0.07, "grad_norm": 1.238917350769043, "learning_rate": 9.951901248010823e-06, "loss": 0.5606, "step": 1129 }, { "epoch": 0.07, "grad_norm": 1.4371790885925293, "learning_rate": 9.951756496761691e-06, "loss": 0.6191, "step": 1130 }, { "epoch": 0.07, "grad_norm": 1.2647961378097534, "learning_rate": 9.951611529084055e-06, "loss": 0.573, "step": 1131 }, { "epoch": 0.07, "grad_norm": 1.4267727136611938, "learning_rate": 9.951466344984258e-06, "loss": 0.6205, "step": 1132 }, { "epoch": 0.07, "grad_norm": 1.290429711341858, "learning_rate": 9.95132094446864e-06, "loss": 0.6136, "step": 1133 }, { "epoch": 0.07, "grad_norm": 1.333771824836731, "learning_rate": 9.951175327543558e-06, "loss": 0.5876, "step": 1134 }, { "epoch": 0.07, "grad_norm": 1.205845594406128, "learning_rate": 9.951029494215379e-06, "loss": 0.6556, "step": 1135 }, { "epoch": 0.07, "grad_norm": 1.2222832441329956, "learning_rate": 9.950883444490471e-06, "loss": 0.5564, "step": 1136 }, { "epoch": 0.07, "grad_norm": 1.217978835105896, "learning_rate": 9.950737178375222e-06, "loss": 0.6352, "step": 1137 }, { "epoch": 0.07, "grad_norm": 1.354812502861023, "learning_rate": 9.950590695876025e-06, "loss": 0.5903, "step": 1138 }, { "epoch": 0.07, "grad_norm": 1.23276948928833, "learning_rate": 9.950443996999283e-06, "loss": 0.5698, "step": 1139 }, { "epoch": 0.07, "grad_norm": 1.2025983333587646, "learning_rate": 9.950297081751404e-06, "loss": 0.6349, "step": 1140 }, { "epoch": 0.07, "grad_norm": 1.302459478378296, "learning_rate": 9.950149950138813e-06, "loss": 0.5806, "step": 1141 }, { "epoch": 0.07, "grad_norm": 1.0920779705047607, "learning_rate": 9.950002602167938e-06, "loss": 0.5821, "step": 1142 }, { "epoch": 0.07, "grad_norm": 1.2081817388534546, "learning_rate": 9.94985503784522e-06, "loss": 0.5752, "step": 1143 }, { "epoch": 0.07, "grad_norm": 1.2345128059387207, "learning_rate": 9.94970725717711e-06, "loss": 0.6072, "step": 1144 }, { "epoch": 0.07, "grad_norm": 1.2252671718597412, "learning_rate": 9.949559260170066e-06, "loss": 0.5616, "step": 1145 }, { "epoch": 0.07, "grad_norm": 1.227883219718933, "learning_rate": 9.949411046830558e-06, "loss": 0.6453, "step": 1146 }, { "epoch": 0.07, "grad_norm": 1.3198540210723877, "learning_rate": 9.94926261716506e-06, "loss": 0.6193, "step": 1147 }, { "epoch": 0.07, "grad_norm": 1.2231109142303467, "learning_rate": 9.949113971180064e-06, "loss": 0.6274, "step": 1148 }, { "epoch": 0.07, "grad_norm": 1.4483445882797241, "learning_rate": 9.948965108882065e-06, "loss": 0.6257, "step": 1149 }, { "epoch": 0.07, "grad_norm": 1.2410904169082642, "learning_rate": 9.94881603027757e-06, "loss": 0.6394, "step": 1150 }, { "epoch": 0.07, "grad_norm": 1.2814027070999146, "learning_rate": 9.948666735373094e-06, "loss": 0.6019, "step": 1151 }, { "epoch": 0.07, "grad_norm": 1.4309295415878296, "learning_rate": 9.948517224175163e-06, "loss": 0.6369, "step": 1152 }, { "epoch": 0.07, "grad_norm": 1.2618697881698608, "learning_rate": 9.948367496690309e-06, "loss": 0.6314, "step": 1153 }, { "epoch": 0.07, "grad_norm": 1.3757891654968262, "learning_rate": 9.948217552925082e-06, "loss": 0.6269, "step": 1154 }, { "epoch": 0.07, "grad_norm": 1.4503875970840454, "learning_rate": 9.948067392886031e-06, "loss": 0.6132, "step": 1155 }, { "epoch": 0.07, "grad_norm": 1.2787419557571411, "learning_rate": 9.947917016579721e-06, "loss": 0.6399, "step": 1156 }, { "epoch": 0.07, "grad_norm": 1.2426716089248657, "learning_rate": 9.947766424012723e-06, "loss": 0.6166, "step": 1157 }, { "epoch": 0.07, "grad_norm": 1.1562877893447876, "learning_rate": 9.94761561519162e-06, "loss": 0.6341, "step": 1158 }, { "epoch": 0.07, "grad_norm": 1.1737455129623413, "learning_rate": 9.947464590123007e-06, "loss": 0.5923, "step": 1159 }, { "epoch": 0.07, "grad_norm": 1.1416984796524048, "learning_rate": 9.947313348813476e-06, "loss": 0.5817, "step": 1160 }, { "epoch": 0.07, "grad_norm": 1.1958719491958618, "learning_rate": 9.947161891269646e-06, "loss": 0.6428, "step": 1161 }, { "epoch": 0.08, "grad_norm": 1.2372400760650635, "learning_rate": 9.947010217498134e-06, "loss": 0.5541, "step": 1162 }, { "epoch": 0.08, "grad_norm": 1.4296022653579712, "learning_rate": 9.946858327505568e-06, "loss": 0.5839, "step": 1163 }, { "epoch": 0.08, "grad_norm": 1.242598295211792, "learning_rate": 9.946706221298586e-06, "loss": 0.6371, "step": 1164 }, { "epoch": 0.08, "grad_norm": 1.279944658279419, "learning_rate": 9.946553898883839e-06, "loss": 0.5576, "step": 1165 }, { "epoch": 0.08, "grad_norm": 1.278002142906189, "learning_rate": 9.946401360267983e-06, "loss": 0.5974, "step": 1166 }, { "epoch": 0.08, "grad_norm": 1.2121338844299316, "learning_rate": 9.946248605457686e-06, "loss": 0.5876, "step": 1167 }, { "epoch": 0.08, "grad_norm": 1.2274916172027588, "learning_rate": 9.946095634459625e-06, "loss": 0.5622, "step": 1168 }, { "epoch": 0.08, "grad_norm": 1.2876677513122559, "learning_rate": 9.945942447280483e-06, "loss": 0.6213, "step": 1169 }, { "epoch": 0.08, "grad_norm": 1.35683012008667, "learning_rate": 9.94578904392696e-06, "loss": 0.5758, "step": 1170 }, { "epoch": 0.08, "grad_norm": 1.239996075630188, "learning_rate": 9.945635424405756e-06, "loss": 0.6153, "step": 1171 }, { "epoch": 0.08, "grad_norm": 1.1328368186950684, "learning_rate": 9.945481588723587e-06, "loss": 0.5679, "step": 1172 }, { "epoch": 0.08, "grad_norm": 1.304167628288269, "learning_rate": 9.945327536887177e-06, "loss": 0.6074, "step": 1173 }, { "epoch": 0.08, "grad_norm": 1.2355586290359497, "learning_rate": 9.94517326890326e-06, "loss": 0.5859, "step": 1174 }, { "epoch": 0.08, "grad_norm": 1.1539759635925293, "learning_rate": 9.94501878477858e-06, "loss": 0.5644, "step": 1175 }, { "epoch": 0.08, "grad_norm": 1.2151204347610474, "learning_rate": 9.944864084519885e-06, "loss": 0.5906, "step": 1176 }, { "epoch": 0.08, "grad_norm": 1.2463668584823608, "learning_rate": 9.94470916813394e-06, "loss": 0.6197, "step": 1177 }, { "epoch": 0.08, "grad_norm": 1.1774426698684692, "learning_rate": 9.944554035627514e-06, "loss": 0.5977, "step": 1178 }, { "epoch": 0.08, "grad_norm": 1.3505401611328125, "learning_rate": 9.944398687007389e-06, "loss": 0.6085, "step": 1179 }, { "epoch": 0.08, "grad_norm": 1.1915570497512817, "learning_rate": 9.944243122280354e-06, "loss": 0.5725, "step": 1180 }, { "epoch": 0.08, "grad_norm": 1.1261628866195679, "learning_rate": 9.944087341453207e-06, "loss": 0.6091, "step": 1181 }, { "epoch": 0.08, "grad_norm": 1.2659204006195068, "learning_rate": 9.94393134453276e-06, "loss": 0.6088, "step": 1182 }, { "epoch": 0.08, "grad_norm": 1.4092804193496704, "learning_rate": 9.94377513152583e-06, "loss": 0.605, "step": 1183 }, { "epoch": 0.08, "grad_norm": 1.1249440908432007, "learning_rate": 9.943618702439241e-06, "loss": 0.5993, "step": 1184 }, { "epoch": 0.08, "grad_norm": 1.1321388483047485, "learning_rate": 9.943462057279837e-06, "loss": 0.591, "step": 1185 }, { "epoch": 0.08, "grad_norm": 1.140833854675293, "learning_rate": 9.943305196054458e-06, "loss": 0.5788, "step": 1186 }, { "epoch": 0.08, "grad_norm": 1.1963233947753906, "learning_rate": 9.943148118769965e-06, "loss": 0.5891, "step": 1187 }, { "epoch": 0.08, "grad_norm": 1.1984602212905884, "learning_rate": 9.942990825433218e-06, "loss": 0.6, "step": 1188 }, { "epoch": 0.08, "grad_norm": 1.1524772644042969, "learning_rate": 9.942833316051097e-06, "loss": 0.5865, "step": 1189 }, { "epoch": 0.08, "grad_norm": 1.1810294389724731, "learning_rate": 9.942675590630486e-06, "loss": 0.633, "step": 1190 }, { "epoch": 0.08, "grad_norm": 1.2878679037094116, "learning_rate": 9.942517649178277e-06, "loss": 0.6084, "step": 1191 }, { "epoch": 0.08, "grad_norm": 1.1494908332824707, "learning_rate": 9.942359491701372e-06, "loss": 0.572, "step": 1192 }, { "epoch": 0.08, "grad_norm": 1.215349793434143, "learning_rate": 9.942201118206684e-06, "loss": 0.6229, "step": 1193 }, { "epoch": 0.08, "grad_norm": 1.2534223794937134, "learning_rate": 9.942042528701137e-06, "loss": 0.5985, "step": 1194 }, { "epoch": 0.08, "grad_norm": 1.1506727933883667, "learning_rate": 9.941883723191662e-06, "loss": 0.6239, "step": 1195 }, { "epoch": 0.08, "grad_norm": 1.2900757789611816, "learning_rate": 9.941724701685201e-06, "loss": 0.58, "step": 1196 }, { "epoch": 0.08, "grad_norm": 1.2046412229537964, "learning_rate": 9.941565464188701e-06, "loss": 0.5716, "step": 1197 }, { "epoch": 0.08, "grad_norm": 1.2513456344604492, "learning_rate": 9.941406010709125e-06, "loss": 0.6267, "step": 1198 }, { "epoch": 0.08, "grad_norm": 1.3233616352081299, "learning_rate": 9.941246341253442e-06, "loss": 0.5892, "step": 1199 }, { "epoch": 0.08, "grad_norm": 1.2798506021499634, "learning_rate": 9.941086455828628e-06, "loss": 0.5844, "step": 1200 }, { "epoch": 0.08, "grad_norm": 1.1577019691467285, "learning_rate": 9.940926354441675e-06, "loss": 0.6546, "step": 1201 }, { "epoch": 0.08, "grad_norm": 1.2945011854171753, "learning_rate": 9.940766037099579e-06, "loss": 0.5674, "step": 1202 }, { "epoch": 0.08, "grad_norm": 1.3123979568481445, "learning_rate": 9.940605503809344e-06, "loss": 0.6086, "step": 1203 }, { "epoch": 0.08, "grad_norm": 1.174729347229004, "learning_rate": 9.94044475457799e-06, "loss": 0.6028, "step": 1204 }, { "epoch": 0.08, "grad_norm": 1.1893833875656128, "learning_rate": 9.940283789412546e-06, "loss": 0.5573, "step": 1205 }, { "epoch": 0.08, "grad_norm": 1.2774114608764648, "learning_rate": 9.940122608320042e-06, "loss": 0.591, "step": 1206 }, { "epoch": 0.08, "grad_norm": 1.1773425340652466, "learning_rate": 9.939961211307523e-06, "loss": 0.5841, "step": 1207 }, { "epoch": 0.08, "grad_norm": 1.358311653137207, "learning_rate": 9.939799598382045e-06, "loss": 0.6338, "step": 1208 }, { "epoch": 0.08, "grad_norm": 1.1452255249023438, "learning_rate": 9.939637769550671e-06, "loss": 0.6095, "step": 1209 }, { "epoch": 0.08, "grad_norm": 1.184670090675354, "learning_rate": 9.939475724820478e-06, "loss": 0.5749, "step": 1210 }, { "epoch": 0.08, "grad_norm": 1.0979782342910767, "learning_rate": 9.939313464198542e-06, "loss": 0.5736, "step": 1211 }, { "epoch": 0.08, "grad_norm": 1.4760690927505493, "learning_rate": 9.93915098769196e-06, "loss": 0.6085, "step": 1212 }, { "epoch": 0.08, "grad_norm": 1.298526406288147, "learning_rate": 9.938988295307828e-06, "loss": 0.6046, "step": 1213 }, { "epoch": 0.08, "grad_norm": 1.2093422412872314, "learning_rate": 9.938825387053264e-06, "loss": 0.5612, "step": 1214 }, { "epoch": 0.08, "grad_norm": 1.237298846244812, "learning_rate": 9.938662262935381e-06, "loss": 0.6059, "step": 1215 }, { "epoch": 0.08, "grad_norm": 1.3047606945037842, "learning_rate": 9.938498922961315e-06, "loss": 0.5811, "step": 1216 }, { "epoch": 0.08, "grad_norm": 1.1493282318115234, "learning_rate": 9.938335367138203e-06, "loss": 0.5852, "step": 1217 }, { "epoch": 0.08, "grad_norm": 1.237396240234375, "learning_rate": 9.938171595473191e-06, "loss": 0.6238, "step": 1218 }, { "epoch": 0.08, "grad_norm": 1.1792484521865845, "learning_rate": 9.93800760797344e-06, "loss": 0.5918, "step": 1219 }, { "epoch": 0.08, "grad_norm": 1.2433832883834839, "learning_rate": 9.937843404646119e-06, "loss": 0.5908, "step": 1220 }, { "epoch": 0.08, "grad_norm": 1.1755752563476562, "learning_rate": 9.9376789854984e-06, "loss": 0.6056, "step": 1221 }, { "epoch": 0.08, "grad_norm": 1.2071483135223389, "learning_rate": 9.937514350537474e-06, "loss": 0.6012, "step": 1222 }, { "epoch": 0.08, "grad_norm": 1.2030067443847656, "learning_rate": 9.937349499770533e-06, "loss": 0.5458, "step": 1223 }, { "epoch": 0.08, "grad_norm": 1.1606976985931396, "learning_rate": 9.937184433204786e-06, "loss": 0.6306, "step": 1224 }, { "epoch": 0.08, "grad_norm": 1.3053094148635864, "learning_rate": 9.937019150847442e-06, "loss": 0.6172, "step": 1225 }, { "epoch": 0.08, "grad_norm": 1.191264033317566, "learning_rate": 9.936853652705731e-06, "loss": 0.5447, "step": 1226 }, { "epoch": 0.08, "grad_norm": 1.1574187278747559, "learning_rate": 9.936687938786884e-06, "loss": 0.5239, "step": 1227 }, { "epoch": 0.08, "grad_norm": 1.2872692346572876, "learning_rate": 9.936522009098143e-06, "loss": 0.6229, "step": 1228 }, { "epoch": 0.08, "grad_norm": 1.204621434211731, "learning_rate": 9.936355863646762e-06, "loss": 0.538, "step": 1229 }, { "epoch": 0.08, "grad_norm": 1.1980400085449219, "learning_rate": 9.936189502440002e-06, "loss": 0.5763, "step": 1230 }, { "epoch": 0.08, "grad_norm": 1.2224180698394775, "learning_rate": 9.936022925485133e-06, "loss": 0.5707, "step": 1231 }, { "epoch": 0.08, "grad_norm": 1.249347448348999, "learning_rate": 9.93585613278944e-06, "loss": 0.5954, "step": 1232 }, { "epoch": 0.08, "grad_norm": 1.2420600652694702, "learning_rate": 9.935689124360209e-06, "loss": 0.6144, "step": 1233 }, { "epoch": 0.08, "grad_norm": 1.1555384397506714, "learning_rate": 9.93552190020474e-06, "loss": 0.6041, "step": 1234 }, { "epoch": 0.08, "grad_norm": 1.2262178659439087, "learning_rate": 9.935354460330342e-06, "loss": 0.6276, "step": 1235 }, { "epoch": 0.08, "grad_norm": 1.2257685661315918, "learning_rate": 9.935186804744335e-06, "loss": 0.5627, "step": 1236 }, { "epoch": 0.08, "grad_norm": 1.1296343803405762, "learning_rate": 9.935018933454047e-06, "loss": 0.5806, "step": 1237 }, { "epoch": 0.08, "grad_norm": 1.2694883346557617, "learning_rate": 9.934850846466811e-06, "loss": 0.6473, "step": 1238 }, { "epoch": 0.08, "grad_norm": 1.2068825960159302, "learning_rate": 9.93468254378998e-06, "loss": 0.5787, "step": 1239 }, { "epoch": 0.08, "grad_norm": 1.2978535890579224, "learning_rate": 9.934514025430903e-06, "loss": 0.6068, "step": 1240 }, { "epoch": 0.08, "grad_norm": 1.2446930408477783, "learning_rate": 9.934345291396953e-06, "loss": 0.6127, "step": 1241 }, { "epoch": 0.08, "grad_norm": 1.1574113368988037, "learning_rate": 9.934176341695496e-06, "loss": 0.5918, "step": 1242 }, { "epoch": 0.08, "grad_norm": 1.3890352249145508, "learning_rate": 9.934007176333926e-06, "loss": 0.5901, "step": 1243 }, { "epoch": 0.08, "grad_norm": 1.3218815326690674, "learning_rate": 9.93383779531963e-06, "loss": 0.6431, "step": 1244 }, { "epoch": 0.08, "grad_norm": 1.2749485969543457, "learning_rate": 9.933668198660014e-06, "loss": 0.6336, "step": 1245 }, { "epoch": 0.08, "grad_norm": 1.2037899494171143, "learning_rate": 9.93349838636249e-06, "loss": 0.6168, "step": 1246 }, { "epoch": 0.08, "grad_norm": 1.1300768852233887, "learning_rate": 9.93332835843448e-06, "loss": 0.6117, "step": 1247 }, { "epoch": 0.08, "grad_norm": 1.2544656991958618, "learning_rate": 9.933158114883416e-06, "loss": 0.5971, "step": 1248 }, { "epoch": 0.08, "grad_norm": 1.394278645515442, "learning_rate": 9.93298765571674e-06, "loss": 0.6115, "step": 1249 }, { "epoch": 0.08, "grad_norm": 1.21902596950531, "learning_rate": 9.932816980941897e-06, "loss": 0.5639, "step": 1250 }, { "epoch": 0.08, "grad_norm": 1.2034516334533691, "learning_rate": 9.932646090566353e-06, "loss": 0.5817, "step": 1251 }, { "epoch": 0.08, "grad_norm": 1.2308204174041748, "learning_rate": 9.932474984597575e-06, "loss": 0.6241, "step": 1252 }, { "epoch": 0.08, "grad_norm": 1.2496633529663086, "learning_rate": 9.932303663043042e-06, "loss": 0.6028, "step": 1253 }, { "epoch": 0.08, "grad_norm": 1.1465222835540771, "learning_rate": 9.93213212591024e-06, "loss": 0.5992, "step": 1254 }, { "epoch": 0.08, "grad_norm": 1.2311006784439087, "learning_rate": 9.931960373206669e-06, "loss": 0.5987, "step": 1255 }, { "epoch": 0.08, "grad_norm": 1.2218912839889526, "learning_rate": 9.931788404939832e-06, "loss": 0.5751, "step": 1256 }, { "epoch": 0.08, "grad_norm": 1.3498971462249756, "learning_rate": 9.93161622111725e-06, "loss": 0.6786, "step": 1257 }, { "epoch": 0.08, "grad_norm": 1.1559092998504639, "learning_rate": 9.931443821746448e-06, "loss": 0.5924, "step": 1258 }, { "epoch": 0.08, "grad_norm": 1.3162919282913208, "learning_rate": 9.931271206834958e-06, "loss": 0.6261, "step": 1259 }, { "epoch": 0.08, "grad_norm": 1.3471404314041138, "learning_rate": 9.931098376390327e-06, "loss": 0.6483, "step": 1260 }, { "epoch": 0.08, "grad_norm": 1.1124467849731445, "learning_rate": 9.930925330420107e-06, "loss": 0.5179, "step": 1261 }, { "epoch": 0.08, "grad_norm": 1.268761396408081, "learning_rate": 9.930752068931867e-06, "loss": 0.6208, "step": 1262 }, { "epoch": 0.08, "grad_norm": 1.2396506071090698, "learning_rate": 9.93057859193317e-06, "loss": 0.5623, "step": 1263 }, { "epoch": 0.08, "grad_norm": 1.4115159511566162, "learning_rate": 9.930404899431606e-06, "loss": 0.6108, "step": 1264 }, { "epoch": 0.08, "grad_norm": 1.400389313697815, "learning_rate": 9.930230991434766e-06, "loss": 0.6094, "step": 1265 }, { "epoch": 0.08, "grad_norm": 1.2969456911087036, "learning_rate": 9.930056867950248e-06, "loss": 0.6025, "step": 1266 }, { "epoch": 0.08, "grad_norm": 1.2843972444534302, "learning_rate": 9.929882528985664e-06, "loss": 0.607, "step": 1267 }, { "epoch": 0.08, "grad_norm": 1.1398098468780518, "learning_rate": 9.929707974548635e-06, "loss": 0.5345, "step": 1268 }, { "epoch": 0.08, "grad_norm": 1.2696526050567627, "learning_rate": 9.929533204646788e-06, "loss": 0.5665, "step": 1269 }, { "epoch": 0.08, "grad_norm": 1.1087722778320312, "learning_rate": 9.929358219287765e-06, "loss": 0.5596, "step": 1270 }, { "epoch": 0.08, "grad_norm": 1.224719762802124, "learning_rate": 9.929183018479211e-06, "loss": 0.5754, "step": 1271 }, { "epoch": 0.08, "grad_norm": 1.2370352745056152, "learning_rate": 9.929007602228784e-06, "loss": 0.6167, "step": 1272 }, { "epoch": 0.08, "grad_norm": 1.4100638628005981, "learning_rate": 9.928831970544153e-06, "loss": 0.6165, "step": 1273 }, { "epoch": 0.08, "grad_norm": 1.1472617387771606, "learning_rate": 9.928656123432992e-06, "loss": 0.5911, "step": 1274 }, { "epoch": 0.08, "grad_norm": 1.25579035282135, "learning_rate": 9.928480060902987e-06, "loss": 0.5826, "step": 1275 }, { "epoch": 0.08, "grad_norm": 1.2979007959365845, "learning_rate": 9.928303782961836e-06, "loss": 0.6183, "step": 1276 }, { "epoch": 0.08, "grad_norm": 1.2668291330337524, "learning_rate": 9.92812728961724e-06, "loss": 0.5978, "step": 1277 }, { "epoch": 0.08, "grad_norm": 1.2836849689483643, "learning_rate": 9.927950580876917e-06, "loss": 0.6244, "step": 1278 }, { "epoch": 0.08, "grad_norm": 1.1505980491638184, "learning_rate": 9.927773656748588e-06, "loss": 0.5798, "step": 1279 }, { "epoch": 0.08, "grad_norm": 1.1351232528686523, "learning_rate": 9.927596517239985e-06, "loss": 0.5444, "step": 1280 }, { "epoch": 0.08, "grad_norm": 1.1692378520965576, "learning_rate": 9.927419162358853e-06, "loss": 0.5995, "step": 1281 }, { "epoch": 0.08, "grad_norm": 1.2706658840179443, "learning_rate": 9.927241592112941e-06, "loss": 0.6747, "step": 1282 }, { "epoch": 0.08, "grad_norm": 1.211156964302063, "learning_rate": 9.927063806510013e-06, "loss": 0.627, "step": 1283 }, { "epoch": 0.08, "grad_norm": 1.2252941131591797, "learning_rate": 9.926885805557837e-06, "loss": 0.6122, "step": 1284 }, { "epoch": 0.08, "grad_norm": 1.3342771530151367, "learning_rate": 9.926707589264194e-06, "loss": 0.6461, "step": 1285 }, { "epoch": 0.08, "grad_norm": 1.3061991930007935, "learning_rate": 9.926529157636875e-06, "loss": 0.6764, "step": 1286 }, { "epoch": 0.08, "grad_norm": 1.2106966972351074, "learning_rate": 9.926350510683676e-06, "loss": 0.5812, "step": 1287 }, { "epoch": 0.08, "grad_norm": 1.2828856706619263, "learning_rate": 9.926171648412406e-06, "loss": 0.629, "step": 1288 }, { "epoch": 0.08, "grad_norm": 1.3199076652526855, "learning_rate": 9.925992570830883e-06, "loss": 0.6182, "step": 1289 }, { "epoch": 0.08, "grad_norm": 1.1955420970916748, "learning_rate": 9.925813277946933e-06, "loss": 0.588, "step": 1290 }, { "epoch": 0.08, "grad_norm": 1.1848267316818237, "learning_rate": 9.925633769768395e-06, "loss": 0.575, "step": 1291 }, { "epoch": 0.08, "grad_norm": 1.1698644161224365, "learning_rate": 9.925454046303113e-06, "loss": 0.623, "step": 1292 }, { "epoch": 0.08, "grad_norm": 1.1342787742614746, "learning_rate": 9.92527410755894e-06, "loss": 0.5831, "step": 1293 }, { "epoch": 0.08, "grad_norm": 1.1299183368682861, "learning_rate": 9.925093953543746e-06, "loss": 0.5855, "step": 1294 }, { "epoch": 0.08, "grad_norm": 1.2719546556472778, "learning_rate": 9.924913584265401e-06, "loss": 0.6217, "step": 1295 }, { "epoch": 0.08, "grad_norm": 1.2416824102401733, "learning_rate": 9.92473299973179e-06, "loss": 0.6047, "step": 1296 }, { "epoch": 0.08, "grad_norm": 1.2099891901016235, "learning_rate": 9.924552199950806e-06, "loss": 0.62, "step": 1297 }, { "epoch": 0.08, "grad_norm": 1.1273183822631836, "learning_rate": 9.92437118493035e-06, "loss": 0.6168, "step": 1298 }, { "epoch": 0.08, "grad_norm": 1.5190417766571045, "learning_rate": 9.924189954678337e-06, "loss": 0.6412, "step": 1299 }, { "epoch": 0.08, "grad_norm": 1.1770684719085693, "learning_rate": 9.924008509202683e-06, "loss": 0.5783, "step": 1300 }, { "epoch": 0.08, "grad_norm": 1.1578876972198486, "learning_rate": 9.923826848511322e-06, "loss": 0.5521, "step": 1301 }, { "epoch": 0.08, "grad_norm": 1.3003042936325073, "learning_rate": 9.923644972612192e-06, "loss": 0.6416, "step": 1302 }, { "epoch": 0.08, "grad_norm": 1.1745306253433228, "learning_rate": 9.923462881513245e-06, "loss": 0.6016, "step": 1303 }, { "epoch": 0.08, "grad_norm": 1.1779744625091553, "learning_rate": 9.923280575222438e-06, "loss": 0.5951, "step": 1304 }, { "epoch": 0.08, "grad_norm": 1.1394075155258179, "learning_rate": 9.923098053747738e-06, "loss": 0.6033, "step": 1305 }, { "epoch": 0.08, "grad_norm": 1.1281049251556396, "learning_rate": 9.922915317097125e-06, "loss": 0.5805, "step": 1306 }, { "epoch": 0.08, "grad_norm": 1.2899863719940186, "learning_rate": 9.922732365278584e-06, "loss": 0.6058, "step": 1307 }, { "epoch": 0.08, "grad_norm": 1.2637683153152466, "learning_rate": 9.922549198300112e-06, "loss": 0.6402, "step": 1308 }, { "epoch": 0.08, "grad_norm": 1.3487480878829956, "learning_rate": 9.922365816169717e-06, "loss": 0.6528, "step": 1309 }, { "epoch": 0.08, "grad_norm": 1.1554546356201172, "learning_rate": 9.92218221889541e-06, "loss": 0.5926, "step": 1310 }, { "epoch": 0.08, "grad_norm": 1.370485782623291, "learning_rate": 9.921998406485218e-06, "loss": 0.6097, "step": 1311 }, { "epoch": 0.08, "grad_norm": 1.1664847135543823, "learning_rate": 9.921814378947174e-06, "loss": 0.6144, "step": 1312 }, { "epoch": 0.08, "grad_norm": 1.229067325592041, "learning_rate": 9.921630136289324e-06, "loss": 0.5502, "step": 1313 }, { "epoch": 0.08, "grad_norm": 1.2478543519973755, "learning_rate": 9.921445678519716e-06, "loss": 0.6005, "step": 1314 }, { "epoch": 0.08, "grad_norm": 1.3926351070404053, "learning_rate": 9.921261005646416e-06, "loss": 0.6533, "step": 1315 }, { "epoch": 0.08, "grad_norm": 1.2271709442138672, "learning_rate": 9.921076117677494e-06, "loss": 0.5758, "step": 1316 }, { "epoch": 0.09, "grad_norm": 1.142340064048767, "learning_rate": 9.920891014621034e-06, "loss": 0.5787, "step": 1317 }, { "epoch": 0.09, "grad_norm": 1.1740518808364868, "learning_rate": 9.920705696485123e-06, "loss": 0.6367, "step": 1318 }, { "epoch": 0.09, "grad_norm": 1.3332388401031494, "learning_rate": 9.92052016327786e-06, "loss": 0.6275, "step": 1319 }, { "epoch": 0.09, "grad_norm": 1.406020164489746, "learning_rate": 9.920334415007355e-06, "loss": 0.6455, "step": 1320 }, { "epoch": 0.09, "grad_norm": 1.2960386276245117, "learning_rate": 9.92014845168173e-06, "loss": 0.5853, "step": 1321 }, { "epoch": 0.09, "grad_norm": 1.0942329168319702, "learning_rate": 9.919962273309108e-06, "loss": 0.5741, "step": 1322 }, { "epoch": 0.09, "grad_norm": 1.1257399320602417, "learning_rate": 9.91977587989763e-06, "loss": 0.6065, "step": 1323 }, { "epoch": 0.09, "grad_norm": 1.2218865156173706, "learning_rate": 9.91958927145544e-06, "loss": 0.5771, "step": 1324 }, { "epoch": 0.09, "grad_norm": 1.4911431074142456, "learning_rate": 9.919402447990699e-06, "loss": 0.6061, "step": 1325 }, { "epoch": 0.09, "grad_norm": 1.2108649015426636, "learning_rate": 9.919215409511567e-06, "loss": 0.6625, "step": 1326 }, { "epoch": 0.09, "grad_norm": 1.228149652481079, "learning_rate": 9.919028156026221e-06, "loss": 0.5963, "step": 1327 }, { "epoch": 0.09, "grad_norm": 1.1931962966918945, "learning_rate": 9.918840687542845e-06, "loss": 0.5661, "step": 1328 }, { "epoch": 0.09, "grad_norm": 1.331695556640625, "learning_rate": 9.918653004069634e-06, "loss": 0.6193, "step": 1329 }, { "epoch": 0.09, "grad_norm": 1.1368035078048706, "learning_rate": 9.918465105614793e-06, "loss": 0.5704, "step": 1330 }, { "epoch": 0.09, "grad_norm": 1.188506007194519, "learning_rate": 9.91827699218653e-06, "loss": 0.5909, "step": 1331 }, { "epoch": 0.09, "grad_norm": 1.1858216524124146, "learning_rate": 9.918088663793066e-06, "loss": 0.6208, "step": 1332 }, { "epoch": 0.09, "grad_norm": 1.0762604475021362, "learning_rate": 9.917900120442639e-06, "loss": 0.5711, "step": 1333 }, { "epoch": 0.09, "grad_norm": 1.3254930973052979, "learning_rate": 9.917711362143485e-06, "loss": 0.5918, "step": 1334 }, { "epoch": 0.09, "grad_norm": 1.2188199758529663, "learning_rate": 9.917522388903856e-06, "loss": 0.5761, "step": 1335 }, { "epoch": 0.09, "grad_norm": 1.2372572422027588, "learning_rate": 9.917333200732008e-06, "loss": 0.6057, "step": 1336 }, { "epoch": 0.09, "grad_norm": 1.2899612188339233, "learning_rate": 9.917143797636216e-06, "loss": 0.6079, "step": 1337 }, { "epoch": 0.09, "grad_norm": 1.2372413873672485, "learning_rate": 9.916954179624754e-06, "loss": 0.5962, "step": 1338 }, { "epoch": 0.09, "grad_norm": 1.2036871910095215, "learning_rate": 9.916764346705911e-06, "loss": 0.5697, "step": 1339 }, { "epoch": 0.09, "grad_norm": 1.3442552089691162, "learning_rate": 9.916574298887983e-06, "loss": 0.6144, "step": 1340 }, { "epoch": 0.09, "grad_norm": 1.2108653783798218, "learning_rate": 9.916384036179278e-06, "loss": 0.5742, "step": 1341 }, { "epoch": 0.09, "grad_norm": 1.270803451538086, "learning_rate": 9.91619355858811e-06, "loss": 0.5926, "step": 1342 }, { "epoch": 0.09, "grad_norm": 1.142386555671692, "learning_rate": 9.916002866122807e-06, "loss": 0.5524, "step": 1343 }, { "epoch": 0.09, "grad_norm": 1.2903927564620972, "learning_rate": 9.915811958791702e-06, "loss": 0.5905, "step": 1344 }, { "epoch": 0.09, "grad_norm": 1.2446751594543457, "learning_rate": 9.91562083660314e-06, "loss": 0.6164, "step": 1345 }, { "epoch": 0.09, "grad_norm": 1.314069390296936, "learning_rate": 9.915429499565471e-06, "loss": 0.577, "step": 1346 }, { "epoch": 0.09, "grad_norm": 1.224804401397705, "learning_rate": 9.915237947687062e-06, "loss": 0.5614, "step": 1347 }, { "epoch": 0.09, "grad_norm": 1.2544323205947876, "learning_rate": 9.915046180976284e-06, "loss": 0.6722, "step": 1348 }, { "epoch": 0.09, "grad_norm": 1.2671366930007935, "learning_rate": 9.914854199441519e-06, "loss": 0.644, "step": 1349 }, { "epoch": 0.09, "grad_norm": 1.3213587999343872, "learning_rate": 9.914662003091155e-06, "loss": 0.5775, "step": 1350 }, { "epoch": 0.09, "grad_norm": 1.298176646232605, "learning_rate": 9.914469591933598e-06, "loss": 0.6076, "step": 1351 }, { "epoch": 0.09, "grad_norm": 1.1902104616165161, "learning_rate": 9.914276965977253e-06, "loss": 0.6346, "step": 1352 }, { "epoch": 0.09, "grad_norm": 1.1221067905426025, "learning_rate": 9.914084125230542e-06, "loss": 0.582, "step": 1353 }, { "epoch": 0.09, "grad_norm": 1.268727421760559, "learning_rate": 9.913891069701891e-06, "loss": 0.624, "step": 1354 }, { "epoch": 0.09, "grad_norm": 1.2640374898910522, "learning_rate": 9.91369779939974e-06, "loss": 0.6107, "step": 1355 }, { "epoch": 0.09, "grad_norm": 1.3178941011428833, "learning_rate": 9.913504314332534e-06, "loss": 0.6049, "step": 1356 }, { "epoch": 0.09, "grad_norm": 1.2250274419784546, "learning_rate": 9.913310614508733e-06, "loss": 0.5846, "step": 1357 }, { "epoch": 0.09, "grad_norm": 1.269561529159546, "learning_rate": 9.913116699936803e-06, "loss": 0.5918, "step": 1358 }, { "epoch": 0.09, "grad_norm": 1.3976479768753052, "learning_rate": 9.912922570625215e-06, "loss": 0.6317, "step": 1359 }, { "epoch": 0.09, "grad_norm": 1.2637001276016235, "learning_rate": 9.91272822658246e-06, "loss": 0.59, "step": 1360 }, { "epoch": 0.09, "grad_norm": 1.142849087715149, "learning_rate": 9.912533667817026e-06, "loss": 0.5613, "step": 1361 }, { "epoch": 0.09, "grad_norm": 1.233920693397522, "learning_rate": 9.912338894337422e-06, "loss": 0.628, "step": 1362 }, { "epoch": 0.09, "grad_norm": 1.2992721796035767, "learning_rate": 9.912143906152158e-06, "loss": 0.6065, "step": 1363 }, { "epoch": 0.09, "grad_norm": 1.1000620126724243, "learning_rate": 9.911948703269759e-06, "loss": 0.5735, "step": 1364 }, { "epoch": 0.09, "grad_norm": 1.285275936126709, "learning_rate": 9.911753285698753e-06, "loss": 0.5974, "step": 1365 }, { "epoch": 0.09, "grad_norm": 1.2329814434051514, "learning_rate": 9.911557653447685e-06, "loss": 0.5961, "step": 1366 }, { "epoch": 0.09, "grad_norm": 1.1743911504745483, "learning_rate": 9.911361806525104e-06, "loss": 0.5668, "step": 1367 }, { "epoch": 0.09, "grad_norm": 1.2161704301834106, "learning_rate": 9.911165744939568e-06, "loss": 0.573, "step": 1368 }, { "epoch": 0.09, "grad_norm": 1.2367308139801025, "learning_rate": 9.91096946869965e-06, "loss": 0.559, "step": 1369 }, { "epoch": 0.09, "grad_norm": 1.3890738487243652, "learning_rate": 9.910772977813926e-06, "loss": 0.6382, "step": 1370 }, { "epoch": 0.09, "grad_norm": 1.1394225358963013, "learning_rate": 9.910576272290986e-06, "loss": 0.5893, "step": 1371 }, { "epoch": 0.09, "grad_norm": 1.2969026565551758, "learning_rate": 9.910379352139426e-06, "loss": 0.6063, "step": 1372 }, { "epoch": 0.09, "grad_norm": 1.2506924867630005, "learning_rate": 9.910182217367854e-06, "loss": 0.6057, "step": 1373 }, { "epoch": 0.09, "grad_norm": 1.251685380935669, "learning_rate": 9.909984867984886e-06, "loss": 0.6274, "step": 1374 }, { "epoch": 0.09, "grad_norm": 1.159958004951477, "learning_rate": 9.909787303999146e-06, "loss": 0.5982, "step": 1375 }, { "epoch": 0.09, "grad_norm": 1.2782355546951294, "learning_rate": 9.909589525419273e-06, "loss": 0.5787, "step": 1376 }, { "epoch": 0.09, "grad_norm": 1.2314502000808716, "learning_rate": 9.909391532253905e-06, "loss": 0.5644, "step": 1377 }, { "epoch": 0.09, "grad_norm": 1.153383731842041, "learning_rate": 9.909193324511701e-06, "loss": 0.6322, "step": 1378 }, { "epoch": 0.09, "grad_norm": 1.1569026708602905, "learning_rate": 9.908994902201323e-06, "loss": 0.5281, "step": 1379 }, { "epoch": 0.09, "grad_norm": 1.166937232017517, "learning_rate": 9.908796265331444e-06, "loss": 0.6106, "step": 1380 }, { "epoch": 0.09, "grad_norm": 1.1953495740890503, "learning_rate": 9.908597413910744e-06, "loss": 0.5812, "step": 1381 }, { "epoch": 0.09, "grad_norm": 1.2017443180084229, "learning_rate": 9.908398347947917e-06, "loss": 0.5895, "step": 1382 }, { "epoch": 0.09, "grad_norm": 1.2407464981079102, "learning_rate": 9.908199067451661e-06, "loss": 0.5839, "step": 1383 }, { "epoch": 0.09, "grad_norm": 1.3900781869888306, "learning_rate": 9.907999572430686e-06, "loss": 0.6416, "step": 1384 }, { "epoch": 0.09, "grad_norm": 1.1338984966278076, "learning_rate": 9.907799862893713e-06, "loss": 0.5761, "step": 1385 }, { "epoch": 0.09, "grad_norm": 1.288590669631958, "learning_rate": 9.907599938849471e-06, "loss": 0.5858, "step": 1386 }, { "epoch": 0.09, "grad_norm": 1.4527939558029175, "learning_rate": 9.907399800306697e-06, "loss": 0.5998, "step": 1387 }, { "epoch": 0.09, "grad_norm": 1.134479284286499, "learning_rate": 9.90719944727414e-06, "loss": 0.5652, "step": 1388 }, { "epoch": 0.09, "grad_norm": 1.1434121131896973, "learning_rate": 9.906998879760553e-06, "loss": 0.6087, "step": 1389 }, { "epoch": 0.09, "grad_norm": 1.3742902278900146, "learning_rate": 9.906798097774708e-06, "loss": 0.6511, "step": 1390 }, { "epoch": 0.09, "grad_norm": 1.2752196788787842, "learning_rate": 9.906597101325379e-06, "loss": 0.5775, "step": 1391 }, { "epoch": 0.09, "grad_norm": 1.2043203115463257, "learning_rate": 9.906395890421348e-06, "loss": 0.5815, "step": 1392 }, { "epoch": 0.09, "grad_norm": 1.3055998086929321, "learning_rate": 9.90619446507141e-06, "loss": 0.6011, "step": 1393 }, { "epoch": 0.09, "grad_norm": 1.215103030204773, "learning_rate": 9.905992825284371e-06, "loss": 0.6394, "step": 1394 }, { "epoch": 0.09, "grad_norm": 1.1929900646209717, "learning_rate": 9.905790971069046e-06, "loss": 0.5614, "step": 1395 }, { "epoch": 0.09, "grad_norm": 1.1683778762817383, "learning_rate": 9.905588902434251e-06, "loss": 0.5803, "step": 1396 }, { "epoch": 0.09, "grad_norm": 1.2433338165283203, "learning_rate": 9.905386619388823e-06, "loss": 0.5805, "step": 1397 }, { "epoch": 0.09, "grad_norm": 1.190555214881897, "learning_rate": 9.905184121941602e-06, "loss": 0.6056, "step": 1398 }, { "epoch": 0.09, "grad_norm": 1.226736307144165, "learning_rate": 9.904981410101437e-06, "loss": 0.5807, "step": 1399 }, { "epoch": 0.09, "grad_norm": 1.4835413694381714, "learning_rate": 9.904778483877192e-06, "loss": 0.6379, "step": 1400 }, { "epoch": 0.09, "grad_norm": 1.3903262615203857, "learning_rate": 9.904575343277734e-06, "loss": 0.5972, "step": 1401 }, { "epoch": 0.09, "grad_norm": 1.2369725704193115, "learning_rate": 9.904371988311941e-06, "loss": 0.5735, "step": 1402 }, { "epoch": 0.09, "grad_norm": 1.3741146326065063, "learning_rate": 9.9041684189887e-06, "loss": 0.6476, "step": 1403 }, { "epoch": 0.09, "grad_norm": 1.18125319480896, "learning_rate": 9.903964635316912e-06, "loss": 0.612, "step": 1404 }, { "epoch": 0.09, "grad_norm": 1.1938815116882324, "learning_rate": 9.903760637305483e-06, "loss": 0.6059, "step": 1405 }, { "epoch": 0.09, "grad_norm": 1.3406604528427124, "learning_rate": 9.903556424963327e-06, "loss": 0.6127, "step": 1406 }, { "epoch": 0.09, "grad_norm": 1.2442644834518433, "learning_rate": 9.903351998299372e-06, "loss": 0.6087, "step": 1407 }, { "epoch": 0.09, "grad_norm": 1.1817282438278198, "learning_rate": 9.903147357322552e-06, "loss": 0.5832, "step": 1408 }, { "epoch": 0.09, "grad_norm": 1.3694915771484375, "learning_rate": 9.90294250204181e-06, "loss": 0.6294, "step": 1409 }, { "epoch": 0.09, "grad_norm": 1.246484398841858, "learning_rate": 9.902737432466102e-06, "loss": 0.5857, "step": 1410 }, { "epoch": 0.09, "grad_norm": 1.1182383298873901, "learning_rate": 9.90253214860439e-06, "loss": 0.6024, "step": 1411 }, { "epoch": 0.09, "grad_norm": 1.2228224277496338, "learning_rate": 9.902326650465646e-06, "loss": 0.5697, "step": 1412 }, { "epoch": 0.09, "grad_norm": 1.2210710048675537, "learning_rate": 9.902120938058853e-06, "loss": 0.5623, "step": 1413 }, { "epoch": 0.09, "grad_norm": 1.2547786235809326, "learning_rate": 9.901915011393002e-06, "loss": 0.626, "step": 1414 }, { "epoch": 0.09, "grad_norm": 1.1960078477859497, "learning_rate": 9.901708870477092e-06, "loss": 0.5306, "step": 1415 }, { "epoch": 0.09, "grad_norm": 1.224565863609314, "learning_rate": 9.901502515320134e-06, "loss": 0.5652, "step": 1416 }, { "epoch": 0.09, "grad_norm": 1.2310221195220947, "learning_rate": 9.901295945931146e-06, "loss": 0.6442, "step": 1417 }, { "epoch": 0.09, "grad_norm": 1.3717966079711914, "learning_rate": 9.901089162319159e-06, "loss": 0.5926, "step": 1418 }, { "epoch": 0.09, "grad_norm": 1.2791608572006226, "learning_rate": 9.900882164493208e-06, "loss": 0.6142, "step": 1419 }, { "epoch": 0.09, "grad_norm": 1.376478910446167, "learning_rate": 9.900674952462344e-06, "loss": 0.6204, "step": 1420 }, { "epoch": 0.09, "grad_norm": 1.1442561149597168, "learning_rate": 9.900467526235621e-06, "loss": 0.5397, "step": 1421 }, { "epoch": 0.09, "grad_norm": 1.1930209398269653, "learning_rate": 9.900259885822106e-06, "loss": 0.6211, "step": 1422 }, { "epoch": 0.09, "grad_norm": 1.1905287504196167, "learning_rate": 9.900052031230876e-06, "loss": 0.6101, "step": 1423 }, { "epoch": 0.09, "grad_norm": 1.3073949813842773, "learning_rate": 9.899843962471013e-06, "loss": 0.5724, "step": 1424 }, { "epoch": 0.09, "grad_norm": 1.1167266368865967, "learning_rate": 9.89963567955161e-06, "loss": 0.6295, "step": 1425 }, { "epoch": 0.09, "grad_norm": 1.2495468854904175, "learning_rate": 9.899427182481774e-06, "loss": 0.5753, "step": 1426 }, { "epoch": 0.09, "grad_norm": 1.2608726024627686, "learning_rate": 9.899218471270617e-06, "loss": 0.6002, "step": 1427 }, { "epoch": 0.09, "grad_norm": 1.310968279838562, "learning_rate": 9.89900954592726e-06, "loss": 0.6338, "step": 1428 }, { "epoch": 0.09, "grad_norm": 1.0840809345245361, "learning_rate": 9.898800406460836e-06, "loss": 0.5686, "step": 1429 }, { "epoch": 0.09, "grad_norm": 1.2564557790756226, "learning_rate": 9.898591052880485e-06, "loss": 0.5535, "step": 1430 }, { "epoch": 0.09, "grad_norm": 1.2745634317398071, "learning_rate": 9.898381485195358e-06, "loss": 0.579, "step": 1431 }, { "epoch": 0.09, "grad_norm": 1.2917166948318481, "learning_rate": 9.898171703414615e-06, "loss": 0.6485, "step": 1432 }, { "epoch": 0.09, "grad_norm": 1.2004352807998657, "learning_rate": 9.897961707547421e-06, "loss": 0.5456, "step": 1433 }, { "epoch": 0.09, "grad_norm": 1.2040636539459229, "learning_rate": 9.89775149760296e-06, "loss": 0.6529, "step": 1434 }, { "epoch": 0.09, "grad_norm": 1.2933907508850098, "learning_rate": 9.897541073590417e-06, "loss": 0.5854, "step": 1435 }, { "epoch": 0.09, "grad_norm": 1.210796594619751, "learning_rate": 9.897330435518991e-06, "loss": 0.6085, "step": 1436 }, { "epoch": 0.09, "grad_norm": 1.2024592161178589, "learning_rate": 9.897119583397885e-06, "loss": 0.5837, "step": 1437 }, { "epoch": 0.09, "grad_norm": 1.2362140417099, "learning_rate": 9.896908517236317e-06, "loss": 0.5967, "step": 1438 }, { "epoch": 0.09, "grad_norm": 1.1461296081542969, "learning_rate": 9.896697237043513e-06, "loss": 0.5496, "step": 1439 }, { "epoch": 0.09, "grad_norm": 1.2329217195510864, "learning_rate": 9.896485742828706e-06, "loss": 0.5582, "step": 1440 }, { "epoch": 0.09, "grad_norm": 1.3159077167510986, "learning_rate": 9.896274034601141e-06, "loss": 0.6207, "step": 1441 }, { "epoch": 0.09, "grad_norm": 1.2670873403549194, "learning_rate": 9.896062112370068e-06, "loss": 0.6187, "step": 1442 }, { "epoch": 0.09, "grad_norm": 1.2508444786071777, "learning_rate": 9.895849976144755e-06, "loss": 0.5929, "step": 1443 }, { "epoch": 0.09, "grad_norm": 1.3067532777786255, "learning_rate": 9.895637625934468e-06, "loss": 0.6458, "step": 1444 }, { "epoch": 0.09, "grad_norm": 1.3452790975570679, "learning_rate": 9.895425061748494e-06, "loss": 0.5652, "step": 1445 }, { "epoch": 0.09, "grad_norm": 1.3184771537780762, "learning_rate": 9.89521228359612e-06, "loss": 0.585, "step": 1446 }, { "epoch": 0.09, "grad_norm": 1.2842870950698853, "learning_rate": 9.894999291486646e-06, "loss": 0.5806, "step": 1447 }, { "epoch": 0.09, "grad_norm": 1.3472026586532593, "learning_rate": 9.894786085429384e-06, "loss": 0.5444, "step": 1448 }, { "epoch": 0.09, "grad_norm": 1.2253612279891968, "learning_rate": 9.89457266543365e-06, "loss": 0.5896, "step": 1449 }, { "epoch": 0.09, "grad_norm": 1.3309968709945679, "learning_rate": 9.894359031508773e-06, "loss": 0.6554, "step": 1450 }, { "epoch": 0.09, "grad_norm": 1.1999526023864746, "learning_rate": 9.89414518366409e-06, "loss": 0.5832, "step": 1451 }, { "epoch": 0.09, "grad_norm": 1.1408838033676147, "learning_rate": 9.893931121908948e-06, "loss": 0.5742, "step": 1452 }, { "epoch": 0.09, "grad_norm": 1.2690621614456177, "learning_rate": 9.893716846252705e-06, "loss": 0.6279, "step": 1453 }, { "epoch": 0.09, "grad_norm": 1.2093309164047241, "learning_rate": 9.893502356704722e-06, "loss": 0.5715, "step": 1454 }, { "epoch": 0.09, "grad_norm": 1.265531301498413, "learning_rate": 9.893287653274377e-06, "loss": 0.5821, "step": 1455 }, { "epoch": 0.09, "grad_norm": 1.186239242553711, "learning_rate": 9.893072735971055e-06, "loss": 0.613, "step": 1456 }, { "epoch": 0.09, "grad_norm": 1.1899338960647583, "learning_rate": 9.892857604804147e-06, "loss": 0.5905, "step": 1457 }, { "epoch": 0.09, "grad_norm": 1.2428808212280273, "learning_rate": 9.892642259783056e-06, "loss": 0.5986, "step": 1458 }, { "epoch": 0.09, "grad_norm": 1.3476642370224, "learning_rate": 9.892426700917197e-06, "loss": 0.6021, "step": 1459 }, { "epoch": 0.09, "grad_norm": 1.19772207736969, "learning_rate": 9.892210928215989e-06, "loss": 0.5492, "step": 1460 }, { "epoch": 0.09, "grad_norm": 1.292283058166504, "learning_rate": 9.891994941688864e-06, "loss": 0.6175, "step": 1461 }, { "epoch": 0.09, "grad_norm": 1.3098551034927368, "learning_rate": 9.891778741345259e-06, "loss": 0.5832, "step": 1462 }, { "epoch": 0.09, "grad_norm": 1.1375858783721924, "learning_rate": 9.891562327194628e-06, "loss": 0.6048, "step": 1463 }, { "epoch": 0.09, "grad_norm": 1.17124605178833, "learning_rate": 9.891345699246426e-06, "loss": 0.5984, "step": 1464 }, { "epoch": 0.09, "grad_norm": 1.1034057140350342, "learning_rate": 9.891128857510126e-06, "loss": 0.5744, "step": 1465 }, { "epoch": 0.09, "grad_norm": 1.2086035013198853, "learning_rate": 9.890911801995201e-06, "loss": 0.5962, "step": 1466 }, { "epoch": 0.09, "grad_norm": 1.451258897781372, "learning_rate": 9.890694532711142e-06, "loss": 0.5903, "step": 1467 }, { "epoch": 0.09, "grad_norm": 1.16914963722229, "learning_rate": 9.890477049667442e-06, "loss": 0.6359, "step": 1468 }, { "epoch": 0.09, "grad_norm": 1.13645601272583, "learning_rate": 9.890259352873608e-06, "loss": 0.5514, "step": 1469 }, { "epoch": 0.09, "grad_norm": 1.179618000984192, "learning_rate": 9.890041442339154e-06, "loss": 0.5822, "step": 1470 }, { "epoch": 0.09, "grad_norm": 1.2337076663970947, "learning_rate": 9.889823318073605e-06, "loss": 0.5552, "step": 1471 }, { "epoch": 0.1, "grad_norm": 1.2454975843429565, "learning_rate": 9.889604980086496e-06, "loss": 0.5723, "step": 1472 }, { "epoch": 0.1, "grad_norm": 1.3138099908828735, "learning_rate": 9.889386428387366e-06, "loss": 0.5756, "step": 1473 }, { "epoch": 0.1, "grad_norm": 1.2695106267929077, "learning_rate": 9.889167662985773e-06, "loss": 0.5698, "step": 1474 }, { "epoch": 0.1, "grad_norm": 1.4973758459091187, "learning_rate": 9.888948683891275e-06, "loss": 0.6231, "step": 1475 }, { "epoch": 0.1, "grad_norm": 1.2770239114761353, "learning_rate": 9.888729491113442e-06, "loss": 0.6013, "step": 1476 }, { "epoch": 0.1, "grad_norm": 1.1922467947006226, "learning_rate": 9.888510084661857e-06, "loss": 0.5746, "step": 1477 }, { "epoch": 0.1, "grad_norm": 1.1241368055343628, "learning_rate": 9.88829046454611e-06, "loss": 0.6389, "step": 1478 }, { "epoch": 0.1, "grad_norm": 1.3098241090774536, "learning_rate": 9.888070630775797e-06, "loss": 0.6451, "step": 1479 }, { "epoch": 0.1, "grad_norm": 1.3364472389221191, "learning_rate": 9.88785058336053e-06, "loss": 0.573, "step": 1480 }, { "epoch": 0.1, "grad_norm": 1.2521919012069702, "learning_rate": 9.887630322309923e-06, "loss": 0.5909, "step": 1481 }, { "epoch": 0.1, "grad_norm": 1.2521965503692627, "learning_rate": 9.887409847633606e-06, "loss": 0.6073, "step": 1482 }, { "epoch": 0.1, "grad_norm": 1.1471753120422363, "learning_rate": 9.887189159341213e-06, "loss": 0.5669, "step": 1483 }, { "epoch": 0.1, "grad_norm": 1.234128475189209, "learning_rate": 9.886968257442392e-06, "loss": 0.6105, "step": 1484 }, { "epoch": 0.1, "grad_norm": 1.2409793138504028, "learning_rate": 9.886747141946796e-06, "loss": 0.618, "step": 1485 }, { "epoch": 0.1, "grad_norm": 1.2051841020584106, "learning_rate": 9.886525812864092e-06, "loss": 0.6195, "step": 1486 }, { "epoch": 0.1, "grad_norm": 1.3550246953964233, "learning_rate": 9.88630427020395e-06, "loss": 0.5989, "step": 1487 }, { "epoch": 0.1, "grad_norm": 1.1795477867126465, "learning_rate": 9.886082513976058e-06, "loss": 0.5876, "step": 1488 }, { "epoch": 0.1, "grad_norm": 1.1432788372039795, "learning_rate": 9.885860544190102e-06, "loss": 0.5859, "step": 1489 }, { "epoch": 0.1, "grad_norm": 1.2418277263641357, "learning_rate": 9.88563836085579e-06, "loss": 0.6008, "step": 1490 }, { "epoch": 0.1, "grad_norm": 1.0825963020324707, "learning_rate": 9.88541596398283e-06, "loss": 0.5144, "step": 1491 }, { "epoch": 0.1, "grad_norm": 1.261075496673584, "learning_rate": 9.885193353580942e-06, "loss": 0.5915, "step": 1492 }, { "epoch": 0.1, "grad_norm": 1.3487164974212646, "learning_rate": 9.884970529659857e-06, "loss": 0.6802, "step": 1493 }, { "epoch": 0.1, "grad_norm": 1.1903024911880493, "learning_rate": 9.884747492229313e-06, "loss": 0.6122, "step": 1494 }, { "epoch": 0.1, "grad_norm": 1.225196123123169, "learning_rate": 9.88452424129906e-06, "loss": 0.5932, "step": 1495 }, { "epoch": 0.1, "grad_norm": 1.220460057258606, "learning_rate": 9.884300776878854e-06, "loss": 0.6292, "step": 1496 }, { "epoch": 0.1, "grad_norm": 1.1758430004119873, "learning_rate": 9.884077098978462e-06, "loss": 0.6029, "step": 1497 }, { "epoch": 0.1, "grad_norm": 1.2802691459655762, "learning_rate": 9.883853207607664e-06, "loss": 0.5888, "step": 1498 }, { "epoch": 0.1, "grad_norm": 1.2714301347732544, "learning_rate": 9.88362910277624e-06, "loss": 0.6129, "step": 1499 }, { "epoch": 0.1, "grad_norm": 1.1713032722473145, "learning_rate": 9.883404784493988e-06, "loss": 0.6189, "step": 1500 }, { "epoch": 0.1, "grad_norm": 1.1886054277420044, "learning_rate": 9.883180252770712e-06, "loss": 0.5862, "step": 1501 }, { "epoch": 0.1, "grad_norm": 1.1249240636825562, "learning_rate": 9.882955507616227e-06, "loss": 0.5582, "step": 1502 }, { "epoch": 0.1, "grad_norm": 1.1792271137237549, "learning_rate": 9.882730549040354e-06, "loss": 0.5447, "step": 1503 }, { "epoch": 0.1, "grad_norm": 1.3244144916534424, "learning_rate": 9.882505377052926e-06, "loss": 0.5838, "step": 1504 }, { "epoch": 0.1, "grad_norm": 1.1658576726913452, "learning_rate": 9.882279991663789e-06, "loss": 0.6075, "step": 1505 }, { "epoch": 0.1, "grad_norm": 1.1007020473480225, "learning_rate": 9.882054392882786e-06, "loss": 0.5711, "step": 1506 }, { "epoch": 0.1, "grad_norm": 1.0528419017791748, "learning_rate": 9.88182858071978e-06, "loss": 0.5729, "step": 1507 }, { "epoch": 0.1, "grad_norm": 1.199800968170166, "learning_rate": 9.881602555184646e-06, "loss": 0.6003, "step": 1508 }, { "epoch": 0.1, "grad_norm": 1.1541804075241089, "learning_rate": 9.881376316287257e-06, "loss": 0.5915, "step": 1509 }, { "epoch": 0.1, "grad_norm": 1.2529112100601196, "learning_rate": 9.881149864037503e-06, "loss": 0.6127, "step": 1510 }, { "epoch": 0.1, "grad_norm": 1.2549484968185425, "learning_rate": 9.880923198445283e-06, "loss": 0.5991, "step": 1511 }, { "epoch": 0.1, "grad_norm": 1.187235713005066, "learning_rate": 9.880696319520503e-06, "loss": 0.6278, "step": 1512 }, { "epoch": 0.1, "grad_norm": 1.2124847173690796, "learning_rate": 9.880469227273078e-06, "loss": 0.5845, "step": 1513 }, { "epoch": 0.1, "grad_norm": 1.1810718774795532, "learning_rate": 9.880241921712936e-06, "loss": 0.6209, "step": 1514 }, { "epoch": 0.1, "grad_norm": 1.262187123298645, "learning_rate": 9.880014402850012e-06, "loss": 0.6376, "step": 1515 }, { "epoch": 0.1, "grad_norm": 1.2561676502227783, "learning_rate": 9.879786670694247e-06, "loss": 0.644, "step": 1516 }, { "epoch": 0.1, "grad_norm": 1.303969383239746, "learning_rate": 9.879558725255596e-06, "loss": 0.6281, "step": 1517 }, { "epoch": 0.1, "grad_norm": 1.1965657472610474, "learning_rate": 9.879330566544024e-06, "loss": 0.5916, "step": 1518 }, { "epoch": 0.1, "grad_norm": 1.2936269044876099, "learning_rate": 9.879102194569501e-06, "loss": 0.5729, "step": 1519 }, { "epoch": 0.1, "grad_norm": 1.2426971197128296, "learning_rate": 9.87887360934201e-06, "loss": 0.6239, "step": 1520 }, { "epoch": 0.1, "grad_norm": 1.265256404876709, "learning_rate": 9.878644810871541e-06, "loss": 0.6333, "step": 1521 }, { "epoch": 0.1, "grad_norm": 1.1365193128585815, "learning_rate": 9.878415799168096e-06, "loss": 0.5793, "step": 1522 }, { "epoch": 0.1, "grad_norm": 1.1782954931259155, "learning_rate": 9.87818657424168e-06, "loss": 0.6289, "step": 1523 }, { "epoch": 0.1, "grad_norm": 1.1961904764175415, "learning_rate": 9.877957136102316e-06, "loss": 0.6277, "step": 1524 }, { "epoch": 0.1, "grad_norm": 1.2232673168182373, "learning_rate": 9.877727484760031e-06, "loss": 0.5896, "step": 1525 }, { "epoch": 0.1, "grad_norm": 1.3815410137176514, "learning_rate": 9.877497620224861e-06, "loss": 0.6333, "step": 1526 }, { "epoch": 0.1, "grad_norm": 1.246081829071045, "learning_rate": 9.877267542506857e-06, "loss": 0.595, "step": 1527 }, { "epoch": 0.1, "grad_norm": 1.1852705478668213, "learning_rate": 9.87703725161607e-06, "loss": 0.5874, "step": 1528 }, { "epoch": 0.1, "grad_norm": 1.175665259361267, "learning_rate": 9.876806747562568e-06, "loss": 0.5812, "step": 1529 }, { "epoch": 0.1, "grad_norm": 1.2205634117126465, "learning_rate": 9.876576030356426e-06, "loss": 0.5996, "step": 1530 }, { "epoch": 0.1, "grad_norm": 1.3353437185287476, "learning_rate": 9.876345100007727e-06, "loss": 0.5901, "step": 1531 }, { "epoch": 0.1, "grad_norm": 1.2110484838485718, "learning_rate": 9.876113956526564e-06, "loss": 0.6024, "step": 1532 }, { "epoch": 0.1, "grad_norm": 1.160301685333252, "learning_rate": 9.87588259992304e-06, "loss": 0.5483, "step": 1533 }, { "epoch": 0.1, "grad_norm": 1.2269184589385986, "learning_rate": 9.875651030207272e-06, "loss": 0.5576, "step": 1534 }, { "epoch": 0.1, "grad_norm": 1.2944875955581665, "learning_rate": 9.875419247389373e-06, "loss": 0.5788, "step": 1535 }, { "epoch": 0.1, "grad_norm": 1.3173109292984009, "learning_rate": 9.875187251479479e-06, "loss": 0.5837, "step": 1536 }, { "epoch": 0.1, "grad_norm": 1.2115665674209595, "learning_rate": 9.874955042487726e-06, "loss": 0.6145, "step": 1537 }, { "epoch": 0.1, "grad_norm": 1.2951889038085938, "learning_rate": 9.874722620424267e-06, "loss": 0.535, "step": 1538 }, { "epoch": 0.1, "grad_norm": 1.1566593647003174, "learning_rate": 9.87448998529926e-06, "loss": 0.568, "step": 1539 }, { "epoch": 0.1, "grad_norm": 1.2274761199951172, "learning_rate": 9.874257137122871e-06, "loss": 0.6261, "step": 1540 }, { "epoch": 0.1, "grad_norm": 1.4117940664291382, "learning_rate": 9.87402407590528e-06, "loss": 0.6013, "step": 1541 }, { "epoch": 0.1, "grad_norm": 1.1532979011535645, "learning_rate": 9.87379080165667e-06, "loss": 0.6031, "step": 1542 }, { "epoch": 0.1, "grad_norm": 1.1534514427185059, "learning_rate": 9.87355731438724e-06, "loss": 0.6006, "step": 1543 }, { "epoch": 0.1, "grad_norm": 1.2513148784637451, "learning_rate": 9.873323614107191e-06, "loss": 0.6149, "step": 1544 }, { "epoch": 0.1, "grad_norm": 1.3031493425369263, "learning_rate": 9.873089700826743e-06, "loss": 0.5903, "step": 1545 }, { "epoch": 0.1, "grad_norm": 1.2332762479782104, "learning_rate": 9.872855574556116e-06, "loss": 0.5836, "step": 1546 }, { "epoch": 0.1, "grad_norm": 1.301178216934204, "learning_rate": 9.872621235305544e-06, "loss": 0.6303, "step": 1547 }, { "epoch": 0.1, "grad_norm": 1.2518656253814697, "learning_rate": 9.872386683085269e-06, "loss": 0.5586, "step": 1548 }, { "epoch": 0.1, "grad_norm": 1.1783102750778198, "learning_rate": 9.872151917905544e-06, "loss": 0.5692, "step": 1549 }, { "epoch": 0.1, "grad_norm": 1.2796142101287842, "learning_rate": 9.871916939776628e-06, "loss": 0.5861, "step": 1550 }, { "epoch": 0.1, "grad_norm": 1.0579824447631836, "learning_rate": 9.871681748708792e-06, "loss": 0.6146, "step": 1551 }, { "epoch": 0.1, "grad_norm": 1.1381760835647583, "learning_rate": 9.871446344712317e-06, "loss": 0.5957, "step": 1552 }, { "epoch": 0.1, "grad_norm": 1.2102080583572388, "learning_rate": 9.87121072779749e-06, "loss": 0.592, "step": 1553 }, { "epoch": 0.1, "grad_norm": 1.1942956447601318, "learning_rate": 9.87097489797461e-06, "loss": 0.5827, "step": 1554 }, { "epoch": 0.1, "grad_norm": 1.2404890060424805, "learning_rate": 9.870738855253986e-06, "loss": 0.6189, "step": 1555 }, { "epoch": 0.1, "grad_norm": 1.2993141412734985, "learning_rate": 9.870502599645932e-06, "loss": 0.5967, "step": 1556 }, { "epoch": 0.1, "grad_norm": 1.2148311138153076, "learning_rate": 9.870266131160775e-06, "loss": 0.6009, "step": 1557 }, { "epoch": 0.1, "grad_norm": 1.283898949623108, "learning_rate": 9.87002944980885e-06, "loss": 0.6533, "step": 1558 }, { "epoch": 0.1, "grad_norm": 1.3579005002975464, "learning_rate": 9.869792555600504e-06, "loss": 0.6185, "step": 1559 }, { "epoch": 0.1, "grad_norm": 1.2581266164779663, "learning_rate": 9.86955544854609e-06, "loss": 0.5986, "step": 1560 }, { "epoch": 0.1, "grad_norm": 1.1519769430160522, "learning_rate": 9.869318128655971e-06, "loss": 0.6119, "step": 1561 }, { "epoch": 0.1, "grad_norm": 1.1922305822372437, "learning_rate": 9.869080595940518e-06, "loss": 0.6056, "step": 1562 }, { "epoch": 0.1, "grad_norm": 1.1239254474639893, "learning_rate": 9.868842850410116e-06, "loss": 0.6153, "step": 1563 }, { "epoch": 0.1, "grad_norm": 1.2687342166900635, "learning_rate": 9.868604892075156e-06, "loss": 0.6206, "step": 1564 }, { "epoch": 0.1, "grad_norm": 1.211658000946045, "learning_rate": 9.868366720946034e-06, "loss": 0.5697, "step": 1565 }, { "epoch": 0.1, "grad_norm": 1.2686394453048706, "learning_rate": 9.868128337033164e-06, "loss": 0.6355, "step": 1566 }, { "epoch": 0.1, "grad_norm": 1.136846661567688, "learning_rate": 9.867889740346968e-06, "loss": 0.5691, "step": 1567 }, { "epoch": 0.1, "grad_norm": 1.1114858388900757, "learning_rate": 9.867650930897868e-06, "loss": 0.5486, "step": 1568 }, { "epoch": 0.1, "grad_norm": 1.1914831399917603, "learning_rate": 9.867411908696304e-06, "loss": 0.5703, "step": 1569 }, { "epoch": 0.1, "grad_norm": 1.2432221174240112, "learning_rate": 9.867172673752723e-06, "loss": 0.5904, "step": 1570 }, { "epoch": 0.1, "grad_norm": 1.1566762924194336, "learning_rate": 9.866933226077584e-06, "loss": 0.5969, "step": 1571 }, { "epoch": 0.1, "grad_norm": 1.155065894126892, "learning_rate": 9.86669356568135e-06, "loss": 0.6318, "step": 1572 }, { "epoch": 0.1, "grad_norm": 1.1487606763839722, "learning_rate": 9.866453692574496e-06, "loss": 0.5507, "step": 1573 }, { "epoch": 0.1, "grad_norm": 1.1993380784988403, "learning_rate": 9.866213606767506e-06, "loss": 0.6699, "step": 1574 }, { "epoch": 0.1, "grad_norm": 1.2528865337371826, "learning_rate": 9.865973308270875e-06, "loss": 0.6299, "step": 1575 }, { "epoch": 0.1, "grad_norm": 1.189642071723938, "learning_rate": 9.865732797095104e-06, "loss": 0.6076, "step": 1576 }, { "epoch": 0.1, "grad_norm": 1.182338833808899, "learning_rate": 9.865492073250708e-06, "loss": 0.563, "step": 1577 }, { "epoch": 0.1, "grad_norm": 1.1485732793807983, "learning_rate": 9.865251136748205e-06, "loss": 0.5541, "step": 1578 }, { "epoch": 0.1, "grad_norm": 1.143183946609497, "learning_rate": 9.86500998759813e-06, "loss": 0.5463, "step": 1579 }, { "epoch": 0.1, "grad_norm": 1.0978708267211914, "learning_rate": 9.864768625811018e-06, "loss": 0.5852, "step": 1580 }, { "epoch": 0.1, "grad_norm": 1.4457873106002808, "learning_rate": 9.864527051397421e-06, "loss": 0.6314, "step": 1581 }, { "epoch": 0.1, "grad_norm": 1.256028175354004, "learning_rate": 9.864285264367897e-06, "loss": 0.6066, "step": 1582 }, { "epoch": 0.1, "grad_norm": 1.3697810173034668, "learning_rate": 9.864043264733016e-06, "loss": 0.6191, "step": 1583 }, { "epoch": 0.1, "grad_norm": 1.1514053344726562, "learning_rate": 9.86380105250335e-06, "loss": 0.564, "step": 1584 }, { "epoch": 0.1, "grad_norm": 1.412618637084961, "learning_rate": 9.863558627689491e-06, "loss": 0.5796, "step": 1585 }, { "epoch": 0.1, "grad_norm": 1.3705692291259766, "learning_rate": 9.863315990302033e-06, "loss": 0.559, "step": 1586 }, { "epoch": 0.1, "grad_norm": 1.1389447450637817, "learning_rate": 9.86307314035158e-06, "loss": 0.5265, "step": 1587 }, { "epoch": 0.1, "grad_norm": 1.2678003311157227, "learning_rate": 9.862830077848747e-06, "loss": 0.5915, "step": 1588 }, { "epoch": 0.1, "grad_norm": 1.2446562051773071, "learning_rate": 9.862586802804157e-06, "loss": 0.5801, "step": 1589 }, { "epoch": 0.1, "grad_norm": 1.073177695274353, "learning_rate": 9.862343315228442e-06, "loss": 0.5335, "step": 1590 }, { "epoch": 0.1, "grad_norm": 1.2572104930877686, "learning_rate": 9.862099615132249e-06, "loss": 0.5838, "step": 1591 }, { "epoch": 0.1, "grad_norm": 1.1833642721176147, "learning_rate": 9.861855702526224e-06, "loss": 0.6278, "step": 1592 }, { "epoch": 0.1, "grad_norm": 1.0327873229980469, "learning_rate": 9.861611577421031e-06, "loss": 0.5791, "step": 1593 }, { "epoch": 0.1, "grad_norm": 1.1374962329864502, "learning_rate": 9.861367239827338e-06, "loss": 0.5997, "step": 1594 }, { "epoch": 0.1, "grad_norm": 1.2182788848876953, "learning_rate": 9.861122689755827e-06, "loss": 0.5721, "step": 1595 }, { "epoch": 0.1, "grad_norm": 1.1235955953598022, "learning_rate": 9.860877927217183e-06, "loss": 0.5886, "step": 1596 }, { "epoch": 0.1, "grad_norm": 1.3224833011627197, "learning_rate": 9.86063295222211e-06, "loss": 0.6, "step": 1597 }, { "epoch": 0.1, "grad_norm": 1.2497245073318481, "learning_rate": 9.860387764781307e-06, "loss": 0.5679, "step": 1598 }, { "epoch": 0.1, "grad_norm": 1.3004792928695679, "learning_rate": 9.860142364905496e-06, "loss": 0.6321, "step": 1599 }, { "epoch": 0.1, "grad_norm": 1.3017524480819702, "learning_rate": 9.859896752605402e-06, "loss": 0.6137, "step": 1600 }, { "epoch": 0.1, "grad_norm": 1.0982877016067505, "learning_rate": 9.859650927891759e-06, "loss": 0.5441, "step": 1601 }, { "epoch": 0.1, "grad_norm": 1.296007513999939, "learning_rate": 9.859404890775313e-06, "loss": 0.6477, "step": 1602 }, { "epoch": 0.1, "grad_norm": 1.180212140083313, "learning_rate": 9.859158641266817e-06, "loss": 0.5725, "step": 1603 }, { "epoch": 0.1, "grad_norm": 1.1615076065063477, "learning_rate": 9.858912179377032e-06, "loss": 0.5716, "step": 1604 }, { "epoch": 0.1, "grad_norm": 1.201949119567871, "learning_rate": 9.858665505116733e-06, "loss": 0.6143, "step": 1605 }, { "epoch": 0.1, "grad_norm": 1.2330421209335327, "learning_rate": 9.858418618496698e-06, "loss": 0.6413, "step": 1606 }, { "epoch": 0.1, "grad_norm": 1.2913992404937744, "learning_rate": 9.858171519527723e-06, "loss": 0.597, "step": 1607 }, { "epoch": 0.1, "grad_norm": 1.023423433303833, "learning_rate": 9.857924208220604e-06, "loss": 0.5487, "step": 1608 }, { "epoch": 0.1, "grad_norm": 1.2909882068634033, "learning_rate": 9.857676684586153e-06, "loss": 0.6044, "step": 1609 }, { "epoch": 0.1, "grad_norm": 1.2663614749908447, "learning_rate": 9.857428948635184e-06, "loss": 0.5906, "step": 1610 }, { "epoch": 0.1, "grad_norm": 1.254685878753662, "learning_rate": 9.857181000378531e-06, "loss": 0.5934, "step": 1611 }, { "epoch": 0.1, "grad_norm": 1.221717357635498, "learning_rate": 9.856932839827026e-06, "loss": 0.574, "step": 1612 }, { "epoch": 0.1, "grad_norm": 1.296129822731018, "learning_rate": 9.856684466991518e-06, "loss": 0.59, "step": 1613 }, { "epoch": 0.1, "grad_norm": 1.1928893327713013, "learning_rate": 9.856435881882863e-06, "loss": 0.6359, "step": 1614 }, { "epoch": 0.1, "grad_norm": 1.4271719455718994, "learning_rate": 9.856187084511926e-06, "loss": 0.6419, "step": 1615 }, { "epoch": 0.1, "grad_norm": 1.2145817279815674, "learning_rate": 9.85593807488958e-06, "loss": 0.5788, "step": 1616 }, { "epoch": 0.1, "grad_norm": 1.3335951566696167, "learning_rate": 9.855688853026708e-06, "loss": 0.5972, "step": 1617 }, { "epoch": 0.1, "grad_norm": 1.1328668594360352, "learning_rate": 9.855439418934206e-06, "loss": 0.5876, "step": 1618 }, { "epoch": 0.1, "grad_norm": 1.3216484785079956, "learning_rate": 9.855189772622973e-06, "loss": 0.6142, "step": 1619 }, { "epoch": 0.1, "grad_norm": 1.2194607257843018, "learning_rate": 9.85493991410392e-06, "loss": 0.5821, "step": 1620 }, { "epoch": 0.1, "grad_norm": 1.240233302116394, "learning_rate": 9.854689843387972e-06, "loss": 0.598, "step": 1621 }, { "epoch": 0.1, "grad_norm": 1.3720879554748535, "learning_rate": 9.854439560486055e-06, "loss": 0.5898, "step": 1622 }, { "epoch": 0.1, "grad_norm": 1.2370030879974365, "learning_rate": 9.85418906540911e-06, "loss": 0.5905, "step": 1623 }, { "epoch": 0.1, "grad_norm": 1.2424322366714478, "learning_rate": 9.853938358168086e-06, "loss": 0.5679, "step": 1624 }, { "epoch": 0.1, "grad_norm": 1.495632290840149, "learning_rate": 9.853687438773937e-06, "loss": 0.6231, "step": 1625 }, { "epoch": 0.1, "grad_norm": 1.2532908916473389, "learning_rate": 9.853436307237635e-06, "loss": 0.5704, "step": 1626 }, { "epoch": 0.11, "grad_norm": 1.1445719003677368, "learning_rate": 9.853184963570155e-06, "loss": 0.5345, "step": 1627 }, { "epoch": 0.11, "grad_norm": 1.1609220504760742, "learning_rate": 9.85293340778248e-06, "loss": 0.5965, "step": 1628 }, { "epoch": 0.11, "grad_norm": 1.1851009130477905, "learning_rate": 9.852681639885605e-06, "loss": 0.5711, "step": 1629 }, { "epoch": 0.11, "grad_norm": 1.3028347492218018, "learning_rate": 9.852429659890537e-06, "loss": 0.6116, "step": 1630 }, { "epoch": 0.11, "grad_norm": 1.3037755489349365, "learning_rate": 9.852177467808288e-06, "loss": 0.5618, "step": 1631 }, { "epoch": 0.11, "grad_norm": 1.1700178384780884, "learning_rate": 9.85192506364988e-06, "loss": 0.6164, "step": 1632 }, { "epoch": 0.11, "grad_norm": 1.3166167736053467, "learning_rate": 9.851672447426346e-06, "loss": 0.5808, "step": 1633 }, { "epoch": 0.11, "grad_norm": 1.2025984525680542, "learning_rate": 9.851419619148728e-06, "loss": 0.5774, "step": 1634 }, { "epoch": 0.11, "grad_norm": 1.2808984518051147, "learning_rate": 9.851166578828072e-06, "loss": 0.6354, "step": 1635 }, { "epoch": 0.11, "grad_norm": 1.1881158351898193, "learning_rate": 9.850913326475446e-06, "loss": 0.552, "step": 1636 }, { "epoch": 0.11, "grad_norm": 1.2352209091186523, "learning_rate": 9.85065986210191e-06, "loss": 0.599, "step": 1637 }, { "epoch": 0.11, "grad_norm": 1.2624303102493286, "learning_rate": 9.850406185718548e-06, "loss": 0.6262, "step": 1638 }, { "epoch": 0.11, "grad_norm": 1.1493905782699585, "learning_rate": 9.850152297336447e-06, "loss": 0.5677, "step": 1639 }, { "epoch": 0.11, "grad_norm": 1.4481414556503296, "learning_rate": 9.8498981969667e-06, "loss": 0.6142, "step": 1640 }, { "epoch": 0.11, "grad_norm": 1.2876156568527222, "learning_rate": 9.849643884620416e-06, "loss": 0.5596, "step": 1641 }, { "epoch": 0.11, "grad_norm": 1.217887282371521, "learning_rate": 9.849389360308713e-06, "loss": 0.5864, "step": 1642 }, { "epoch": 0.11, "grad_norm": 1.407006859779358, "learning_rate": 9.84913462404271e-06, "loss": 0.6271, "step": 1643 }, { "epoch": 0.11, "grad_norm": 1.2175565958023071, "learning_rate": 9.848879675833542e-06, "loss": 0.6243, "step": 1644 }, { "epoch": 0.11, "grad_norm": 1.159474492073059, "learning_rate": 9.848624515692357e-06, "loss": 0.5622, "step": 1645 }, { "epoch": 0.11, "grad_norm": 1.3027385473251343, "learning_rate": 9.848369143630302e-06, "loss": 0.6517, "step": 1646 }, { "epoch": 0.11, "grad_norm": 1.2940566539764404, "learning_rate": 9.848113559658541e-06, "loss": 0.5643, "step": 1647 }, { "epoch": 0.11, "grad_norm": 1.108911395072937, "learning_rate": 9.847857763788245e-06, "loss": 0.5944, "step": 1648 }, { "epoch": 0.11, "grad_norm": 1.2476303577423096, "learning_rate": 9.847601756030594e-06, "loss": 0.6269, "step": 1649 }, { "epoch": 0.11, "grad_norm": 1.3277695178985596, "learning_rate": 9.847345536396778e-06, "loss": 0.5334, "step": 1650 }, { "epoch": 0.11, "grad_norm": 1.1887693405151367, "learning_rate": 9.847089104897993e-06, "loss": 0.6154, "step": 1651 }, { "epoch": 0.11, "grad_norm": 1.4732482433319092, "learning_rate": 9.84683246154545e-06, "loss": 0.568, "step": 1652 }, { "epoch": 0.11, "grad_norm": 1.343870759010315, "learning_rate": 9.846575606350366e-06, "loss": 0.5464, "step": 1653 }, { "epoch": 0.11, "grad_norm": 1.2465907335281372, "learning_rate": 9.846318539323965e-06, "loss": 0.6092, "step": 1654 }, { "epoch": 0.11, "grad_norm": 1.2118966579437256, "learning_rate": 9.846061260477484e-06, "loss": 0.5967, "step": 1655 }, { "epoch": 0.11, "grad_norm": 1.294382095336914, "learning_rate": 9.84580376982217e-06, "loss": 0.5637, "step": 1656 }, { "epoch": 0.11, "grad_norm": 1.2222330570220947, "learning_rate": 9.845546067369274e-06, "loss": 0.5479, "step": 1657 }, { "epoch": 0.11, "grad_norm": 1.463549256324768, "learning_rate": 9.845288153130061e-06, "loss": 0.6441, "step": 1658 }, { "epoch": 0.11, "grad_norm": 1.3649464845657349, "learning_rate": 9.845030027115805e-06, "loss": 0.589, "step": 1659 }, { "epoch": 0.11, "grad_norm": 1.2942241430282593, "learning_rate": 9.844771689337786e-06, "loss": 0.6075, "step": 1660 }, { "epoch": 0.11, "grad_norm": 1.243156909942627, "learning_rate": 9.844513139807298e-06, "loss": 0.5973, "step": 1661 }, { "epoch": 0.11, "grad_norm": 1.3308113813400269, "learning_rate": 9.844254378535638e-06, "loss": 0.6001, "step": 1662 }, { "epoch": 0.11, "grad_norm": 1.2282259464263916, "learning_rate": 9.843995405534117e-06, "loss": 0.5997, "step": 1663 }, { "epoch": 0.11, "grad_norm": 1.2038732767105103, "learning_rate": 9.843736220814055e-06, "loss": 0.5421, "step": 1664 }, { "epoch": 0.11, "grad_norm": 1.268788456916809, "learning_rate": 9.84347682438678e-06, "loss": 0.5811, "step": 1665 }, { "epoch": 0.11, "grad_norm": 1.1237473487854004, "learning_rate": 9.84321721626363e-06, "loss": 0.5883, "step": 1666 }, { "epoch": 0.11, "grad_norm": 1.0995349884033203, "learning_rate": 9.84295739645595e-06, "loss": 0.5562, "step": 1667 }, { "epoch": 0.11, "grad_norm": 1.2692179679870605, "learning_rate": 9.842697364975098e-06, "loss": 0.6059, "step": 1668 }, { "epoch": 0.11, "grad_norm": 1.0873079299926758, "learning_rate": 9.842437121832439e-06, "loss": 0.5731, "step": 1669 }, { "epoch": 0.11, "grad_norm": 1.188362956047058, "learning_rate": 9.842176667039345e-06, "loss": 0.6067, "step": 1670 }, { "epoch": 0.11, "grad_norm": 1.2905356884002686, "learning_rate": 9.841916000607204e-06, "loss": 0.5865, "step": 1671 }, { "epoch": 0.11, "grad_norm": 1.2030761241912842, "learning_rate": 9.841655122547407e-06, "loss": 0.5659, "step": 1672 }, { "epoch": 0.11, "grad_norm": 1.3774031400680542, "learning_rate": 9.841394032871354e-06, "loss": 0.6212, "step": 1673 }, { "epoch": 0.11, "grad_norm": 1.2183440923690796, "learning_rate": 9.84113273159046e-06, "loss": 0.5894, "step": 1674 }, { "epoch": 0.11, "grad_norm": 1.2259937524795532, "learning_rate": 9.840871218716147e-06, "loss": 0.5835, "step": 1675 }, { "epoch": 0.11, "grad_norm": 1.1156138181686401, "learning_rate": 9.840609494259839e-06, "loss": 0.5453, "step": 1676 }, { "epoch": 0.11, "grad_norm": 1.2008438110351562, "learning_rate": 9.84034755823298e-06, "loss": 0.5494, "step": 1677 }, { "epoch": 0.11, "grad_norm": 1.350527048110962, "learning_rate": 9.840085410647017e-06, "loss": 0.6119, "step": 1678 }, { "epoch": 0.11, "grad_norm": 1.3061323165893555, "learning_rate": 9.83982305151341e-06, "loss": 0.5516, "step": 1679 }, { "epoch": 0.11, "grad_norm": 1.1768683195114136, "learning_rate": 9.839560480843623e-06, "loss": 0.5669, "step": 1680 }, { "epoch": 0.11, "grad_norm": 1.1863226890563965, "learning_rate": 9.839297698649134e-06, "loss": 0.5852, "step": 1681 }, { "epoch": 0.11, "grad_norm": 1.264375925064087, "learning_rate": 9.839034704941429e-06, "loss": 0.6416, "step": 1682 }, { "epoch": 0.11, "grad_norm": 1.1849948167800903, "learning_rate": 9.838771499732e-06, "loss": 0.5566, "step": 1683 }, { "epoch": 0.11, "grad_norm": 1.2997300624847412, "learning_rate": 9.838508083032354e-06, "loss": 0.6183, "step": 1684 }, { "epoch": 0.11, "grad_norm": 1.1810988187789917, "learning_rate": 9.838244454854003e-06, "loss": 0.5925, "step": 1685 }, { "epoch": 0.11, "grad_norm": 1.208022117614746, "learning_rate": 9.83798061520847e-06, "loss": 0.6125, "step": 1686 }, { "epoch": 0.11, "grad_norm": 1.1619939804077148, "learning_rate": 9.837716564107286e-06, "loss": 0.6018, "step": 1687 }, { "epoch": 0.11, "grad_norm": 1.2063145637512207, "learning_rate": 9.837452301561994e-06, "loss": 0.6043, "step": 1688 }, { "epoch": 0.11, "grad_norm": 1.1886957883834839, "learning_rate": 9.83718782758414e-06, "loss": 0.5794, "step": 1689 }, { "epoch": 0.11, "grad_norm": 1.3087600469589233, "learning_rate": 9.836923142185289e-06, "loss": 0.6558, "step": 1690 }, { "epoch": 0.11, "grad_norm": 1.2035701274871826, "learning_rate": 9.836658245377004e-06, "loss": 0.5809, "step": 1691 }, { "epoch": 0.11, "grad_norm": 1.4134624004364014, "learning_rate": 9.836393137170867e-06, "loss": 0.5959, "step": 1692 }, { "epoch": 0.11, "grad_norm": 1.2199617624282837, "learning_rate": 9.836127817578465e-06, "loss": 0.6105, "step": 1693 }, { "epoch": 0.11, "grad_norm": 1.2704510688781738, "learning_rate": 9.835862286611393e-06, "loss": 0.6614, "step": 1694 }, { "epoch": 0.11, "grad_norm": 1.1761891841888428, "learning_rate": 9.835596544281258e-06, "loss": 0.6311, "step": 1695 }, { "epoch": 0.11, "grad_norm": 1.2178045511245728, "learning_rate": 9.835330590599673e-06, "loss": 0.6057, "step": 1696 }, { "epoch": 0.11, "grad_norm": 1.2920424938201904, "learning_rate": 9.835064425578264e-06, "loss": 0.5925, "step": 1697 }, { "epoch": 0.11, "grad_norm": 1.3268074989318848, "learning_rate": 9.834798049228663e-06, "loss": 0.6589, "step": 1698 }, { "epoch": 0.11, "grad_norm": 1.1684081554412842, "learning_rate": 9.834531461562512e-06, "loss": 0.6027, "step": 1699 }, { "epoch": 0.11, "grad_norm": 1.2588365077972412, "learning_rate": 9.834264662591467e-06, "loss": 0.569, "step": 1700 }, { "epoch": 0.11, "grad_norm": 1.1364084482192993, "learning_rate": 9.833997652327182e-06, "loss": 0.6094, "step": 1701 }, { "epoch": 0.11, "grad_norm": 1.3792811632156372, "learning_rate": 9.833730430781334e-06, "loss": 0.6289, "step": 1702 }, { "epoch": 0.11, "grad_norm": 1.283807396888733, "learning_rate": 9.8334629979656e-06, "loss": 0.6034, "step": 1703 }, { "epoch": 0.11, "grad_norm": 1.292319893836975, "learning_rate": 9.833195353891669e-06, "loss": 0.6144, "step": 1704 }, { "epoch": 0.11, "grad_norm": 1.395006775856018, "learning_rate": 9.832927498571239e-06, "loss": 0.5737, "step": 1705 }, { "epoch": 0.11, "grad_norm": 1.2824573516845703, "learning_rate": 9.832659432016017e-06, "loss": 0.647, "step": 1706 }, { "epoch": 0.11, "grad_norm": 1.3002039194107056, "learning_rate": 9.83239115423772e-06, "loss": 0.6385, "step": 1707 }, { "epoch": 0.11, "grad_norm": 1.3234755992889404, "learning_rate": 9.832122665248071e-06, "loss": 0.5885, "step": 1708 }, { "epoch": 0.11, "grad_norm": 1.2229434251785278, "learning_rate": 9.83185396505881e-06, "loss": 0.6518, "step": 1709 }, { "epoch": 0.11, "grad_norm": 1.2459099292755127, "learning_rate": 9.831585053681678e-06, "loss": 0.6295, "step": 1710 }, { "epoch": 0.11, "grad_norm": 1.3014956712722778, "learning_rate": 9.83131593112843e-06, "loss": 0.578, "step": 1711 }, { "epoch": 0.11, "grad_norm": 1.2456477880477905, "learning_rate": 9.831046597410825e-06, "loss": 0.5294, "step": 1712 }, { "epoch": 0.11, "grad_norm": 1.1718806028366089, "learning_rate": 9.830777052540638e-06, "loss": 0.5929, "step": 1713 }, { "epoch": 0.11, "grad_norm": 1.2079806327819824, "learning_rate": 9.830507296529653e-06, "loss": 0.5971, "step": 1714 }, { "epoch": 0.11, "grad_norm": 1.126344919204712, "learning_rate": 9.830237329389653e-06, "loss": 0.5894, "step": 1715 }, { "epoch": 0.11, "grad_norm": 1.2044068574905396, "learning_rate": 9.829967151132442e-06, "loss": 0.5867, "step": 1716 }, { "epoch": 0.11, "grad_norm": 1.1369727849960327, "learning_rate": 9.829696761769829e-06, "loss": 0.5679, "step": 1717 }, { "epoch": 0.11, "grad_norm": 1.2235556840896606, "learning_rate": 9.829426161313634e-06, "loss": 0.6098, "step": 1718 }, { "epoch": 0.11, "grad_norm": 1.2267147302627563, "learning_rate": 9.829155349775677e-06, "loss": 0.5536, "step": 1719 }, { "epoch": 0.11, "grad_norm": 1.1325905323028564, "learning_rate": 9.828884327167802e-06, "loss": 0.5906, "step": 1720 }, { "epoch": 0.11, "grad_norm": 1.1781032085418701, "learning_rate": 9.828613093501851e-06, "loss": 0.5544, "step": 1721 }, { "epoch": 0.11, "grad_norm": 1.1325215101242065, "learning_rate": 9.82834164878968e-06, "loss": 0.5893, "step": 1722 }, { "epoch": 0.11, "grad_norm": 1.2007848024368286, "learning_rate": 9.828069993043152e-06, "loss": 0.5526, "step": 1723 }, { "epoch": 0.11, "grad_norm": 1.183667540550232, "learning_rate": 9.827798126274143e-06, "loss": 0.5901, "step": 1724 }, { "epoch": 0.11, "grad_norm": 1.2066534757614136, "learning_rate": 9.82752604849453e-06, "loss": 0.6021, "step": 1725 }, { "epoch": 0.11, "grad_norm": 1.1987782716751099, "learning_rate": 9.827253759716212e-06, "loss": 0.5633, "step": 1726 }, { "epoch": 0.11, "grad_norm": 1.1328158378601074, "learning_rate": 9.826981259951085e-06, "loss": 0.5733, "step": 1727 }, { "epoch": 0.11, "grad_norm": 1.2324339151382446, "learning_rate": 9.826708549211061e-06, "loss": 0.5872, "step": 1728 }, { "epoch": 0.11, "grad_norm": 1.1205248832702637, "learning_rate": 9.826435627508059e-06, "loss": 0.5771, "step": 1729 }, { "epoch": 0.11, "grad_norm": 1.194175124168396, "learning_rate": 9.82616249485401e-06, "loss": 0.5239, "step": 1730 }, { "epoch": 0.11, "grad_norm": 1.3403139114379883, "learning_rate": 9.825889151260848e-06, "loss": 0.6328, "step": 1731 }, { "epoch": 0.11, "grad_norm": 1.1104638576507568, "learning_rate": 9.825615596740524e-06, "loss": 0.5873, "step": 1732 }, { "epoch": 0.11, "grad_norm": 1.2295886278152466, "learning_rate": 9.825341831304992e-06, "loss": 0.6149, "step": 1733 }, { "epoch": 0.11, "grad_norm": 1.2176268100738525, "learning_rate": 9.825067854966217e-06, "loss": 0.5859, "step": 1734 }, { "epoch": 0.11, "grad_norm": 1.3564367294311523, "learning_rate": 9.824793667736176e-06, "loss": 0.6464, "step": 1735 }, { "epoch": 0.11, "grad_norm": 1.0689332485198975, "learning_rate": 9.824519269626853e-06, "loss": 0.5601, "step": 1736 }, { "epoch": 0.11, "grad_norm": 1.1518312692642212, "learning_rate": 9.824244660650237e-06, "loss": 0.5878, "step": 1737 }, { "epoch": 0.11, "grad_norm": 1.2771142721176147, "learning_rate": 9.823969840818336e-06, "loss": 0.6211, "step": 1738 }, { "epoch": 0.11, "grad_norm": 1.4493962526321411, "learning_rate": 9.823694810143159e-06, "loss": 0.6089, "step": 1739 }, { "epoch": 0.11, "grad_norm": 1.1796424388885498, "learning_rate": 9.823419568636726e-06, "loss": 0.5857, "step": 1740 }, { "epoch": 0.11, "grad_norm": 1.300942063331604, "learning_rate": 9.823144116311069e-06, "loss": 0.6056, "step": 1741 }, { "epoch": 0.11, "grad_norm": 1.30216646194458, "learning_rate": 9.822868453178227e-06, "loss": 0.6387, "step": 1742 }, { "epoch": 0.11, "grad_norm": 1.245396375656128, "learning_rate": 9.822592579250247e-06, "loss": 0.587, "step": 1743 }, { "epoch": 0.11, "grad_norm": 1.1605384349822998, "learning_rate": 9.82231649453919e-06, "loss": 0.5846, "step": 1744 }, { "epoch": 0.11, "grad_norm": 1.1767277717590332, "learning_rate": 9.82204019905712e-06, "loss": 0.5905, "step": 1745 }, { "epoch": 0.11, "grad_norm": 1.2600938081741333, "learning_rate": 9.82176369281611e-06, "loss": 0.6094, "step": 1746 }, { "epoch": 0.11, "grad_norm": 1.3714780807495117, "learning_rate": 9.821486975828253e-06, "loss": 0.6282, "step": 1747 }, { "epoch": 0.11, "grad_norm": 1.4202486276626587, "learning_rate": 9.821210048105641e-06, "loss": 0.6175, "step": 1748 }, { "epoch": 0.11, "grad_norm": 1.1300503015518188, "learning_rate": 9.820932909660375e-06, "loss": 0.5543, "step": 1749 }, { "epoch": 0.11, "grad_norm": 1.2003231048583984, "learning_rate": 9.820655560504569e-06, "loss": 0.5537, "step": 1750 }, { "epoch": 0.11, "grad_norm": 1.2054706811904907, "learning_rate": 9.820378000650345e-06, "loss": 0.6004, "step": 1751 }, { "epoch": 0.11, "grad_norm": 1.1762956380844116, "learning_rate": 9.820100230109839e-06, "loss": 0.669, "step": 1752 }, { "epoch": 0.11, "grad_norm": 1.333722472190857, "learning_rate": 9.819822248895186e-06, "loss": 0.5992, "step": 1753 }, { "epoch": 0.11, "grad_norm": 1.1734741926193237, "learning_rate": 9.819544057018538e-06, "loss": 0.5909, "step": 1754 }, { "epoch": 0.11, "grad_norm": 1.443820595741272, "learning_rate": 9.819265654492054e-06, "loss": 0.6207, "step": 1755 }, { "epoch": 0.11, "grad_norm": 1.1394968032836914, "learning_rate": 9.818987041327901e-06, "loss": 0.5424, "step": 1756 }, { "epoch": 0.11, "grad_norm": 1.1818708181381226, "learning_rate": 9.818708217538257e-06, "loss": 0.601, "step": 1757 }, { "epoch": 0.11, "grad_norm": 1.1496292352676392, "learning_rate": 9.818429183135311e-06, "loss": 0.5799, "step": 1758 }, { "epoch": 0.11, "grad_norm": 1.2901744842529297, "learning_rate": 9.818149938131257e-06, "loss": 0.622, "step": 1759 }, { "epoch": 0.11, "grad_norm": 1.2696551084518433, "learning_rate": 9.8178704825383e-06, "loss": 0.5915, "step": 1760 }, { "epoch": 0.11, "grad_norm": 1.3116565942764282, "learning_rate": 9.817590816368654e-06, "loss": 0.654, "step": 1761 }, { "epoch": 0.11, "grad_norm": 1.2050001621246338, "learning_rate": 9.817310939634544e-06, "loss": 0.6225, "step": 1762 }, { "epoch": 0.11, "grad_norm": 1.131482481956482, "learning_rate": 9.817030852348199e-06, "loss": 0.5901, "step": 1763 }, { "epoch": 0.11, "grad_norm": 1.0855544805526733, "learning_rate": 9.816750554521866e-06, "loss": 0.5387, "step": 1764 }, { "epoch": 0.11, "grad_norm": 1.2041516304016113, "learning_rate": 9.816470046167795e-06, "loss": 0.6375, "step": 1765 }, { "epoch": 0.11, "grad_norm": 1.367276668548584, "learning_rate": 9.816189327298243e-06, "loss": 0.6187, "step": 1766 }, { "epoch": 0.11, "grad_norm": 1.1542452573776245, "learning_rate": 9.81590839792548e-06, "loss": 0.6086, "step": 1767 }, { "epoch": 0.11, "grad_norm": 1.3830000162124634, "learning_rate": 9.815627258061788e-06, "loss": 0.6176, "step": 1768 }, { "epoch": 0.11, "grad_norm": 1.1650502681732178, "learning_rate": 9.815345907719452e-06, "loss": 0.5687, "step": 1769 }, { "epoch": 0.11, "grad_norm": 1.211534857749939, "learning_rate": 9.815064346910772e-06, "loss": 0.6172, "step": 1770 }, { "epoch": 0.11, "grad_norm": 1.1737220287322998, "learning_rate": 9.81478257564805e-06, "loss": 0.5586, "step": 1771 }, { "epoch": 0.11, "grad_norm": 1.1077094078063965, "learning_rate": 9.814500593943606e-06, "loss": 0.5576, "step": 1772 }, { "epoch": 0.11, "grad_norm": 1.1498066186904907, "learning_rate": 9.814218401809761e-06, "loss": 0.5461, "step": 1773 }, { "epoch": 0.11, "grad_norm": 1.211664080619812, "learning_rate": 9.813935999258852e-06, "loss": 0.5937, "step": 1774 }, { "epoch": 0.11, "grad_norm": 1.1827080249786377, "learning_rate": 9.81365338630322e-06, "loss": 0.569, "step": 1775 }, { "epoch": 0.11, "grad_norm": 1.1528174877166748, "learning_rate": 9.813370562955218e-06, "loss": 0.5391, "step": 1776 }, { "epoch": 0.11, "grad_norm": 1.2820100784301758, "learning_rate": 9.813087529227207e-06, "loss": 0.6256, "step": 1777 }, { "epoch": 0.11, "grad_norm": 1.2478814125061035, "learning_rate": 9.812804285131557e-06, "loss": 0.6247, "step": 1778 }, { "epoch": 0.11, "grad_norm": 1.174739122390747, "learning_rate": 9.81252083068065e-06, "loss": 0.5877, "step": 1779 }, { "epoch": 0.11, "grad_norm": 1.1819592714309692, "learning_rate": 9.812237165886875e-06, "loss": 0.6092, "step": 1780 }, { "epoch": 0.11, "grad_norm": 1.2541784048080444, "learning_rate": 9.811953290762628e-06, "loss": 0.6184, "step": 1781 }, { "epoch": 0.12, "grad_norm": 1.1574395895004272, "learning_rate": 9.811669205320317e-06, "loss": 0.6122, "step": 1782 }, { "epoch": 0.12, "grad_norm": 1.1396318674087524, "learning_rate": 9.811384909572361e-06, "loss": 0.5837, "step": 1783 }, { "epoch": 0.12, "grad_norm": 1.2214239835739136, "learning_rate": 9.811100403531184e-06, "loss": 0.61, "step": 1784 }, { "epoch": 0.12, "grad_norm": 1.1596806049346924, "learning_rate": 9.81081568720922e-06, "loss": 0.6099, "step": 1785 }, { "epoch": 0.12, "grad_norm": 1.206365704536438, "learning_rate": 9.810530760618914e-06, "loss": 0.5714, "step": 1786 }, { "epoch": 0.12, "grad_norm": 1.2649949789047241, "learning_rate": 9.81024562377272e-06, "loss": 0.5666, "step": 1787 }, { "epoch": 0.12, "grad_norm": 1.1424615383148193, "learning_rate": 9.809960276683102e-06, "loss": 0.6079, "step": 1788 }, { "epoch": 0.12, "grad_norm": 1.157280683517456, "learning_rate": 9.80967471936253e-06, "loss": 0.5679, "step": 1789 }, { "epoch": 0.12, "grad_norm": 1.3008993864059448, "learning_rate": 9.809388951823484e-06, "loss": 0.5849, "step": 1790 }, { "epoch": 0.12, "grad_norm": 1.2529852390289307, "learning_rate": 9.809102974078455e-06, "loss": 0.6291, "step": 1791 }, { "epoch": 0.12, "grad_norm": 1.1538463830947876, "learning_rate": 9.808816786139942e-06, "loss": 0.5932, "step": 1792 }, { "epoch": 0.12, "grad_norm": 1.217225432395935, "learning_rate": 9.808530388020457e-06, "loss": 0.6025, "step": 1793 }, { "epoch": 0.12, "grad_norm": 1.315435767173767, "learning_rate": 9.808243779732513e-06, "loss": 0.6088, "step": 1794 }, { "epoch": 0.12, "grad_norm": 1.1977806091308594, "learning_rate": 9.80795696128864e-06, "loss": 0.633, "step": 1795 }, { "epoch": 0.12, "grad_norm": 1.3618017435073853, "learning_rate": 9.807669932701371e-06, "loss": 0.6113, "step": 1796 }, { "epoch": 0.12, "grad_norm": 1.141076683998108, "learning_rate": 9.807382693983255e-06, "loss": 0.5794, "step": 1797 }, { "epoch": 0.12, "grad_norm": 1.1997485160827637, "learning_rate": 9.807095245146845e-06, "loss": 0.6319, "step": 1798 }, { "epoch": 0.12, "grad_norm": 1.2793564796447754, "learning_rate": 9.806807586204703e-06, "loss": 0.5832, "step": 1799 }, { "epoch": 0.12, "grad_norm": 1.1895166635513306, "learning_rate": 9.806519717169402e-06, "loss": 0.5613, "step": 1800 }, { "epoch": 0.12, "grad_norm": 1.2331781387329102, "learning_rate": 9.806231638053527e-06, "loss": 0.6012, "step": 1801 }, { "epoch": 0.12, "grad_norm": 1.5186675786972046, "learning_rate": 9.805943348869667e-06, "loss": 0.5894, "step": 1802 }, { "epoch": 0.12, "grad_norm": 1.2060879468917847, "learning_rate": 9.805654849630424e-06, "loss": 0.5296, "step": 1803 }, { "epoch": 0.12, "grad_norm": 1.257850170135498, "learning_rate": 9.805366140348404e-06, "loss": 0.6127, "step": 1804 }, { "epoch": 0.12, "grad_norm": 1.239624261856079, "learning_rate": 9.80507722103623e-06, "loss": 0.6449, "step": 1805 }, { "epoch": 0.12, "grad_norm": 1.1184827089309692, "learning_rate": 9.804788091706524e-06, "loss": 0.6308, "step": 1806 }, { "epoch": 0.12, "grad_norm": 1.2520917654037476, "learning_rate": 9.804498752371932e-06, "loss": 0.6011, "step": 1807 }, { "epoch": 0.12, "grad_norm": 1.1933866739273071, "learning_rate": 9.804209203045093e-06, "loss": 0.5766, "step": 1808 }, { "epoch": 0.12, "grad_norm": 1.2684961557388306, "learning_rate": 9.803919443738665e-06, "loss": 0.559, "step": 1809 }, { "epoch": 0.12, "grad_norm": 1.337185263633728, "learning_rate": 9.803629474465312e-06, "loss": 0.6073, "step": 1810 }, { "epoch": 0.12, "grad_norm": 1.1790227890014648, "learning_rate": 9.80333929523771e-06, "loss": 0.6064, "step": 1811 }, { "epoch": 0.12, "grad_norm": 1.4305166006088257, "learning_rate": 9.803048906068537e-06, "loss": 0.5901, "step": 1812 }, { "epoch": 0.12, "grad_norm": 1.2857648134231567, "learning_rate": 9.80275830697049e-06, "loss": 0.6056, "step": 1813 }, { "epoch": 0.12, "grad_norm": 1.2211490869522095, "learning_rate": 9.80246749795627e-06, "loss": 0.6181, "step": 1814 }, { "epoch": 0.12, "grad_norm": 1.3095903396606445, "learning_rate": 9.802176479038586e-06, "loss": 0.6007, "step": 1815 }, { "epoch": 0.12, "grad_norm": 1.238431453704834, "learning_rate": 9.801885250230156e-06, "loss": 0.6334, "step": 1816 }, { "epoch": 0.12, "grad_norm": 1.2219985723495483, "learning_rate": 9.801593811543712e-06, "loss": 0.5866, "step": 1817 }, { "epoch": 0.12, "grad_norm": 1.1383734941482544, "learning_rate": 9.80130216299199e-06, "loss": 0.6003, "step": 1818 }, { "epoch": 0.12, "grad_norm": 1.1527336835861206, "learning_rate": 9.801010304587737e-06, "loss": 0.6139, "step": 1819 }, { "epoch": 0.12, "grad_norm": 1.1978318691253662, "learning_rate": 9.800718236343712e-06, "loss": 0.5677, "step": 1820 }, { "epoch": 0.12, "grad_norm": 1.2662427425384521, "learning_rate": 9.800425958272678e-06, "loss": 0.5939, "step": 1821 }, { "epoch": 0.12, "grad_norm": 1.1646467447280884, "learning_rate": 9.800133470387413e-06, "loss": 0.6126, "step": 1822 }, { "epoch": 0.12, "grad_norm": 1.1200039386749268, "learning_rate": 9.799840772700697e-06, "loss": 0.5672, "step": 1823 }, { "epoch": 0.12, "grad_norm": 1.2349265813827515, "learning_rate": 9.799547865225323e-06, "loss": 0.6018, "step": 1824 }, { "epoch": 0.12, "grad_norm": 1.1154634952545166, "learning_rate": 9.799254747974096e-06, "loss": 0.6204, "step": 1825 }, { "epoch": 0.12, "grad_norm": 1.1331678628921509, "learning_rate": 9.798961420959827e-06, "loss": 0.5658, "step": 1826 }, { "epoch": 0.12, "grad_norm": 1.2341803312301636, "learning_rate": 9.798667884195335e-06, "loss": 0.5744, "step": 1827 }, { "epoch": 0.12, "grad_norm": 1.3615559339523315, "learning_rate": 9.798374137693452e-06, "loss": 0.5899, "step": 1828 }, { "epoch": 0.12, "grad_norm": 1.17841374874115, "learning_rate": 9.798080181467013e-06, "loss": 0.5386, "step": 1829 }, { "epoch": 0.12, "grad_norm": 1.2372050285339355, "learning_rate": 9.79778601552887e-06, "loss": 0.5797, "step": 1830 }, { "epoch": 0.12, "grad_norm": 1.2119255065917969, "learning_rate": 9.797491639891878e-06, "loss": 0.5761, "step": 1831 }, { "epoch": 0.12, "grad_norm": 1.2502695322036743, "learning_rate": 9.797197054568905e-06, "loss": 0.6043, "step": 1832 }, { "epoch": 0.12, "grad_norm": 1.082972764968872, "learning_rate": 9.796902259572825e-06, "loss": 0.5528, "step": 1833 }, { "epoch": 0.12, "grad_norm": 1.2734301090240479, "learning_rate": 9.796607254916523e-06, "loss": 0.5827, "step": 1834 }, { "epoch": 0.12, "grad_norm": 1.3130910396575928, "learning_rate": 9.796312040612895e-06, "loss": 0.6318, "step": 1835 }, { "epoch": 0.12, "grad_norm": 1.2829433679580688, "learning_rate": 9.796016616674839e-06, "loss": 0.5977, "step": 1836 }, { "epoch": 0.12, "grad_norm": 1.1965216398239136, "learning_rate": 9.795720983115273e-06, "loss": 0.5613, "step": 1837 }, { "epoch": 0.12, "grad_norm": 1.163360834121704, "learning_rate": 9.795425139947117e-06, "loss": 0.6035, "step": 1838 }, { "epoch": 0.12, "grad_norm": 1.2033567428588867, "learning_rate": 9.795129087183299e-06, "loss": 0.5701, "step": 1839 }, { "epoch": 0.12, "grad_norm": 1.1722532510757446, "learning_rate": 9.79483282483676e-06, "loss": 0.6346, "step": 1840 }, { "epoch": 0.12, "grad_norm": 1.2542644739151, "learning_rate": 9.79453635292045e-06, "loss": 0.577, "step": 1841 }, { "epoch": 0.12, "grad_norm": 1.2719043493270874, "learning_rate": 9.794239671447324e-06, "loss": 0.5959, "step": 1842 }, { "epoch": 0.12, "grad_norm": 1.2264877557754517, "learning_rate": 9.793942780430353e-06, "loss": 0.5358, "step": 1843 }, { "epoch": 0.12, "grad_norm": 1.2445122003555298, "learning_rate": 9.79364567988251e-06, "loss": 0.5761, "step": 1844 }, { "epoch": 0.12, "grad_norm": 1.2479488849639893, "learning_rate": 9.793348369816785e-06, "loss": 0.5743, "step": 1845 }, { "epoch": 0.12, "grad_norm": 1.25471830368042, "learning_rate": 9.793050850246168e-06, "loss": 0.5722, "step": 1846 }, { "epoch": 0.12, "grad_norm": 1.1929798126220703, "learning_rate": 9.792753121183664e-06, "loss": 0.5624, "step": 1847 }, { "epoch": 0.12, "grad_norm": 1.1586002111434937, "learning_rate": 9.792455182642289e-06, "loss": 0.5734, "step": 1848 }, { "epoch": 0.12, "grad_norm": 1.2492027282714844, "learning_rate": 9.79215703463506e-06, "loss": 0.6388, "step": 1849 }, { "epoch": 0.12, "grad_norm": 1.3708807229995728, "learning_rate": 9.791858677175012e-06, "loss": 0.6091, "step": 1850 }, { "epoch": 0.12, "grad_norm": 1.294802188873291, "learning_rate": 9.791560110275184e-06, "loss": 0.672, "step": 1851 }, { "epoch": 0.12, "grad_norm": 1.1364479064941406, "learning_rate": 9.791261333948627e-06, "loss": 0.5974, "step": 1852 }, { "epoch": 0.12, "grad_norm": 1.2587543725967407, "learning_rate": 9.790962348208397e-06, "loss": 0.5575, "step": 1853 }, { "epoch": 0.12, "grad_norm": 1.4076374769210815, "learning_rate": 9.790663153067566e-06, "loss": 0.58, "step": 1854 }, { "epoch": 0.12, "grad_norm": 1.3175101280212402, "learning_rate": 9.790363748539206e-06, "loss": 0.5679, "step": 1855 }, { "epoch": 0.12, "grad_norm": 1.0765372514724731, "learning_rate": 9.790064134636408e-06, "loss": 0.5669, "step": 1856 }, { "epoch": 0.12, "grad_norm": 1.1945635080337524, "learning_rate": 9.789764311372265e-06, "loss": 0.5994, "step": 1857 }, { "epoch": 0.12, "grad_norm": 1.0711615085601807, "learning_rate": 9.78946427875988e-06, "loss": 0.5511, "step": 1858 }, { "epoch": 0.12, "grad_norm": 1.179155945777893, "learning_rate": 9.789164036812368e-06, "loss": 0.5814, "step": 1859 }, { "epoch": 0.12, "grad_norm": 1.2020055055618286, "learning_rate": 9.788863585542854e-06, "loss": 0.599, "step": 1860 }, { "epoch": 0.12, "grad_norm": 1.2612744569778442, "learning_rate": 9.788562924964469e-06, "loss": 0.5839, "step": 1861 }, { "epoch": 0.12, "grad_norm": 1.1645230054855347, "learning_rate": 9.788262055090352e-06, "loss": 0.5418, "step": 1862 }, { "epoch": 0.12, "grad_norm": 1.1743762493133545, "learning_rate": 9.787960975933653e-06, "loss": 0.5936, "step": 1863 }, { "epoch": 0.12, "grad_norm": 1.1046277284622192, "learning_rate": 9.787659687507535e-06, "loss": 0.5737, "step": 1864 }, { "epoch": 0.12, "grad_norm": 1.225908875465393, "learning_rate": 9.787358189825163e-06, "loss": 0.592, "step": 1865 }, { "epoch": 0.12, "grad_norm": 1.16697359085083, "learning_rate": 9.787056482899717e-06, "loss": 0.6113, "step": 1866 }, { "epoch": 0.12, "grad_norm": 1.2172986268997192, "learning_rate": 9.786754566744383e-06, "loss": 0.5911, "step": 1867 }, { "epoch": 0.12, "grad_norm": 1.2916276454925537, "learning_rate": 9.786452441372356e-06, "loss": 0.5386, "step": 1868 }, { "epoch": 0.12, "grad_norm": 1.498077392578125, "learning_rate": 9.786150106796842e-06, "loss": 0.6593, "step": 1869 }, { "epoch": 0.12, "grad_norm": 1.0891796350479126, "learning_rate": 9.785847563031058e-06, "loss": 0.5983, "step": 1870 }, { "epoch": 0.12, "grad_norm": 1.2302461862564087, "learning_rate": 9.785544810088221e-06, "loss": 0.6024, "step": 1871 }, { "epoch": 0.12, "grad_norm": 1.1951425075531006, "learning_rate": 9.785241847981568e-06, "loss": 0.5996, "step": 1872 }, { "epoch": 0.12, "grad_norm": 1.3715685606002808, "learning_rate": 9.78493867672434e-06, "loss": 0.5739, "step": 1873 }, { "epoch": 0.12, "grad_norm": 1.209067940711975, "learning_rate": 9.78463529632979e-06, "loss": 0.5969, "step": 1874 }, { "epoch": 0.12, "grad_norm": 1.216176152229309, "learning_rate": 9.784331706811171e-06, "loss": 0.5794, "step": 1875 }, { "epoch": 0.12, "grad_norm": 1.192907452583313, "learning_rate": 9.784027908181762e-06, "loss": 0.5774, "step": 1876 }, { "epoch": 0.12, "grad_norm": 1.2134099006652832, "learning_rate": 9.783723900454833e-06, "loss": 0.5821, "step": 1877 }, { "epoch": 0.12, "grad_norm": 1.2365938425064087, "learning_rate": 9.783419683643675e-06, "loss": 0.6199, "step": 1878 }, { "epoch": 0.12, "grad_norm": 1.19040846824646, "learning_rate": 9.783115257761584e-06, "loss": 0.4957, "step": 1879 }, { "epoch": 0.12, "grad_norm": 1.1710668802261353, "learning_rate": 9.782810622821867e-06, "loss": 0.6263, "step": 1880 }, { "epoch": 0.12, "grad_norm": 1.080352544784546, "learning_rate": 9.782505778837837e-06, "loss": 0.5724, "step": 1881 }, { "epoch": 0.12, "grad_norm": 1.223713755607605, "learning_rate": 9.782200725822816e-06, "loss": 0.6481, "step": 1882 }, { "epoch": 0.12, "grad_norm": 1.152010202407837, "learning_rate": 9.781895463790142e-06, "loss": 0.6114, "step": 1883 }, { "epoch": 0.12, "grad_norm": 1.1771653890609741, "learning_rate": 9.781589992753156e-06, "loss": 0.5374, "step": 1884 }, { "epoch": 0.12, "grad_norm": 1.168131947517395, "learning_rate": 9.781284312725205e-06, "loss": 0.5845, "step": 1885 }, { "epoch": 0.12, "grad_norm": 1.15181303024292, "learning_rate": 9.780978423719653e-06, "loss": 0.592, "step": 1886 }, { "epoch": 0.12, "grad_norm": 1.2802611589431763, "learning_rate": 9.780672325749872e-06, "loss": 0.63, "step": 1887 }, { "epoch": 0.12, "grad_norm": 1.1834124326705933, "learning_rate": 9.780366018829235e-06, "loss": 0.6207, "step": 1888 }, { "epoch": 0.12, "grad_norm": 1.1239713430404663, "learning_rate": 9.780059502971135e-06, "loss": 0.5779, "step": 1889 }, { "epoch": 0.12, "grad_norm": 1.12791109085083, "learning_rate": 9.779752778188965e-06, "loss": 0.5289, "step": 1890 }, { "epoch": 0.12, "grad_norm": 1.1639926433563232, "learning_rate": 9.779445844496134e-06, "loss": 0.5697, "step": 1891 }, { "epoch": 0.12, "grad_norm": 1.1105413436889648, "learning_rate": 9.779138701906054e-06, "loss": 0.5792, "step": 1892 }, { "epoch": 0.12, "grad_norm": 1.2124277353286743, "learning_rate": 9.778831350432155e-06, "loss": 0.6444, "step": 1893 }, { "epoch": 0.12, "grad_norm": 1.251900553703308, "learning_rate": 9.778523790087867e-06, "loss": 0.6353, "step": 1894 }, { "epoch": 0.12, "grad_norm": 1.1610510349273682, "learning_rate": 9.77821602088663e-06, "loss": 0.5998, "step": 1895 }, { "epoch": 0.12, "grad_norm": 1.2242954969406128, "learning_rate": 9.777908042841902e-06, "loss": 0.5791, "step": 1896 }, { "epoch": 0.12, "grad_norm": 1.0866413116455078, "learning_rate": 9.777599855967137e-06, "loss": 0.5975, "step": 1897 }, { "epoch": 0.12, "grad_norm": 1.1534159183502197, "learning_rate": 9.777291460275812e-06, "loss": 0.5637, "step": 1898 }, { "epoch": 0.12, "grad_norm": 1.2229399681091309, "learning_rate": 9.7769828557814e-06, "loss": 0.6007, "step": 1899 }, { "epoch": 0.12, "grad_norm": 1.21675443649292, "learning_rate": 9.776674042497394e-06, "loss": 0.5716, "step": 1900 }, { "epoch": 0.12, "grad_norm": 1.285149335861206, "learning_rate": 9.77636502043729e-06, "loss": 0.63, "step": 1901 }, { "epoch": 0.12, "grad_norm": 1.1994400024414062, "learning_rate": 9.776055789614594e-06, "loss": 0.5806, "step": 1902 }, { "epoch": 0.12, "grad_norm": 1.089572787284851, "learning_rate": 9.775746350042821e-06, "loss": 0.5938, "step": 1903 }, { "epoch": 0.12, "grad_norm": 1.2980462312698364, "learning_rate": 9.775436701735497e-06, "loss": 0.5763, "step": 1904 }, { "epoch": 0.12, "grad_norm": 1.126521348953247, "learning_rate": 9.775126844706155e-06, "loss": 0.5562, "step": 1905 }, { "epoch": 0.12, "grad_norm": 1.2230972051620483, "learning_rate": 9.77481677896834e-06, "loss": 0.6223, "step": 1906 }, { "epoch": 0.12, "grad_norm": 1.0674346685409546, "learning_rate": 9.774506504535601e-06, "loss": 0.5762, "step": 1907 }, { "epoch": 0.12, "grad_norm": 1.1464256048202515, "learning_rate": 9.774196021421503e-06, "loss": 0.5712, "step": 1908 }, { "epoch": 0.12, "grad_norm": 1.3527413606643677, "learning_rate": 9.773885329639613e-06, "loss": 0.5758, "step": 1909 }, { "epoch": 0.12, "grad_norm": 1.1456685066223145, "learning_rate": 9.773574429203512e-06, "loss": 0.6165, "step": 1910 }, { "epoch": 0.12, "grad_norm": 1.244727373123169, "learning_rate": 9.773263320126789e-06, "loss": 0.6054, "step": 1911 }, { "epoch": 0.12, "grad_norm": 1.204087495803833, "learning_rate": 9.772952002423043e-06, "loss": 0.5533, "step": 1912 }, { "epoch": 0.12, "grad_norm": 1.2312511205673218, "learning_rate": 9.772640476105878e-06, "loss": 0.5637, "step": 1913 }, { "epoch": 0.12, "grad_norm": 1.2567503452301025, "learning_rate": 9.77232874118891e-06, "loss": 0.5744, "step": 1914 }, { "epoch": 0.12, "grad_norm": 1.1682435274124146, "learning_rate": 9.772016797685766e-06, "loss": 0.6359, "step": 1915 }, { "epoch": 0.12, "grad_norm": 1.105617880821228, "learning_rate": 9.77170464561008e-06, "loss": 0.5542, "step": 1916 }, { "epoch": 0.12, "grad_norm": 1.218342900276184, "learning_rate": 9.771392284975496e-06, "loss": 0.604, "step": 1917 }, { "epoch": 0.12, "grad_norm": 1.4933998584747314, "learning_rate": 9.771079715795666e-06, "loss": 0.6307, "step": 1918 }, { "epoch": 0.12, "grad_norm": 1.2912859916687012, "learning_rate": 9.77076693808425e-06, "loss": 0.586, "step": 1919 }, { "epoch": 0.12, "grad_norm": 1.2062814235687256, "learning_rate": 9.770453951854922e-06, "loss": 0.5805, "step": 1920 }, { "epoch": 0.12, "grad_norm": 1.1371450424194336, "learning_rate": 9.770140757121356e-06, "loss": 0.5745, "step": 1921 }, { "epoch": 0.12, "grad_norm": 1.0856950283050537, "learning_rate": 9.769827353897248e-06, "loss": 0.5412, "step": 1922 }, { "epoch": 0.12, "grad_norm": 1.2271087169647217, "learning_rate": 9.76951374219629e-06, "loss": 0.6168, "step": 1923 }, { "epoch": 0.12, "grad_norm": 1.3473663330078125, "learning_rate": 9.769199922032194e-06, "loss": 0.5617, "step": 1924 }, { "epoch": 0.12, "grad_norm": 1.1641062498092651, "learning_rate": 9.768885893418673e-06, "loss": 0.5806, "step": 1925 }, { "epoch": 0.12, "grad_norm": 1.3029451370239258, "learning_rate": 9.768571656369455e-06, "loss": 0.625, "step": 1926 }, { "epoch": 0.12, "grad_norm": 1.1845316886901855, "learning_rate": 9.768257210898271e-06, "loss": 0.5752, "step": 1927 }, { "epoch": 0.12, "grad_norm": 1.178580403327942, "learning_rate": 9.767942557018866e-06, "loss": 0.5499, "step": 1928 }, { "epoch": 0.12, "grad_norm": 1.1758016347885132, "learning_rate": 9.767627694744994e-06, "loss": 0.5764, "step": 1929 }, { "epoch": 0.12, "grad_norm": 1.247825264930725, "learning_rate": 9.767312624090416e-06, "loss": 0.5898, "step": 1930 }, { "epoch": 0.12, "grad_norm": 1.1585880517959595, "learning_rate": 9.766997345068905e-06, "loss": 0.6082, "step": 1931 }, { "epoch": 0.12, "grad_norm": 1.191214680671692, "learning_rate": 9.766681857694238e-06, "loss": 0.5468, "step": 1932 }, { "epoch": 0.12, "grad_norm": 1.2548198699951172, "learning_rate": 9.766366161980205e-06, "loss": 0.5777, "step": 1933 }, { "epoch": 0.12, "grad_norm": 1.3551255464553833, "learning_rate": 9.766050257940605e-06, "loss": 0.5769, "step": 1934 }, { "epoch": 0.12, "grad_norm": 1.201601266860962, "learning_rate": 9.765734145589244e-06, "loss": 0.6289, "step": 1935 }, { "epoch": 0.12, "grad_norm": 1.2427717447280884, "learning_rate": 9.76541782493994e-06, "loss": 0.5828, "step": 1936 }, { "epoch": 0.13, "grad_norm": 1.4107544422149658, "learning_rate": 9.765101296006516e-06, "loss": 0.6725, "step": 1937 }, { "epoch": 0.13, "grad_norm": 1.3809185028076172, "learning_rate": 9.76478455880281e-06, "loss": 0.5283, "step": 1938 }, { "epoch": 0.13, "grad_norm": 1.4332635402679443, "learning_rate": 9.764467613342665e-06, "loss": 0.6111, "step": 1939 }, { "epoch": 0.13, "grad_norm": 1.2492848634719849, "learning_rate": 9.764150459639932e-06, "loss": 0.5822, "step": 1940 }, { "epoch": 0.13, "grad_norm": 1.152356743812561, "learning_rate": 9.763833097708474e-06, "loss": 0.6329, "step": 1941 }, { "epoch": 0.13, "grad_norm": 1.1547986268997192, "learning_rate": 9.763515527562162e-06, "loss": 0.6062, "step": 1942 }, { "epoch": 0.13, "grad_norm": 1.1530935764312744, "learning_rate": 9.763197749214877e-06, "loss": 0.6121, "step": 1943 }, { "epoch": 0.13, "grad_norm": 1.365071177482605, "learning_rate": 9.762879762680507e-06, "loss": 0.6499, "step": 1944 }, { "epoch": 0.13, "grad_norm": 1.1773236989974976, "learning_rate": 9.76256156797295e-06, "loss": 0.5523, "step": 1945 }, { "epoch": 0.13, "grad_norm": 1.3046311140060425, "learning_rate": 9.762243165106117e-06, "loss": 0.5998, "step": 1946 }, { "epoch": 0.13, "grad_norm": 1.2973753213882446, "learning_rate": 9.76192455409392e-06, "loss": 0.5554, "step": 1947 }, { "epoch": 0.13, "grad_norm": 1.1038380861282349, "learning_rate": 9.761605734950288e-06, "loss": 0.5528, "step": 1948 }, { "epoch": 0.13, "grad_norm": 1.1694159507751465, "learning_rate": 9.761286707689154e-06, "loss": 0.6232, "step": 1949 }, { "epoch": 0.13, "grad_norm": 1.20974600315094, "learning_rate": 9.760967472324462e-06, "loss": 0.6007, "step": 1950 }, { "epoch": 0.13, "grad_norm": 1.2994108200073242, "learning_rate": 9.760648028870165e-06, "loss": 0.6388, "step": 1951 }, { "epoch": 0.13, "grad_norm": 1.3223092555999756, "learning_rate": 9.760328377340225e-06, "loss": 0.6139, "step": 1952 }, { "epoch": 0.13, "grad_norm": 1.192766547203064, "learning_rate": 9.760008517748615e-06, "loss": 0.5607, "step": 1953 }, { "epoch": 0.13, "grad_norm": 1.1761480569839478, "learning_rate": 9.759688450109313e-06, "loss": 0.5448, "step": 1954 }, { "epoch": 0.13, "grad_norm": 1.1308060884475708, "learning_rate": 9.759368174436308e-06, "loss": 0.5758, "step": 1955 }, { "epoch": 0.13, "grad_norm": 1.2122818231582642, "learning_rate": 9.759047690743601e-06, "loss": 0.5797, "step": 1956 }, { "epoch": 0.13, "grad_norm": 1.1595383882522583, "learning_rate": 9.758726999045196e-06, "loss": 0.5959, "step": 1957 }, { "epoch": 0.13, "grad_norm": 1.064066767692566, "learning_rate": 9.758406099355112e-06, "loss": 0.5356, "step": 1958 }, { "epoch": 0.13, "grad_norm": 1.195454716682434, "learning_rate": 9.758084991687376e-06, "loss": 0.5919, "step": 1959 }, { "epoch": 0.13, "grad_norm": 1.148880124092102, "learning_rate": 9.75776367605602e-06, "loss": 0.5778, "step": 1960 }, { "epoch": 0.13, "grad_norm": 1.0648914575576782, "learning_rate": 9.75744215247509e-06, "loss": 0.5821, "step": 1961 }, { "epoch": 0.13, "grad_norm": 1.4260010719299316, "learning_rate": 9.757120420958636e-06, "loss": 0.5788, "step": 1962 }, { "epoch": 0.13, "grad_norm": 1.1921131610870361, "learning_rate": 9.756798481520721e-06, "loss": 0.5733, "step": 1963 }, { "epoch": 0.13, "grad_norm": 1.1922229528427124, "learning_rate": 9.75647633417542e-06, "loss": 0.6132, "step": 1964 }, { "epoch": 0.13, "grad_norm": 1.2829664945602417, "learning_rate": 9.756153978936809e-06, "loss": 0.6437, "step": 1965 }, { "epoch": 0.13, "grad_norm": 1.113026738166809, "learning_rate": 9.75583141581898e-06, "loss": 0.5993, "step": 1966 }, { "epoch": 0.13, "grad_norm": 1.1411738395690918, "learning_rate": 9.755508644836027e-06, "loss": 0.5649, "step": 1967 }, { "epoch": 0.13, "grad_norm": 1.3464363813400269, "learning_rate": 9.755185666002062e-06, "loss": 0.6034, "step": 1968 }, { "epoch": 0.13, "grad_norm": 1.1553435325622559, "learning_rate": 9.754862479331201e-06, "loss": 0.5696, "step": 1969 }, { "epoch": 0.13, "grad_norm": 1.189777135848999, "learning_rate": 9.75453908483757e-06, "loss": 0.5686, "step": 1970 }, { "epoch": 0.13, "grad_norm": 1.1378498077392578, "learning_rate": 9.754215482535298e-06, "loss": 0.536, "step": 1971 }, { "epoch": 0.13, "grad_norm": 1.1271471977233887, "learning_rate": 9.753891672438535e-06, "loss": 0.5699, "step": 1972 }, { "epoch": 0.13, "grad_norm": 1.1585646867752075, "learning_rate": 9.753567654561434e-06, "loss": 0.5601, "step": 1973 }, { "epoch": 0.13, "grad_norm": 1.2544840574264526, "learning_rate": 9.753243428918155e-06, "loss": 0.5579, "step": 1974 }, { "epoch": 0.13, "grad_norm": 1.3290568590164185, "learning_rate": 9.752918995522868e-06, "loss": 0.5819, "step": 1975 }, { "epoch": 0.13, "grad_norm": 1.2787573337554932, "learning_rate": 9.752594354389755e-06, "loss": 0.5798, "step": 1976 }, { "epoch": 0.13, "grad_norm": 1.272091269493103, "learning_rate": 9.752269505533004e-06, "loss": 0.5618, "step": 1977 }, { "epoch": 0.13, "grad_norm": 1.1292022466659546, "learning_rate": 9.751944448966814e-06, "loss": 0.5551, "step": 1978 }, { "epoch": 0.13, "grad_norm": 1.199550986289978, "learning_rate": 9.751619184705394e-06, "loss": 0.59, "step": 1979 }, { "epoch": 0.13, "grad_norm": 1.1900676488876343, "learning_rate": 9.751293712762957e-06, "loss": 0.5878, "step": 1980 }, { "epoch": 0.13, "grad_norm": 1.2554727792739868, "learning_rate": 9.75096803315373e-06, "loss": 0.5809, "step": 1981 }, { "epoch": 0.13, "grad_norm": 1.0797898769378662, "learning_rate": 9.750642145891948e-06, "loss": 0.6371, "step": 1982 }, { "epoch": 0.13, "grad_norm": 1.2203460931777954, "learning_rate": 9.750316050991857e-06, "loss": 0.5853, "step": 1983 }, { "epoch": 0.13, "grad_norm": 1.2497738599777222, "learning_rate": 9.749989748467706e-06, "loss": 0.601, "step": 1984 }, { "epoch": 0.13, "grad_norm": 1.2147797346115112, "learning_rate": 9.749663238333758e-06, "loss": 0.5769, "step": 1985 }, { "epoch": 0.13, "grad_norm": 1.1599591970443726, "learning_rate": 9.749336520604283e-06, "loss": 0.6049, "step": 1986 }, { "epoch": 0.13, "grad_norm": 1.2406138181686401, "learning_rate": 9.749009595293563e-06, "loss": 0.5989, "step": 1987 }, { "epoch": 0.13, "grad_norm": 1.3209755420684814, "learning_rate": 9.748682462415887e-06, "loss": 0.6119, "step": 1988 }, { "epoch": 0.13, "grad_norm": 1.3994982242584229, "learning_rate": 9.748355121985551e-06, "loss": 0.5936, "step": 1989 }, { "epoch": 0.13, "grad_norm": 1.1552916765213013, "learning_rate": 9.748027574016865e-06, "loss": 0.6199, "step": 1990 }, { "epoch": 0.13, "grad_norm": 1.2872016429901123, "learning_rate": 9.747699818524143e-06, "loss": 0.6534, "step": 1991 }, { "epoch": 0.13, "grad_norm": 1.2162638902664185, "learning_rate": 9.747371855521711e-06, "loss": 0.5598, "step": 1992 }, { "epoch": 0.13, "grad_norm": 1.1957683563232422, "learning_rate": 9.747043685023904e-06, "loss": 0.6557, "step": 1993 }, { "epoch": 0.13, "grad_norm": 1.2172402143478394, "learning_rate": 9.746715307045065e-06, "loss": 0.5652, "step": 1994 }, { "epoch": 0.13, "grad_norm": 1.313804030418396, "learning_rate": 9.746386721599549e-06, "loss": 0.5933, "step": 1995 }, { "epoch": 0.13, "grad_norm": 1.2846256494522095, "learning_rate": 9.746057928701711e-06, "loss": 0.5623, "step": 1996 }, { "epoch": 0.13, "grad_norm": 1.0977898836135864, "learning_rate": 9.745728928365927e-06, "loss": 0.6258, "step": 1997 }, { "epoch": 0.13, "grad_norm": 1.0920761823654175, "learning_rate": 9.745399720606577e-06, "loss": 0.5069, "step": 1998 }, { "epoch": 0.13, "grad_norm": 1.179329752922058, "learning_rate": 9.745070305438048e-06, "loss": 0.5726, "step": 1999 }, { "epoch": 0.13, "grad_norm": 1.2191249132156372, "learning_rate": 9.744740682874738e-06, "loss": 0.5436, "step": 2000 }, { "epoch": 0.13, "grad_norm": 1.2866204977035522, "learning_rate": 9.744410852931055e-06, "loss": 0.6069, "step": 2001 }, { "epoch": 0.13, "grad_norm": 1.2020988464355469, "learning_rate": 9.744080815621414e-06, "loss": 0.6208, "step": 2002 }, { "epoch": 0.13, "grad_norm": 1.1342108249664307, "learning_rate": 9.74375057096024e-06, "loss": 0.5923, "step": 2003 }, { "epoch": 0.13, "grad_norm": 1.20370352268219, "learning_rate": 9.743420118961968e-06, "loss": 0.5863, "step": 2004 }, { "epoch": 0.13, "grad_norm": 1.1196153163909912, "learning_rate": 9.743089459641041e-06, "loss": 0.5516, "step": 2005 }, { "epoch": 0.13, "grad_norm": 1.193854808807373, "learning_rate": 9.742758593011911e-06, "loss": 0.6488, "step": 2006 }, { "epoch": 0.13, "grad_norm": 1.1780997514724731, "learning_rate": 9.74242751908904e-06, "loss": 0.6111, "step": 2007 }, { "epoch": 0.13, "grad_norm": 1.1587319374084473, "learning_rate": 9.742096237886896e-06, "loss": 0.5686, "step": 2008 }, { "epoch": 0.13, "grad_norm": 1.3498541116714478, "learning_rate": 9.741764749419961e-06, "loss": 0.6174, "step": 2009 }, { "epoch": 0.13, "grad_norm": 1.1922597885131836, "learning_rate": 9.741433053702724e-06, "loss": 0.617, "step": 2010 }, { "epoch": 0.13, "grad_norm": 1.1798304319381714, "learning_rate": 9.74110115074968e-06, "loss": 0.5921, "step": 2011 }, { "epoch": 0.13, "grad_norm": 1.1314383745193481, "learning_rate": 9.740769040575338e-06, "loss": 0.561, "step": 2012 }, { "epoch": 0.13, "grad_norm": 1.142041802406311, "learning_rate": 9.74043672319421e-06, "loss": 0.5866, "step": 2013 }, { "epoch": 0.13, "grad_norm": 1.1899900436401367, "learning_rate": 9.740104198620826e-06, "loss": 0.595, "step": 2014 }, { "epoch": 0.13, "grad_norm": 1.0696144104003906, "learning_rate": 9.739771466869716e-06, "loss": 0.571, "step": 2015 }, { "epoch": 0.13, "grad_norm": 1.1752692461013794, "learning_rate": 9.739438527955425e-06, "loss": 0.6537, "step": 2016 }, { "epoch": 0.13, "grad_norm": 1.123524785041809, "learning_rate": 9.739105381892502e-06, "loss": 0.5338, "step": 2017 }, { "epoch": 0.13, "grad_norm": 1.1988874673843384, "learning_rate": 9.738772028695512e-06, "loss": 0.6237, "step": 2018 }, { "epoch": 0.13, "grad_norm": 1.1691352128982544, "learning_rate": 9.738438468379022e-06, "loss": 0.6153, "step": 2019 }, { "epoch": 0.13, "grad_norm": 1.2462188005447388, "learning_rate": 9.73810470095761e-06, "loss": 0.5881, "step": 2020 }, { "epoch": 0.13, "grad_norm": 1.2658747434616089, "learning_rate": 9.737770726445867e-06, "loss": 0.6282, "step": 2021 }, { "epoch": 0.13, "grad_norm": 1.2728310823440552, "learning_rate": 9.73743654485839e-06, "loss": 0.5831, "step": 2022 }, { "epoch": 0.13, "grad_norm": 1.2244644165039062, "learning_rate": 9.737102156209785e-06, "loss": 0.5632, "step": 2023 }, { "epoch": 0.13, "grad_norm": 1.2438349723815918, "learning_rate": 9.736767560514665e-06, "loss": 0.5555, "step": 2024 }, { "epoch": 0.13, "grad_norm": 1.2554168701171875, "learning_rate": 9.736432757787657e-06, "loss": 0.529, "step": 2025 }, { "epoch": 0.13, "grad_norm": 1.1841615438461304, "learning_rate": 9.736097748043393e-06, "loss": 0.6197, "step": 2026 }, { "epoch": 0.13, "grad_norm": 1.2643036842346191, "learning_rate": 9.735762531296515e-06, "loss": 0.5847, "step": 2027 }, { "epoch": 0.13, "grad_norm": 1.2292779684066772, "learning_rate": 9.735427107561677e-06, "loss": 0.6421, "step": 2028 }, { "epoch": 0.13, "grad_norm": 1.2440773248672485, "learning_rate": 9.735091476853535e-06, "loss": 0.5997, "step": 2029 }, { "epoch": 0.13, "grad_norm": 1.0587130784988403, "learning_rate": 9.734755639186763e-06, "loss": 0.5691, "step": 2030 }, { "epoch": 0.13, "grad_norm": 1.244220495223999, "learning_rate": 9.73441959457604e-06, "loss": 0.6006, "step": 2031 }, { "epoch": 0.13, "grad_norm": 1.3211719989776611, "learning_rate": 9.734083343036047e-06, "loss": 0.5809, "step": 2032 }, { "epoch": 0.13, "grad_norm": 1.207969069480896, "learning_rate": 9.733746884581488e-06, "loss": 0.5906, "step": 2033 }, { "epoch": 0.13, "grad_norm": 1.2793160676956177, "learning_rate": 9.733410219227065e-06, "loss": 0.5993, "step": 2034 }, { "epoch": 0.13, "grad_norm": 1.2942639589309692, "learning_rate": 9.733073346987494e-06, "loss": 0.6184, "step": 2035 }, { "epoch": 0.13, "grad_norm": 1.1735544204711914, "learning_rate": 9.732736267877498e-06, "loss": 0.5982, "step": 2036 }, { "epoch": 0.13, "grad_norm": 1.2089018821716309, "learning_rate": 9.73239898191181e-06, "loss": 0.536, "step": 2037 }, { "epoch": 0.13, "grad_norm": 1.2405835390090942, "learning_rate": 9.732061489105173e-06, "loss": 0.5815, "step": 2038 }, { "epoch": 0.13, "grad_norm": 1.204485535621643, "learning_rate": 9.731723789472339e-06, "loss": 0.6019, "step": 2039 }, { "epoch": 0.13, "grad_norm": 1.2071596384048462, "learning_rate": 9.731385883028063e-06, "loss": 0.6132, "step": 2040 }, { "epoch": 0.13, "grad_norm": 1.1869359016418457, "learning_rate": 9.73104776978712e-06, "loss": 0.5681, "step": 2041 }, { "epoch": 0.13, "grad_norm": 1.1906927824020386, "learning_rate": 9.730709449764281e-06, "loss": 0.5888, "step": 2042 }, { "epoch": 0.13, "grad_norm": 1.1529316902160645, "learning_rate": 9.730370922974342e-06, "loss": 0.5799, "step": 2043 }, { "epoch": 0.13, "grad_norm": 1.23030686378479, "learning_rate": 9.730032189432092e-06, "loss": 0.6176, "step": 2044 }, { "epoch": 0.13, "grad_norm": 1.2065575122833252, "learning_rate": 9.72969324915234e-06, "loss": 0.5942, "step": 2045 }, { "epoch": 0.13, "grad_norm": 1.2848626375198364, "learning_rate": 9.729354102149898e-06, "loss": 0.5751, "step": 2046 }, { "epoch": 0.13, "grad_norm": 1.1230099201202393, "learning_rate": 9.729014748439589e-06, "loss": 0.5709, "step": 2047 }, { "epoch": 0.13, "grad_norm": 1.0825053453445435, "learning_rate": 9.728675188036248e-06, "loss": 0.5856, "step": 2048 }, { "epoch": 0.13, "grad_norm": 1.1398372650146484, "learning_rate": 9.728335420954714e-06, "loss": 0.5708, "step": 2049 }, { "epoch": 0.13, "grad_norm": 1.1103241443634033, "learning_rate": 9.727995447209839e-06, "loss": 0.5954, "step": 2050 }, { "epoch": 0.13, "grad_norm": 1.2232799530029297, "learning_rate": 9.72765526681648e-06, "loss": 0.6058, "step": 2051 }, { "epoch": 0.13, "grad_norm": 1.152854323387146, "learning_rate": 9.727314879789508e-06, "loss": 0.6099, "step": 2052 }, { "epoch": 0.13, "grad_norm": 1.4232659339904785, "learning_rate": 9.726974286143799e-06, "loss": 0.5757, "step": 2053 }, { "epoch": 0.13, "grad_norm": 1.2357492446899414, "learning_rate": 9.72663348589424e-06, "loss": 0.53, "step": 2054 }, { "epoch": 0.13, "grad_norm": 1.4366317987442017, "learning_rate": 9.726292479055724e-06, "loss": 0.6454, "step": 2055 }, { "epoch": 0.13, "grad_norm": 1.3121484518051147, "learning_rate": 9.72595126564316e-06, "loss": 0.6017, "step": 2056 }, { "epoch": 0.13, "grad_norm": 1.0917526483535767, "learning_rate": 9.725609845671459e-06, "loss": 0.562, "step": 2057 }, { "epoch": 0.13, "grad_norm": 1.2124441862106323, "learning_rate": 9.725268219155544e-06, "loss": 0.5827, "step": 2058 }, { "epoch": 0.13, "grad_norm": 1.2750352621078491, "learning_rate": 9.724926386110345e-06, "loss": 0.5684, "step": 2059 }, { "epoch": 0.13, "grad_norm": 1.0652910470962524, "learning_rate": 9.724584346550804e-06, "loss": 0.5673, "step": 2060 }, { "epoch": 0.13, "grad_norm": 1.169337511062622, "learning_rate": 9.724242100491873e-06, "loss": 0.6277, "step": 2061 }, { "epoch": 0.13, "grad_norm": 1.2647048234939575, "learning_rate": 9.723899647948508e-06, "loss": 0.5858, "step": 2062 }, { "epoch": 0.13, "grad_norm": 1.2766200304031372, "learning_rate": 9.723556988935676e-06, "loss": 0.5548, "step": 2063 }, { "epoch": 0.13, "grad_norm": 1.3076955080032349, "learning_rate": 9.723214123468356e-06, "loss": 0.585, "step": 2064 }, { "epoch": 0.13, "grad_norm": 1.1276370286941528, "learning_rate": 9.722871051561532e-06, "loss": 0.5582, "step": 2065 }, { "epoch": 0.13, "grad_norm": 1.2164969444274902, "learning_rate": 9.722527773230198e-06, "loss": 0.556, "step": 2066 }, { "epoch": 0.13, "grad_norm": 1.2816568613052368, "learning_rate": 9.722184288489362e-06, "loss": 0.5533, "step": 2067 }, { "epoch": 0.13, "grad_norm": 1.2416677474975586, "learning_rate": 9.721840597354035e-06, "loss": 0.6388, "step": 2068 }, { "epoch": 0.13, "grad_norm": 1.1497312784194946, "learning_rate": 9.721496699839235e-06, "loss": 0.6087, "step": 2069 }, { "epoch": 0.13, "grad_norm": 1.074936032295227, "learning_rate": 9.721152595959998e-06, "loss": 0.5538, "step": 2070 }, { "epoch": 0.13, "grad_norm": 1.2752249240875244, "learning_rate": 9.720808285731361e-06, "loss": 0.6355, "step": 2071 }, { "epoch": 0.13, "grad_norm": 1.3994945287704468, "learning_rate": 9.720463769168373e-06, "loss": 0.6044, "step": 2072 }, { "epoch": 0.13, "grad_norm": 1.165314793586731, "learning_rate": 9.720119046286094e-06, "loss": 0.6333, "step": 2073 }, { "epoch": 0.13, "grad_norm": 1.2002086639404297, "learning_rate": 9.719774117099588e-06, "loss": 0.6153, "step": 2074 }, { "epoch": 0.13, "grad_norm": 1.2226753234863281, "learning_rate": 9.719428981623933e-06, "loss": 0.6101, "step": 2075 }, { "epoch": 0.13, "grad_norm": 1.1796828508377075, "learning_rate": 9.719083639874215e-06, "loss": 0.5961, "step": 2076 }, { "epoch": 0.13, "grad_norm": 1.1390496492385864, "learning_rate": 9.718738091865525e-06, "loss": 0.5737, "step": 2077 }, { "epoch": 0.13, "grad_norm": 1.1025030612945557, "learning_rate": 9.718392337612967e-06, "loss": 0.5914, "step": 2078 }, { "epoch": 0.13, "grad_norm": 1.112886667251587, "learning_rate": 9.718046377131655e-06, "loss": 0.573, "step": 2079 }, { "epoch": 0.13, "grad_norm": 1.2831130027770996, "learning_rate": 9.717700210436708e-06, "loss": 0.5622, "step": 2080 }, { "epoch": 0.13, "grad_norm": 1.092162847518921, "learning_rate": 9.717353837543257e-06, "loss": 0.5433, "step": 2081 }, { "epoch": 0.13, "grad_norm": 1.17490816116333, "learning_rate": 9.71700725846644e-06, "loss": 0.5884, "step": 2082 }, { "epoch": 0.13, "grad_norm": 1.2060863971710205, "learning_rate": 9.716660473221406e-06, "loss": 0.5351, "step": 2083 }, { "epoch": 0.13, "grad_norm": 1.1564937829971313, "learning_rate": 9.716313481823312e-06, "loss": 0.583, "step": 2084 }, { "epoch": 0.13, "grad_norm": 1.1747978925704956, "learning_rate": 9.715966284287324e-06, "loss": 0.5342, "step": 2085 }, { "epoch": 0.13, "grad_norm": 1.1724556684494019, "learning_rate": 9.715618880628617e-06, "loss": 0.5783, "step": 2086 }, { "epoch": 0.13, "grad_norm": 1.3507583141326904, "learning_rate": 9.715271270862374e-06, "loss": 0.5875, "step": 2087 }, { "epoch": 0.13, "grad_norm": 1.2433457374572754, "learning_rate": 9.71492345500379e-06, "loss": 0.6083, "step": 2088 }, { "epoch": 0.13, "grad_norm": 1.1535879373550415, "learning_rate": 9.714575433068068e-06, "loss": 0.5656, "step": 2089 }, { "epoch": 0.13, "grad_norm": 1.2157607078552246, "learning_rate": 9.714227205070417e-06, "loss": 0.5464, "step": 2090 }, { "epoch": 0.13, "grad_norm": 1.1673665046691895, "learning_rate": 9.713878771026057e-06, "loss": 0.5708, "step": 2091 }, { "epoch": 0.14, "grad_norm": 1.0877575874328613, "learning_rate": 9.713530130950218e-06, "loss": 0.5724, "step": 2092 }, { "epoch": 0.14, "grad_norm": 1.2449144124984741, "learning_rate": 9.713181284858137e-06, "loss": 0.6163, "step": 2093 }, { "epoch": 0.14, "grad_norm": 1.2327903509140015, "learning_rate": 9.712832232765064e-06, "loss": 0.5667, "step": 2094 }, { "epoch": 0.14, "grad_norm": 1.091889500617981, "learning_rate": 9.712482974686252e-06, "loss": 0.5468, "step": 2095 }, { "epoch": 0.14, "grad_norm": 1.2650656700134277, "learning_rate": 9.71213351063697e-06, "loss": 0.6099, "step": 2096 }, { "epoch": 0.14, "grad_norm": 1.174810767173767, "learning_rate": 9.711783840632488e-06, "loss": 0.6118, "step": 2097 }, { "epoch": 0.14, "grad_norm": 1.1754891872406006, "learning_rate": 9.71143396468809e-06, "loss": 0.6227, "step": 2098 }, { "epoch": 0.14, "grad_norm": 1.1657497882843018, "learning_rate": 9.71108388281907e-06, "loss": 0.6099, "step": 2099 }, { "epoch": 0.14, "grad_norm": 1.2339386940002441, "learning_rate": 9.710733595040727e-06, "loss": 0.6491, "step": 2100 }, { "epoch": 0.14, "grad_norm": 1.3545541763305664, "learning_rate": 9.710383101368374e-06, "loss": 0.5932, "step": 2101 }, { "epoch": 0.14, "grad_norm": 1.1897122859954834, "learning_rate": 9.710032401817329e-06, "loss": 0.5849, "step": 2102 }, { "epoch": 0.14, "grad_norm": 1.1122865676879883, "learning_rate": 9.709681496402918e-06, "loss": 0.5606, "step": 2103 }, { "epoch": 0.14, "grad_norm": 1.278057336807251, "learning_rate": 9.709330385140481e-06, "loss": 0.6312, "step": 2104 }, { "epoch": 0.14, "grad_norm": 1.1670362949371338, "learning_rate": 9.708979068045362e-06, "loss": 0.5737, "step": 2105 }, { "epoch": 0.14, "grad_norm": 1.2482125759124756, "learning_rate": 9.70862754513292e-06, "loss": 0.6134, "step": 2106 }, { "epoch": 0.14, "grad_norm": 1.1683716773986816, "learning_rate": 9.708275816418515e-06, "loss": 0.5973, "step": 2107 }, { "epoch": 0.14, "grad_norm": 1.1827702522277832, "learning_rate": 9.70792388191752e-06, "loss": 0.602, "step": 2108 }, { "epoch": 0.14, "grad_norm": 1.223944067955017, "learning_rate": 9.70757174164532e-06, "loss": 0.5968, "step": 2109 }, { "epoch": 0.14, "grad_norm": 1.2361655235290527, "learning_rate": 9.707219395617305e-06, "loss": 0.5619, "step": 2110 }, { "epoch": 0.14, "grad_norm": 1.4503616094589233, "learning_rate": 9.706866843848875e-06, "loss": 0.6605, "step": 2111 }, { "epoch": 0.14, "grad_norm": 1.2156580686569214, "learning_rate": 9.706514086355439e-06, "loss": 0.5864, "step": 2112 }, { "epoch": 0.14, "grad_norm": 1.2232630252838135, "learning_rate": 9.706161123152416e-06, "loss": 0.5644, "step": 2113 }, { "epoch": 0.14, "grad_norm": 1.1901229619979858, "learning_rate": 9.705807954255232e-06, "loss": 0.6521, "step": 2114 }, { "epoch": 0.14, "grad_norm": 1.215620756149292, "learning_rate": 9.705454579679321e-06, "loss": 0.54, "step": 2115 }, { "epoch": 0.14, "grad_norm": 1.311279535293579, "learning_rate": 9.705100999440134e-06, "loss": 0.625, "step": 2116 }, { "epoch": 0.14, "grad_norm": 1.1694912910461426, "learning_rate": 9.70474721355312e-06, "loss": 0.5655, "step": 2117 }, { "epoch": 0.14, "grad_norm": 1.271690845489502, "learning_rate": 9.704393222033743e-06, "loss": 0.5857, "step": 2118 }, { "epoch": 0.14, "grad_norm": 1.2445465326309204, "learning_rate": 9.704039024897479e-06, "loss": 0.5707, "step": 2119 }, { "epoch": 0.14, "grad_norm": 1.1620159149169922, "learning_rate": 9.7036846221598e-06, "loss": 0.5677, "step": 2120 }, { "epoch": 0.14, "grad_norm": 1.316856026649475, "learning_rate": 9.703330013836208e-06, "loss": 0.612, "step": 2121 }, { "epoch": 0.14, "grad_norm": 1.2427668571472168, "learning_rate": 9.702975199942193e-06, "loss": 0.6107, "step": 2122 }, { "epoch": 0.14, "grad_norm": 1.179787516593933, "learning_rate": 9.702620180493265e-06, "loss": 0.6408, "step": 2123 }, { "epoch": 0.14, "grad_norm": 1.1718987226486206, "learning_rate": 9.702264955504944e-06, "loss": 0.5942, "step": 2124 }, { "epoch": 0.14, "grad_norm": 1.1083205938339233, "learning_rate": 9.701909524992753e-06, "loss": 0.5813, "step": 2125 }, { "epoch": 0.14, "grad_norm": 1.1077829599380493, "learning_rate": 9.701553888972225e-06, "loss": 0.5518, "step": 2126 }, { "epoch": 0.14, "grad_norm": 1.2746981382369995, "learning_rate": 9.701198047458911e-06, "loss": 0.6267, "step": 2127 }, { "epoch": 0.14, "grad_norm": 1.1209900379180908, "learning_rate": 9.700842000468359e-06, "loss": 0.5898, "step": 2128 }, { "epoch": 0.14, "grad_norm": 1.119003176689148, "learning_rate": 9.70048574801613e-06, "loss": 0.5626, "step": 2129 }, { "epoch": 0.14, "grad_norm": 1.1010662317276, "learning_rate": 9.700129290117795e-06, "loss": 0.5753, "step": 2130 }, { "epoch": 0.14, "grad_norm": 1.2070597410202026, "learning_rate": 9.699772626788936e-06, "loss": 0.5152, "step": 2131 }, { "epoch": 0.14, "grad_norm": 1.128394365310669, "learning_rate": 9.699415758045143e-06, "loss": 0.5546, "step": 2132 }, { "epoch": 0.14, "grad_norm": 1.1831237077713013, "learning_rate": 9.699058683902011e-06, "loss": 0.5883, "step": 2133 }, { "epoch": 0.14, "grad_norm": 1.1919171810150146, "learning_rate": 9.698701404375147e-06, "loss": 0.6089, "step": 2134 }, { "epoch": 0.14, "grad_norm": 1.2737705707550049, "learning_rate": 9.698343919480167e-06, "loss": 0.5592, "step": 2135 }, { "epoch": 0.14, "grad_norm": 1.347837209701538, "learning_rate": 9.697986229232697e-06, "loss": 0.5966, "step": 2136 }, { "epoch": 0.14, "grad_norm": 1.109228253364563, "learning_rate": 9.69762833364837e-06, "loss": 0.5419, "step": 2137 }, { "epoch": 0.14, "grad_norm": 1.1786959171295166, "learning_rate": 9.69727023274283e-06, "loss": 0.6033, "step": 2138 }, { "epoch": 0.14, "grad_norm": 1.1419713497161865, "learning_rate": 9.696911926531725e-06, "loss": 0.5667, "step": 2139 }, { "epoch": 0.14, "grad_norm": 1.3804514408111572, "learning_rate": 9.696553415030719e-06, "loss": 0.6008, "step": 2140 }, { "epoch": 0.14, "grad_norm": 1.1668514013290405, "learning_rate": 9.696194698255478e-06, "loss": 0.5728, "step": 2141 }, { "epoch": 0.14, "grad_norm": 1.2694405317306519, "learning_rate": 9.695835776221688e-06, "loss": 0.5596, "step": 2142 }, { "epoch": 0.14, "grad_norm": 1.1472735404968262, "learning_rate": 9.695476648945028e-06, "loss": 0.5662, "step": 2143 }, { "epoch": 0.14, "grad_norm": 1.3438668251037598, "learning_rate": 9.695117316441199e-06, "loss": 0.6339, "step": 2144 }, { "epoch": 0.14, "grad_norm": 1.2501195669174194, "learning_rate": 9.694757778725905e-06, "loss": 0.5886, "step": 2145 }, { "epoch": 0.14, "grad_norm": 1.1210740804672241, "learning_rate": 9.694398035814861e-06, "loss": 0.5466, "step": 2146 }, { "epoch": 0.14, "grad_norm": 1.2090566158294678, "learning_rate": 9.694038087723792e-06, "loss": 0.5905, "step": 2147 }, { "epoch": 0.14, "grad_norm": 1.1767598390579224, "learning_rate": 9.693677934468429e-06, "loss": 0.6053, "step": 2148 }, { "epoch": 0.14, "grad_norm": 1.3352558612823486, "learning_rate": 9.693317576064511e-06, "loss": 0.6025, "step": 2149 }, { "epoch": 0.14, "grad_norm": 1.3900476694107056, "learning_rate": 9.692957012527793e-06, "loss": 0.6535, "step": 2150 }, { "epoch": 0.14, "grad_norm": 1.2568825483322144, "learning_rate": 9.692596243874031e-06, "loss": 0.5403, "step": 2151 }, { "epoch": 0.14, "grad_norm": 1.07401704788208, "learning_rate": 9.692235270118994e-06, "loss": 0.5653, "step": 2152 }, { "epoch": 0.14, "grad_norm": 1.1871598958969116, "learning_rate": 9.691874091278461e-06, "loss": 0.6346, "step": 2153 }, { "epoch": 0.14, "grad_norm": 1.175493836402893, "learning_rate": 9.691512707368215e-06, "loss": 0.5599, "step": 2154 }, { "epoch": 0.14, "grad_norm": 1.1201562881469727, "learning_rate": 9.691151118404055e-06, "loss": 0.5488, "step": 2155 }, { "epoch": 0.14, "grad_norm": 1.1859079599380493, "learning_rate": 9.690789324401781e-06, "loss": 0.6651, "step": 2156 }, { "epoch": 0.14, "grad_norm": 1.274428367614746, "learning_rate": 9.690427325377207e-06, "loss": 0.6098, "step": 2157 }, { "epoch": 0.14, "grad_norm": 1.1705232858657837, "learning_rate": 9.690065121346158e-06, "loss": 0.5738, "step": 2158 }, { "epoch": 0.14, "grad_norm": 1.387885570526123, "learning_rate": 9.689702712324463e-06, "loss": 0.5654, "step": 2159 }, { "epoch": 0.14, "grad_norm": 1.1325868368148804, "learning_rate": 9.689340098327962e-06, "loss": 0.6017, "step": 2160 }, { "epoch": 0.14, "grad_norm": 1.1068469285964966, "learning_rate": 9.688977279372503e-06, "loss": 0.5551, "step": 2161 }, { "epoch": 0.14, "grad_norm": 1.1701345443725586, "learning_rate": 9.688614255473945e-06, "loss": 0.6032, "step": 2162 }, { "epoch": 0.14, "grad_norm": 1.1879956722259521, "learning_rate": 9.688251026648156e-06, "loss": 0.6059, "step": 2163 }, { "epoch": 0.14, "grad_norm": 1.2474135160446167, "learning_rate": 9.68788759291101e-06, "loss": 0.5451, "step": 2164 }, { "epoch": 0.14, "grad_norm": 1.1238291263580322, "learning_rate": 9.68752395427839e-06, "loss": 0.5341, "step": 2165 }, { "epoch": 0.14, "grad_norm": 1.3546820878982544, "learning_rate": 9.687160110766195e-06, "loss": 0.5538, "step": 2166 }, { "epoch": 0.14, "grad_norm": 1.1101995706558228, "learning_rate": 9.686796062390323e-06, "loss": 0.591, "step": 2167 }, { "epoch": 0.14, "grad_norm": 1.2363179922103882, "learning_rate": 9.686431809166687e-06, "loss": 0.5751, "step": 2168 }, { "epoch": 0.14, "grad_norm": 1.3079355955123901, "learning_rate": 9.68606735111121e-06, "loss": 0.5895, "step": 2169 }, { "epoch": 0.14, "grad_norm": 1.186081886291504, "learning_rate": 9.685702688239816e-06, "loss": 0.5751, "step": 2170 }, { "epoch": 0.14, "grad_norm": 1.3072503805160522, "learning_rate": 9.68533782056845e-06, "loss": 0.5607, "step": 2171 }, { "epoch": 0.14, "grad_norm": 1.1418944597244263, "learning_rate": 9.684972748113053e-06, "loss": 0.5711, "step": 2172 }, { "epoch": 0.14, "grad_norm": 1.2492132186889648, "learning_rate": 9.684607470889586e-06, "loss": 0.5841, "step": 2173 }, { "epoch": 0.14, "grad_norm": 1.2312240600585938, "learning_rate": 9.684241988914014e-06, "loss": 0.5639, "step": 2174 }, { "epoch": 0.14, "grad_norm": 1.2081174850463867, "learning_rate": 9.683876302202309e-06, "loss": 0.5348, "step": 2175 }, { "epoch": 0.14, "grad_norm": 1.163665533065796, "learning_rate": 9.683510410770456e-06, "loss": 0.5473, "step": 2176 }, { "epoch": 0.14, "grad_norm": 1.3641669750213623, "learning_rate": 9.683144314634445e-06, "loss": 0.6139, "step": 2177 }, { "epoch": 0.14, "grad_norm": 1.4038560390472412, "learning_rate": 9.68277801381028e-06, "loss": 0.5872, "step": 2178 }, { "epoch": 0.14, "grad_norm": 1.2168116569519043, "learning_rate": 9.682411508313971e-06, "loss": 0.5977, "step": 2179 }, { "epoch": 0.14, "grad_norm": 1.2337149381637573, "learning_rate": 9.682044798161533e-06, "loss": 0.6326, "step": 2180 }, { "epoch": 0.14, "grad_norm": 1.1140501499176025, "learning_rate": 9.681677883368998e-06, "loss": 0.554, "step": 2181 }, { "epoch": 0.14, "grad_norm": 1.2084343433380127, "learning_rate": 9.681310763952402e-06, "loss": 0.626, "step": 2182 }, { "epoch": 0.14, "grad_norm": 1.232305645942688, "learning_rate": 9.68094343992779e-06, "loss": 0.6609, "step": 2183 }, { "epoch": 0.14, "grad_norm": 1.096994400024414, "learning_rate": 9.680575911311218e-06, "loss": 0.5836, "step": 2184 }, { "epoch": 0.14, "grad_norm": 1.1455693244934082, "learning_rate": 9.680208178118748e-06, "loss": 0.5884, "step": 2185 }, { "epoch": 0.14, "grad_norm": 1.1986902952194214, "learning_rate": 9.679840240366454e-06, "loss": 0.6182, "step": 2186 }, { "epoch": 0.14, "grad_norm": 1.4357197284698486, "learning_rate": 9.679472098070418e-06, "loss": 0.5712, "step": 2187 }, { "epoch": 0.14, "grad_norm": 1.306462049484253, "learning_rate": 9.679103751246728e-06, "loss": 0.5986, "step": 2188 }, { "epoch": 0.14, "grad_norm": 1.2238514423370361, "learning_rate": 9.678735199911487e-06, "loss": 0.5991, "step": 2189 }, { "epoch": 0.14, "grad_norm": 1.2337170839309692, "learning_rate": 9.6783664440808e-06, "loss": 0.577, "step": 2190 }, { "epoch": 0.14, "grad_norm": 1.0751526355743408, "learning_rate": 9.677997483770787e-06, "loss": 0.5378, "step": 2191 }, { "epoch": 0.14, "grad_norm": 1.2564570903778076, "learning_rate": 9.677628318997573e-06, "loss": 0.576, "step": 2192 }, { "epoch": 0.14, "grad_norm": 1.130933165550232, "learning_rate": 9.677258949777295e-06, "loss": 0.5704, "step": 2193 }, { "epoch": 0.14, "grad_norm": 1.1988259553909302, "learning_rate": 9.676889376126095e-06, "loss": 0.5842, "step": 2194 }, { "epoch": 0.14, "grad_norm": 1.2559714317321777, "learning_rate": 9.676519598060126e-06, "loss": 0.5732, "step": 2195 }, { "epoch": 0.14, "grad_norm": 1.1779346466064453, "learning_rate": 9.676149615595553e-06, "loss": 0.5922, "step": 2196 }, { "epoch": 0.14, "grad_norm": 1.2648823261260986, "learning_rate": 9.675779428748543e-06, "loss": 0.5782, "step": 2197 }, { "epoch": 0.14, "grad_norm": 1.2942867279052734, "learning_rate": 9.675409037535278e-06, "loss": 0.5217, "step": 2198 }, { "epoch": 0.14, "grad_norm": 1.1636743545532227, "learning_rate": 9.675038441971948e-06, "loss": 0.5746, "step": 2199 }, { "epoch": 0.14, "grad_norm": 1.4208171367645264, "learning_rate": 9.674667642074749e-06, "loss": 0.5883, "step": 2200 }, { "epoch": 0.14, "grad_norm": 1.2245789766311646, "learning_rate": 9.674296637859886e-06, "loss": 0.5468, "step": 2201 }, { "epoch": 0.14, "grad_norm": 1.2102820873260498, "learning_rate": 9.67392542934358e-06, "loss": 0.5978, "step": 2202 }, { "epoch": 0.14, "grad_norm": 1.2647851705551147, "learning_rate": 9.673554016542051e-06, "loss": 0.6154, "step": 2203 }, { "epoch": 0.14, "grad_norm": 1.196533203125, "learning_rate": 9.673182399471534e-06, "loss": 0.5677, "step": 2204 }, { "epoch": 0.14, "grad_norm": 1.1122523546218872, "learning_rate": 9.672810578148271e-06, "loss": 0.5714, "step": 2205 }, { "epoch": 0.14, "grad_norm": 1.166050910949707, "learning_rate": 9.672438552588513e-06, "loss": 0.5815, "step": 2206 }, { "epoch": 0.14, "grad_norm": 1.2241495847702026, "learning_rate": 9.672066322808522e-06, "loss": 0.6136, "step": 2207 }, { "epoch": 0.14, "grad_norm": 1.2251735925674438, "learning_rate": 9.671693888824565e-06, "loss": 0.5525, "step": 2208 }, { "epoch": 0.14, "grad_norm": 1.1838442087173462, "learning_rate": 9.671321250652923e-06, "loss": 0.5452, "step": 2209 }, { "epoch": 0.14, "grad_norm": 1.1296935081481934, "learning_rate": 9.67094840830988e-06, "loss": 0.5895, "step": 2210 }, { "epoch": 0.14, "grad_norm": 1.2288850545883179, "learning_rate": 9.670575361811733e-06, "loss": 0.5473, "step": 2211 }, { "epoch": 0.14, "grad_norm": 1.1106959581375122, "learning_rate": 9.670202111174789e-06, "loss": 0.5918, "step": 2212 }, { "epoch": 0.14, "grad_norm": 1.3567800521850586, "learning_rate": 9.669828656415359e-06, "loss": 0.6384, "step": 2213 }, { "epoch": 0.14, "grad_norm": 1.1584184169769287, "learning_rate": 9.669454997549766e-06, "loss": 0.5468, "step": 2214 }, { "epoch": 0.14, "grad_norm": 1.1624596118927002, "learning_rate": 9.669081134594342e-06, "loss": 0.633, "step": 2215 }, { "epoch": 0.14, "grad_norm": 1.156111717224121, "learning_rate": 9.66870706756543e-06, "loss": 0.5335, "step": 2216 }, { "epoch": 0.14, "grad_norm": 1.1214386224746704, "learning_rate": 9.668332796479376e-06, "loss": 0.5611, "step": 2217 }, { "epoch": 0.14, "grad_norm": 1.2296617031097412, "learning_rate": 9.667958321352541e-06, "loss": 0.6319, "step": 2218 }, { "epoch": 0.14, "grad_norm": 1.215032696723938, "learning_rate": 9.66758364220129e-06, "loss": 0.5805, "step": 2219 }, { "epoch": 0.14, "grad_norm": 1.1912299394607544, "learning_rate": 9.667208759042002e-06, "loss": 0.569, "step": 2220 }, { "epoch": 0.14, "grad_norm": 1.1137878894805908, "learning_rate": 9.66683367189106e-06, "loss": 0.5528, "step": 2221 }, { "epoch": 0.14, "grad_norm": 1.1516443490982056, "learning_rate": 9.666458380764859e-06, "loss": 0.5714, "step": 2222 }, { "epoch": 0.14, "grad_norm": 1.1689178943634033, "learning_rate": 9.6660828856798e-06, "loss": 0.5791, "step": 2223 }, { "epoch": 0.14, "grad_norm": 1.1673811674118042, "learning_rate": 9.6657071866523e-06, "loss": 0.5071, "step": 2224 }, { "epoch": 0.14, "grad_norm": 1.1908152103424072, "learning_rate": 9.665331283698773e-06, "loss": 0.6362, "step": 2225 }, { "epoch": 0.14, "grad_norm": 1.360812783241272, "learning_rate": 9.664955176835655e-06, "loss": 0.5592, "step": 2226 }, { "epoch": 0.14, "grad_norm": 1.2002570629119873, "learning_rate": 9.664578866079381e-06, "loss": 0.5989, "step": 2227 }, { "epoch": 0.14, "grad_norm": 1.3235416412353516, "learning_rate": 9.6642023514464e-06, "loss": 0.6365, "step": 2228 }, { "epoch": 0.14, "grad_norm": 1.1452131271362305, "learning_rate": 9.663825632953168e-06, "loss": 0.5968, "step": 2229 }, { "epoch": 0.14, "grad_norm": 1.2377922534942627, "learning_rate": 9.663448710616149e-06, "loss": 0.5864, "step": 2230 }, { "epoch": 0.14, "grad_norm": 1.0504796504974365, "learning_rate": 9.66307158445182e-06, "loss": 0.5477, "step": 2231 }, { "epoch": 0.14, "grad_norm": 1.1861096620559692, "learning_rate": 9.662694254476661e-06, "loss": 0.6131, "step": 2232 }, { "epoch": 0.14, "grad_norm": 1.1508762836456299, "learning_rate": 9.66231672070717e-06, "loss": 0.5759, "step": 2233 }, { "epoch": 0.14, "grad_norm": 1.2405844926834106, "learning_rate": 9.661938983159841e-06, "loss": 0.5775, "step": 2234 }, { "epoch": 0.14, "grad_norm": 1.2654834985733032, "learning_rate": 9.661561041851187e-06, "loss": 0.5936, "step": 2235 }, { "epoch": 0.14, "grad_norm": 1.0740017890930176, "learning_rate": 9.661182896797728e-06, "loss": 0.5553, "step": 2236 }, { "epoch": 0.14, "grad_norm": 1.084961175918579, "learning_rate": 9.66080454801599e-06, "loss": 0.5849, "step": 2237 }, { "epoch": 0.14, "grad_norm": 1.250914216041565, "learning_rate": 9.66042599552251e-06, "loss": 0.5934, "step": 2238 }, { "epoch": 0.14, "grad_norm": 1.2627133131027222, "learning_rate": 9.660047239333836e-06, "loss": 0.5863, "step": 2239 }, { "epoch": 0.14, "grad_norm": 1.1759318113327026, "learning_rate": 9.659668279466518e-06, "loss": 0.5556, "step": 2240 }, { "epoch": 0.14, "grad_norm": 1.226062297821045, "learning_rate": 9.659289115937122e-06, "loss": 0.5878, "step": 2241 }, { "epoch": 0.14, "grad_norm": 1.2720131874084473, "learning_rate": 9.658909748762219e-06, "loss": 0.5635, "step": 2242 }, { "epoch": 0.14, "grad_norm": 1.2249107360839844, "learning_rate": 9.658530177958393e-06, "loss": 0.6195, "step": 2243 }, { "epoch": 0.14, "grad_norm": 1.1146769523620605, "learning_rate": 9.65815040354223e-06, "loss": 0.5715, "step": 2244 }, { "epoch": 0.14, "grad_norm": 1.2355473041534424, "learning_rate": 9.657770425530332e-06, "loss": 0.5814, "step": 2245 }, { "epoch": 0.14, "grad_norm": 1.2329134941101074, "learning_rate": 9.657390243939307e-06, "loss": 0.5946, "step": 2246 }, { "epoch": 0.15, "grad_norm": 1.0972994565963745, "learning_rate": 9.65700985878577e-06, "loss": 0.5254, "step": 2247 }, { "epoch": 0.15, "grad_norm": 1.1316407918930054, "learning_rate": 9.656629270086346e-06, "loss": 0.6062, "step": 2248 }, { "epoch": 0.15, "grad_norm": 1.3041565418243408, "learning_rate": 9.656248477857673e-06, "loss": 0.6087, "step": 2249 }, { "epoch": 0.15, "grad_norm": 1.1561964750289917, "learning_rate": 9.655867482116391e-06, "loss": 0.5418, "step": 2250 }, { "epoch": 0.15, "grad_norm": 1.2940165996551514, "learning_rate": 9.655486282879154e-06, "loss": 0.6074, "step": 2251 }, { "epoch": 0.15, "grad_norm": 1.2002413272857666, "learning_rate": 9.655104880162623e-06, "loss": 0.6045, "step": 2252 }, { "epoch": 0.15, "grad_norm": 1.1031769514083862, "learning_rate": 9.654723273983468e-06, "loss": 0.5698, "step": 2253 }, { "epoch": 0.15, "grad_norm": 1.1894335746765137, "learning_rate": 9.654341464358368e-06, "loss": 0.5292, "step": 2254 }, { "epoch": 0.15, "grad_norm": 1.2523802518844604, "learning_rate": 9.65395945130401e-06, "loss": 0.5676, "step": 2255 }, { "epoch": 0.15, "grad_norm": 1.234782099723816, "learning_rate": 9.653577234837093e-06, "loss": 0.6017, "step": 2256 }, { "epoch": 0.15, "grad_norm": 1.206821084022522, "learning_rate": 9.65319481497432e-06, "loss": 0.6105, "step": 2257 }, { "epoch": 0.15, "grad_norm": 1.2092132568359375, "learning_rate": 9.652812191732409e-06, "loss": 0.5769, "step": 2258 }, { "epoch": 0.15, "grad_norm": 1.1007497310638428, "learning_rate": 9.65242936512808e-06, "loss": 0.542, "step": 2259 }, { "epoch": 0.15, "grad_norm": 1.2156715393066406, "learning_rate": 9.652046335178068e-06, "loss": 0.6004, "step": 2260 }, { "epoch": 0.15, "grad_norm": 1.0622273683547974, "learning_rate": 9.651663101899112e-06, "loss": 0.5647, "step": 2261 }, { "epoch": 0.15, "grad_norm": 1.3196736574172974, "learning_rate": 9.651279665307964e-06, "loss": 0.5576, "step": 2262 }, { "epoch": 0.15, "grad_norm": 1.4540339708328247, "learning_rate": 9.650896025421382e-06, "loss": 0.6636, "step": 2263 }, { "epoch": 0.15, "grad_norm": 1.2159194946289062, "learning_rate": 9.650512182256135e-06, "loss": 0.563, "step": 2264 }, { "epoch": 0.15, "grad_norm": 1.3409762382507324, "learning_rate": 9.650128135828998e-06, "loss": 0.5734, "step": 2265 }, { "epoch": 0.15, "grad_norm": 1.1743658781051636, "learning_rate": 9.649743886156756e-06, "loss": 0.5153, "step": 2266 }, { "epoch": 0.15, "grad_norm": 1.223360300064087, "learning_rate": 9.64935943325621e-06, "loss": 0.6058, "step": 2267 }, { "epoch": 0.15, "grad_norm": 1.185068130493164, "learning_rate": 9.648974777144156e-06, "loss": 0.5801, "step": 2268 }, { "epoch": 0.15, "grad_norm": 1.1315251588821411, "learning_rate": 9.648589917837408e-06, "loss": 0.5871, "step": 2269 }, { "epoch": 0.15, "grad_norm": 1.3091223239898682, "learning_rate": 9.648204855352789e-06, "loss": 0.6188, "step": 2270 }, { "epoch": 0.15, "grad_norm": 1.2279126644134521, "learning_rate": 9.647819589707128e-06, "loss": 0.5941, "step": 2271 }, { "epoch": 0.15, "grad_norm": 1.2125509977340698, "learning_rate": 9.647434120917265e-06, "loss": 0.5668, "step": 2272 }, { "epoch": 0.15, "grad_norm": 1.220141053199768, "learning_rate": 9.647048449000047e-06, "loss": 0.5979, "step": 2273 }, { "epoch": 0.15, "grad_norm": 1.3222029209136963, "learning_rate": 9.64666257397233e-06, "loss": 0.6039, "step": 2274 }, { "epoch": 0.15, "grad_norm": 1.2216689586639404, "learning_rate": 9.646276495850982e-06, "loss": 0.5686, "step": 2275 }, { "epoch": 0.15, "grad_norm": 1.1674383878707886, "learning_rate": 9.645890214652875e-06, "loss": 0.5949, "step": 2276 }, { "epoch": 0.15, "grad_norm": 1.2532529830932617, "learning_rate": 9.645503730394894e-06, "loss": 0.6457, "step": 2277 }, { "epoch": 0.15, "grad_norm": 1.1914644241333008, "learning_rate": 9.64511704309393e-06, "loss": 0.5515, "step": 2278 }, { "epoch": 0.15, "grad_norm": 1.2239466905593872, "learning_rate": 9.644730152766883e-06, "loss": 0.5698, "step": 2279 }, { "epoch": 0.15, "grad_norm": 1.3683842420578003, "learning_rate": 9.644343059430667e-06, "loss": 0.6272, "step": 2280 }, { "epoch": 0.15, "grad_norm": 1.1989634037017822, "learning_rate": 9.643955763102197e-06, "loss": 0.5337, "step": 2281 }, { "epoch": 0.15, "grad_norm": 1.276391863822937, "learning_rate": 9.643568263798402e-06, "loss": 0.5939, "step": 2282 }, { "epoch": 0.15, "grad_norm": 1.0613101720809937, "learning_rate": 9.64318056153622e-06, "loss": 0.5586, "step": 2283 }, { "epoch": 0.15, "grad_norm": 1.298568606376648, "learning_rate": 9.642792656332593e-06, "loss": 0.6011, "step": 2284 }, { "epoch": 0.15, "grad_norm": 1.193249225616455, "learning_rate": 9.64240454820448e-06, "loss": 0.5653, "step": 2285 }, { "epoch": 0.15, "grad_norm": 1.1913903951644897, "learning_rate": 9.642016237168841e-06, "loss": 0.5457, "step": 2286 }, { "epoch": 0.15, "grad_norm": 1.2240753173828125, "learning_rate": 9.641627723242648e-06, "loss": 0.6159, "step": 2287 }, { "epoch": 0.15, "grad_norm": 1.1722757816314697, "learning_rate": 9.641239006442883e-06, "loss": 0.5703, "step": 2288 }, { "epoch": 0.15, "grad_norm": 1.186187744140625, "learning_rate": 9.640850086786538e-06, "loss": 0.6463, "step": 2289 }, { "epoch": 0.15, "grad_norm": 1.266506314277649, "learning_rate": 9.640460964290607e-06, "loss": 0.5902, "step": 2290 }, { "epoch": 0.15, "grad_norm": 1.0640276670455933, "learning_rate": 9.640071638972099e-06, "loss": 0.5303, "step": 2291 }, { "epoch": 0.15, "grad_norm": 1.1695890426635742, "learning_rate": 9.639682110848033e-06, "loss": 0.617, "step": 2292 }, { "epoch": 0.15, "grad_norm": 1.2259774208068848, "learning_rate": 9.639292379935432e-06, "loss": 0.5797, "step": 2293 }, { "epoch": 0.15, "grad_norm": 1.3102351427078247, "learning_rate": 9.63890244625133e-06, "loss": 0.5929, "step": 2294 }, { "epoch": 0.15, "grad_norm": 1.1235060691833496, "learning_rate": 9.63851230981277e-06, "loss": 0.5892, "step": 2295 }, { "epoch": 0.15, "grad_norm": 1.2014795541763306, "learning_rate": 9.638121970636807e-06, "loss": 0.5878, "step": 2296 }, { "epoch": 0.15, "grad_norm": 1.2503260374069214, "learning_rate": 9.637731428740498e-06, "loss": 0.5536, "step": 2297 }, { "epoch": 0.15, "grad_norm": 1.1821098327636719, "learning_rate": 9.637340684140913e-06, "loss": 0.5733, "step": 2298 }, { "epoch": 0.15, "grad_norm": 1.155981183052063, "learning_rate": 9.636949736855133e-06, "loss": 0.5605, "step": 2299 }, { "epoch": 0.15, "grad_norm": 1.184302568435669, "learning_rate": 9.63655858690024e-06, "loss": 0.5892, "step": 2300 }, { "epoch": 0.15, "grad_norm": 1.29524827003479, "learning_rate": 9.636167234293337e-06, "loss": 0.6289, "step": 2301 }, { "epoch": 0.15, "grad_norm": 1.148553490638733, "learning_rate": 9.635775679051525e-06, "loss": 0.5699, "step": 2302 }, { "epoch": 0.15, "grad_norm": 1.2261972427368164, "learning_rate": 9.635383921191916e-06, "loss": 0.5881, "step": 2303 }, { "epoch": 0.15, "grad_norm": 1.3043020963668823, "learning_rate": 9.634991960731637e-06, "loss": 0.5594, "step": 2304 }, { "epoch": 0.15, "grad_norm": 1.2717756032943726, "learning_rate": 9.63459979768782e-06, "loss": 0.5443, "step": 2305 }, { "epoch": 0.15, "grad_norm": 1.1735923290252686, "learning_rate": 9.6342074320776e-06, "loss": 0.5878, "step": 2306 }, { "epoch": 0.15, "grad_norm": 1.152470588684082, "learning_rate": 9.633814863918131e-06, "loss": 0.5517, "step": 2307 }, { "epoch": 0.15, "grad_norm": 1.2501254081726074, "learning_rate": 9.63342209322657e-06, "loss": 0.5921, "step": 2308 }, { "epoch": 0.15, "grad_norm": 1.238396167755127, "learning_rate": 9.633029120020083e-06, "loss": 0.6005, "step": 2309 }, { "epoch": 0.15, "grad_norm": 1.1722015142440796, "learning_rate": 9.632635944315847e-06, "loss": 0.5762, "step": 2310 }, { "epoch": 0.15, "grad_norm": 1.300792932510376, "learning_rate": 9.632242566131046e-06, "loss": 0.5811, "step": 2311 }, { "epoch": 0.15, "grad_norm": 1.276161789894104, "learning_rate": 9.631848985482874e-06, "loss": 0.5849, "step": 2312 }, { "epoch": 0.15, "grad_norm": 1.1701931953430176, "learning_rate": 9.631455202388534e-06, "loss": 0.6073, "step": 2313 }, { "epoch": 0.15, "grad_norm": 1.2132573127746582, "learning_rate": 9.631061216865234e-06, "loss": 0.594, "step": 2314 }, { "epoch": 0.15, "grad_norm": 1.2747352123260498, "learning_rate": 9.630667028930199e-06, "loss": 0.6166, "step": 2315 }, { "epoch": 0.15, "grad_norm": 1.25980544090271, "learning_rate": 9.630272638600653e-06, "loss": 0.5808, "step": 2316 }, { "epoch": 0.15, "grad_norm": 1.1480998992919922, "learning_rate": 9.629878045893838e-06, "loss": 0.5257, "step": 2317 }, { "epoch": 0.15, "grad_norm": 1.2112675905227661, "learning_rate": 9.629483250826998e-06, "loss": 0.5909, "step": 2318 }, { "epoch": 0.15, "grad_norm": 1.1010713577270508, "learning_rate": 9.62908825341739e-06, "loss": 0.608, "step": 2319 }, { "epoch": 0.15, "grad_norm": 1.1835966110229492, "learning_rate": 9.628693053682277e-06, "loss": 0.5739, "step": 2320 }, { "epoch": 0.15, "grad_norm": 1.2383214235305786, "learning_rate": 9.628297651638934e-06, "loss": 0.603, "step": 2321 }, { "epoch": 0.15, "grad_norm": 1.125900387763977, "learning_rate": 9.62790204730464e-06, "loss": 0.5449, "step": 2322 }, { "epoch": 0.15, "grad_norm": 1.170491337776184, "learning_rate": 9.62750624069669e-06, "loss": 0.5796, "step": 2323 }, { "epoch": 0.15, "grad_norm": 1.2171053886413574, "learning_rate": 9.62711023183238e-06, "loss": 0.5569, "step": 2324 }, { "epoch": 0.15, "grad_norm": 1.1159672737121582, "learning_rate": 9.62671402072902e-06, "loss": 0.5393, "step": 2325 }, { "epoch": 0.15, "grad_norm": 1.163318395614624, "learning_rate": 9.626317607403926e-06, "loss": 0.5869, "step": 2326 }, { "epoch": 0.15, "grad_norm": 1.2236030101776123, "learning_rate": 9.625920991874426e-06, "loss": 0.5509, "step": 2327 }, { "epoch": 0.15, "grad_norm": 1.054452657699585, "learning_rate": 9.625524174157857e-06, "loss": 0.5693, "step": 2328 }, { "epoch": 0.15, "grad_norm": 1.2802915573120117, "learning_rate": 9.625127154271557e-06, "loss": 0.5868, "step": 2329 }, { "epoch": 0.15, "grad_norm": 1.1007636785507202, "learning_rate": 9.624729932232883e-06, "loss": 0.5614, "step": 2330 }, { "epoch": 0.15, "grad_norm": 1.0766950845718384, "learning_rate": 9.624332508059198e-06, "loss": 0.5883, "step": 2331 }, { "epoch": 0.15, "grad_norm": 1.2294318675994873, "learning_rate": 9.623934881767867e-06, "loss": 0.572, "step": 2332 }, { "epoch": 0.15, "grad_norm": 1.214994192123413, "learning_rate": 9.623537053376272e-06, "loss": 0.5815, "step": 2333 }, { "epoch": 0.15, "grad_norm": 1.1389684677124023, "learning_rate": 9.623139022901803e-06, "loss": 0.5718, "step": 2334 }, { "epoch": 0.15, "grad_norm": 1.2773017883300781, "learning_rate": 9.622740790361855e-06, "loss": 0.6133, "step": 2335 }, { "epoch": 0.15, "grad_norm": 1.3492622375488281, "learning_rate": 9.622342355773836e-06, "loss": 0.6252, "step": 2336 }, { "epoch": 0.15, "grad_norm": 1.2195011377334595, "learning_rate": 9.621943719155156e-06, "loss": 0.6197, "step": 2337 }, { "epoch": 0.15, "grad_norm": 1.1517257690429688, "learning_rate": 9.621544880523239e-06, "loss": 0.5982, "step": 2338 }, { "epoch": 0.15, "grad_norm": 1.2267946004867554, "learning_rate": 9.621145839895522e-06, "loss": 0.5912, "step": 2339 }, { "epoch": 0.15, "grad_norm": 1.2591512203216553, "learning_rate": 9.620746597289442e-06, "loss": 0.6486, "step": 2340 }, { "epoch": 0.15, "grad_norm": 1.17159104347229, "learning_rate": 9.62034715272245e-06, "loss": 0.6514, "step": 2341 }, { "epoch": 0.15, "grad_norm": 1.1566944122314453, "learning_rate": 9.619947506212003e-06, "loss": 0.6296, "step": 2342 }, { "epoch": 0.15, "grad_norm": 1.0615812540054321, "learning_rate": 9.619547657775573e-06, "loss": 0.566, "step": 2343 }, { "epoch": 0.15, "grad_norm": 1.1773704290390015, "learning_rate": 9.619147607430633e-06, "loss": 0.5876, "step": 2344 }, { "epoch": 0.15, "grad_norm": 1.1975544691085815, "learning_rate": 9.618747355194666e-06, "loss": 0.6194, "step": 2345 }, { "epoch": 0.15, "grad_norm": 1.1461758613586426, "learning_rate": 9.618346901085172e-06, "loss": 0.5889, "step": 2346 }, { "epoch": 0.15, "grad_norm": 1.2713910341262817, "learning_rate": 9.617946245119648e-06, "loss": 0.6818, "step": 2347 }, { "epoch": 0.15, "grad_norm": 1.2057169675827026, "learning_rate": 9.617545387315609e-06, "loss": 0.6513, "step": 2348 }, { "epoch": 0.15, "grad_norm": 1.2911337614059448, "learning_rate": 9.617144327690576e-06, "loss": 0.6094, "step": 2349 }, { "epoch": 0.15, "grad_norm": 1.0805693864822388, "learning_rate": 9.616743066262073e-06, "loss": 0.5539, "step": 2350 }, { "epoch": 0.15, "grad_norm": 1.1306039094924927, "learning_rate": 9.616341603047645e-06, "loss": 0.6269, "step": 2351 }, { "epoch": 0.15, "grad_norm": 1.180198311805725, "learning_rate": 9.615939938064836e-06, "loss": 0.5968, "step": 2352 }, { "epoch": 0.15, "grad_norm": 1.0913161039352417, "learning_rate": 9.6155380713312e-06, "loss": 0.5456, "step": 2353 }, { "epoch": 0.15, "grad_norm": 1.161391258239746, "learning_rate": 9.615136002864304e-06, "loss": 0.5919, "step": 2354 }, { "epoch": 0.15, "grad_norm": 1.1742092370986938, "learning_rate": 9.61473373268172e-06, "loss": 0.5735, "step": 2355 }, { "epoch": 0.15, "grad_norm": 1.1232725381851196, "learning_rate": 9.614331260801031e-06, "loss": 0.5552, "step": 2356 }, { "epoch": 0.15, "grad_norm": 1.245816707611084, "learning_rate": 9.613928587239827e-06, "loss": 0.5736, "step": 2357 }, { "epoch": 0.15, "grad_norm": 1.207488775253296, "learning_rate": 9.61352571201571e-06, "loss": 0.6021, "step": 2358 }, { "epoch": 0.15, "grad_norm": 1.1427611112594604, "learning_rate": 9.613122635146286e-06, "loss": 0.5276, "step": 2359 }, { "epoch": 0.15, "grad_norm": 1.176019549369812, "learning_rate": 9.612719356649174e-06, "loss": 0.5703, "step": 2360 }, { "epoch": 0.15, "grad_norm": 1.2842025756835938, "learning_rate": 9.612315876542002e-06, "loss": 0.6061, "step": 2361 }, { "epoch": 0.15, "grad_norm": 1.1274480819702148, "learning_rate": 9.6119121948424e-06, "loss": 0.5836, "step": 2362 }, { "epoch": 0.15, "grad_norm": 1.2169731855392456, "learning_rate": 9.611508311568016e-06, "loss": 0.5574, "step": 2363 }, { "epoch": 0.15, "grad_norm": 1.2078649997711182, "learning_rate": 9.611104226736501e-06, "loss": 0.6287, "step": 2364 }, { "epoch": 0.15, "grad_norm": 1.2356092929840088, "learning_rate": 9.610699940365517e-06, "loss": 0.5617, "step": 2365 }, { "epoch": 0.15, "grad_norm": 1.1695395708084106, "learning_rate": 9.610295452472735e-06, "loss": 0.6139, "step": 2366 }, { "epoch": 0.15, "grad_norm": 1.182448387145996, "learning_rate": 9.609890763075835e-06, "loss": 0.6256, "step": 2367 }, { "epoch": 0.15, "grad_norm": 1.2179927825927734, "learning_rate": 9.609485872192501e-06, "loss": 0.6112, "step": 2368 }, { "epoch": 0.15, "grad_norm": 1.3259958028793335, "learning_rate": 9.609080779840434e-06, "loss": 0.5777, "step": 2369 }, { "epoch": 0.15, "grad_norm": 1.0966123342514038, "learning_rate": 9.608675486037336e-06, "loss": 0.5814, "step": 2370 }, { "epoch": 0.15, "grad_norm": 1.2526428699493408, "learning_rate": 9.608269990800923e-06, "loss": 0.5929, "step": 2371 }, { "epoch": 0.15, "grad_norm": 1.1556177139282227, "learning_rate": 9.607864294148918e-06, "loss": 0.595, "step": 2372 }, { "epoch": 0.15, "grad_norm": 1.1329914331436157, "learning_rate": 9.607458396099055e-06, "loss": 0.5981, "step": 2373 }, { "epoch": 0.15, "grad_norm": 1.1797242164611816, "learning_rate": 9.607052296669072e-06, "loss": 0.5657, "step": 2374 }, { "epoch": 0.15, "grad_norm": 1.174989104270935, "learning_rate": 9.60664599587672e-06, "loss": 0.5786, "step": 2375 }, { "epoch": 0.15, "grad_norm": 1.1873347759246826, "learning_rate": 9.606239493739755e-06, "loss": 0.5981, "step": 2376 }, { "epoch": 0.15, "grad_norm": 1.1610052585601807, "learning_rate": 9.605832790275947e-06, "loss": 0.6112, "step": 2377 }, { "epoch": 0.15, "grad_norm": 1.3393940925598145, "learning_rate": 9.605425885503073e-06, "loss": 0.5374, "step": 2378 }, { "epoch": 0.15, "grad_norm": 1.2151825428009033, "learning_rate": 9.605018779438913e-06, "loss": 0.5992, "step": 2379 }, { "epoch": 0.15, "grad_norm": 1.240666151046753, "learning_rate": 9.604611472101263e-06, "loss": 0.5797, "step": 2380 }, { "epoch": 0.15, "grad_norm": 1.2075368165969849, "learning_rate": 9.604203963507928e-06, "loss": 0.498, "step": 2381 }, { "epoch": 0.15, "grad_norm": 1.154213547706604, "learning_rate": 9.603796253676715e-06, "loss": 0.5738, "step": 2382 }, { "epoch": 0.15, "grad_norm": 1.3954514265060425, "learning_rate": 9.603388342625446e-06, "loss": 0.587, "step": 2383 }, { "epoch": 0.15, "grad_norm": 1.1665178537368774, "learning_rate": 9.60298023037195e-06, "loss": 0.6192, "step": 2384 }, { "epoch": 0.15, "grad_norm": 1.1172940731048584, "learning_rate": 9.602571916934064e-06, "loss": 0.5695, "step": 2385 }, { "epoch": 0.15, "grad_norm": 1.2571096420288086, "learning_rate": 9.602163402329633e-06, "loss": 0.5884, "step": 2386 }, { "epoch": 0.15, "grad_norm": 1.2629085779190063, "learning_rate": 9.601754686576514e-06, "loss": 0.5643, "step": 2387 }, { "epoch": 0.15, "grad_norm": 1.284449815750122, "learning_rate": 9.601345769692572e-06, "loss": 0.6051, "step": 2388 }, { "epoch": 0.15, "grad_norm": 1.3072805404663086, "learning_rate": 9.600936651695676e-06, "loss": 0.5824, "step": 2389 }, { "epoch": 0.15, "grad_norm": 1.2642266750335693, "learning_rate": 9.60052733260371e-06, "loss": 0.5701, "step": 2390 }, { "epoch": 0.15, "grad_norm": 1.1206753253936768, "learning_rate": 9.600117812434565e-06, "loss": 0.6123, "step": 2391 }, { "epoch": 0.15, "grad_norm": 1.1296064853668213, "learning_rate": 9.599708091206137e-06, "loss": 0.5227, "step": 2392 }, { "epoch": 0.15, "grad_norm": 1.1835646629333496, "learning_rate": 9.599298168936338e-06, "loss": 0.6445, "step": 2393 }, { "epoch": 0.15, "grad_norm": 1.171296238899231, "learning_rate": 9.59888804564308e-06, "loss": 0.5663, "step": 2394 }, { "epoch": 0.15, "grad_norm": 1.387462854385376, "learning_rate": 9.598477721344293e-06, "loss": 0.6085, "step": 2395 }, { "epoch": 0.15, "grad_norm": 1.3124113082885742, "learning_rate": 9.598067196057907e-06, "loss": 0.5523, "step": 2396 }, { "epoch": 0.15, "grad_norm": 1.352789044380188, "learning_rate": 9.597656469801868e-06, "loss": 0.6173, "step": 2397 }, { "epoch": 0.15, "grad_norm": 1.3375335931777954, "learning_rate": 9.597245542594127e-06, "loss": 0.5638, "step": 2398 }, { "epoch": 0.15, "grad_norm": 1.1788907051086426, "learning_rate": 9.596834414452642e-06, "loss": 0.5638, "step": 2399 }, { "epoch": 0.15, "grad_norm": 1.1633975505828857, "learning_rate": 9.596423085395388e-06, "loss": 0.5659, "step": 2400 }, { "epoch": 0.15, "grad_norm": 1.1793577671051025, "learning_rate": 9.596011555440338e-06, "loss": 0.5607, "step": 2401 }, { "epoch": 0.16, "grad_norm": 1.2971506118774414, "learning_rate": 9.595599824605482e-06, "loss": 0.5701, "step": 2402 }, { "epoch": 0.16, "grad_norm": 1.303275227546692, "learning_rate": 9.595187892908814e-06, "loss": 0.5907, "step": 2403 }, { "epoch": 0.16, "grad_norm": 1.2554751634597778, "learning_rate": 9.59477576036834e-06, "loss": 0.4878, "step": 2404 }, { "epoch": 0.16, "grad_norm": 1.2538014650344849, "learning_rate": 9.594363427002071e-06, "loss": 0.5723, "step": 2405 }, { "epoch": 0.16, "grad_norm": 1.0948325395584106, "learning_rate": 9.59395089282803e-06, "loss": 0.5686, "step": 2406 }, { "epoch": 0.16, "grad_norm": 1.1462067365646362, "learning_rate": 9.593538157864249e-06, "loss": 0.5455, "step": 2407 }, { "epoch": 0.16, "grad_norm": 1.2591557502746582, "learning_rate": 9.593125222128766e-06, "loss": 0.5398, "step": 2408 }, { "epoch": 0.16, "grad_norm": 1.3519773483276367, "learning_rate": 9.592712085639629e-06, "loss": 0.613, "step": 2409 }, { "epoch": 0.16, "grad_norm": 1.4649313688278198, "learning_rate": 9.592298748414896e-06, "loss": 0.5898, "step": 2410 }, { "epoch": 0.16, "grad_norm": 1.232566475868225, "learning_rate": 9.591885210472635e-06, "loss": 0.5859, "step": 2411 }, { "epoch": 0.16, "grad_norm": 1.1930354833602905, "learning_rate": 9.591471471830918e-06, "loss": 0.5797, "step": 2412 }, { "epoch": 0.16, "grad_norm": 1.2238423824310303, "learning_rate": 9.591057532507828e-06, "loss": 0.5662, "step": 2413 }, { "epoch": 0.16, "grad_norm": 1.1885817050933838, "learning_rate": 9.59064339252146e-06, "loss": 0.5504, "step": 2414 }, { "epoch": 0.16, "grad_norm": 1.1914902925491333, "learning_rate": 9.590229051889912e-06, "loss": 0.5862, "step": 2415 }, { "epoch": 0.16, "grad_norm": 1.3648478984832764, "learning_rate": 9.589814510631297e-06, "loss": 0.5724, "step": 2416 }, { "epoch": 0.16, "grad_norm": 1.2601268291473389, "learning_rate": 9.589399768763729e-06, "loss": 0.634, "step": 2417 }, { "epoch": 0.16, "grad_norm": 1.2579594850540161, "learning_rate": 9.588984826305338e-06, "loss": 0.554, "step": 2418 }, { "epoch": 0.16, "grad_norm": 1.203633189201355, "learning_rate": 9.588569683274262e-06, "loss": 0.6143, "step": 2419 }, { "epoch": 0.16, "grad_norm": 1.2667453289031982, "learning_rate": 9.588154339688643e-06, "loss": 0.6519, "step": 2420 }, { "epoch": 0.16, "grad_norm": 1.146646499633789, "learning_rate": 9.587738795566636e-06, "loss": 0.5618, "step": 2421 }, { "epoch": 0.16, "grad_norm": 1.1369293928146362, "learning_rate": 9.587323050926403e-06, "loss": 0.5662, "step": 2422 }, { "epoch": 0.16, "grad_norm": 1.0919575691223145, "learning_rate": 9.586907105786113e-06, "loss": 0.5775, "step": 2423 }, { "epoch": 0.16, "grad_norm": 1.1444002389907837, "learning_rate": 9.586490960163948e-06, "loss": 0.5731, "step": 2424 }, { "epoch": 0.16, "grad_norm": 1.326550006866455, "learning_rate": 9.586074614078097e-06, "loss": 0.5789, "step": 2425 }, { "epoch": 0.16, "grad_norm": 1.1407272815704346, "learning_rate": 9.585658067546758e-06, "loss": 0.6029, "step": 2426 }, { "epoch": 0.16, "grad_norm": 1.3507165908813477, "learning_rate": 9.585241320588135e-06, "loss": 0.5879, "step": 2427 }, { "epoch": 0.16, "grad_norm": 1.2903763055801392, "learning_rate": 9.584824373220444e-06, "loss": 0.5745, "step": 2428 }, { "epoch": 0.16, "grad_norm": 1.2051911354064941, "learning_rate": 9.584407225461909e-06, "loss": 0.5759, "step": 2429 }, { "epoch": 0.16, "grad_norm": 1.0642040967941284, "learning_rate": 9.583989877330762e-06, "loss": 0.5965, "step": 2430 }, { "epoch": 0.16, "grad_norm": 1.1974881887435913, "learning_rate": 9.583572328845244e-06, "loss": 0.5557, "step": 2431 }, { "epoch": 0.16, "grad_norm": 1.275938868522644, "learning_rate": 9.583154580023604e-06, "loss": 0.6216, "step": 2432 }, { "epoch": 0.16, "grad_norm": 1.2237924337387085, "learning_rate": 9.582736630884104e-06, "loss": 0.5915, "step": 2433 }, { "epoch": 0.16, "grad_norm": 1.1187081336975098, "learning_rate": 9.58231848144501e-06, "loss": 0.5937, "step": 2434 }, { "epoch": 0.16, "grad_norm": 1.1804053783416748, "learning_rate": 9.581900131724595e-06, "loss": 0.6164, "step": 2435 }, { "epoch": 0.16, "grad_norm": 1.1840201616287231, "learning_rate": 9.581481581741148e-06, "loss": 0.594, "step": 2436 }, { "epoch": 0.16, "grad_norm": 1.184220314025879, "learning_rate": 9.581062831512962e-06, "loss": 0.5387, "step": 2437 }, { "epoch": 0.16, "grad_norm": 1.233590006828308, "learning_rate": 9.58064388105834e-06, "loss": 0.5972, "step": 2438 }, { "epoch": 0.16, "grad_norm": 1.331649661064148, "learning_rate": 9.580224730395591e-06, "loss": 0.5684, "step": 2439 }, { "epoch": 0.16, "grad_norm": 1.2073395252227783, "learning_rate": 9.579805379543034e-06, "loss": 0.6012, "step": 2440 }, { "epoch": 0.16, "grad_norm": 1.2319092750549316, "learning_rate": 9.579385828519003e-06, "loss": 0.5999, "step": 2441 }, { "epoch": 0.16, "grad_norm": 1.2141683101654053, "learning_rate": 9.578966077341831e-06, "loss": 0.6017, "step": 2442 }, { "epoch": 0.16, "grad_norm": 1.2273662090301514, "learning_rate": 9.578546126029867e-06, "loss": 0.5554, "step": 2443 }, { "epoch": 0.16, "grad_norm": 1.3439258337020874, "learning_rate": 9.578125974601463e-06, "loss": 0.6097, "step": 2444 }, { "epoch": 0.16, "grad_norm": 1.1471935510635376, "learning_rate": 9.577705623074985e-06, "loss": 0.5907, "step": 2445 }, { "epoch": 0.16, "grad_norm": 1.235077142715454, "learning_rate": 9.577285071468804e-06, "loss": 0.5887, "step": 2446 }, { "epoch": 0.16, "grad_norm": 1.2324485778808594, "learning_rate": 9.576864319801304e-06, "loss": 0.6062, "step": 2447 }, { "epoch": 0.16, "grad_norm": 1.1022775173187256, "learning_rate": 9.576443368090872e-06, "loss": 0.5565, "step": 2448 }, { "epoch": 0.16, "grad_norm": 1.0839921236038208, "learning_rate": 9.576022216355908e-06, "loss": 0.5726, "step": 2449 }, { "epoch": 0.16, "grad_norm": 1.2400449514389038, "learning_rate": 9.575600864614817e-06, "loss": 0.5936, "step": 2450 }, { "epoch": 0.16, "grad_norm": 1.30100417137146, "learning_rate": 9.57517931288602e-06, "loss": 0.6256, "step": 2451 }, { "epoch": 0.16, "grad_norm": 1.2404543161392212, "learning_rate": 9.57475756118794e-06, "loss": 0.5272, "step": 2452 }, { "epoch": 0.16, "grad_norm": 1.3581851720809937, "learning_rate": 9.574335609539008e-06, "loss": 0.6306, "step": 2453 }, { "epoch": 0.16, "grad_norm": 1.1166975498199463, "learning_rate": 9.57391345795767e-06, "loss": 0.5877, "step": 2454 }, { "epoch": 0.16, "grad_norm": 1.1518793106079102, "learning_rate": 9.573491106462373e-06, "loss": 0.5541, "step": 2455 }, { "epoch": 0.16, "grad_norm": 1.0700889825820923, "learning_rate": 9.573068555071582e-06, "loss": 0.5449, "step": 2456 }, { "epoch": 0.16, "grad_norm": 1.1026482582092285, "learning_rate": 9.572645803803764e-06, "loss": 0.5738, "step": 2457 }, { "epoch": 0.16, "grad_norm": 1.2258366346359253, "learning_rate": 9.572222852677393e-06, "loss": 0.5781, "step": 2458 }, { "epoch": 0.16, "grad_norm": 1.1687825918197632, "learning_rate": 9.571799701710958e-06, "loss": 0.585, "step": 2459 }, { "epoch": 0.16, "grad_norm": 1.2129442691802979, "learning_rate": 9.571376350922954e-06, "loss": 0.6027, "step": 2460 }, { "epoch": 0.16, "grad_norm": 1.2414922714233398, "learning_rate": 9.570952800331885e-06, "loss": 0.5953, "step": 2461 }, { "epoch": 0.16, "grad_norm": 1.136260986328125, "learning_rate": 9.57052904995626e-06, "loss": 0.568, "step": 2462 }, { "epoch": 0.16, "grad_norm": 1.2485984563827515, "learning_rate": 9.570105099814603e-06, "loss": 0.5397, "step": 2463 }, { "epoch": 0.16, "grad_norm": 1.251671314239502, "learning_rate": 9.569680949925446e-06, "loss": 0.557, "step": 2464 }, { "epoch": 0.16, "grad_norm": 1.3902578353881836, "learning_rate": 9.569256600307321e-06, "loss": 0.5969, "step": 2465 }, { "epoch": 0.16, "grad_norm": 1.1324799060821533, "learning_rate": 9.56883205097878e-06, "loss": 0.5246, "step": 2466 }, { "epoch": 0.16, "grad_norm": 1.2816050052642822, "learning_rate": 9.56840730195838e-06, "loss": 0.6504, "step": 2467 }, { "epoch": 0.16, "grad_norm": 1.2287261486053467, "learning_rate": 9.56798235326468e-06, "loss": 0.5787, "step": 2468 }, { "epoch": 0.16, "grad_norm": 1.1798900365829468, "learning_rate": 9.567557204916259e-06, "loss": 0.549, "step": 2469 }, { "epoch": 0.16, "grad_norm": 1.3203095197677612, "learning_rate": 9.567131856931696e-06, "loss": 0.6033, "step": 2470 }, { "epoch": 0.16, "grad_norm": 1.312439203262329, "learning_rate": 9.566706309329582e-06, "loss": 0.589, "step": 2471 }, { "epoch": 0.16, "grad_norm": 1.2188448905944824, "learning_rate": 9.56628056212852e-06, "loss": 0.5545, "step": 2472 }, { "epoch": 0.16, "grad_norm": 1.228562593460083, "learning_rate": 9.565854615347114e-06, "loss": 0.5686, "step": 2473 }, { "epoch": 0.16, "grad_norm": 1.2963674068450928, "learning_rate": 9.565428469003984e-06, "loss": 0.6181, "step": 2474 }, { "epoch": 0.16, "grad_norm": 1.1843525171279907, "learning_rate": 9.565002123117755e-06, "loss": 0.6001, "step": 2475 }, { "epoch": 0.16, "grad_norm": 1.3385945558547974, "learning_rate": 9.56457557770706e-06, "loss": 0.6228, "step": 2476 }, { "epoch": 0.16, "grad_norm": 1.2868698835372925, "learning_rate": 9.564148832790542e-06, "loss": 0.6141, "step": 2477 }, { "epoch": 0.16, "grad_norm": 1.1217089891433716, "learning_rate": 9.563721888386856e-06, "loss": 0.5923, "step": 2478 }, { "epoch": 0.16, "grad_norm": 1.166597604751587, "learning_rate": 9.563294744514661e-06, "loss": 0.6036, "step": 2479 }, { "epoch": 0.16, "grad_norm": 1.4099596738815308, "learning_rate": 9.562867401192626e-06, "loss": 0.5813, "step": 2480 }, { "epoch": 0.16, "grad_norm": 1.341156005859375, "learning_rate": 9.562439858439427e-06, "loss": 0.5988, "step": 2481 }, { "epoch": 0.16, "grad_norm": 1.3242703676223755, "learning_rate": 9.562012116273756e-06, "loss": 0.5943, "step": 2482 }, { "epoch": 0.16, "grad_norm": 1.0768381357192993, "learning_rate": 9.561584174714304e-06, "loss": 0.5814, "step": 2483 }, { "epoch": 0.16, "grad_norm": 1.0851430892944336, "learning_rate": 9.561156033779778e-06, "loss": 0.5656, "step": 2484 }, { "epoch": 0.16, "grad_norm": 1.0235633850097656, "learning_rate": 9.560727693488886e-06, "loss": 0.5418, "step": 2485 }, { "epoch": 0.16, "grad_norm": 1.114150047302246, "learning_rate": 9.560299153860358e-06, "loss": 0.5472, "step": 2486 }, { "epoch": 0.16, "grad_norm": 1.1835178136825562, "learning_rate": 9.559870414912917e-06, "loss": 0.6155, "step": 2487 }, { "epoch": 0.16, "grad_norm": 1.1818758249282837, "learning_rate": 9.559441476665307e-06, "loss": 0.5542, "step": 2488 }, { "epoch": 0.16, "grad_norm": 1.2377281188964844, "learning_rate": 9.559012339136272e-06, "loss": 0.5908, "step": 2489 }, { "epoch": 0.16, "grad_norm": 1.1090458631515503, "learning_rate": 9.55858300234457e-06, "loss": 0.5631, "step": 2490 }, { "epoch": 0.16, "grad_norm": 1.1184855699539185, "learning_rate": 9.558153466308965e-06, "loss": 0.5583, "step": 2491 }, { "epoch": 0.16, "grad_norm": 1.1607929468154907, "learning_rate": 9.557723731048236e-06, "loss": 0.5479, "step": 2492 }, { "epoch": 0.16, "grad_norm": 1.1228171586990356, "learning_rate": 9.55729379658116e-06, "loss": 0.5627, "step": 2493 }, { "epoch": 0.16, "grad_norm": 1.1218681335449219, "learning_rate": 9.556863662926528e-06, "loss": 0.5305, "step": 2494 }, { "epoch": 0.16, "grad_norm": 1.0797961950302124, "learning_rate": 9.556433330103145e-06, "loss": 0.5621, "step": 2495 }, { "epoch": 0.16, "grad_norm": 1.3196650743484497, "learning_rate": 9.556002798129815e-06, "loss": 0.5869, "step": 2496 }, { "epoch": 0.16, "grad_norm": 1.1642191410064697, "learning_rate": 9.555572067025359e-06, "loss": 0.6103, "step": 2497 }, { "epoch": 0.16, "grad_norm": 1.1072341203689575, "learning_rate": 9.5551411368086e-06, "loss": 0.5599, "step": 2498 }, { "epoch": 0.16, "grad_norm": 1.220579981803894, "learning_rate": 9.554710007498374e-06, "loss": 0.6064, "step": 2499 }, { "epoch": 0.16, "grad_norm": 1.1366773843765259, "learning_rate": 9.554278679113527e-06, "loss": 0.5343, "step": 2500 }, { "epoch": 0.16, "grad_norm": 1.2511848211288452, "learning_rate": 9.553847151672906e-06, "loss": 0.6371, "step": 2501 }, { "epoch": 0.16, "grad_norm": 1.126974105834961, "learning_rate": 9.553415425195378e-06, "loss": 0.615, "step": 2502 }, { "epoch": 0.16, "grad_norm": 1.2973774671554565, "learning_rate": 9.552983499699808e-06, "loss": 0.6135, "step": 2503 }, { "epoch": 0.16, "grad_norm": 1.6068154573440552, "learning_rate": 9.552551375205077e-06, "loss": 0.6946, "step": 2504 }, { "epoch": 0.16, "grad_norm": 1.24983811378479, "learning_rate": 9.55211905173007e-06, "loss": 0.5703, "step": 2505 }, { "epoch": 0.16, "grad_norm": 1.3054136037826538, "learning_rate": 9.551686529293686e-06, "loss": 0.629, "step": 2506 }, { "epoch": 0.16, "grad_norm": 1.214113712310791, "learning_rate": 9.551253807914827e-06, "loss": 0.6295, "step": 2507 }, { "epoch": 0.16, "grad_norm": 1.0953962802886963, "learning_rate": 9.550820887612405e-06, "loss": 0.5653, "step": 2508 }, { "epoch": 0.16, "grad_norm": 1.269182801246643, "learning_rate": 9.550387768405342e-06, "loss": 0.5774, "step": 2509 }, { "epoch": 0.16, "grad_norm": 1.165313959121704, "learning_rate": 9.549954450312574e-06, "loss": 0.5659, "step": 2510 }, { "epoch": 0.16, "grad_norm": 1.2927206754684448, "learning_rate": 9.549520933353032e-06, "loss": 0.5505, "step": 2511 }, { "epoch": 0.16, "grad_norm": 1.2049999237060547, "learning_rate": 9.54908721754567e-06, "loss": 0.5976, "step": 2512 }, { "epoch": 0.16, "grad_norm": 1.111923098564148, "learning_rate": 9.548653302909441e-06, "loss": 0.5248, "step": 2513 }, { "epoch": 0.16, "grad_norm": 1.170030951499939, "learning_rate": 9.548219189463315e-06, "loss": 0.5555, "step": 2514 }, { "epoch": 0.16, "grad_norm": 1.1970703601837158, "learning_rate": 9.54778487722626e-06, "loss": 0.608, "step": 2515 }, { "epoch": 0.16, "grad_norm": 1.4273903369903564, "learning_rate": 9.547350366217262e-06, "loss": 0.5474, "step": 2516 }, { "epoch": 0.16, "grad_norm": 1.2627336978912354, "learning_rate": 9.54691565645531e-06, "loss": 0.5823, "step": 2517 }, { "epoch": 0.16, "grad_norm": 1.2696326971054077, "learning_rate": 9.54648074795941e-06, "loss": 0.6013, "step": 2518 }, { "epoch": 0.16, "grad_norm": 1.2052234411239624, "learning_rate": 9.546045640748564e-06, "loss": 0.5673, "step": 2519 }, { "epoch": 0.16, "grad_norm": 1.0646984577178955, "learning_rate": 9.545610334841793e-06, "loss": 0.5335, "step": 2520 }, { "epoch": 0.16, "grad_norm": 1.6619070768356323, "learning_rate": 9.54517483025812e-06, "loss": 0.6061, "step": 2521 }, { "epoch": 0.16, "grad_norm": 1.1323552131652832, "learning_rate": 9.544739127016585e-06, "loss": 0.6144, "step": 2522 }, { "epoch": 0.16, "grad_norm": 1.2304025888442993, "learning_rate": 9.544303225136225e-06, "loss": 0.5281, "step": 2523 }, { "epoch": 0.16, "grad_norm": 1.1401559114456177, "learning_rate": 9.543867124636097e-06, "loss": 0.5854, "step": 2524 }, { "epoch": 0.16, "grad_norm": 1.1529144048690796, "learning_rate": 9.54343082553526e-06, "loss": 0.5778, "step": 2525 }, { "epoch": 0.16, "grad_norm": 1.53265380859375, "learning_rate": 9.542994327852786e-06, "loss": 0.5685, "step": 2526 }, { "epoch": 0.16, "grad_norm": 1.2379817962646484, "learning_rate": 9.542557631607749e-06, "loss": 0.6227, "step": 2527 }, { "epoch": 0.16, "grad_norm": 1.2443184852600098, "learning_rate": 9.542120736819239e-06, "loss": 0.5519, "step": 2528 }, { "epoch": 0.16, "grad_norm": 1.2494230270385742, "learning_rate": 9.541683643506348e-06, "loss": 0.6177, "step": 2529 }, { "epoch": 0.16, "grad_norm": 1.1469321250915527, "learning_rate": 9.541246351688185e-06, "loss": 0.5559, "step": 2530 }, { "epoch": 0.16, "grad_norm": 1.1554194688796997, "learning_rate": 9.54080886138386e-06, "loss": 0.5827, "step": 2531 }, { "epoch": 0.16, "grad_norm": 0.9898285865783691, "learning_rate": 9.540371172612494e-06, "loss": 0.5448, "step": 2532 }, { "epoch": 0.16, "grad_norm": 1.196150779724121, "learning_rate": 9.53993328539322e-06, "loss": 0.5607, "step": 2533 }, { "epoch": 0.16, "grad_norm": 1.3222897052764893, "learning_rate": 9.539495199745174e-06, "loss": 0.6049, "step": 2534 }, { "epoch": 0.16, "grad_norm": 1.231536865234375, "learning_rate": 9.539056915687507e-06, "loss": 0.5977, "step": 2535 }, { "epoch": 0.16, "grad_norm": 1.188865303993225, "learning_rate": 9.53861843323937e-06, "loss": 0.593, "step": 2536 }, { "epoch": 0.16, "grad_norm": 1.0745172500610352, "learning_rate": 9.538179752419933e-06, "loss": 0.5795, "step": 2537 }, { "epoch": 0.16, "grad_norm": 1.3019804954528809, "learning_rate": 9.537740873248368e-06, "loss": 0.6235, "step": 2538 }, { "epoch": 0.16, "grad_norm": 1.1516578197479248, "learning_rate": 9.537301795743856e-06, "loss": 0.6083, "step": 2539 }, { "epoch": 0.16, "grad_norm": 1.1924949884414673, "learning_rate": 9.536862519925589e-06, "loss": 0.6416, "step": 2540 }, { "epoch": 0.16, "grad_norm": 1.147863745689392, "learning_rate": 9.536423045812767e-06, "loss": 0.5562, "step": 2541 }, { "epoch": 0.16, "grad_norm": 1.258541226387024, "learning_rate": 9.535983373424597e-06, "loss": 0.5898, "step": 2542 }, { "epoch": 0.16, "grad_norm": 1.1942275762557983, "learning_rate": 9.535543502780299e-06, "loss": 0.5762, "step": 2543 }, { "epoch": 0.16, "grad_norm": 1.1596394777297974, "learning_rate": 9.535103433899093e-06, "loss": 0.6232, "step": 2544 }, { "epoch": 0.16, "grad_norm": 1.150648832321167, "learning_rate": 9.534663166800217e-06, "loss": 0.6113, "step": 2545 }, { "epoch": 0.16, "grad_norm": 1.2039610147476196, "learning_rate": 9.534222701502915e-06, "loss": 0.5826, "step": 2546 }, { "epoch": 0.16, "grad_norm": 1.2565468549728394, "learning_rate": 9.533782038026437e-06, "loss": 0.5552, "step": 2547 }, { "epoch": 0.16, "grad_norm": 1.0641077756881714, "learning_rate": 9.533341176390044e-06, "loss": 0.5525, "step": 2548 }, { "epoch": 0.16, "grad_norm": 1.3133704662322998, "learning_rate": 9.532900116613004e-06, "loss": 0.5475, "step": 2549 }, { "epoch": 0.16, "grad_norm": 1.2720632553100586, "learning_rate": 9.532458858714595e-06, "loss": 0.5859, "step": 2550 }, { "epoch": 0.16, "grad_norm": 1.2480316162109375, "learning_rate": 9.532017402714103e-06, "loss": 0.5645, "step": 2551 }, { "epoch": 0.16, "grad_norm": 1.6285128593444824, "learning_rate": 9.531575748630823e-06, "loss": 0.6039, "step": 2552 }, { "epoch": 0.16, "grad_norm": 1.099246621131897, "learning_rate": 9.531133896484058e-06, "loss": 0.567, "step": 2553 }, { "epoch": 0.16, "grad_norm": 1.14531409740448, "learning_rate": 9.53069184629312e-06, "loss": 0.5416, "step": 2554 }, { "epoch": 0.16, "grad_norm": 1.2851897478103638, "learning_rate": 9.530249598077334e-06, "loss": 0.5152, "step": 2555 }, { "epoch": 0.16, "grad_norm": 1.3031326532363892, "learning_rate": 9.529807151856025e-06, "loss": 0.6301, "step": 2556 }, { "epoch": 0.17, "grad_norm": 1.219534158706665, "learning_rate": 9.529364507648531e-06, "loss": 0.5846, "step": 2557 }, { "epoch": 0.17, "grad_norm": 1.2995967864990234, "learning_rate": 9.528921665474202e-06, "loss": 0.5433, "step": 2558 }, { "epoch": 0.17, "grad_norm": 1.1896851062774658, "learning_rate": 9.52847862535239e-06, "loss": 0.6122, "step": 2559 }, { "epoch": 0.17, "grad_norm": 1.367789626121521, "learning_rate": 9.528035387302463e-06, "loss": 0.5761, "step": 2560 }, { "epoch": 0.17, "grad_norm": 1.2353503704071045, "learning_rate": 9.52759195134379e-06, "loss": 0.5898, "step": 2561 }, { "epoch": 0.17, "grad_norm": 1.1959826946258545, "learning_rate": 9.527148317495755e-06, "loss": 0.5577, "step": 2562 }, { "epoch": 0.17, "grad_norm": 1.2017732858657837, "learning_rate": 9.526704485777746e-06, "loss": 0.5987, "step": 2563 }, { "epoch": 0.17, "grad_norm": 1.1690130233764648, "learning_rate": 9.526260456209165e-06, "loss": 0.5744, "step": 2564 }, { "epoch": 0.17, "grad_norm": 1.3340396881103516, "learning_rate": 9.525816228809417e-06, "loss": 0.5734, "step": 2565 }, { "epoch": 0.17, "grad_norm": 1.1704057455062866, "learning_rate": 9.525371803597915e-06, "loss": 0.5643, "step": 2566 }, { "epoch": 0.17, "grad_norm": 1.2626147270202637, "learning_rate": 9.524927180594092e-06, "loss": 0.5939, "step": 2567 }, { "epoch": 0.17, "grad_norm": 1.1945152282714844, "learning_rate": 9.524482359817372e-06, "loss": 0.6149, "step": 2568 }, { "epoch": 0.17, "grad_norm": 1.1107969284057617, "learning_rate": 9.524037341287205e-06, "loss": 0.533, "step": 2569 }, { "epoch": 0.17, "grad_norm": 1.1398972272872925, "learning_rate": 9.523592125023037e-06, "loss": 0.5106, "step": 2570 }, { "epoch": 0.17, "grad_norm": 1.3202319145202637, "learning_rate": 9.523146711044328e-06, "loss": 0.6117, "step": 2571 }, { "epoch": 0.17, "grad_norm": 1.2760744094848633, "learning_rate": 9.522701099370547e-06, "loss": 0.5726, "step": 2572 }, { "epoch": 0.17, "grad_norm": 1.0537011623382568, "learning_rate": 9.522255290021168e-06, "loss": 0.5799, "step": 2573 }, { "epoch": 0.17, "grad_norm": 1.2995222806930542, "learning_rate": 9.521809283015677e-06, "loss": 0.5772, "step": 2574 }, { "epoch": 0.17, "grad_norm": 1.1384162902832031, "learning_rate": 9.52136307837357e-06, "loss": 0.6037, "step": 2575 }, { "epoch": 0.17, "grad_norm": 1.2756437063217163, "learning_rate": 9.520916676114349e-06, "loss": 0.5528, "step": 2576 }, { "epoch": 0.17, "grad_norm": 1.2420053482055664, "learning_rate": 9.520470076257523e-06, "loss": 0.6156, "step": 2577 }, { "epoch": 0.17, "grad_norm": 1.1618478298187256, "learning_rate": 9.520023278822614e-06, "loss": 0.6144, "step": 2578 }, { "epoch": 0.17, "grad_norm": 1.252786636352539, "learning_rate": 9.519576283829149e-06, "loss": 0.5888, "step": 2579 }, { "epoch": 0.17, "grad_norm": 1.218207836151123, "learning_rate": 9.519129091296665e-06, "loss": 0.5705, "step": 2580 }, { "epoch": 0.17, "grad_norm": 1.1556298732757568, "learning_rate": 9.518681701244708e-06, "loss": 0.5635, "step": 2581 }, { "epoch": 0.17, "grad_norm": 1.3035030364990234, "learning_rate": 9.518234113692832e-06, "loss": 0.5784, "step": 2582 }, { "epoch": 0.17, "grad_norm": 1.2182934284210205, "learning_rate": 9.5177863286606e-06, "loss": 0.6026, "step": 2583 }, { "epoch": 0.17, "grad_norm": 1.3267921209335327, "learning_rate": 9.517338346167583e-06, "loss": 0.5906, "step": 2584 }, { "epoch": 0.17, "grad_norm": 1.287968397140503, "learning_rate": 9.516890166233363e-06, "loss": 0.5376, "step": 2585 }, { "epoch": 0.17, "grad_norm": 1.2724294662475586, "learning_rate": 9.516441788877528e-06, "loss": 0.5898, "step": 2586 }, { "epoch": 0.17, "grad_norm": 1.1332453489303589, "learning_rate": 9.515993214119674e-06, "loss": 0.5634, "step": 2587 }, { "epoch": 0.17, "grad_norm": 1.2103838920593262, "learning_rate": 9.515544441979408e-06, "loss": 0.6067, "step": 2588 }, { "epoch": 0.17, "grad_norm": 1.191411018371582, "learning_rate": 9.515095472476346e-06, "loss": 0.5808, "step": 2589 }, { "epoch": 0.17, "grad_norm": 1.1705389022827148, "learning_rate": 9.514646305630109e-06, "loss": 0.5975, "step": 2590 }, { "epoch": 0.17, "grad_norm": 1.4450870752334595, "learning_rate": 9.514196941460328e-06, "loss": 0.6626, "step": 2591 }, { "epoch": 0.17, "grad_norm": 1.3833931684494019, "learning_rate": 9.513747379986648e-06, "loss": 0.6168, "step": 2592 }, { "epoch": 0.17, "grad_norm": 1.2048050165176392, "learning_rate": 9.513297621228715e-06, "loss": 0.5996, "step": 2593 }, { "epoch": 0.17, "grad_norm": 1.3787448406219482, "learning_rate": 9.512847665206187e-06, "loss": 0.6015, "step": 2594 }, { "epoch": 0.17, "grad_norm": 1.098470687866211, "learning_rate": 9.512397511938732e-06, "loss": 0.6185, "step": 2595 }, { "epoch": 0.17, "grad_norm": 1.0388381481170654, "learning_rate": 9.511947161446023e-06, "loss": 0.5372, "step": 2596 }, { "epoch": 0.17, "grad_norm": 1.1598339080810547, "learning_rate": 9.511496613747744e-06, "loss": 0.5588, "step": 2597 }, { "epoch": 0.17, "grad_norm": 1.19161057472229, "learning_rate": 9.511045868863589e-06, "loss": 0.6291, "step": 2598 }, { "epoch": 0.17, "grad_norm": 1.1932748556137085, "learning_rate": 9.510594926813256e-06, "loss": 0.6063, "step": 2599 }, { "epoch": 0.17, "grad_norm": 1.1906355619430542, "learning_rate": 9.510143787616457e-06, "loss": 0.5465, "step": 2600 }, { "epoch": 0.17, "grad_norm": 1.1515790224075317, "learning_rate": 9.509692451292911e-06, "loss": 0.5482, "step": 2601 }, { "epoch": 0.17, "grad_norm": 1.200319766998291, "learning_rate": 9.509240917862342e-06, "loss": 0.5616, "step": 2602 }, { "epoch": 0.17, "grad_norm": 1.2267327308654785, "learning_rate": 9.508789187344487e-06, "loss": 0.566, "step": 2603 }, { "epoch": 0.17, "grad_norm": 1.1952672004699707, "learning_rate": 9.508337259759089e-06, "loss": 0.5857, "step": 2604 }, { "epoch": 0.17, "grad_norm": 1.5102087259292603, "learning_rate": 9.5078851351259e-06, "loss": 0.6354, "step": 2605 }, { "epoch": 0.17, "grad_norm": 1.2865387201309204, "learning_rate": 9.507432813464683e-06, "loss": 0.6216, "step": 2606 }, { "epoch": 0.17, "grad_norm": 1.0875012874603271, "learning_rate": 9.506980294795207e-06, "loss": 0.5502, "step": 2607 }, { "epoch": 0.17, "grad_norm": 1.1343448162078857, "learning_rate": 9.506527579137251e-06, "loss": 0.5856, "step": 2608 }, { "epoch": 0.17, "grad_norm": 1.0917409658432007, "learning_rate": 9.506074666510601e-06, "loss": 0.581, "step": 2609 }, { "epoch": 0.17, "grad_norm": 1.3169499635696411, "learning_rate": 9.505621556935054e-06, "loss": 0.623, "step": 2610 }, { "epoch": 0.17, "grad_norm": 1.2079845666885376, "learning_rate": 9.505168250430413e-06, "loss": 0.5854, "step": 2611 }, { "epoch": 0.17, "grad_norm": 1.0869313478469849, "learning_rate": 9.50471474701649e-06, "loss": 0.5737, "step": 2612 }, { "epoch": 0.17, "grad_norm": 1.1895477771759033, "learning_rate": 9.50426104671311e-06, "loss": 0.5835, "step": 2613 }, { "epoch": 0.17, "grad_norm": 1.1035124063491821, "learning_rate": 9.503807149540099e-06, "loss": 0.5412, "step": 2614 }, { "epoch": 0.17, "grad_norm": 1.2044121026992798, "learning_rate": 9.503353055517298e-06, "loss": 0.5545, "step": 2615 }, { "epoch": 0.17, "grad_norm": 1.2886959314346313, "learning_rate": 9.502898764664554e-06, "loss": 0.5934, "step": 2616 }, { "epoch": 0.17, "grad_norm": 1.1154125928878784, "learning_rate": 9.502444277001721e-06, "loss": 0.5907, "step": 2617 }, { "epoch": 0.17, "grad_norm": 1.1298993825912476, "learning_rate": 9.501989592548667e-06, "loss": 0.5511, "step": 2618 }, { "epoch": 0.17, "grad_norm": 1.1265640258789062, "learning_rate": 9.501534711325264e-06, "loss": 0.6129, "step": 2619 }, { "epoch": 0.17, "grad_norm": 1.1314294338226318, "learning_rate": 9.50107963335139e-06, "loss": 0.5841, "step": 2620 }, { "epoch": 0.17, "grad_norm": 1.1604255437850952, "learning_rate": 9.50062435864694e-06, "loss": 0.5922, "step": 2621 }, { "epoch": 0.17, "grad_norm": 1.0979526042938232, "learning_rate": 9.500168887231811e-06, "loss": 0.5843, "step": 2622 }, { "epoch": 0.17, "grad_norm": 1.2290834188461304, "learning_rate": 9.49971321912591e-06, "loss": 0.5726, "step": 2623 }, { "epoch": 0.17, "grad_norm": 1.1622973680496216, "learning_rate": 9.499257354349154e-06, "loss": 0.5886, "step": 2624 }, { "epoch": 0.17, "grad_norm": 1.249164342880249, "learning_rate": 9.498801292921468e-06, "loss": 0.5865, "step": 2625 }, { "epoch": 0.17, "grad_norm": 1.1223578453063965, "learning_rate": 9.498345034862783e-06, "loss": 0.5845, "step": 2626 }, { "epoch": 0.17, "grad_norm": 1.1762012243270874, "learning_rate": 9.497888580193044e-06, "loss": 0.5293, "step": 2627 }, { "epoch": 0.17, "grad_norm": 1.3448225259780884, "learning_rate": 9.497431928932198e-06, "loss": 0.5942, "step": 2628 }, { "epoch": 0.17, "grad_norm": 1.4903903007507324, "learning_rate": 9.496975081100209e-06, "loss": 0.5434, "step": 2629 }, { "epoch": 0.17, "grad_norm": 1.166146993637085, "learning_rate": 9.496518036717039e-06, "loss": 0.5225, "step": 2630 }, { "epoch": 0.17, "grad_norm": 1.0657511949539185, "learning_rate": 9.496060795802666e-06, "loss": 0.5867, "step": 2631 }, { "epoch": 0.17, "grad_norm": 1.1992499828338623, "learning_rate": 9.495603358377076e-06, "loss": 0.584, "step": 2632 }, { "epoch": 0.17, "grad_norm": 1.2528856992721558, "learning_rate": 9.495145724460264e-06, "loss": 0.6186, "step": 2633 }, { "epoch": 0.17, "grad_norm": 1.1239551305770874, "learning_rate": 9.49468789407223e-06, "loss": 0.5749, "step": 2634 }, { "epoch": 0.17, "grad_norm": 1.1624659299850464, "learning_rate": 9.494229867232983e-06, "loss": 0.578, "step": 2635 }, { "epoch": 0.17, "grad_norm": 1.5341432094573975, "learning_rate": 9.493771643962544e-06, "loss": 0.5862, "step": 2636 }, { "epoch": 0.17, "grad_norm": 1.3715137243270874, "learning_rate": 9.493313224280941e-06, "loss": 0.5837, "step": 2637 }, { "epoch": 0.17, "grad_norm": 1.212304711341858, "learning_rate": 9.49285460820821e-06, "loss": 0.5441, "step": 2638 }, { "epoch": 0.17, "grad_norm": 1.1002659797668457, "learning_rate": 9.492395795764395e-06, "loss": 0.5648, "step": 2639 }, { "epoch": 0.17, "grad_norm": 1.1910524368286133, "learning_rate": 9.491936786969551e-06, "loss": 0.5689, "step": 2640 }, { "epoch": 0.17, "grad_norm": 1.1938706636428833, "learning_rate": 9.49147758184374e-06, "loss": 0.541, "step": 2641 }, { "epoch": 0.17, "grad_norm": 1.3202756643295288, "learning_rate": 9.49101818040703e-06, "loss": 0.563, "step": 2642 }, { "epoch": 0.17, "grad_norm": 1.3568052053451538, "learning_rate": 9.490558582679504e-06, "loss": 0.5889, "step": 2643 }, { "epoch": 0.17, "grad_norm": 1.3219631910324097, "learning_rate": 9.490098788681248e-06, "loss": 0.6498, "step": 2644 }, { "epoch": 0.17, "grad_norm": 1.153999924659729, "learning_rate": 9.489638798432359e-06, "loss": 0.543, "step": 2645 }, { "epoch": 0.17, "grad_norm": 1.1970717906951904, "learning_rate": 9.489178611952941e-06, "loss": 0.5547, "step": 2646 }, { "epoch": 0.17, "grad_norm": 1.1156574487686157, "learning_rate": 9.488718229263108e-06, "loss": 0.5569, "step": 2647 }, { "epoch": 0.17, "grad_norm": 1.239400029182434, "learning_rate": 9.488257650382982e-06, "loss": 0.576, "step": 2648 }, { "epoch": 0.17, "grad_norm": 1.2509996891021729, "learning_rate": 9.487796875332694e-06, "loss": 0.5971, "step": 2649 }, { "epoch": 0.17, "grad_norm": 1.2698935270309448, "learning_rate": 9.487335904132382e-06, "loss": 0.6112, "step": 2650 }, { "epoch": 0.17, "grad_norm": 1.1949275732040405, "learning_rate": 9.486874736802198e-06, "loss": 0.527, "step": 2651 }, { "epoch": 0.17, "grad_norm": 1.2495685815811157, "learning_rate": 9.486413373362293e-06, "loss": 0.5392, "step": 2652 }, { "epoch": 0.17, "grad_norm": 1.0476047992706299, "learning_rate": 9.485951813832837e-06, "loss": 0.5821, "step": 2653 }, { "epoch": 0.17, "grad_norm": 1.1095595359802246, "learning_rate": 9.485490058234e-06, "loss": 0.5817, "step": 2654 }, { "epoch": 0.17, "grad_norm": 1.1426481008529663, "learning_rate": 9.485028106585962e-06, "loss": 0.5672, "step": 2655 }, { "epoch": 0.17, "grad_norm": 1.1074433326721191, "learning_rate": 9.484565958908922e-06, "loss": 0.5556, "step": 2656 }, { "epoch": 0.17, "grad_norm": 1.24777352809906, "learning_rate": 9.484103615223073e-06, "loss": 0.6236, "step": 2657 }, { "epoch": 0.17, "grad_norm": 1.210693359375, "learning_rate": 9.483641075548623e-06, "loss": 0.5578, "step": 2658 }, { "epoch": 0.17, "grad_norm": 1.1600110530853271, "learning_rate": 9.48317833990579e-06, "loss": 0.5697, "step": 2659 }, { "epoch": 0.17, "grad_norm": 1.1327612400054932, "learning_rate": 9.482715408314798e-06, "loss": 0.5532, "step": 2660 }, { "epoch": 0.17, "grad_norm": 1.1619102954864502, "learning_rate": 9.48225228079588e-06, "loss": 0.569, "step": 2661 }, { "epoch": 0.17, "grad_norm": 1.1871973276138306, "learning_rate": 9.481788957369282e-06, "loss": 0.5745, "step": 2662 }, { "epoch": 0.17, "grad_norm": 1.184667944908142, "learning_rate": 9.48132543805525e-06, "loss": 0.5586, "step": 2663 }, { "epoch": 0.17, "grad_norm": 1.2991572618484497, "learning_rate": 9.480861722874046e-06, "loss": 0.6182, "step": 2664 }, { "epoch": 0.17, "grad_norm": 1.2716294527053833, "learning_rate": 9.480397811845937e-06, "loss": 0.6416, "step": 2665 }, { "epoch": 0.17, "grad_norm": 1.1946678161621094, "learning_rate": 9.479933704991198e-06, "loss": 0.6042, "step": 2666 }, { "epoch": 0.17, "grad_norm": 1.1701993942260742, "learning_rate": 9.479469402330116e-06, "loss": 0.5334, "step": 2667 }, { "epoch": 0.17, "grad_norm": 1.1790430545806885, "learning_rate": 9.479004903882983e-06, "loss": 0.5687, "step": 2668 }, { "epoch": 0.17, "grad_norm": 1.1515802145004272, "learning_rate": 9.478540209670103e-06, "loss": 0.5946, "step": 2669 }, { "epoch": 0.17, "grad_norm": 1.1485549211502075, "learning_rate": 9.478075319711783e-06, "loss": 0.5997, "step": 2670 }, { "epoch": 0.17, "grad_norm": 1.2641661167144775, "learning_rate": 9.477610234028345e-06, "loss": 0.6467, "step": 2671 }, { "epoch": 0.17, "grad_norm": 1.2315998077392578, "learning_rate": 9.477144952640119e-06, "loss": 0.5884, "step": 2672 }, { "epoch": 0.17, "grad_norm": 1.2709180116653442, "learning_rate": 9.476679475567435e-06, "loss": 0.6327, "step": 2673 }, { "epoch": 0.17, "grad_norm": 1.310104250907898, "learning_rate": 9.476213802830643e-06, "loss": 0.5467, "step": 2674 }, { "epoch": 0.17, "grad_norm": 1.1905173063278198, "learning_rate": 9.475747934450093e-06, "loss": 0.5459, "step": 2675 }, { "epoch": 0.17, "grad_norm": 1.2256768941879272, "learning_rate": 9.47528187044615e-06, "loss": 0.5693, "step": 2676 }, { "epoch": 0.17, "grad_norm": 1.2138664722442627, "learning_rate": 9.474815610839181e-06, "loss": 0.541, "step": 2677 }, { "epoch": 0.17, "grad_norm": 1.1218913793563843, "learning_rate": 9.474349155649567e-06, "loss": 0.5881, "step": 2678 }, { "epoch": 0.17, "grad_norm": 1.3002818822860718, "learning_rate": 9.473882504897695e-06, "loss": 0.5535, "step": 2679 }, { "epoch": 0.17, "grad_norm": 1.1741089820861816, "learning_rate": 9.473415658603964e-06, "loss": 0.5248, "step": 2680 }, { "epoch": 0.17, "grad_norm": 1.0695812702178955, "learning_rate": 9.472948616788774e-06, "loss": 0.5341, "step": 2681 }, { "epoch": 0.17, "grad_norm": 1.2945890426635742, "learning_rate": 9.472481379472542e-06, "loss": 0.5864, "step": 2682 }, { "epoch": 0.17, "grad_norm": 1.0914008617401123, "learning_rate": 9.472013946675686e-06, "loss": 0.5632, "step": 2683 }, { "epoch": 0.17, "grad_norm": 1.0893633365631104, "learning_rate": 9.471546318418638e-06, "loss": 0.5593, "step": 2684 }, { "epoch": 0.17, "grad_norm": 1.240759253501892, "learning_rate": 9.471078494721839e-06, "loss": 0.5519, "step": 2685 }, { "epoch": 0.17, "grad_norm": 1.3396077156066895, "learning_rate": 9.470610475605735e-06, "loss": 0.5725, "step": 2686 }, { "epoch": 0.17, "grad_norm": 1.15183424949646, "learning_rate": 9.47014226109078e-06, "loss": 0.5132, "step": 2687 }, { "epoch": 0.17, "grad_norm": 1.2507182359695435, "learning_rate": 9.46967385119744e-06, "loss": 0.5926, "step": 2688 }, { "epoch": 0.17, "grad_norm": 1.1482182741165161, "learning_rate": 9.469205245946189e-06, "loss": 0.5975, "step": 2689 }, { "epoch": 0.17, "grad_norm": 1.2644466161727905, "learning_rate": 9.468736445357508e-06, "loss": 0.5653, "step": 2690 }, { "epoch": 0.17, "grad_norm": 1.1158133745193481, "learning_rate": 9.468267449451884e-06, "loss": 0.5647, "step": 2691 }, { "epoch": 0.17, "grad_norm": 1.1978696584701538, "learning_rate": 9.46779825824982e-06, "loss": 0.5886, "step": 2692 }, { "epoch": 0.17, "grad_norm": 1.1782023906707764, "learning_rate": 9.46732887177182e-06, "loss": 0.59, "step": 2693 }, { "epoch": 0.17, "grad_norm": 1.1475554704666138, "learning_rate": 9.4668592900384e-06, "loss": 0.5482, "step": 2694 }, { "epoch": 0.17, "grad_norm": 1.1675236225128174, "learning_rate": 9.466389513070087e-06, "loss": 0.5881, "step": 2695 }, { "epoch": 0.17, "grad_norm": 1.140379786491394, "learning_rate": 9.465919540887412e-06, "loss": 0.5612, "step": 2696 }, { "epoch": 0.17, "grad_norm": 1.1408241987228394, "learning_rate": 9.465449373510914e-06, "loss": 0.5428, "step": 2697 }, { "epoch": 0.17, "grad_norm": 1.2039185762405396, "learning_rate": 9.464979010961146e-06, "loss": 0.5921, "step": 2698 }, { "epoch": 0.17, "grad_norm": 1.2679848670959473, "learning_rate": 9.464508453258666e-06, "loss": 0.5857, "step": 2699 }, { "epoch": 0.17, "grad_norm": 1.1255581378936768, "learning_rate": 9.464037700424039e-06, "loss": 0.5719, "step": 2700 }, { "epoch": 0.17, "grad_norm": 1.1759068965911865, "learning_rate": 9.463566752477842e-06, "loss": 0.6264, "step": 2701 }, { "epoch": 0.17, "grad_norm": 1.2654836177825928, "learning_rate": 9.463095609440658e-06, "loss": 0.5893, "step": 2702 }, { "epoch": 0.17, "grad_norm": 1.162479281425476, "learning_rate": 9.46262427133308e-06, "loss": 0.5555, "step": 2703 }, { "epoch": 0.17, "grad_norm": 1.1726983785629272, "learning_rate": 9.46215273817571e-06, "loss": 0.5764, "step": 2704 }, { "epoch": 0.17, "grad_norm": 1.5929243564605713, "learning_rate": 9.461681009989154e-06, "loss": 0.579, "step": 2705 }, { "epoch": 0.17, "grad_norm": 1.078702688217163, "learning_rate": 9.461209086794034e-06, "loss": 0.5663, "step": 2706 }, { "epoch": 0.17, "grad_norm": 1.2228714227676392, "learning_rate": 9.460736968610975e-06, "loss": 0.6394, "step": 2707 }, { "epoch": 0.17, "grad_norm": 1.1938327550888062, "learning_rate": 9.460264655460612e-06, "loss": 0.6036, "step": 2708 }, { "epoch": 0.17, "grad_norm": 1.2131589651107788, "learning_rate": 9.459792147363587e-06, "loss": 0.581, "step": 2709 }, { "epoch": 0.17, "grad_norm": 1.2034437656402588, "learning_rate": 9.459319444340556e-06, "loss": 0.5536, "step": 2710 }, { "epoch": 0.17, "grad_norm": 1.1392070055007935, "learning_rate": 9.458846546412176e-06, "loss": 0.5307, "step": 2711 }, { "epoch": 0.18, "grad_norm": 1.2768619060516357, "learning_rate": 9.458373453599117e-06, "loss": 0.6171, "step": 2712 }, { "epoch": 0.18, "grad_norm": 1.3124780654907227, "learning_rate": 9.457900165922058e-06, "loss": 0.5716, "step": 2713 }, { "epoch": 0.18, "grad_norm": 1.2576943635940552, "learning_rate": 9.457426683401684e-06, "loss": 0.5716, "step": 2714 }, { "epoch": 0.18, "grad_norm": 1.2021201848983765, "learning_rate": 9.456953006058689e-06, "loss": 0.6095, "step": 2715 }, { "epoch": 0.18, "grad_norm": 1.199843406677246, "learning_rate": 9.456479133913779e-06, "loss": 0.631, "step": 2716 }, { "epoch": 0.18, "grad_norm": 1.2181880474090576, "learning_rate": 9.456005066987663e-06, "loss": 0.5138, "step": 2717 }, { "epoch": 0.18, "grad_norm": 1.3215869665145874, "learning_rate": 9.45553080530106e-06, "loss": 0.5965, "step": 2718 }, { "epoch": 0.18, "grad_norm": 1.2311134338378906, "learning_rate": 9.455056348874705e-06, "loss": 0.5535, "step": 2719 }, { "epoch": 0.18, "grad_norm": 1.1771721839904785, "learning_rate": 9.454581697729328e-06, "loss": 0.5632, "step": 2720 }, { "epoch": 0.18, "grad_norm": 1.2477269172668457, "learning_rate": 9.45410685188568e-06, "loss": 0.5631, "step": 2721 }, { "epoch": 0.18, "grad_norm": 1.1553986072540283, "learning_rate": 9.453631811364512e-06, "loss": 0.5016, "step": 2722 }, { "epoch": 0.18, "grad_norm": 1.1619908809661865, "learning_rate": 9.453156576186589e-06, "loss": 0.5917, "step": 2723 }, { "epoch": 0.18, "grad_norm": 1.2018332481384277, "learning_rate": 9.45268114637268e-06, "loss": 0.6516, "step": 2724 }, { "epoch": 0.18, "grad_norm": 1.2420735359191895, "learning_rate": 9.452205521943568e-06, "loss": 0.5702, "step": 2725 }, { "epoch": 0.18, "grad_norm": 1.0900835990905762, "learning_rate": 9.451729702920038e-06, "loss": 0.631, "step": 2726 }, { "epoch": 0.18, "grad_norm": 1.188190221786499, "learning_rate": 9.451253689322889e-06, "loss": 0.4918, "step": 2727 }, { "epoch": 0.18, "grad_norm": 1.162914514541626, "learning_rate": 9.450777481172925e-06, "loss": 0.5752, "step": 2728 }, { "epoch": 0.18, "grad_norm": 1.2556612491607666, "learning_rate": 9.450301078490961e-06, "loss": 0.5961, "step": 2729 }, { "epoch": 0.18, "grad_norm": 1.2612981796264648, "learning_rate": 9.449824481297818e-06, "loss": 0.594, "step": 2730 }, { "epoch": 0.18, "grad_norm": 1.1960320472717285, "learning_rate": 9.44934768961433e-06, "loss": 0.576, "step": 2731 }, { "epoch": 0.18, "grad_norm": 1.2497767210006714, "learning_rate": 9.448870703461332e-06, "loss": 0.6038, "step": 2732 }, { "epoch": 0.18, "grad_norm": 1.0192683935165405, "learning_rate": 9.448393522859674e-06, "loss": 0.5614, "step": 2733 }, { "epoch": 0.18, "grad_norm": 1.2100322246551514, "learning_rate": 9.447916147830211e-06, "loss": 0.5548, "step": 2734 }, { "epoch": 0.18, "grad_norm": 1.1710808277130127, "learning_rate": 9.44743857839381e-06, "loss": 0.6161, "step": 2735 }, { "epoch": 0.18, "grad_norm": 1.189526915550232, "learning_rate": 9.446960814571345e-06, "loss": 0.5503, "step": 2736 }, { "epoch": 0.18, "grad_norm": 1.1572608947753906, "learning_rate": 9.446482856383694e-06, "loss": 0.49, "step": 2737 }, { "epoch": 0.18, "grad_norm": 1.340907096862793, "learning_rate": 9.44600470385175e-06, "loss": 0.62, "step": 2738 }, { "epoch": 0.18, "grad_norm": 1.4251705408096313, "learning_rate": 9.44552635699641e-06, "loss": 0.5632, "step": 2739 }, { "epoch": 0.18, "grad_norm": 1.2287760972976685, "learning_rate": 9.445047815838584e-06, "loss": 0.642, "step": 2740 }, { "epoch": 0.18, "grad_norm": 1.245848536491394, "learning_rate": 9.444569080399186e-06, "loss": 0.5565, "step": 2741 }, { "epoch": 0.18, "grad_norm": 1.196528673171997, "learning_rate": 9.444090150699141e-06, "loss": 0.5574, "step": 2742 }, { "epoch": 0.18, "grad_norm": 1.1877288818359375, "learning_rate": 9.443611026759382e-06, "loss": 0.6327, "step": 2743 }, { "epoch": 0.18, "grad_norm": 1.135682225227356, "learning_rate": 9.443131708600848e-06, "loss": 0.5535, "step": 2744 }, { "epoch": 0.18, "grad_norm": 1.2959237098693848, "learning_rate": 9.442652196244492e-06, "loss": 0.6297, "step": 2745 }, { "epoch": 0.18, "grad_norm": 1.1451187133789062, "learning_rate": 9.44217248971127e-06, "loss": 0.5894, "step": 2746 }, { "epoch": 0.18, "grad_norm": 1.2292540073394775, "learning_rate": 9.441692589022149e-06, "loss": 0.5978, "step": 2747 }, { "epoch": 0.18, "grad_norm": 1.1195772886276245, "learning_rate": 9.441212494198105e-06, "loss": 0.5838, "step": 2748 }, { "epoch": 0.18, "grad_norm": 1.0957353115081787, "learning_rate": 9.440732205260122e-06, "loss": 0.5878, "step": 2749 }, { "epoch": 0.18, "grad_norm": 1.1966701745986938, "learning_rate": 9.44025172222919e-06, "loss": 0.6414, "step": 2750 }, { "epoch": 0.18, "grad_norm": 1.109217882156372, "learning_rate": 9.439771045126313e-06, "loss": 0.5442, "step": 2751 }, { "epoch": 0.18, "grad_norm": 1.1428214311599731, "learning_rate": 9.439290173972497e-06, "loss": 0.5735, "step": 2752 }, { "epoch": 0.18, "grad_norm": 1.1126614809036255, "learning_rate": 9.438809108788763e-06, "loss": 0.5854, "step": 2753 }, { "epoch": 0.18, "grad_norm": 1.14987051486969, "learning_rate": 9.438327849596133e-06, "loss": 0.6296, "step": 2754 }, { "epoch": 0.18, "grad_norm": 1.1793761253356934, "learning_rate": 9.437846396415645e-06, "loss": 0.5731, "step": 2755 }, { "epoch": 0.18, "grad_norm": 1.3440132141113281, "learning_rate": 9.437364749268339e-06, "loss": 0.6138, "step": 2756 }, { "epoch": 0.18, "grad_norm": 1.185097575187683, "learning_rate": 9.43688290817527e-06, "loss": 0.5848, "step": 2757 }, { "epoch": 0.18, "grad_norm": 1.19084894657135, "learning_rate": 9.436400873157497e-06, "loss": 0.5763, "step": 2758 }, { "epoch": 0.18, "grad_norm": 1.213240146636963, "learning_rate": 9.435918644236087e-06, "loss": 0.6288, "step": 2759 }, { "epoch": 0.18, "grad_norm": 1.3685234785079956, "learning_rate": 9.435436221432117e-06, "loss": 0.579, "step": 2760 }, { "epoch": 0.18, "grad_norm": 1.2178014516830444, "learning_rate": 9.434953604766674e-06, "loss": 0.551, "step": 2761 }, { "epoch": 0.18, "grad_norm": 1.2747143507003784, "learning_rate": 9.43447079426085e-06, "loss": 0.5515, "step": 2762 }, { "epoch": 0.18, "grad_norm": 1.1970715522766113, "learning_rate": 9.433987789935751e-06, "loss": 0.574, "step": 2763 }, { "epoch": 0.18, "grad_norm": 1.1485799551010132, "learning_rate": 9.433504591812484e-06, "loss": 0.6265, "step": 2764 }, { "epoch": 0.18, "grad_norm": 1.2030504941940308, "learning_rate": 9.433021199912171e-06, "loss": 0.5967, "step": 2765 }, { "epoch": 0.18, "grad_norm": 1.3622645139694214, "learning_rate": 9.43253761425594e-06, "loss": 0.6107, "step": 2766 }, { "epoch": 0.18, "grad_norm": 1.3222451210021973, "learning_rate": 9.432053834864923e-06, "loss": 0.5876, "step": 2767 }, { "epoch": 0.18, "grad_norm": 1.1943845748901367, "learning_rate": 9.431569861760268e-06, "loss": 0.6547, "step": 2768 }, { "epoch": 0.18, "grad_norm": 1.196097493171692, "learning_rate": 9.43108569496313e-06, "loss": 0.5596, "step": 2769 }, { "epoch": 0.18, "grad_norm": 1.1524646282196045, "learning_rate": 9.430601334494666e-06, "loss": 0.5836, "step": 2770 }, { "epoch": 0.18, "grad_norm": 1.1589257717132568, "learning_rate": 9.43011678037605e-06, "loss": 0.5694, "step": 2771 }, { "epoch": 0.18, "grad_norm": 1.1357413530349731, "learning_rate": 9.429632032628462e-06, "loss": 0.551, "step": 2772 }, { "epoch": 0.18, "grad_norm": 1.1627849340438843, "learning_rate": 9.429147091273083e-06, "loss": 0.5796, "step": 2773 }, { "epoch": 0.18, "grad_norm": 1.110267162322998, "learning_rate": 9.428661956331112e-06, "loss": 0.5252, "step": 2774 }, { "epoch": 0.18, "grad_norm": 1.1115282773971558, "learning_rate": 9.428176627823755e-06, "loss": 0.527, "step": 2775 }, { "epoch": 0.18, "grad_norm": 1.2917879819869995, "learning_rate": 9.427691105772223e-06, "loss": 0.6156, "step": 2776 }, { "epoch": 0.18, "grad_norm": 1.3258658647537231, "learning_rate": 9.427205390197733e-06, "loss": 0.5798, "step": 2777 }, { "epoch": 0.18, "grad_norm": 1.1071501970291138, "learning_rate": 9.42671948112152e-06, "loss": 0.5249, "step": 2778 }, { "epoch": 0.18, "grad_norm": 1.3028857707977295, "learning_rate": 9.42623337856482e-06, "loss": 0.599, "step": 2779 }, { "epoch": 0.18, "grad_norm": 1.1291759014129639, "learning_rate": 9.425747082548877e-06, "loss": 0.5616, "step": 2780 }, { "epoch": 0.18, "grad_norm": 1.2540651559829712, "learning_rate": 9.42526059309495e-06, "loss": 0.5154, "step": 2781 }, { "epoch": 0.18, "grad_norm": 1.1140118837356567, "learning_rate": 9.424773910224298e-06, "loss": 0.5733, "step": 2782 }, { "epoch": 0.18, "grad_norm": 1.0975762605667114, "learning_rate": 9.424287033958195e-06, "loss": 0.5175, "step": 2783 }, { "epoch": 0.18, "grad_norm": 1.0847573280334473, "learning_rate": 9.42379996431792e-06, "loss": 0.5372, "step": 2784 }, { "epoch": 0.18, "grad_norm": 1.1600698232650757, "learning_rate": 9.423312701324762e-06, "loss": 0.6044, "step": 2785 }, { "epoch": 0.18, "grad_norm": 1.1670947074890137, "learning_rate": 9.422825245000018e-06, "loss": 0.5371, "step": 2786 }, { "epoch": 0.18, "grad_norm": 1.0987075567245483, "learning_rate": 9.422337595364995e-06, "loss": 0.5448, "step": 2787 }, { "epoch": 0.18, "grad_norm": 1.1839911937713623, "learning_rate": 9.421849752441006e-06, "loss": 0.6046, "step": 2788 }, { "epoch": 0.18, "grad_norm": 1.217233657836914, "learning_rate": 9.42136171624937e-06, "loss": 0.5476, "step": 2789 }, { "epoch": 0.18, "grad_norm": 1.1722315549850464, "learning_rate": 9.420873486811423e-06, "loss": 0.623, "step": 2790 }, { "epoch": 0.18, "grad_norm": 1.1580121517181396, "learning_rate": 9.420385064148501e-06, "loss": 0.5909, "step": 2791 }, { "epoch": 0.18, "grad_norm": 1.2243283987045288, "learning_rate": 9.419896448281954e-06, "loss": 0.5365, "step": 2792 }, { "epoch": 0.18, "grad_norm": 1.0989700555801392, "learning_rate": 9.419407639233135e-06, "loss": 0.5549, "step": 2793 }, { "epoch": 0.18, "grad_norm": 1.2920513153076172, "learning_rate": 9.418918637023411e-06, "loss": 0.575, "step": 2794 }, { "epoch": 0.18, "grad_norm": 1.1071674823760986, "learning_rate": 9.418429441674153e-06, "loss": 0.5592, "step": 2795 }, { "epoch": 0.18, "grad_norm": 1.1213735342025757, "learning_rate": 9.417940053206745e-06, "loss": 0.5384, "step": 2796 }, { "epoch": 0.18, "grad_norm": 1.203922152519226, "learning_rate": 9.417450471642576e-06, "loss": 0.5555, "step": 2797 }, { "epoch": 0.18, "grad_norm": 1.0976123809814453, "learning_rate": 9.416960697003043e-06, "loss": 0.5749, "step": 2798 }, { "epoch": 0.18, "grad_norm": 1.2168471813201904, "learning_rate": 9.416470729309555e-06, "loss": 0.5872, "step": 2799 }, { "epoch": 0.18, "grad_norm": 1.2129122018814087, "learning_rate": 9.415980568583525e-06, "loss": 0.6321, "step": 2800 }, { "epoch": 0.18, "grad_norm": 1.20623779296875, "learning_rate": 9.41549021484638e-06, "loss": 0.5785, "step": 2801 }, { "epoch": 0.18, "grad_norm": 1.1509629487991333, "learning_rate": 9.414999668119547e-06, "loss": 0.5839, "step": 2802 }, { "epoch": 0.18, "grad_norm": 1.2330867052078247, "learning_rate": 9.41450892842447e-06, "loss": 0.6011, "step": 2803 }, { "epoch": 0.18, "grad_norm": 1.3007259368896484, "learning_rate": 9.4140179957826e-06, "loss": 0.467, "step": 2804 }, { "epoch": 0.18, "grad_norm": 1.0944145917892456, "learning_rate": 9.413526870215389e-06, "loss": 0.514, "step": 2805 }, { "epoch": 0.18, "grad_norm": 1.3214709758758545, "learning_rate": 9.413035551744306e-06, "loss": 0.5788, "step": 2806 }, { "epoch": 0.18, "grad_norm": 1.1727725267410278, "learning_rate": 9.412544040390826e-06, "loss": 0.5426, "step": 2807 }, { "epoch": 0.18, "grad_norm": 1.2332642078399658, "learning_rate": 9.412052336176428e-06, "loss": 0.5726, "step": 2808 }, { "epoch": 0.18, "grad_norm": 1.149723768234253, "learning_rate": 9.411560439122607e-06, "loss": 0.5373, "step": 2809 }, { "epoch": 0.18, "grad_norm": 1.2076784372329712, "learning_rate": 9.411068349250862e-06, "loss": 0.5574, "step": 2810 }, { "epoch": 0.18, "grad_norm": 1.2135019302368164, "learning_rate": 9.410576066582699e-06, "loss": 0.5367, "step": 2811 }, { "epoch": 0.18, "grad_norm": 1.160821557044983, "learning_rate": 9.410083591139635e-06, "loss": 0.6125, "step": 2812 }, { "epoch": 0.18, "grad_norm": 1.2226682901382446, "learning_rate": 9.409590922943196e-06, "loss": 0.5516, "step": 2813 }, { "epoch": 0.18, "grad_norm": 1.097424030303955, "learning_rate": 9.409098062014916e-06, "loss": 0.5768, "step": 2814 }, { "epoch": 0.18, "grad_norm": 1.2785365581512451, "learning_rate": 9.408605008376335e-06, "loss": 0.5673, "step": 2815 }, { "epoch": 0.18, "grad_norm": 1.2448275089263916, "learning_rate": 9.408111762049004e-06, "loss": 0.5918, "step": 2816 }, { "epoch": 0.18, "grad_norm": 1.2085723876953125, "learning_rate": 9.407618323054479e-06, "loss": 0.5947, "step": 2817 }, { "epoch": 0.18, "grad_norm": 1.1574203968048096, "learning_rate": 9.407124691414329e-06, "loss": 0.6152, "step": 2818 }, { "epoch": 0.18, "grad_norm": 1.247474193572998, "learning_rate": 9.40663086715013e-06, "loss": 0.5901, "step": 2819 }, { "epoch": 0.18, "grad_norm": 1.1759177446365356, "learning_rate": 9.406136850283465e-06, "loss": 0.583, "step": 2820 }, { "epoch": 0.18, "grad_norm": 1.1762317419052124, "learning_rate": 9.405642640835926e-06, "loss": 0.5744, "step": 2821 }, { "epoch": 0.18, "grad_norm": 1.2425422668457031, "learning_rate": 9.405148238829115e-06, "loss": 0.653, "step": 2822 }, { "epoch": 0.18, "grad_norm": 1.107904076576233, "learning_rate": 9.404653644284639e-06, "loss": 0.5505, "step": 2823 }, { "epoch": 0.18, "grad_norm": 1.2631115913391113, "learning_rate": 9.404158857224116e-06, "loss": 0.512, "step": 2824 }, { "epoch": 0.18, "grad_norm": 1.3713531494140625, "learning_rate": 9.403663877669173e-06, "loss": 0.5458, "step": 2825 }, { "epoch": 0.18, "grad_norm": 1.2256793975830078, "learning_rate": 9.403168705641443e-06, "loss": 0.604, "step": 2826 }, { "epoch": 0.18, "grad_norm": 1.1356456279754639, "learning_rate": 9.402673341162569e-06, "loss": 0.6062, "step": 2827 }, { "epoch": 0.18, "grad_norm": 1.155469298362732, "learning_rate": 9.402177784254202e-06, "loss": 0.5477, "step": 2828 }, { "epoch": 0.18, "grad_norm": 1.1562204360961914, "learning_rate": 9.401682034938004e-06, "loss": 0.6061, "step": 2829 }, { "epoch": 0.18, "grad_norm": 1.3500123023986816, "learning_rate": 9.40118609323564e-06, "loss": 0.5847, "step": 2830 }, { "epoch": 0.18, "grad_norm": 1.316817283630371, "learning_rate": 9.400689959168784e-06, "loss": 0.603, "step": 2831 }, { "epoch": 0.18, "grad_norm": 1.2520564794540405, "learning_rate": 9.400193632759127e-06, "loss": 0.5638, "step": 2832 }, { "epoch": 0.18, "grad_norm": 1.236411690711975, "learning_rate": 9.399697114028359e-06, "loss": 0.5878, "step": 2833 }, { "epoch": 0.18, "grad_norm": 1.137420892715454, "learning_rate": 9.399200402998181e-06, "loss": 0.5741, "step": 2834 }, { "epoch": 0.18, "grad_norm": 1.2697514295578003, "learning_rate": 9.398703499690303e-06, "loss": 0.5698, "step": 2835 }, { "epoch": 0.18, "grad_norm": 1.149152159690857, "learning_rate": 9.398206404126445e-06, "loss": 0.6036, "step": 2836 }, { "epoch": 0.18, "grad_norm": 1.3826311826705933, "learning_rate": 9.397709116328331e-06, "loss": 0.6161, "step": 2837 }, { "epoch": 0.18, "grad_norm": 1.0539354085922241, "learning_rate": 9.397211636317698e-06, "loss": 0.521, "step": 2838 }, { "epoch": 0.18, "grad_norm": 1.1472941637039185, "learning_rate": 9.396713964116289e-06, "loss": 0.5985, "step": 2839 }, { "epoch": 0.18, "grad_norm": 1.2384448051452637, "learning_rate": 9.396216099745857e-06, "loss": 0.5641, "step": 2840 }, { "epoch": 0.18, "grad_norm": 1.2205621004104614, "learning_rate": 9.395718043228162e-06, "loss": 0.5696, "step": 2841 }, { "epoch": 0.18, "grad_norm": 1.1582468748092651, "learning_rate": 9.395219794584972e-06, "loss": 0.5745, "step": 2842 }, { "epoch": 0.18, "grad_norm": 1.0944122076034546, "learning_rate": 9.394721353838064e-06, "loss": 0.545, "step": 2843 }, { "epoch": 0.18, "grad_norm": 1.2447706460952759, "learning_rate": 9.394222721009225e-06, "loss": 0.5938, "step": 2844 }, { "epoch": 0.18, "grad_norm": 1.1702182292938232, "learning_rate": 9.393723896120249e-06, "loss": 0.5842, "step": 2845 }, { "epoch": 0.18, "grad_norm": 1.241391658782959, "learning_rate": 9.393224879192934e-06, "loss": 0.5659, "step": 2846 }, { "epoch": 0.18, "grad_norm": 1.2070845365524292, "learning_rate": 9.392725670249098e-06, "loss": 0.5782, "step": 2847 }, { "epoch": 0.18, "grad_norm": 1.1411547660827637, "learning_rate": 9.392226269310555e-06, "loss": 0.5301, "step": 2848 }, { "epoch": 0.18, "grad_norm": 1.3183263540267944, "learning_rate": 9.391726676399134e-06, "loss": 0.584, "step": 2849 }, { "epoch": 0.18, "grad_norm": 1.1551355123519897, "learning_rate": 9.39122689153667e-06, "loss": 0.5223, "step": 2850 }, { "epoch": 0.18, "grad_norm": 1.1311355829238892, "learning_rate": 9.39072691474501e-06, "loss": 0.5985, "step": 2851 }, { "epoch": 0.18, "grad_norm": 1.198630690574646, "learning_rate": 9.390226746046003e-06, "loss": 0.6217, "step": 2852 }, { "epoch": 0.18, "grad_norm": 1.2350906133651733, "learning_rate": 9.389726385461511e-06, "loss": 0.5491, "step": 2853 }, { "epoch": 0.18, "grad_norm": 1.2203447818756104, "learning_rate": 9.389225833013407e-06, "loss": 0.5834, "step": 2854 }, { "epoch": 0.18, "grad_norm": 1.1804877519607544, "learning_rate": 9.388725088723564e-06, "loss": 0.5732, "step": 2855 }, { "epoch": 0.18, "grad_norm": 1.1890244483947754, "learning_rate": 9.388224152613872e-06, "loss": 0.5504, "step": 2856 }, { "epoch": 0.18, "grad_norm": 1.0477262735366821, "learning_rate": 9.387723024706223e-06, "loss": 0.569, "step": 2857 }, { "epoch": 0.18, "grad_norm": 1.2671836614608765, "learning_rate": 9.38722170502252e-06, "loss": 0.6115, "step": 2858 }, { "epoch": 0.18, "grad_norm": 1.1620211601257324, "learning_rate": 9.386720193584678e-06, "loss": 0.5834, "step": 2859 }, { "epoch": 0.18, "grad_norm": 1.1124136447906494, "learning_rate": 9.386218490414613e-06, "loss": 0.5431, "step": 2860 }, { "epoch": 0.18, "grad_norm": 1.2678840160369873, "learning_rate": 9.385716595534255e-06, "loss": 0.5823, "step": 2861 }, { "epoch": 0.18, "grad_norm": 1.1195820569992065, "learning_rate": 9.38521450896554e-06, "loss": 0.6156, "step": 2862 }, { "epoch": 0.18, "grad_norm": 1.196832537651062, "learning_rate": 9.38471223073041e-06, "loss": 0.5976, "step": 2863 }, { "epoch": 0.18, "grad_norm": 1.0729109048843384, "learning_rate": 9.384209760850825e-06, "loss": 0.5547, "step": 2864 }, { "epoch": 0.18, "grad_norm": 1.3233648538589478, "learning_rate": 9.38370709934874e-06, "loss": 0.5797, "step": 2865 }, { "epoch": 0.18, "grad_norm": 1.1828008890151978, "learning_rate": 9.38320424624613e-06, "loss": 0.5904, "step": 2866 }, { "epoch": 0.19, "grad_norm": 1.1272587776184082, "learning_rate": 9.382701201564968e-06, "loss": 0.5865, "step": 2867 }, { "epoch": 0.19, "grad_norm": 1.1776225566864014, "learning_rate": 9.382197965327246e-06, "loss": 0.5936, "step": 2868 }, { "epoch": 0.19, "grad_norm": 1.2685368061065674, "learning_rate": 9.381694537554958e-06, "loss": 0.5984, "step": 2869 }, { "epoch": 0.19, "grad_norm": 1.3765861988067627, "learning_rate": 9.381190918270107e-06, "loss": 0.543, "step": 2870 }, { "epoch": 0.19, "grad_norm": 1.1642512083053589, "learning_rate": 9.380687107494703e-06, "loss": 0.5887, "step": 2871 }, { "epoch": 0.19, "grad_norm": 1.122883677482605, "learning_rate": 9.380183105250768e-06, "loss": 0.5728, "step": 2872 }, { "epoch": 0.19, "grad_norm": 1.178702473640442, "learning_rate": 9.37967891156033e-06, "loss": 0.644, "step": 2873 }, { "epoch": 0.19, "grad_norm": 1.2741936445236206, "learning_rate": 9.379174526445428e-06, "loss": 0.6171, "step": 2874 }, { "epoch": 0.19, "grad_norm": 1.1568049192428589, "learning_rate": 9.378669949928105e-06, "loss": 0.5908, "step": 2875 }, { "epoch": 0.19, "grad_norm": 1.1829923391342163, "learning_rate": 9.378165182030416e-06, "loss": 0.5743, "step": 2876 }, { "epoch": 0.19, "grad_norm": 1.156469464302063, "learning_rate": 9.377660222774423e-06, "loss": 0.5523, "step": 2877 }, { "epoch": 0.19, "grad_norm": 1.1698381900787354, "learning_rate": 9.377155072182195e-06, "loss": 0.6022, "step": 2878 }, { "epoch": 0.19, "grad_norm": 1.2272546291351318, "learning_rate": 9.376649730275813e-06, "loss": 0.6184, "step": 2879 }, { "epoch": 0.19, "grad_norm": 1.0850616693496704, "learning_rate": 9.376144197077364e-06, "loss": 0.539, "step": 2880 }, { "epoch": 0.19, "grad_norm": 1.176785945892334, "learning_rate": 9.37563847260894e-06, "loss": 0.6186, "step": 2881 }, { "epoch": 0.19, "grad_norm": 1.221956729888916, "learning_rate": 9.37513255689265e-06, "loss": 0.5485, "step": 2882 }, { "epoch": 0.19, "grad_norm": 1.1972384452819824, "learning_rate": 9.374626449950603e-06, "loss": 0.6153, "step": 2883 }, { "epoch": 0.19, "grad_norm": 1.22370183467865, "learning_rate": 9.374120151804922e-06, "loss": 0.5997, "step": 2884 }, { "epoch": 0.19, "grad_norm": 1.2997848987579346, "learning_rate": 9.373613662477733e-06, "loss": 0.6206, "step": 2885 }, { "epoch": 0.19, "grad_norm": 1.1134321689605713, "learning_rate": 9.373106981991174e-06, "loss": 0.5452, "step": 2886 }, { "epoch": 0.19, "grad_norm": 1.1048139333724976, "learning_rate": 9.372600110367394e-06, "loss": 0.5691, "step": 2887 }, { "epoch": 0.19, "grad_norm": 1.155002474784851, "learning_rate": 9.372093047628543e-06, "loss": 0.5524, "step": 2888 }, { "epoch": 0.19, "grad_norm": 1.0571845769882202, "learning_rate": 9.371585793796785e-06, "loss": 0.5655, "step": 2889 }, { "epoch": 0.19, "grad_norm": 1.0848690271377563, "learning_rate": 9.371078348894291e-06, "loss": 0.5756, "step": 2890 }, { "epoch": 0.19, "grad_norm": 1.218907356262207, "learning_rate": 9.370570712943242e-06, "loss": 0.5544, "step": 2891 }, { "epoch": 0.19, "grad_norm": 1.224337100982666, "learning_rate": 9.37006288596582e-06, "loss": 0.5595, "step": 2892 }, { "epoch": 0.19, "grad_norm": 1.2939366102218628, "learning_rate": 9.369554867984226e-06, "loss": 0.6452, "step": 2893 }, { "epoch": 0.19, "grad_norm": 1.0524688959121704, "learning_rate": 9.369046659020661e-06, "loss": 0.5233, "step": 2894 }, { "epoch": 0.19, "grad_norm": 1.1749889850616455, "learning_rate": 9.36853825909734e-06, "loss": 0.5577, "step": 2895 }, { "epoch": 0.19, "grad_norm": 1.277173638343811, "learning_rate": 9.368029668236483e-06, "loss": 0.5962, "step": 2896 }, { "epoch": 0.19, "grad_norm": 1.1980878114700317, "learning_rate": 9.367520886460318e-06, "loss": 0.5843, "step": 2897 }, { "epoch": 0.19, "grad_norm": 1.1643719673156738, "learning_rate": 9.367011913791084e-06, "loss": 0.5831, "step": 2898 }, { "epoch": 0.19, "grad_norm": 1.2895187139511108, "learning_rate": 9.366502750251027e-06, "loss": 0.5336, "step": 2899 }, { "epoch": 0.19, "grad_norm": 1.113747477531433, "learning_rate": 9.365993395862399e-06, "loss": 0.5742, "step": 2900 }, { "epoch": 0.19, "grad_norm": 1.1715010404586792, "learning_rate": 9.365483850647466e-06, "loss": 0.5558, "step": 2901 }, { "epoch": 0.19, "grad_norm": 1.249440312385559, "learning_rate": 9.364974114628494e-06, "loss": 0.572, "step": 2902 }, { "epoch": 0.19, "grad_norm": 1.2362877130508423, "learning_rate": 9.364464187827767e-06, "loss": 0.5973, "step": 2903 }, { "epoch": 0.19, "grad_norm": 1.1762300729751587, "learning_rate": 9.363954070267571e-06, "loss": 0.5377, "step": 2904 }, { "epoch": 0.19, "grad_norm": 1.2565759420394897, "learning_rate": 9.3634437619702e-06, "loss": 0.6274, "step": 2905 }, { "epoch": 0.19, "grad_norm": 1.2228507995605469, "learning_rate": 9.362933262957963e-06, "loss": 0.5594, "step": 2906 }, { "epoch": 0.19, "grad_norm": 1.2180100679397583, "learning_rate": 9.362422573253168e-06, "loss": 0.5698, "step": 2907 }, { "epoch": 0.19, "grad_norm": 1.1334160566329956, "learning_rate": 9.361911692878139e-06, "loss": 0.5594, "step": 2908 }, { "epoch": 0.19, "grad_norm": 1.2889267206192017, "learning_rate": 9.361400621855201e-06, "loss": 0.5807, "step": 2909 }, { "epoch": 0.19, "grad_norm": 1.304753303527832, "learning_rate": 9.360889360206695e-06, "loss": 0.5841, "step": 2910 }, { "epoch": 0.19, "grad_norm": 1.2329158782958984, "learning_rate": 9.360377907954967e-06, "loss": 0.5483, "step": 2911 }, { "epoch": 0.19, "grad_norm": 1.0986711978912354, "learning_rate": 9.359866265122372e-06, "loss": 0.512, "step": 2912 }, { "epoch": 0.19, "grad_norm": 1.1240533590316772, "learning_rate": 9.359354431731268e-06, "loss": 0.5659, "step": 2913 }, { "epoch": 0.19, "grad_norm": 1.1079602241516113, "learning_rate": 9.358842407804032e-06, "loss": 0.553, "step": 2914 }, { "epoch": 0.19, "grad_norm": 1.2468156814575195, "learning_rate": 9.35833019336304e-06, "loss": 0.6046, "step": 2915 }, { "epoch": 0.19, "grad_norm": 1.1682841777801514, "learning_rate": 9.357817788430678e-06, "loss": 0.5824, "step": 2916 }, { "epoch": 0.19, "grad_norm": 1.07853364944458, "learning_rate": 9.357305193029345e-06, "loss": 0.5951, "step": 2917 }, { "epoch": 0.19, "grad_norm": 1.0964773893356323, "learning_rate": 9.356792407181445e-06, "loss": 0.5506, "step": 2918 }, { "epoch": 0.19, "grad_norm": 1.2322684526443481, "learning_rate": 9.356279430909387e-06, "loss": 0.6236, "step": 2919 }, { "epoch": 0.19, "grad_norm": 1.2551063299179077, "learning_rate": 9.355766264235597e-06, "loss": 0.5885, "step": 2920 }, { "epoch": 0.19, "grad_norm": 1.2794201374053955, "learning_rate": 9.355252907182499e-06, "loss": 0.61, "step": 2921 }, { "epoch": 0.19, "grad_norm": 1.0687339305877686, "learning_rate": 9.354739359772535e-06, "loss": 0.546, "step": 2922 }, { "epoch": 0.19, "grad_norm": 1.1080917119979858, "learning_rate": 9.35422562202815e-06, "loss": 0.5156, "step": 2923 }, { "epoch": 0.19, "grad_norm": 1.1922023296356201, "learning_rate": 9.353711693971795e-06, "loss": 0.5354, "step": 2924 }, { "epoch": 0.19, "grad_norm": 1.1181724071502686, "learning_rate": 9.353197575625934e-06, "loss": 0.5519, "step": 2925 }, { "epoch": 0.19, "grad_norm": 1.2186487913131714, "learning_rate": 9.352683267013042e-06, "loss": 0.6392, "step": 2926 }, { "epoch": 0.19, "grad_norm": 1.1037554740905762, "learning_rate": 9.35216876815559e-06, "loss": 0.5627, "step": 2927 }, { "epoch": 0.19, "grad_norm": 1.1334657669067383, "learning_rate": 9.351654079076072e-06, "loss": 0.5813, "step": 2928 }, { "epoch": 0.19, "grad_norm": 1.1248846054077148, "learning_rate": 9.35113919979698e-06, "loss": 0.6143, "step": 2929 }, { "epoch": 0.19, "grad_norm": 1.3438016176223755, "learning_rate": 9.350624130340824e-06, "loss": 0.5364, "step": 2930 }, { "epoch": 0.19, "grad_norm": 1.1841721534729004, "learning_rate": 9.350108870730108e-06, "loss": 0.5998, "step": 2931 }, { "epoch": 0.19, "grad_norm": 1.0828105211257935, "learning_rate": 9.349593420987359e-06, "loss": 0.5722, "step": 2932 }, { "epoch": 0.19, "grad_norm": 1.1780786514282227, "learning_rate": 9.349077781135102e-06, "loss": 0.5881, "step": 2933 }, { "epoch": 0.19, "grad_norm": 1.0867847204208374, "learning_rate": 9.348561951195878e-06, "loss": 0.5733, "step": 2934 }, { "epoch": 0.19, "grad_norm": 1.1151291131973267, "learning_rate": 9.348045931192227e-06, "loss": 0.5396, "step": 2935 }, { "epoch": 0.19, "grad_norm": 1.2328193187713623, "learning_rate": 9.34752972114671e-06, "loss": 0.5928, "step": 2936 }, { "epoch": 0.19, "grad_norm": 1.1810543537139893, "learning_rate": 9.347013321081883e-06, "loss": 0.6557, "step": 2937 }, { "epoch": 0.19, "grad_norm": 1.3228814601898193, "learning_rate": 9.346496731020321e-06, "loss": 0.6061, "step": 2938 }, { "epoch": 0.19, "grad_norm": 1.1445622444152832, "learning_rate": 9.3459799509846e-06, "loss": 0.6046, "step": 2939 }, { "epoch": 0.19, "grad_norm": 1.0790482759475708, "learning_rate": 9.34546298099731e-06, "loss": 0.5663, "step": 2940 }, { "epoch": 0.19, "grad_norm": 1.1900007724761963, "learning_rate": 9.344945821081043e-06, "loss": 0.5971, "step": 2941 }, { "epoch": 0.19, "grad_norm": 1.2592957019805908, "learning_rate": 9.344428471258405e-06, "loss": 0.5931, "step": 2942 }, { "epoch": 0.19, "grad_norm": 1.143734097480774, "learning_rate": 9.343910931552005e-06, "loss": 0.539, "step": 2943 }, { "epoch": 0.19, "grad_norm": 1.1528781652450562, "learning_rate": 9.343393201984468e-06, "loss": 0.5456, "step": 2944 }, { "epoch": 0.19, "grad_norm": 1.2399744987487793, "learning_rate": 9.34287528257842e-06, "loss": 0.601, "step": 2945 }, { "epoch": 0.19, "grad_norm": 1.1470575332641602, "learning_rate": 9.342357173356498e-06, "loss": 0.5813, "step": 2946 }, { "epoch": 0.19, "grad_norm": 1.1988873481750488, "learning_rate": 9.341838874341347e-06, "loss": 0.6151, "step": 2947 }, { "epoch": 0.19, "grad_norm": 1.2890536785125732, "learning_rate": 9.34132038555562e-06, "loss": 0.5671, "step": 2948 }, { "epoch": 0.19, "grad_norm": 1.206046462059021, "learning_rate": 9.34080170702198e-06, "loss": 0.5869, "step": 2949 }, { "epoch": 0.19, "grad_norm": 1.1748250722885132, "learning_rate": 9.340282838763099e-06, "loss": 0.642, "step": 2950 }, { "epoch": 0.19, "grad_norm": 1.3690180778503418, "learning_rate": 9.339763780801651e-06, "loss": 0.6234, "step": 2951 }, { "epoch": 0.19, "grad_norm": 1.2870187759399414, "learning_rate": 9.339244533160326e-06, "loss": 0.6699, "step": 2952 }, { "epoch": 0.19, "grad_norm": 1.2061841487884521, "learning_rate": 9.338725095861816e-06, "loss": 0.5751, "step": 2953 }, { "epoch": 0.19, "grad_norm": 1.2316920757293701, "learning_rate": 9.338205468928829e-06, "loss": 0.552, "step": 2954 }, { "epoch": 0.19, "grad_norm": 1.1466740369796753, "learning_rate": 9.337685652384072e-06, "loss": 0.577, "step": 2955 }, { "epoch": 0.19, "grad_norm": 1.2185149192810059, "learning_rate": 9.337165646250267e-06, "loss": 0.6363, "step": 2956 }, { "epoch": 0.19, "grad_norm": 1.0338317155838013, "learning_rate": 9.336645450550141e-06, "loss": 0.4986, "step": 2957 }, { "epoch": 0.19, "grad_norm": 1.1843557357788086, "learning_rate": 9.336125065306432e-06, "loss": 0.6144, "step": 2958 }, { "epoch": 0.19, "grad_norm": 1.1464930772781372, "learning_rate": 9.335604490541883e-06, "loss": 0.6255, "step": 2959 }, { "epoch": 0.19, "grad_norm": 1.1258386373519897, "learning_rate": 9.335083726279248e-06, "loss": 0.567, "step": 2960 }, { "epoch": 0.19, "grad_norm": 1.3653255701065063, "learning_rate": 9.334562772541287e-06, "loss": 0.5729, "step": 2961 }, { "epoch": 0.19, "grad_norm": 1.1599277257919312, "learning_rate": 9.334041629350772e-06, "loss": 0.5761, "step": 2962 }, { "epoch": 0.19, "grad_norm": 1.1670660972595215, "learning_rate": 9.333520296730479e-06, "loss": 0.6017, "step": 2963 }, { "epoch": 0.19, "grad_norm": 1.0765753984451294, "learning_rate": 9.332998774703194e-06, "loss": 0.5774, "step": 2964 }, { "epoch": 0.19, "grad_norm": 1.1096965074539185, "learning_rate": 9.332477063291711e-06, "loss": 0.5417, "step": 2965 }, { "epoch": 0.19, "grad_norm": 1.186431884765625, "learning_rate": 9.331955162518836e-06, "loss": 0.6189, "step": 2966 }, { "epoch": 0.19, "grad_norm": 1.1953283548355103, "learning_rate": 9.331433072407375e-06, "loss": 0.5632, "step": 2967 }, { "epoch": 0.19, "grad_norm": 1.1119651794433594, "learning_rate": 9.330910792980151e-06, "loss": 0.5593, "step": 2968 }, { "epoch": 0.19, "grad_norm": 1.1223728656768799, "learning_rate": 9.330388324259991e-06, "loss": 0.5215, "step": 2969 }, { "epoch": 0.19, "grad_norm": 1.1270252466201782, "learning_rate": 9.32986566626973e-06, "loss": 0.5781, "step": 2970 }, { "epoch": 0.19, "grad_norm": 1.1030408143997192, "learning_rate": 9.329342819032207e-06, "loss": 0.538, "step": 2971 }, { "epoch": 0.19, "grad_norm": 1.2436364889144897, "learning_rate": 9.328819782570284e-06, "loss": 0.6383, "step": 2972 }, { "epoch": 0.19, "grad_norm": 1.2042878866195679, "learning_rate": 9.328296556906816e-06, "loss": 0.5704, "step": 2973 }, { "epoch": 0.19, "grad_norm": 1.2584311962127686, "learning_rate": 9.327773142064673e-06, "loss": 0.576, "step": 2974 }, { "epoch": 0.19, "grad_norm": 1.1827573776245117, "learning_rate": 9.327249538066731e-06, "loss": 0.5581, "step": 2975 }, { "epoch": 0.19, "grad_norm": 1.2250531911849976, "learning_rate": 9.326725744935877e-06, "loss": 0.5685, "step": 2976 }, { "epoch": 0.19, "grad_norm": 1.0631678104400635, "learning_rate": 9.326201762695002e-06, "loss": 0.5426, "step": 2977 }, { "epoch": 0.19, "grad_norm": 1.181078314781189, "learning_rate": 9.325677591367011e-06, "loss": 0.5348, "step": 2978 }, { "epoch": 0.19, "grad_norm": 1.423296332359314, "learning_rate": 9.325153230974812e-06, "loss": 0.6171, "step": 2979 }, { "epoch": 0.19, "grad_norm": 1.277420163154602, "learning_rate": 9.324628681541326e-06, "loss": 0.5398, "step": 2980 }, { "epoch": 0.19, "grad_norm": 1.0937138795852661, "learning_rate": 9.324103943089476e-06, "loss": 0.5683, "step": 2981 }, { "epoch": 0.19, "grad_norm": 1.1935259103775024, "learning_rate": 9.3235790156422e-06, "loss": 0.5347, "step": 2982 }, { "epoch": 0.19, "grad_norm": 1.281661868095398, "learning_rate": 9.32305389922244e-06, "loss": 0.5836, "step": 2983 }, { "epoch": 0.19, "grad_norm": 1.2085498571395874, "learning_rate": 9.322528593853148e-06, "loss": 0.6008, "step": 2984 }, { "epoch": 0.19, "grad_norm": 1.2592288255691528, "learning_rate": 9.322003099557284e-06, "loss": 0.5502, "step": 2985 }, { "epoch": 0.19, "grad_norm": 1.1743733882904053, "learning_rate": 9.321477416357815e-06, "loss": 0.6411, "step": 2986 }, { "epoch": 0.19, "grad_norm": 1.3260385990142822, "learning_rate": 9.320951544277717e-06, "loss": 0.6087, "step": 2987 }, { "epoch": 0.19, "grad_norm": 1.2637598514556885, "learning_rate": 9.320425483339974e-06, "loss": 0.5704, "step": 2988 }, { "epoch": 0.19, "grad_norm": 1.2062466144561768, "learning_rate": 9.319899233567583e-06, "loss": 0.5186, "step": 2989 }, { "epoch": 0.19, "grad_norm": 1.1679385900497437, "learning_rate": 9.319372794983541e-06, "loss": 0.5954, "step": 2990 }, { "epoch": 0.19, "grad_norm": 1.1733585596084595, "learning_rate": 9.318846167610858e-06, "loss": 0.5985, "step": 2991 }, { "epoch": 0.19, "grad_norm": 1.0535153150558472, "learning_rate": 9.318319351472551e-06, "loss": 0.5236, "step": 2992 }, { "epoch": 0.19, "grad_norm": 1.0893906354904175, "learning_rate": 9.317792346591647e-06, "loss": 0.5882, "step": 2993 }, { "epoch": 0.19, "grad_norm": 1.1199668645858765, "learning_rate": 9.31726515299118e-06, "loss": 0.5666, "step": 2994 }, { "epoch": 0.19, "grad_norm": 1.1628811359405518, "learning_rate": 9.31673777069419e-06, "loss": 0.6111, "step": 2995 }, { "epoch": 0.19, "grad_norm": 1.1851544380187988, "learning_rate": 9.316210199723731e-06, "loss": 0.5804, "step": 2996 }, { "epoch": 0.19, "grad_norm": 1.1588752269744873, "learning_rate": 9.315682440102861e-06, "loss": 0.5403, "step": 2997 }, { "epoch": 0.19, "grad_norm": 1.185451865196228, "learning_rate": 9.315154491854646e-06, "loss": 0.5456, "step": 2998 }, { "epoch": 0.19, "grad_norm": 1.266563057899475, "learning_rate": 9.31462635500216e-06, "loss": 0.5601, "step": 2999 }, { "epoch": 0.19, "grad_norm": 1.1245709657669067, "learning_rate": 9.314098029568488e-06, "loss": 0.5508, "step": 3000 }, { "epoch": 0.19, "grad_norm": 1.1974939107894897, "learning_rate": 9.313569515576724e-06, "loss": 0.5747, "step": 3001 }, { "epoch": 0.19, "grad_norm": 1.1739282608032227, "learning_rate": 9.313040813049963e-06, "loss": 0.5759, "step": 3002 }, { "epoch": 0.19, "grad_norm": 1.133509874343872, "learning_rate": 9.312511922011317e-06, "loss": 0.6191, "step": 3003 }, { "epoch": 0.19, "grad_norm": 1.098097324371338, "learning_rate": 9.311982842483902e-06, "loss": 0.5479, "step": 3004 }, { "epoch": 0.19, "grad_norm": 1.178916573524475, "learning_rate": 9.31145357449084e-06, "loss": 0.5223, "step": 3005 }, { "epoch": 0.19, "grad_norm": 1.2523831129074097, "learning_rate": 9.310924118055266e-06, "loss": 0.608, "step": 3006 }, { "epoch": 0.19, "grad_norm": 1.152325987815857, "learning_rate": 9.310394473200325e-06, "loss": 0.5748, "step": 3007 }, { "epoch": 0.19, "grad_norm": 1.2205924987792969, "learning_rate": 9.309864639949157e-06, "loss": 0.5949, "step": 3008 }, { "epoch": 0.19, "grad_norm": 1.239532232284546, "learning_rate": 9.30933461832493e-06, "loss": 0.5878, "step": 3009 }, { "epoch": 0.19, "grad_norm": 1.239559292793274, "learning_rate": 9.308804408350802e-06, "loss": 0.5609, "step": 3010 }, { "epoch": 0.19, "grad_norm": 1.2058849334716797, "learning_rate": 9.308274010049952e-06, "loss": 0.5848, "step": 3011 }, { "epoch": 0.19, "grad_norm": 1.101211428642273, "learning_rate": 9.307743423445558e-06, "loss": 0.5464, "step": 3012 }, { "epoch": 0.19, "grad_norm": 1.2484394311904907, "learning_rate": 9.307212648560814e-06, "loss": 0.5541, "step": 3013 }, { "epoch": 0.19, "grad_norm": 1.2418099641799927, "learning_rate": 9.306681685418918e-06, "loss": 0.5907, "step": 3014 }, { "epoch": 0.19, "grad_norm": 1.2999069690704346, "learning_rate": 9.30615053404308e-06, "loss": 0.6012, "step": 3015 }, { "epoch": 0.19, "grad_norm": 1.2580294609069824, "learning_rate": 9.30561919445651e-06, "loss": 0.5681, "step": 3016 }, { "epoch": 0.19, "grad_norm": 1.1577858924865723, "learning_rate": 9.305087666682432e-06, "loss": 0.5783, "step": 3017 }, { "epoch": 0.19, "grad_norm": 1.1218377351760864, "learning_rate": 9.30455595074408e-06, "loss": 0.5793, "step": 3018 }, { "epoch": 0.19, "grad_norm": 1.1909887790679932, "learning_rate": 9.304024046664695e-06, "loss": 0.6026, "step": 3019 }, { "epoch": 0.19, "grad_norm": 1.2145228385925293, "learning_rate": 9.303491954467523e-06, "loss": 0.6007, "step": 3020 }, { "epoch": 0.2, "grad_norm": 1.2191705703735352, "learning_rate": 9.30295967417582e-06, "loss": 0.6373, "step": 3021 }, { "epoch": 0.2, "grad_norm": 1.1846791505813599, "learning_rate": 9.302427205812851e-06, "loss": 0.5878, "step": 3022 }, { "epoch": 0.2, "grad_norm": 1.1642893552780151, "learning_rate": 9.30189454940189e-06, "loss": 0.6124, "step": 3023 }, { "epoch": 0.2, "grad_norm": 1.3540116548538208, "learning_rate": 9.301361704966219e-06, "loss": 0.5451, "step": 3024 }, { "epoch": 0.2, "grad_norm": 1.2095338106155396, "learning_rate": 9.300828672529123e-06, "loss": 0.5469, "step": 3025 }, { "epoch": 0.2, "grad_norm": 1.1362740993499756, "learning_rate": 9.300295452113903e-06, "loss": 0.5694, "step": 3026 }, { "epoch": 0.2, "grad_norm": 1.3384737968444824, "learning_rate": 9.299762043743863e-06, "loss": 0.5614, "step": 3027 }, { "epoch": 0.2, "grad_norm": 1.305235743522644, "learning_rate": 9.299228447442317e-06, "loss": 0.582, "step": 3028 }, { "epoch": 0.2, "grad_norm": 1.286002516746521, "learning_rate": 9.29869466323259e-06, "loss": 0.6044, "step": 3029 }, { "epoch": 0.2, "grad_norm": 1.020398497581482, "learning_rate": 9.29816069113801e-06, "loss": 0.5585, "step": 3030 }, { "epoch": 0.2, "grad_norm": 1.1046310663223267, "learning_rate": 9.297626531181913e-06, "loss": 0.5448, "step": 3031 }, { "epoch": 0.2, "grad_norm": 1.2101856470108032, "learning_rate": 9.29709218338765e-06, "loss": 0.5915, "step": 3032 }, { "epoch": 0.2, "grad_norm": 1.2830172777175903, "learning_rate": 9.296557647778574e-06, "loss": 0.5967, "step": 3033 }, { "epoch": 0.2, "grad_norm": 1.0934362411499023, "learning_rate": 9.29602292437805e-06, "loss": 0.5626, "step": 3034 }, { "epoch": 0.2, "grad_norm": 1.2219215631484985, "learning_rate": 9.295488013209445e-06, "loss": 0.5749, "step": 3035 }, { "epoch": 0.2, "grad_norm": 1.0162519216537476, "learning_rate": 9.294952914296142e-06, "loss": 0.5495, "step": 3036 }, { "epoch": 0.2, "grad_norm": 1.1108503341674805, "learning_rate": 9.294417627661531e-06, "loss": 0.5242, "step": 3037 }, { "epoch": 0.2, "grad_norm": 1.1682884693145752, "learning_rate": 9.293882153329003e-06, "loss": 0.5859, "step": 3038 }, { "epoch": 0.2, "grad_norm": 1.0244694948196411, "learning_rate": 9.293346491321965e-06, "loss": 0.5526, "step": 3039 }, { "epoch": 0.2, "grad_norm": 1.280411958694458, "learning_rate": 9.292810641663828e-06, "loss": 0.6234, "step": 3040 }, { "epoch": 0.2, "grad_norm": 1.1096441745758057, "learning_rate": 9.292274604378014e-06, "loss": 0.5681, "step": 3041 }, { "epoch": 0.2, "grad_norm": 1.1472351551055908, "learning_rate": 9.291738379487952e-06, "loss": 0.5795, "step": 3042 }, { "epoch": 0.2, "grad_norm": 1.1862273216247559, "learning_rate": 9.291201967017078e-06, "loss": 0.6192, "step": 3043 }, { "epoch": 0.2, "grad_norm": 1.1674014329910278, "learning_rate": 9.290665366988835e-06, "loss": 0.6204, "step": 3044 }, { "epoch": 0.2, "grad_norm": 1.103035807609558, "learning_rate": 9.290128579426683e-06, "loss": 0.5587, "step": 3045 }, { "epoch": 0.2, "grad_norm": 1.0917901992797852, "learning_rate": 9.289591604354076e-06, "loss": 0.5522, "step": 3046 }, { "epoch": 0.2, "grad_norm": 1.0927205085754395, "learning_rate": 9.289054441794489e-06, "loss": 0.6034, "step": 3047 }, { "epoch": 0.2, "grad_norm": 1.2954493761062622, "learning_rate": 9.2885170917714e-06, "loss": 0.6026, "step": 3048 }, { "epoch": 0.2, "grad_norm": 1.0598409175872803, "learning_rate": 9.28797955430829e-06, "loss": 0.5439, "step": 3049 }, { "epoch": 0.2, "grad_norm": 1.1734683513641357, "learning_rate": 9.287441829428659e-06, "loss": 0.5981, "step": 3050 }, { "epoch": 0.2, "grad_norm": 1.156785488128662, "learning_rate": 9.286903917156005e-06, "loss": 0.5404, "step": 3051 }, { "epoch": 0.2, "grad_norm": 1.1169028282165527, "learning_rate": 9.286365817513845e-06, "loss": 0.5949, "step": 3052 }, { "epoch": 0.2, "grad_norm": 1.1448049545288086, "learning_rate": 9.28582753052569e-06, "loss": 0.5416, "step": 3053 }, { "epoch": 0.2, "grad_norm": 1.2883778810501099, "learning_rate": 9.285289056215075e-06, "loss": 0.621, "step": 3054 }, { "epoch": 0.2, "grad_norm": 1.1667753458023071, "learning_rate": 9.284750394605528e-06, "loss": 0.5291, "step": 3055 }, { "epoch": 0.2, "grad_norm": 1.1072641611099243, "learning_rate": 9.284211545720599e-06, "loss": 0.5076, "step": 3056 }, { "epoch": 0.2, "grad_norm": 1.1863527297973633, "learning_rate": 9.283672509583834e-06, "loss": 0.5686, "step": 3057 }, { "epoch": 0.2, "grad_norm": 1.1698131561279297, "learning_rate": 9.283133286218797e-06, "loss": 0.5573, "step": 3058 }, { "epoch": 0.2, "grad_norm": 1.2004753351211548, "learning_rate": 9.282593875649055e-06, "loss": 0.592, "step": 3059 }, { "epoch": 0.2, "grad_norm": 1.1502668857574463, "learning_rate": 9.282054277898184e-06, "loss": 0.5915, "step": 3060 }, { "epoch": 0.2, "grad_norm": 1.2237268686294556, "learning_rate": 9.281514492989768e-06, "loss": 0.6162, "step": 3061 }, { "epoch": 0.2, "grad_norm": 1.0901283025741577, "learning_rate": 9.280974520947401e-06, "loss": 0.6074, "step": 3062 }, { "epoch": 0.2, "grad_norm": 1.1444658041000366, "learning_rate": 9.280434361794682e-06, "loss": 0.5899, "step": 3063 }, { "epoch": 0.2, "grad_norm": 1.0959869623184204, "learning_rate": 9.279894015555219e-06, "loss": 0.5525, "step": 3064 }, { "epoch": 0.2, "grad_norm": 1.46790611743927, "learning_rate": 9.279353482252633e-06, "loss": 0.5891, "step": 3065 }, { "epoch": 0.2, "grad_norm": 1.3077619075775146, "learning_rate": 9.278812761910547e-06, "loss": 0.5957, "step": 3066 }, { "epoch": 0.2, "grad_norm": 1.2311471700668335, "learning_rate": 9.278271854552594e-06, "loss": 0.5914, "step": 3067 }, { "epoch": 0.2, "grad_norm": 1.1823508739471436, "learning_rate": 9.277730760202417e-06, "loss": 0.531, "step": 3068 }, { "epoch": 0.2, "grad_norm": 1.150061845779419, "learning_rate": 9.277189478883663e-06, "loss": 0.5767, "step": 3069 }, { "epoch": 0.2, "grad_norm": 1.231200098991394, "learning_rate": 9.276648010619996e-06, "loss": 0.6227, "step": 3070 }, { "epoch": 0.2, "grad_norm": 1.1505855321884155, "learning_rate": 9.276106355435075e-06, "loss": 0.5565, "step": 3071 }, { "epoch": 0.2, "grad_norm": 1.17447030544281, "learning_rate": 9.27556451335258e-06, "loss": 0.5872, "step": 3072 }, { "epoch": 0.2, "grad_norm": 1.1657098531723022, "learning_rate": 9.27502248439619e-06, "loss": 0.6203, "step": 3073 }, { "epoch": 0.2, "grad_norm": 1.191632866859436, "learning_rate": 9.274480268589597e-06, "loss": 0.5558, "step": 3074 }, { "epoch": 0.2, "grad_norm": 1.166384220123291, "learning_rate": 9.273937865956499e-06, "loss": 0.5693, "step": 3075 }, { "epoch": 0.2, "grad_norm": 1.3121682405471802, "learning_rate": 9.273395276520605e-06, "loss": 0.5809, "step": 3076 }, { "epoch": 0.2, "grad_norm": 1.1731867790222168, "learning_rate": 9.272852500305629e-06, "loss": 0.5562, "step": 3077 }, { "epoch": 0.2, "grad_norm": 1.1400225162506104, "learning_rate": 9.272309537335293e-06, "loss": 0.6262, "step": 3078 }, { "epoch": 0.2, "grad_norm": 1.1633687019348145, "learning_rate": 9.271766387633329e-06, "loss": 0.6098, "step": 3079 }, { "epoch": 0.2, "grad_norm": 1.2402119636535645, "learning_rate": 9.271223051223482e-06, "loss": 0.6171, "step": 3080 }, { "epoch": 0.2, "grad_norm": 1.3098315000534058, "learning_rate": 9.270679528129491e-06, "loss": 0.5523, "step": 3081 }, { "epoch": 0.2, "grad_norm": 1.0657334327697754, "learning_rate": 9.270135818375118e-06, "loss": 0.5065, "step": 3082 }, { "epoch": 0.2, "grad_norm": 1.178661823272705, "learning_rate": 9.269591921984125e-06, "loss": 0.6177, "step": 3083 }, { "epoch": 0.2, "grad_norm": 1.3271374702453613, "learning_rate": 9.269047838980285e-06, "loss": 0.5567, "step": 3084 }, { "epoch": 0.2, "grad_norm": 1.1564418077468872, "learning_rate": 9.26850356938738e-06, "loss": 0.5568, "step": 3085 }, { "epoch": 0.2, "grad_norm": 1.1748090982437134, "learning_rate": 9.267959113229194e-06, "loss": 0.5499, "step": 3086 }, { "epoch": 0.2, "grad_norm": 1.2156586647033691, "learning_rate": 9.267414470529528e-06, "loss": 0.5641, "step": 3087 }, { "epoch": 0.2, "grad_norm": 1.1295690536499023, "learning_rate": 9.266869641312186e-06, "loss": 0.5243, "step": 3088 }, { "epoch": 0.2, "grad_norm": 1.2382522821426392, "learning_rate": 9.26632462560098e-06, "loss": 0.5903, "step": 3089 }, { "epoch": 0.2, "grad_norm": 1.1940385103225708, "learning_rate": 9.265779423419732e-06, "loss": 0.5939, "step": 3090 }, { "epoch": 0.2, "grad_norm": 1.1233556270599365, "learning_rate": 9.265234034792272e-06, "loss": 0.5633, "step": 3091 }, { "epoch": 0.2, "grad_norm": 1.1994106769561768, "learning_rate": 9.264688459742435e-06, "loss": 0.5744, "step": 3092 }, { "epoch": 0.2, "grad_norm": 1.2474284172058105, "learning_rate": 9.26414269829407e-06, "loss": 0.5723, "step": 3093 }, { "epoch": 0.2, "grad_norm": 1.2211426496505737, "learning_rate": 9.263596750471028e-06, "loss": 0.5543, "step": 3094 }, { "epoch": 0.2, "grad_norm": 1.1078652143478394, "learning_rate": 9.263050616297174e-06, "loss": 0.542, "step": 3095 }, { "epoch": 0.2, "grad_norm": 1.101150631904602, "learning_rate": 9.262504295796374e-06, "loss": 0.5655, "step": 3096 }, { "epoch": 0.2, "grad_norm": 1.1936688423156738, "learning_rate": 9.261957788992511e-06, "loss": 0.5397, "step": 3097 }, { "epoch": 0.2, "grad_norm": 1.2352887392044067, "learning_rate": 9.261411095909467e-06, "loss": 0.5783, "step": 3098 }, { "epoch": 0.2, "grad_norm": 1.1880064010620117, "learning_rate": 9.26086421657114e-06, "loss": 0.5874, "step": 3099 }, { "epoch": 0.2, "grad_norm": 1.1404420137405396, "learning_rate": 9.260317151001432e-06, "loss": 0.5854, "step": 3100 }, { "epoch": 0.2, "grad_norm": 1.1913459300994873, "learning_rate": 9.259769899224252e-06, "loss": 0.5643, "step": 3101 }, { "epoch": 0.2, "grad_norm": 1.194149374961853, "learning_rate": 9.259222461263519e-06, "loss": 0.6152, "step": 3102 }, { "epoch": 0.2, "grad_norm": 1.1938841342926025, "learning_rate": 9.258674837143162e-06, "loss": 0.6055, "step": 3103 }, { "epoch": 0.2, "grad_norm": 1.170621633529663, "learning_rate": 9.258127026887115e-06, "loss": 0.5703, "step": 3104 }, { "epoch": 0.2, "grad_norm": 1.307644248008728, "learning_rate": 9.257579030519324e-06, "loss": 0.581, "step": 3105 }, { "epoch": 0.2, "grad_norm": 1.2012418508529663, "learning_rate": 9.257030848063737e-06, "loss": 0.5889, "step": 3106 }, { "epoch": 0.2, "grad_norm": 1.2299768924713135, "learning_rate": 9.256482479544314e-06, "loss": 0.5622, "step": 3107 }, { "epoch": 0.2, "grad_norm": 1.2062751054763794, "learning_rate": 9.255933924985024e-06, "loss": 0.5588, "step": 3108 }, { "epoch": 0.2, "grad_norm": 1.1604408025741577, "learning_rate": 9.255385184409844e-06, "loss": 0.5653, "step": 3109 }, { "epoch": 0.2, "grad_norm": 1.2625670433044434, "learning_rate": 9.254836257842755e-06, "loss": 0.5715, "step": 3110 }, { "epoch": 0.2, "grad_norm": 1.15908944606781, "learning_rate": 9.254287145307752e-06, "loss": 0.5899, "step": 3111 }, { "epoch": 0.2, "grad_norm": 1.2927672863006592, "learning_rate": 9.253737846828833e-06, "loss": 0.5843, "step": 3112 }, { "epoch": 0.2, "grad_norm": 1.1416044235229492, "learning_rate": 9.253188362430008e-06, "loss": 0.5246, "step": 3113 }, { "epoch": 0.2, "grad_norm": 1.0435566902160645, "learning_rate": 9.252638692135293e-06, "loss": 0.5526, "step": 3114 }, { "epoch": 0.2, "grad_norm": 1.074360728263855, "learning_rate": 9.252088835968713e-06, "loss": 0.5674, "step": 3115 }, { "epoch": 0.2, "grad_norm": 1.2316385507583618, "learning_rate": 9.2515387939543e-06, "loss": 0.5621, "step": 3116 }, { "epoch": 0.2, "grad_norm": 1.2789126634597778, "learning_rate": 9.250988566116095e-06, "loss": 0.6105, "step": 3117 }, { "epoch": 0.2, "grad_norm": 1.1440402269363403, "learning_rate": 9.250438152478149e-06, "loss": 0.5373, "step": 3118 }, { "epoch": 0.2, "grad_norm": 1.0654728412628174, "learning_rate": 9.249887553064515e-06, "loss": 0.5832, "step": 3119 }, { "epoch": 0.2, "grad_norm": 1.109975814819336, "learning_rate": 9.249336767899263e-06, "loss": 0.5913, "step": 3120 }, { "epoch": 0.2, "grad_norm": 1.2574878931045532, "learning_rate": 9.248785797006466e-06, "loss": 0.6005, "step": 3121 }, { "epoch": 0.2, "grad_norm": 1.1314074993133545, "learning_rate": 9.2482346404102e-06, "loss": 0.5985, "step": 3122 }, { "epoch": 0.2, "grad_norm": 1.1990970373153687, "learning_rate": 9.24768329813456e-06, "loss": 0.5291, "step": 3123 }, { "epoch": 0.2, "grad_norm": 1.2057380676269531, "learning_rate": 9.247131770203643e-06, "loss": 0.5804, "step": 3124 }, { "epoch": 0.2, "grad_norm": 1.2116010189056396, "learning_rate": 9.246580056641555e-06, "loss": 0.6156, "step": 3125 }, { "epoch": 0.2, "grad_norm": 1.1164865493774414, "learning_rate": 9.246028157472406e-06, "loss": 0.547, "step": 3126 }, { "epoch": 0.2, "grad_norm": 1.1481071710586548, "learning_rate": 9.245476072720322e-06, "loss": 0.5851, "step": 3127 }, { "epoch": 0.2, "grad_norm": 1.2408658266067505, "learning_rate": 9.244923802409435e-06, "loss": 0.6123, "step": 3128 }, { "epoch": 0.2, "grad_norm": 1.1489121913909912, "learning_rate": 9.244371346563879e-06, "loss": 0.5568, "step": 3129 }, { "epoch": 0.2, "grad_norm": 1.2815707921981812, "learning_rate": 9.2438187052078e-06, "loss": 0.6012, "step": 3130 }, { "epoch": 0.2, "grad_norm": 1.2624331712722778, "learning_rate": 9.243265878365358e-06, "loss": 0.5699, "step": 3131 }, { "epoch": 0.2, "grad_norm": 1.1364437341690063, "learning_rate": 9.24271286606071e-06, "loss": 0.5688, "step": 3132 }, { "epoch": 0.2, "grad_norm": 1.204816222190857, "learning_rate": 9.242159668318029e-06, "loss": 0.5245, "step": 3133 }, { "epoch": 0.2, "grad_norm": 1.1092941761016846, "learning_rate": 9.241606285161495e-06, "loss": 0.538, "step": 3134 }, { "epoch": 0.2, "grad_norm": 1.2426033020019531, "learning_rate": 9.241052716615294e-06, "loss": 0.53, "step": 3135 }, { "epoch": 0.2, "grad_norm": 1.1925318241119385, "learning_rate": 9.24049896270362e-06, "loss": 0.575, "step": 3136 }, { "epoch": 0.2, "grad_norm": 1.081073522567749, "learning_rate": 9.239945023450676e-06, "loss": 0.4936, "step": 3137 }, { "epoch": 0.2, "grad_norm": 1.1169618368148804, "learning_rate": 9.239390898880675e-06, "loss": 0.5511, "step": 3138 }, { "epoch": 0.2, "grad_norm": 1.295778512954712, "learning_rate": 9.238836589017835e-06, "loss": 0.5588, "step": 3139 }, { "epoch": 0.2, "grad_norm": 1.123201608657837, "learning_rate": 9.238282093886386e-06, "loss": 0.6088, "step": 3140 }, { "epoch": 0.2, "grad_norm": 1.1997036933898926, "learning_rate": 9.23772741351056e-06, "loss": 0.6084, "step": 3141 }, { "epoch": 0.2, "grad_norm": 1.241383671760559, "learning_rate": 9.237172547914604e-06, "loss": 0.5777, "step": 3142 }, { "epoch": 0.2, "grad_norm": 1.3041492700576782, "learning_rate": 9.236617497122765e-06, "loss": 0.6131, "step": 3143 }, { "epoch": 0.2, "grad_norm": 1.1316124200820923, "learning_rate": 9.236062261159308e-06, "loss": 0.5259, "step": 3144 }, { "epoch": 0.2, "grad_norm": 1.1110666990280151, "learning_rate": 9.235506840048498e-06, "loss": 0.5714, "step": 3145 }, { "epoch": 0.2, "grad_norm": 1.197737216949463, "learning_rate": 9.234951233814612e-06, "loss": 0.5749, "step": 3146 }, { "epoch": 0.2, "grad_norm": 1.2221674919128418, "learning_rate": 9.234395442481933e-06, "loss": 0.542, "step": 3147 }, { "epoch": 0.2, "grad_norm": 1.293383240699768, "learning_rate": 9.233839466074756e-06, "loss": 0.5884, "step": 3148 }, { "epoch": 0.2, "grad_norm": 1.2041559219360352, "learning_rate": 9.233283304617378e-06, "loss": 0.5411, "step": 3149 }, { "epoch": 0.2, "grad_norm": 1.1516176462173462, "learning_rate": 9.232726958134108e-06, "loss": 0.5708, "step": 3150 }, { "epoch": 0.2, "grad_norm": 1.1027235984802246, "learning_rate": 9.232170426649265e-06, "loss": 0.558, "step": 3151 }, { "epoch": 0.2, "grad_norm": 1.1835819482803345, "learning_rate": 9.231613710187172e-06, "loss": 0.56, "step": 3152 }, { "epoch": 0.2, "grad_norm": 1.1754817962646484, "learning_rate": 9.23105680877216e-06, "loss": 0.6244, "step": 3153 }, { "epoch": 0.2, "grad_norm": 1.1090794801712036, "learning_rate": 9.230499722428571e-06, "loss": 0.5719, "step": 3154 }, { "epoch": 0.2, "grad_norm": 1.3134269714355469, "learning_rate": 9.229942451180755e-06, "loss": 0.5831, "step": 3155 }, { "epoch": 0.2, "grad_norm": 1.2459783554077148, "learning_rate": 9.229384995053067e-06, "loss": 0.6108, "step": 3156 }, { "epoch": 0.2, "grad_norm": 1.2601451873779297, "learning_rate": 9.228827354069873e-06, "loss": 0.5877, "step": 3157 }, { "epoch": 0.2, "grad_norm": 1.2725110054016113, "learning_rate": 9.228269528255546e-06, "loss": 0.6376, "step": 3158 }, { "epoch": 0.2, "grad_norm": 1.2314767837524414, "learning_rate": 9.227711517634468e-06, "loss": 0.5508, "step": 3159 }, { "epoch": 0.2, "grad_norm": 1.2973912954330444, "learning_rate": 9.227153322231027e-06, "loss": 0.5546, "step": 3160 }, { "epoch": 0.2, "grad_norm": 1.1948485374450684, "learning_rate": 9.22659494206962e-06, "loss": 0.5778, "step": 3161 }, { "epoch": 0.2, "grad_norm": 1.1647751331329346, "learning_rate": 9.226036377174654e-06, "loss": 0.5463, "step": 3162 }, { "epoch": 0.2, "grad_norm": 1.1405631303787231, "learning_rate": 9.22547762757054e-06, "loss": 0.5918, "step": 3163 }, { "epoch": 0.2, "grad_norm": 1.1585414409637451, "learning_rate": 9.2249186932817e-06, "loss": 0.5609, "step": 3164 }, { "epoch": 0.2, "grad_norm": 1.1947954893112183, "learning_rate": 9.224359574332564e-06, "loss": 0.6413, "step": 3165 }, { "epoch": 0.2, "grad_norm": 1.1894886493682861, "learning_rate": 9.223800270747571e-06, "loss": 0.6158, "step": 3166 }, { "epoch": 0.2, "grad_norm": 1.2034473419189453, "learning_rate": 9.223240782551168e-06, "loss": 0.5535, "step": 3167 }, { "epoch": 0.2, "grad_norm": 1.1641324758529663, "learning_rate": 9.222681109767803e-06, "loss": 0.5371, "step": 3168 }, { "epoch": 0.2, "grad_norm": 1.1291395425796509, "learning_rate": 9.222121252421942e-06, "loss": 0.5604, "step": 3169 }, { "epoch": 0.2, "grad_norm": 1.1652181148529053, "learning_rate": 9.221561210538057e-06, "loss": 0.625, "step": 3170 }, { "epoch": 0.2, "grad_norm": 1.0239900350570679, "learning_rate": 9.22100098414062e-06, "loss": 0.551, "step": 3171 }, { "epoch": 0.2, "grad_norm": 1.0644463300704956, "learning_rate": 9.220440573254123e-06, "loss": 0.6105, "step": 3172 }, { "epoch": 0.2, "grad_norm": 1.259662389755249, "learning_rate": 9.219879977903055e-06, "loss": 0.619, "step": 3173 }, { "epoch": 0.2, "grad_norm": 1.1433606147766113, "learning_rate": 9.219319198111923e-06, "loss": 0.5971, "step": 3174 }, { "epoch": 0.2, "grad_norm": 1.1493934392929077, "learning_rate": 9.218758233905232e-06, "loss": 0.5494, "step": 3175 }, { "epoch": 0.21, "grad_norm": 1.100106120109558, "learning_rate": 9.218197085307505e-06, "loss": 0.5804, "step": 3176 }, { "epoch": 0.21, "grad_norm": 1.209498405456543, "learning_rate": 9.217635752343267e-06, "loss": 0.5704, "step": 3177 }, { "epoch": 0.21, "grad_norm": 1.161909580230713, "learning_rate": 9.217074235037051e-06, "loss": 0.5792, "step": 3178 }, { "epoch": 0.21, "grad_norm": 1.1800705194473267, "learning_rate": 9.2165125334134e-06, "loss": 0.5905, "step": 3179 }, { "epoch": 0.21, "grad_norm": 1.100710391998291, "learning_rate": 9.215950647496865e-06, "loss": 0.5571, "step": 3180 }, { "epoch": 0.21, "grad_norm": 1.1454949378967285, "learning_rate": 9.215388577312004e-06, "loss": 0.5417, "step": 3181 }, { "epoch": 0.21, "grad_norm": 1.1847013235092163, "learning_rate": 9.214826322883386e-06, "loss": 0.5925, "step": 3182 }, { "epoch": 0.21, "grad_norm": 1.1535452604293823, "learning_rate": 9.214263884235581e-06, "loss": 0.537, "step": 3183 }, { "epoch": 0.21, "grad_norm": 1.2236788272857666, "learning_rate": 9.213701261393177e-06, "loss": 0.6402, "step": 3184 }, { "epoch": 0.21, "grad_norm": 1.0738210678100586, "learning_rate": 9.213138454380762e-06, "loss": 0.5642, "step": 3185 }, { "epoch": 0.21, "grad_norm": 1.2511769533157349, "learning_rate": 9.212575463222934e-06, "loss": 0.579, "step": 3186 }, { "epoch": 0.21, "grad_norm": 1.182493805885315, "learning_rate": 9.212012287944302e-06, "loss": 0.5597, "step": 3187 }, { "epoch": 0.21, "grad_norm": 1.3198593854904175, "learning_rate": 9.211448928569481e-06, "loss": 0.6225, "step": 3188 }, { "epoch": 0.21, "grad_norm": 1.1257680654525757, "learning_rate": 9.210885385123092e-06, "loss": 0.5523, "step": 3189 }, { "epoch": 0.21, "grad_norm": 1.3358969688415527, "learning_rate": 9.210321657629768e-06, "loss": 0.6062, "step": 3190 }, { "epoch": 0.21, "grad_norm": 1.2041857242584229, "learning_rate": 9.209757746114145e-06, "loss": 0.5928, "step": 3191 }, { "epoch": 0.21, "grad_norm": 1.1918138265609741, "learning_rate": 9.209193650600874e-06, "loss": 0.5845, "step": 3192 }, { "epoch": 0.21, "grad_norm": 1.2765218019485474, "learning_rate": 9.208629371114609e-06, "loss": 0.548, "step": 3193 }, { "epoch": 0.21, "grad_norm": 1.1384878158569336, "learning_rate": 9.20806490768001e-06, "loss": 0.5292, "step": 3194 }, { "epoch": 0.21, "grad_norm": 1.185971975326538, "learning_rate": 9.207500260321754e-06, "loss": 0.5841, "step": 3195 }, { "epoch": 0.21, "grad_norm": 1.2333409786224365, "learning_rate": 9.206935429064515e-06, "loss": 0.5749, "step": 3196 }, { "epoch": 0.21, "grad_norm": 1.1328415870666504, "learning_rate": 9.206370413932982e-06, "loss": 0.5902, "step": 3197 }, { "epoch": 0.21, "grad_norm": 1.1446661949157715, "learning_rate": 9.20580521495185e-06, "loss": 0.5321, "step": 3198 }, { "epoch": 0.21, "grad_norm": 1.1032977104187012, "learning_rate": 9.205239832145826e-06, "loss": 0.595, "step": 3199 }, { "epoch": 0.21, "grad_norm": 1.1538269519805908, "learning_rate": 9.204674265539617e-06, "loss": 0.5522, "step": 3200 }, { "epoch": 0.21, "grad_norm": 1.3509466648101807, "learning_rate": 9.204108515157945e-06, "loss": 0.5979, "step": 3201 }, { "epoch": 0.21, "grad_norm": 1.1877223253250122, "learning_rate": 9.203542581025536e-06, "loss": 0.5769, "step": 3202 }, { "epoch": 0.21, "grad_norm": 1.2417141199111938, "learning_rate": 9.202976463167125e-06, "loss": 0.5957, "step": 3203 }, { "epoch": 0.21, "grad_norm": 1.2153123617172241, "learning_rate": 9.202410161607458e-06, "loss": 0.5531, "step": 3204 }, { "epoch": 0.21, "grad_norm": 1.1909741163253784, "learning_rate": 9.201843676371285e-06, "loss": 0.5823, "step": 3205 }, { "epoch": 0.21, "grad_norm": 1.1616787910461426, "learning_rate": 9.201277007483365e-06, "loss": 0.5231, "step": 3206 }, { "epoch": 0.21, "grad_norm": 1.1793344020843506, "learning_rate": 9.200710154968468e-06, "loss": 0.566, "step": 3207 }, { "epoch": 0.21, "grad_norm": 1.1466854810714722, "learning_rate": 9.200143118851367e-06, "loss": 0.4995, "step": 3208 }, { "epoch": 0.21, "grad_norm": 1.1719036102294922, "learning_rate": 9.199575899156847e-06, "loss": 0.5886, "step": 3209 }, { "epoch": 0.21, "grad_norm": 1.1630918979644775, "learning_rate": 9.199008495909701e-06, "loss": 0.5797, "step": 3210 }, { "epoch": 0.21, "grad_norm": 1.1732006072998047, "learning_rate": 9.198440909134726e-06, "loss": 0.6111, "step": 3211 }, { "epoch": 0.21, "grad_norm": 1.168148159980774, "learning_rate": 9.197873138856728e-06, "loss": 0.5451, "step": 3212 }, { "epoch": 0.21, "grad_norm": 1.1410408020019531, "learning_rate": 9.197305185100529e-06, "loss": 0.6522, "step": 3213 }, { "epoch": 0.21, "grad_norm": 1.2131584882736206, "learning_rate": 9.196737047890949e-06, "loss": 0.6002, "step": 3214 }, { "epoch": 0.21, "grad_norm": 1.1001938581466675, "learning_rate": 9.196168727252821e-06, "loss": 0.6092, "step": 3215 }, { "epoch": 0.21, "grad_norm": 1.2977383136749268, "learning_rate": 9.195600223210983e-06, "loss": 0.6203, "step": 3216 }, { "epoch": 0.21, "grad_norm": 1.2146589756011963, "learning_rate": 9.195031535790285e-06, "loss": 0.5832, "step": 3217 }, { "epoch": 0.21, "grad_norm": 1.2407214641571045, "learning_rate": 9.194462665015581e-06, "loss": 0.542, "step": 3218 }, { "epoch": 0.21, "grad_norm": 1.1806490421295166, "learning_rate": 9.193893610911737e-06, "loss": 0.5884, "step": 3219 }, { "epoch": 0.21, "grad_norm": 1.1083087921142578, "learning_rate": 9.193324373503623e-06, "loss": 0.5361, "step": 3220 }, { "epoch": 0.21, "grad_norm": 1.1304301023483276, "learning_rate": 9.19275495281612e-06, "loss": 0.5884, "step": 3221 }, { "epoch": 0.21, "grad_norm": 1.16396164894104, "learning_rate": 9.192185348874115e-06, "loss": 0.5824, "step": 3222 }, { "epoch": 0.21, "grad_norm": 1.2041023969650269, "learning_rate": 9.191615561702504e-06, "loss": 0.639, "step": 3223 }, { "epoch": 0.21, "grad_norm": 1.0971475839614868, "learning_rate": 9.191045591326191e-06, "loss": 0.6003, "step": 3224 }, { "epoch": 0.21, "grad_norm": 1.1641459465026855, "learning_rate": 9.19047543777009e-06, "loss": 0.5763, "step": 3225 }, { "epoch": 0.21, "grad_norm": 1.0357643365859985, "learning_rate": 9.189905101059118e-06, "loss": 0.5264, "step": 3226 }, { "epoch": 0.21, "grad_norm": 1.2561705112457275, "learning_rate": 9.189334581218203e-06, "loss": 0.6339, "step": 3227 }, { "epoch": 0.21, "grad_norm": 1.324174165725708, "learning_rate": 9.188763878272284e-06, "loss": 0.613, "step": 3228 }, { "epoch": 0.21, "grad_norm": 1.2038441896438599, "learning_rate": 9.188192992246301e-06, "loss": 0.5487, "step": 3229 }, { "epoch": 0.21, "grad_norm": 1.27217698097229, "learning_rate": 9.187621923165211e-06, "loss": 0.5813, "step": 3230 }, { "epoch": 0.21, "grad_norm": 1.277828335762024, "learning_rate": 9.187050671053969e-06, "loss": 0.575, "step": 3231 }, { "epoch": 0.21, "grad_norm": 1.1712366342544556, "learning_rate": 9.186479235937545e-06, "loss": 0.5608, "step": 3232 }, { "epoch": 0.21, "grad_norm": 1.307588815689087, "learning_rate": 9.185907617840914e-06, "loss": 0.5862, "step": 3233 }, { "epoch": 0.21, "grad_norm": 1.2550424337387085, "learning_rate": 9.185335816789062e-06, "loss": 0.5683, "step": 3234 }, { "epoch": 0.21, "grad_norm": 1.3388923406600952, "learning_rate": 9.184763832806979e-06, "loss": 0.6015, "step": 3235 }, { "epoch": 0.21, "grad_norm": 1.1435297727584839, "learning_rate": 9.184191665919668e-06, "loss": 0.6168, "step": 3236 }, { "epoch": 0.21, "grad_norm": 1.3004353046417236, "learning_rate": 9.183619316152132e-06, "loss": 0.5397, "step": 3237 }, { "epoch": 0.21, "grad_norm": 1.2038958072662354, "learning_rate": 9.18304678352939e-06, "loss": 0.5865, "step": 3238 }, { "epoch": 0.21, "grad_norm": 1.2178086042404175, "learning_rate": 9.182474068076468e-06, "loss": 0.5285, "step": 3239 }, { "epoch": 0.21, "grad_norm": 1.280238389968872, "learning_rate": 9.181901169818392e-06, "loss": 0.5693, "step": 3240 }, { "epoch": 0.21, "grad_norm": 1.1244760751724243, "learning_rate": 9.181328088780208e-06, "loss": 0.5764, "step": 3241 }, { "epoch": 0.21, "grad_norm": 1.1170039176940918, "learning_rate": 9.18075482498696e-06, "loss": 0.5964, "step": 3242 }, { "epoch": 0.21, "grad_norm": 1.1689203977584839, "learning_rate": 9.180181378463705e-06, "loss": 0.5694, "step": 3243 }, { "epoch": 0.21, "grad_norm": 1.0447330474853516, "learning_rate": 9.179607749235506e-06, "loss": 0.5381, "step": 3244 }, { "epoch": 0.21, "grad_norm": 1.1564273834228516, "learning_rate": 9.179033937327437e-06, "loss": 0.5755, "step": 3245 }, { "epoch": 0.21, "grad_norm": 1.139615774154663, "learning_rate": 9.178459942764577e-06, "loss": 0.5156, "step": 3246 }, { "epoch": 0.21, "grad_norm": 1.1093624830245972, "learning_rate": 9.177885765572013e-06, "loss": 0.5766, "step": 3247 }, { "epoch": 0.21, "grad_norm": 1.1217540502548218, "learning_rate": 9.17731140577484e-06, "loss": 0.5683, "step": 3248 }, { "epoch": 0.21, "grad_norm": 1.1779512166976929, "learning_rate": 9.176736863398164e-06, "loss": 0.5456, "step": 3249 }, { "epoch": 0.21, "grad_norm": 1.0808111429214478, "learning_rate": 9.176162138467098e-06, "loss": 0.5458, "step": 3250 }, { "epoch": 0.21, "grad_norm": 1.102232813835144, "learning_rate": 9.175587231006757e-06, "loss": 0.5163, "step": 3251 }, { "epoch": 0.21, "grad_norm": 1.1798466444015503, "learning_rate": 9.175012141042272e-06, "loss": 0.5895, "step": 3252 }, { "epoch": 0.21, "grad_norm": 1.0974167585372925, "learning_rate": 9.174436868598777e-06, "loss": 0.5582, "step": 3253 }, { "epoch": 0.21, "grad_norm": 1.1735717058181763, "learning_rate": 9.173861413701419e-06, "loss": 0.5174, "step": 3254 }, { "epoch": 0.21, "grad_norm": 1.18678617477417, "learning_rate": 9.173285776375344e-06, "loss": 0.5581, "step": 3255 }, { "epoch": 0.21, "grad_norm": 1.2357646226882935, "learning_rate": 9.172709956645718e-06, "loss": 0.5819, "step": 3256 }, { "epoch": 0.21, "grad_norm": 1.1665030717849731, "learning_rate": 9.172133954537705e-06, "loss": 0.5815, "step": 3257 }, { "epoch": 0.21, "grad_norm": 1.1748780012130737, "learning_rate": 9.17155777007648e-06, "loss": 0.5778, "step": 3258 }, { "epoch": 0.21, "grad_norm": 1.105106234550476, "learning_rate": 9.170981403287228e-06, "loss": 0.5354, "step": 3259 }, { "epoch": 0.21, "grad_norm": 1.1766029596328735, "learning_rate": 9.170404854195142e-06, "loss": 0.5894, "step": 3260 }, { "epoch": 0.21, "grad_norm": 1.123658299446106, "learning_rate": 9.169828122825417e-06, "loss": 0.5696, "step": 3261 }, { "epoch": 0.21, "grad_norm": 1.2234811782836914, "learning_rate": 9.169251209203263e-06, "loss": 0.6379, "step": 3262 }, { "epoch": 0.21, "grad_norm": 1.1442899703979492, "learning_rate": 9.168674113353898e-06, "loss": 0.5765, "step": 3263 }, { "epoch": 0.21, "grad_norm": 1.0744469165802002, "learning_rate": 9.16809683530254e-06, "loss": 0.5396, "step": 3264 }, { "epoch": 0.21, "grad_norm": 1.1709498167037964, "learning_rate": 9.167519375074424e-06, "loss": 0.5514, "step": 3265 }, { "epoch": 0.21, "grad_norm": 1.1984477043151855, "learning_rate": 9.16694173269479e-06, "loss": 0.6055, "step": 3266 }, { "epoch": 0.21, "grad_norm": 1.1771339178085327, "learning_rate": 9.166363908188882e-06, "loss": 0.6003, "step": 3267 }, { "epoch": 0.21, "grad_norm": 1.0426770448684692, "learning_rate": 9.165785901581956e-06, "loss": 0.5532, "step": 3268 }, { "epoch": 0.21, "grad_norm": 1.2711806297302246, "learning_rate": 9.165207712899277e-06, "loss": 0.6151, "step": 3269 }, { "epoch": 0.21, "grad_norm": 1.1356583833694458, "learning_rate": 9.164629342166118e-06, "loss": 0.5692, "step": 3270 }, { "epoch": 0.21, "grad_norm": 1.1993221044540405, "learning_rate": 9.164050789407752e-06, "loss": 0.5927, "step": 3271 }, { "epoch": 0.21, "grad_norm": 1.094504952430725, "learning_rate": 9.163472054649471e-06, "loss": 0.5484, "step": 3272 }, { "epoch": 0.21, "grad_norm": 1.1745754480361938, "learning_rate": 9.162893137916568e-06, "loss": 0.6013, "step": 3273 }, { "epoch": 0.21, "grad_norm": 1.1832115650177002, "learning_rate": 9.162314039234346e-06, "loss": 0.5885, "step": 3274 }, { "epoch": 0.21, "grad_norm": 1.2729113101959229, "learning_rate": 9.161734758628117e-06, "loss": 0.6155, "step": 3275 }, { "epoch": 0.21, "grad_norm": 1.157814621925354, "learning_rate": 9.1611552961232e-06, "loss": 0.5876, "step": 3276 }, { "epoch": 0.21, "grad_norm": 1.1581918001174927, "learning_rate": 9.160575651744919e-06, "loss": 0.5393, "step": 3277 }, { "epoch": 0.21, "grad_norm": 1.1105176210403442, "learning_rate": 9.15999582551861e-06, "loss": 0.5879, "step": 3278 }, { "epoch": 0.21, "grad_norm": 1.2293319702148438, "learning_rate": 9.159415817469617e-06, "loss": 0.5432, "step": 3279 }, { "epoch": 0.21, "grad_norm": 1.084760069847107, "learning_rate": 9.158835627623293e-06, "loss": 0.5492, "step": 3280 }, { "epoch": 0.21, "grad_norm": 1.1881705522537231, "learning_rate": 9.15825525600499e-06, "loss": 0.5541, "step": 3281 }, { "epoch": 0.21, "grad_norm": 1.1204715967178345, "learning_rate": 9.15767470264008e-06, "loss": 0.5871, "step": 3282 }, { "epoch": 0.21, "grad_norm": 1.1220088005065918, "learning_rate": 9.157093967553935e-06, "loss": 0.5815, "step": 3283 }, { "epoch": 0.21, "grad_norm": 1.1698895692825317, "learning_rate": 9.156513050771938e-06, "loss": 0.5707, "step": 3284 }, { "epoch": 0.21, "grad_norm": 1.2799221277236938, "learning_rate": 9.155931952319481e-06, "loss": 0.5768, "step": 3285 }, { "epoch": 0.21, "grad_norm": 1.2647472620010376, "learning_rate": 9.15535067222196e-06, "loss": 0.5709, "step": 3286 }, { "epoch": 0.21, "grad_norm": 1.2229424715042114, "learning_rate": 9.154769210504782e-06, "loss": 0.5773, "step": 3287 }, { "epoch": 0.21, "grad_norm": 1.1196752786636353, "learning_rate": 9.15418756719336e-06, "loss": 0.5241, "step": 3288 }, { "epoch": 0.21, "grad_norm": 1.2154498100280762, "learning_rate": 9.153605742313119e-06, "loss": 0.5695, "step": 3289 }, { "epoch": 0.21, "grad_norm": 1.216018795967102, "learning_rate": 9.153023735889485e-06, "loss": 0.5845, "step": 3290 }, { "epoch": 0.21, "grad_norm": 1.0899336338043213, "learning_rate": 9.152441547947902e-06, "loss": 0.5161, "step": 3291 }, { "epoch": 0.21, "grad_norm": 1.2453924417495728, "learning_rate": 9.15185917851381e-06, "loss": 0.5555, "step": 3292 }, { "epoch": 0.21, "grad_norm": 1.176214337348938, "learning_rate": 9.151276627612667e-06, "loss": 0.6119, "step": 3293 }, { "epoch": 0.21, "grad_norm": 1.0532245635986328, "learning_rate": 9.150693895269931e-06, "loss": 0.5546, "step": 3294 }, { "epoch": 0.21, "grad_norm": 1.2824114561080933, "learning_rate": 9.150110981511076e-06, "loss": 0.5531, "step": 3295 }, { "epoch": 0.21, "grad_norm": 1.1433846950531006, "learning_rate": 9.149527886361576e-06, "loss": 0.5933, "step": 3296 }, { "epoch": 0.21, "grad_norm": 1.1207185983657837, "learning_rate": 9.148944609846917e-06, "loss": 0.517, "step": 3297 }, { "epoch": 0.21, "grad_norm": 1.197812557220459, "learning_rate": 9.148361151992595e-06, "loss": 0.5393, "step": 3298 }, { "epoch": 0.21, "grad_norm": 1.1928043365478516, "learning_rate": 9.14777751282411e-06, "loss": 0.5933, "step": 3299 }, { "epoch": 0.21, "grad_norm": 1.1917940378189087, "learning_rate": 9.147193692366971e-06, "loss": 0.5983, "step": 3300 }, { "epoch": 0.21, "grad_norm": 1.2299195528030396, "learning_rate": 9.146609690646697e-06, "loss": 0.5449, "step": 3301 }, { "epoch": 0.21, "grad_norm": 1.1880069971084595, "learning_rate": 9.14602550768881e-06, "loss": 0.5395, "step": 3302 }, { "epoch": 0.21, "grad_norm": 1.1321510076522827, "learning_rate": 9.145441143518845e-06, "loss": 0.5545, "step": 3303 }, { "epoch": 0.21, "grad_norm": 1.1026431322097778, "learning_rate": 9.144856598162344e-06, "loss": 0.5347, "step": 3304 }, { "epoch": 0.21, "grad_norm": 1.2499175071716309, "learning_rate": 9.144271871644854e-06, "loss": 0.555, "step": 3305 }, { "epoch": 0.21, "grad_norm": 1.131995439529419, "learning_rate": 9.143686963991933e-06, "loss": 0.5585, "step": 3306 }, { "epoch": 0.21, "grad_norm": 1.1314640045166016, "learning_rate": 9.143101875229146e-06, "loss": 0.5689, "step": 3307 }, { "epoch": 0.21, "grad_norm": 1.0898692607879639, "learning_rate": 9.142516605382065e-06, "loss": 0.5555, "step": 3308 }, { "epoch": 0.21, "grad_norm": 1.0747958421707153, "learning_rate": 9.141931154476271e-06, "loss": 0.478, "step": 3309 }, { "epoch": 0.21, "grad_norm": 1.2155238389968872, "learning_rate": 9.141345522537352e-06, "loss": 0.5507, "step": 3310 }, { "epoch": 0.21, "grad_norm": 1.1278891563415527, "learning_rate": 9.140759709590908e-06, "loss": 0.5793, "step": 3311 }, { "epoch": 0.21, "grad_norm": 1.112082600593567, "learning_rate": 9.140173715662537e-06, "loss": 0.5556, "step": 3312 }, { "epoch": 0.21, "grad_norm": 1.31443452835083, "learning_rate": 9.139587540777857e-06, "loss": 0.5678, "step": 3313 }, { "epoch": 0.21, "grad_norm": 1.1061967611312866, "learning_rate": 9.139001184962485e-06, "loss": 0.5583, "step": 3314 }, { "epoch": 0.21, "grad_norm": 1.0963104963302612, "learning_rate": 9.138414648242048e-06, "loss": 0.5903, "step": 3315 }, { "epoch": 0.21, "grad_norm": 1.1866562366485596, "learning_rate": 9.137827930642187e-06, "loss": 0.604, "step": 3316 }, { "epoch": 0.21, "grad_norm": 1.2357805967330933, "learning_rate": 9.137241032188541e-06, "loss": 0.563, "step": 3317 }, { "epoch": 0.21, "grad_norm": 1.2346036434173584, "learning_rate": 9.136653952906765e-06, "loss": 0.6219, "step": 3318 }, { "epoch": 0.21, "grad_norm": 1.2067711353302002, "learning_rate": 9.136066692822516e-06, "loss": 0.5987, "step": 3319 }, { "epoch": 0.21, "grad_norm": 1.154349684715271, "learning_rate": 9.135479251961465e-06, "loss": 0.5245, "step": 3320 }, { "epoch": 0.21, "grad_norm": 1.2286444902420044, "learning_rate": 9.134891630349283e-06, "loss": 0.5556, "step": 3321 }, { "epoch": 0.21, "grad_norm": 1.1267977952957153, "learning_rate": 9.134303828011658e-06, "loss": 0.5431, "step": 3322 }, { "epoch": 0.21, "grad_norm": 1.0789239406585693, "learning_rate": 9.133715844974277e-06, "loss": 0.5482, "step": 3323 }, { "epoch": 0.21, "grad_norm": 1.215700387954712, "learning_rate": 9.133127681262846e-06, "loss": 0.6178, "step": 3324 }, { "epoch": 0.21, "grad_norm": 1.1596362590789795, "learning_rate": 9.132539336903063e-06, "loss": 0.5732, "step": 3325 }, { "epoch": 0.21, "grad_norm": 1.304458498954773, "learning_rate": 9.13195081192065e-06, "loss": 0.5969, "step": 3326 }, { "epoch": 0.21, "grad_norm": 1.1533606052398682, "learning_rate": 9.131362106341329e-06, "loss": 0.5401, "step": 3327 }, { "epoch": 0.21, "grad_norm": 1.1508228778839111, "learning_rate": 9.130773220190826e-06, "loss": 0.6031, "step": 3328 }, { "epoch": 0.21, "grad_norm": 1.26691472530365, "learning_rate": 9.130184153494885e-06, "loss": 0.6066, "step": 3329 }, { "epoch": 0.21, "grad_norm": 1.1456574201583862, "learning_rate": 9.129594906279251e-06, "loss": 0.5811, "step": 3330 }, { "epoch": 0.22, "grad_norm": 1.2812446355819702, "learning_rate": 9.129005478569677e-06, "loss": 0.5642, "step": 3331 }, { "epoch": 0.22, "grad_norm": 1.2566907405853271, "learning_rate": 9.128415870391927e-06, "loss": 0.5592, "step": 3332 }, { "epoch": 0.22, "grad_norm": 1.2304984331130981, "learning_rate": 9.127826081771772e-06, "loss": 0.6033, "step": 3333 }, { "epoch": 0.22, "grad_norm": 1.4201078414916992, "learning_rate": 9.127236112734986e-06, "loss": 0.6115, "step": 3334 }, { "epoch": 0.22, "grad_norm": 1.0733801126480103, "learning_rate": 9.12664596330736e-06, "loss": 0.5528, "step": 3335 }, { "epoch": 0.22, "grad_norm": 1.1158831119537354, "learning_rate": 9.126055633514685e-06, "loss": 0.5652, "step": 3336 }, { "epoch": 0.22, "grad_norm": 1.2250906229019165, "learning_rate": 9.125465123382765e-06, "loss": 0.5858, "step": 3337 }, { "epoch": 0.22, "grad_norm": 1.2382965087890625, "learning_rate": 9.124874432937408e-06, "loss": 0.5714, "step": 3338 }, { "epoch": 0.22, "grad_norm": 1.1782480478286743, "learning_rate": 9.124283562204431e-06, "loss": 0.595, "step": 3339 }, { "epoch": 0.22, "grad_norm": 1.1643818616867065, "learning_rate": 9.12369251120966e-06, "loss": 0.5288, "step": 3340 }, { "epoch": 0.22, "grad_norm": 1.3040131330490112, "learning_rate": 9.123101279978928e-06, "loss": 0.6512, "step": 3341 }, { "epoch": 0.22, "grad_norm": 1.2353237867355347, "learning_rate": 9.122509868538078e-06, "loss": 0.5899, "step": 3342 }, { "epoch": 0.22, "grad_norm": 1.1473020315170288, "learning_rate": 9.121918276912957e-06, "loss": 0.5271, "step": 3343 }, { "epoch": 0.22, "grad_norm": 1.4314864873886108, "learning_rate": 9.121326505129424e-06, "loss": 0.5458, "step": 3344 }, { "epoch": 0.22, "grad_norm": 1.214882254600525, "learning_rate": 9.120734553213342e-06, "loss": 0.6186, "step": 3345 }, { "epoch": 0.22, "grad_norm": 1.3162132501602173, "learning_rate": 9.120142421190585e-06, "loss": 0.597, "step": 3346 }, { "epoch": 0.22, "grad_norm": 1.3084020614624023, "learning_rate": 9.119550109087032e-06, "loss": 0.5487, "step": 3347 }, { "epoch": 0.22, "grad_norm": 1.144492506980896, "learning_rate": 9.118957616928572e-06, "loss": 0.5243, "step": 3348 }, { "epoch": 0.22, "grad_norm": 1.2273097038269043, "learning_rate": 9.118364944741104e-06, "loss": 0.5685, "step": 3349 }, { "epoch": 0.22, "grad_norm": 1.34392249584198, "learning_rate": 9.117772092550528e-06, "loss": 0.5849, "step": 3350 }, { "epoch": 0.22, "grad_norm": 1.3906391859054565, "learning_rate": 9.117179060382757e-06, "loss": 0.6129, "step": 3351 }, { "epoch": 0.22, "grad_norm": 1.4761146306991577, "learning_rate": 9.116585848263712e-06, "loss": 0.55, "step": 3352 }, { "epoch": 0.22, "grad_norm": 1.1331119537353516, "learning_rate": 9.115992456219323e-06, "loss": 0.5518, "step": 3353 }, { "epoch": 0.22, "grad_norm": 1.4146333932876587, "learning_rate": 9.115398884275521e-06, "loss": 0.6523, "step": 3354 }, { "epoch": 0.22, "grad_norm": 1.1455721855163574, "learning_rate": 9.114805132458252e-06, "loss": 0.5775, "step": 3355 }, { "epoch": 0.22, "grad_norm": 1.100079894065857, "learning_rate": 9.114211200793466e-06, "loss": 0.5959, "step": 3356 }, { "epoch": 0.22, "grad_norm": 1.14383864402771, "learning_rate": 9.113617089307126e-06, "loss": 0.6198, "step": 3357 }, { "epoch": 0.22, "grad_norm": 1.3530747890472412, "learning_rate": 9.113022798025192e-06, "loss": 0.5511, "step": 3358 }, { "epoch": 0.22, "grad_norm": 1.3405157327651978, "learning_rate": 9.112428326973646e-06, "loss": 0.5663, "step": 3359 }, { "epoch": 0.22, "grad_norm": 1.2868609428405762, "learning_rate": 9.111833676178468e-06, "loss": 0.5855, "step": 3360 }, { "epoch": 0.22, "grad_norm": 1.2326527833938599, "learning_rate": 9.111238845665649e-06, "loss": 0.5872, "step": 3361 }, { "epoch": 0.22, "grad_norm": 1.2323466539382935, "learning_rate": 9.110643835461186e-06, "loss": 0.5463, "step": 3362 }, { "epoch": 0.22, "grad_norm": 1.2432693243026733, "learning_rate": 9.110048645591088e-06, "loss": 0.6233, "step": 3363 }, { "epoch": 0.22, "grad_norm": 1.197370171546936, "learning_rate": 9.109453276081366e-06, "loss": 0.5893, "step": 3364 }, { "epoch": 0.22, "grad_norm": 1.1533045768737793, "learning_rate": 9.108857726958045e-06, "loss": 0.5316, "step": 3365 }, { "epoch": 0.22, "grad_norm": 1.0999869108200073, "learning_rate": 9.108261998247155e-06, "loss": 0.5763, "step": 3366 }, { "epoch": 0.22, "grad_norm": 1.243642807006836, "learning_rate": 9.10766608997473e-06, "loss": 0.6091, "step": 3367 }, { "epoch": 0.22, "grad_norm": 1.2616595029830933, "learning_rate": 9.10707000216682e-06, "loss": 0.5701, "step": 3368 }, { "epoch": 0.22, "grad_norm": 1.0943193435668945, "learning_rate": 9.106473734849476e-06, "loss": 0.5418, "step": 3369 }, { "epoch": 0.22, "grad_norm": 1.139095425605774, "learning_rate": 9.105877288048759e-06, "loss": 0.5418, "step": 3370 }, { "epoch": 0.22, "grad_norm": 1.2123115062713623, "learning_rate": 9.105280661790739e-06, "loss": 0.5758, "step": 3371 }, { "epoch": 0.22, "grad_norm": 1.145715355873108, "learning_rate": 9.104683856101493e-06, "loss": 0.5919, "step": 3372 }, { "epoch": 0.22, "grad_norm": 1.3174158334732056, "learning_rate": 9.104086871007107e-06, "loss": 0.5657, "step": 3373 }, { "epoch": 0.22, "grad_norm": 1.1441009044647217, "learning_rate": 9.103489706533673e-06, "loss": 0.5978, "step": 3374 }, { "epoch": 0.22, "grad_norm": 1.2659504413604736, "learning_rate": 9.10289236270729e-06, "loss": 0.5435, "step": 3375 }, { "epoch": 0.22, "grad_norm": 1.0986052751541138, "learning_rate": 9.102294839554068e-06, "loss": 0.5604, "step": 3376 }, { "epoch": 0.22, "grad_norm": 1.1844512224197388, "learning_rate": 9.101697137100122e-06, "loss": 0.5464, "step": 3377 }, { "epoch": 0.22, "grad_norm": 1.1731034517288208, "learning_rate": 9.101099255371578e-06, "loss": 0.5472, "step": 3378 }, { "epoch": 0.22, "grad_norm": 1.2437139749526978, "learning_rate": 9.100501194394564e-06, "loss": 0.572, "step": 3379 }, { "epoch": 0.22, "grad_norm": 1.1982256174087524, "learning_rate": 9.099902954195224e-06, "loss": 0.576, "step": 3380 }, { "epoch": 0.22, "grad_norm": 1.1495634317398071, "learning_rate": 9.099304534799703e-06, "loss": 0.5571, "step": 3381 }, { "epoch": 0.22, "grad_norm": 1.2260524034500122, "learning_rate": 9.098705936234158e-06, "loss": 0.6148, "step": 3382 }, { "epoch": 0.22, "grad_norm": 1.063228726387024, "learning_rate": 9.098107158524751e-06, "loss": 0.5368, "step": 3383 }, { "epoch": 0.22, "grad_norm": 1.0588420629501343, "learning_rate": 9.097508201697653e-06, "loss": 0.5418, "step": 3384 }, { "epoch": 0.22, "grad_norm": 1.181414246559143, "learning_rate": 9.096909065779043e-06, "loss": 0.564, "step": 3385 }, { "epoch": 0.22, "grad_norm": 1.116650938987732, "learning_rate": 9.09630975079511e-06, "loss": 0.5769, "step": 3386 }, { "epoch": 0.22, "grad_norm": 1.2070859670639038, "learning_rate": 9.095710256772043e-06, "loss": 0.5854, "step": 3387 }, { "epoch": 0.22, "grad_norm": 1.3032935857772827, "learning_rate": 9.09511058373605e-06, "loss": 0.5996, "step": 3388 }, { "epoch": 0.22, "grad_norm": 1.1567517518997192, "learning_rate": 9.094510731713338e-06, "loss": 0.5222, "step": 3389 }, { "epoch": 0.22, "grad_norm": 1.214563250541687, "learning_rate": 9.093910700730127e-06, "loss": 0.5708, "step": 3390 }, { "epoch": 0.22, "grad_norm": 1.1360586881637573, "learning_rate": 9.093310490812642e-06, "loss": 0.5448, "step": 3391 }, { "epoch": 0.22, "grad_norm": 1.3381099700927734, "learning_rate": 9.092710101987115e-06, "loss": 0.6143, "step": 3392 }, { "epoch": 0.22, "grad_norm": 1.079342007637024, "learning_rate": 9.092109534279787e-06, "loss": 0.5776, "step": 3393 }, { "epoch": 0.22, "grad_norm": 1.1479531526565552, "learning_rate": 9.091508787716912e-06, "loss": 0.5444, "step": 3394 }, { "epoch": 0.22, "grad_norm": 1.0570132732391357, "learning_rate": 9.090907862324744e-06, "loss": 0.6017, "step": 3395 }, { "epoch": 0.22, "grad_norm": 1.1697176694869995, "learning_rate": 9.090306758129548e-06, "loss": 0.582, "step": 3396 }, { "epoch": 0.22, "grad_norm": 1.2798981666564941, "learning_rate": 9.089705475157595e-06, "loss": 0.6295, "step": 3397 }, { "epoch": 0.22, "grad_norm": 1.1468079090118408, "learning_rate": 9.089104013435167e-06, "loss": 0.6196, "step": 3398 }, { "epoch": 0.22, "grad_norm": 1.196268081665039, "learning_rate": 9.088502372988556e-06, "loss": 0.594, "step": 3399 }, { "epoch": 0.22, "grad_norm": 1.233042597770691, "learning_rate": 9.087900553844053e-06, "loss": 0.5265, "step": 3400 }, { "epoch": 0.22, "grad_norm": 1.2190793752670288, "learning_rate": 9.087298556027964e-06, "loss": 0.5949, "step": 3401 }, { "epoch": 0.22, "grad_norm": 1.2151635885238647, "learning_rate": 9.086696379566598e-06, "loss": 0.5531, "step": 3402 }, { "epoch": 0.22, "grad_norm": 1.2370960712432861, "learning_rate": 9.086094024486279e-06, "loss": 0.6178, "step": 3403 }, { "epoch": 0.22, "grad_norm": 1.1574475765228271, "learning_rate": 9.085491490813333e-06, "loss": 0.4864, "step": 3404 }, { "epoch": 0.22, "grad_norm": 1.2684288024902344, "learning_rate": 9.084888778574095e-06, "loss": 0.525, "step": 3405 }, { "epoch": 0.22, "grad_norm": 1.1145532131195068, "learning_rate": 9.084285887794906e-06, "loss": 0.5583, "step": 3406 }, { "epoch": 0.22, "grad_norm": 1.2321999073028564, "learning_rate": 9.083682818502118e-06, "loss": 0.5688, "step": 3407 }, { "epoch": 0.22, "grad_norm": 1.1309480667114258, "learning_rate": 9.083079570722093e-06, "loss": 0.524, "step": 3408 }, { "epoch": 0.22, "grad_norm": 1.1626149415969849, "learning_rate": 9.082476144481192e-06, "loss": 0.5973, "step": 3409 }, { "epoch": 0.22, "grad_norm": 1.1604880094528198, "learning_rate": 9.081872539805792e-06, "loss": 0.5918, "step": 3410 }, { "epoch": 0.22, "grad_norm": 1.2047713994979858, "learning_rate": 9.081268756722274e-06, "loss": 0.6059, "step": 3411 }, { "epoch": 0.22, "grad_norm": 1.1648578643798828, "learning_rate": 9.080664795257031e-06, "loss": 0.5699, "step": 3412 }, { "epoch": 0.22, "grad_norm": 1.0909039974212646, "learning_rate": 9.080060655436456e-06, "loss": 0.6191, "step": 3413 }, { "epoch": 0.22, "grad_norm": 1.1153491735458374, "learning_rate": 9.079456337286956e-06, "loss": 0.5576, "step": 3414 }, { "epoch": 0.22, "grad_norm": 1.2430965900421143, "learning_rate": 9.078851840834946e-06, "loss": 0.5888, "step": 3415 }, { "epoch": 0.22, "grad_norm": 1.1192364692687988, "learning_rate": 9.078247166106845e-06, "loss": 0.5273, "step": 3416 }, { "epoch": 0.22, "grad_norm": 1.233163833618164, "learning_rate": 9.077642313129083e-06, "loss": 0.622, "step": 3417 }, { "epoch": 0.22, "grad_norm": 1.125943899154663, "learning_rate": 9.077037281928095e-06, "loss": 0.569, "step": 3418 }, { "epoch": 0.22, "grad_norm": 1.0006173849105835, "learning_rate": 9.076432072530326e-06, "loss": 0.5016, "step": 3419 }, { "epoch": 0.22, "grad_norm": 1.276964545249939, "learning_rate": 9.07582668496223e-06, "loss": 0.5531, "step": 3420 }, { "epoch": 0.22, "grad_norm": 1.1483653783798218, "learning_rate": 9.075221119250265e-06, "loss": 0.5748, "step": 3421 }, { "epoch": 0.22, "grad_norm": 1.2136517763137817, "learning_rate": 9.074615375420898e-06, "loss": 0.5458, "step": 3422 }, { "epoch": 0.22, "grad_norm": 1.1598082780838013, "learning_rate": 9.074009453500608e-06, "loss": 0.6177, "step": 3423 }, { "epoch": 0.22, "grad_norm": 1.1010122299194336, "learning_rate": 9.073403353515874e-06, "loss": 0.5983, "step": 3424 }, { "epoch": 0.22, "grad_norm": 1.276406168937683, "learning_rate": 9.072797075493188e-06, "loss": 0.5361, "step": 3425 }, { "epoch": 0.22, "grad_norm": 1.1980358362197876, "learning_rate": 9.072190619459052e-06, "loss": 0.5592, "step": 3426 }, { "epoch": 0.22, "grad_norm": 1.2218012809753418, "learning_rate": 9.071583985439969e-06, "loss": 0.5478, "step": 3427 }, { "epoch": 0.22, "grad_norm": 1.2117353677749634, "learning_rate": 9.070977173462455e-06, "loss": 0.58, "step": 3428 }, { "epoch": 0.22, "grad_norm": 1.096798300743103, "learning_rate": 9.070370183553032e-06, "loss": 0.5479, "step": 3429 }, { "epoch": 0.22, "grad_norm": 1.1876566410064697, "learning_rate": 9.06976301573823e-06, "loss": 0.5593, "step": 3430 }, { "epoch": 0.22, "grad_norm": 1.1665502786636353, "learning_rate": 9.069155670044588e-06, "loss": 0.5844, "step": 3431 }, { "epoch": 0.22, "grad_norm": 1.327223300933838, "learning_rate": 9.068548146498649e-06, "loss": 0.5306, "step": 3432 }, { "epoch": 0.22, "grad_norm": 1.2365503311157227, "learning_rate": 9.067940445126965e-06, "loss": 0.6001, "step": 3433 }, { "epoch": 0.22, "grad_norm": 1.2807732820510864, "learning_rate": 9.067332565956104e-06, "loss": 0.6144, "step": 3434 }, { "epoch": 0.22, "grad_norm": 1.126753807067871, "learning_rate": 9.066724509012628e-06, "loss": 0.586, "step": 3435 }, { "epoch": 0.22, "grad_norm": 1.1225206851959229, "learning_rate": 9.066116274323116e-06, "loss": 0.5376, "step": 3436 }, { "epoch": 0.22, "grad_norm": 1.305524230003357, "learning_rate": 9.065507861914153e-06, "loss": 0.541, "step": 3437 }, { "epoch": 0.22, "grad_norm": 1.1232614517211914, "learning_rate": 9.064899271812332e-06, "loss": 0.5843, "step": 3438 }, { "epoch": 0.22, "grad_norm": 1.1551799774169922, "learning_rate": 9.064290504044248e-06, "loss": 0.5628, "step": 3439 }, { "epoch": 0.22, "grad_norm": 1.1264866590499878, "learning_rate": 9.063681558636515e-06, "loss": 0.5948, "step": 3440 }, { "epoch": 0.22, "grad_norm": 1.2718827724456787, "learning_rate": 9.063072435615743e-06, "loss": 0.5629, "step": 3441 }, { "epoch": 0.22, "grad_norm": 1.2053484916687012, "learning_rate": 9.062463135008558e-06, "loss": 0.6164, "step": 3442 }, { "epoch": 0.22, "grad_norm": 1.2854721546173096, "learning_rate": 9.061853656841594e-06, "loss": 0.5215, "step": 3443 }, { "epoch": 0.22, "grad_norm": 1.1746028661727905, "learning_rate": 9.061244001141483e-06, "loss": 0.587, "step": 3444 }, { "epoch": 0.22, "grad_norm": 1.2118793725967407, "learning_rate": 9.060634167934877e-06, "loss": 0.6237, "step": 3445 }, { "epoch": 0.22, "grad_norm": 1.2440414428710938, "learning_rate": 9.060024157248426e-06, "loss": 0.546, "step": 3446 }, { "epoch": 0.22, "grad_norm": 1.181841492652893, "learning_rate": 9.059413969108796e-06, "loss": 0.5937, "step": 3447 }, { "epoch": 0.22, "grad_norm": 1.1936818361282349, "learning_rate": 9.058803603542654e-06, "loss": 0.5232, "step": 3448 }, { "epoch": 0.22, "grad_norm": 1.1008098125457764, "learning_rate": 9.05819306057668e-06, "loss": 0.5812, "step": 3449 }, { "epoch": 0.22, "grad_norm": 1.1117585897445679, "learning_rate": 9.057582340237555e-06, "loss": 0.5721, "step": 3450 }, { "epoch": 0.22, "grad_norm": 1.2226662635803223, "learning_rate": 9.056971442551974e-06, "loss": 0.5921, "step": 3451 }, { "epoch": 0.22, "grad_norm": 1.2109718322753906, "learning_rate": 9.056360367546641e-06, "loss": 0.5861, "step": 3452 }, { "epoch": 0.22, "grad_norm": 1.178336262702942, "learning_rate": 9.05574911524826e-06, "loss": 0.6074, "step": 3453 }, { "epoch": 0.22, "grad_norm": 1.1676745414733887, "learning_rate": 9.05513768568355e-06, "loss": 0.5848, "step": 3454 }, { "epoch": 0.22, "grad_norm": 1.1722843647003174, "learning_rate": 9.054526078879236e-06, "loss": 0.5612, "step": 3455 }, { "epoch": 0.22, "grad_norm": 1.1343125104904175, "learning_rate": 9.053914294862045e-06, "loss": 0.5775, "step": 3456 }, { "epoch": 0.22, "grad_norm": 1.3552374839782715, "learning_rate": 9.05330233365872e-06, "loss": 0.5852, "step": 3457 }, { "epoch": 0.22, "grad_norm": 1.3249403238296509, "learning_rate": 9.052690195296007e-06, "loss": 0.5431, "step": 3458 }, { "epoch": 0.22, "grad_norm": 1.244281530380249, "learning_rate": 9.052077879800666e-06, "loss": 0.5347, "step": 3459 }, { "epoch": 0.22, "grad_norm": 1.2351208925247192, "learning_rate": 9.05146538719945e-06, "loss": 0.5473, "step": 3460 }, { "epoch": 0.22, "grad_norm": 1.1178334951400757, "learning_rate": 9.050852717519137e-06, "loss": 0.5794, "step": 3461 }, { "epoch": 0.22, "grad_norm": 1.3295626640319824, "learning_rate": 9.050239870786504e-06, "loss": 0.5691, "step": 3462 }, { "epoch": 0.22, "grad_norm": 1.2470630407333374, "learning_rate": 9.049626847028334e-06, "loss": 0.5834, "step": 3463 }, { "epoch": 0.22, "grad_norm": 1.2941230535507202, "learning_rate": 9.049013646271424e-06, "loss": 0.5646, "step": 3464 }, { "epoch": 0.22, "grad_norm": 1.246274471282959, "learning_rate": 9.048400268542575e-06, "loss": 0.5649, "step": 3465 }, { "epoch": 0.22, "grad_norm": 1.151948094367981, "learning_rate": 9.047786713868592e-06, "loss": 0.5391, "step": 3466 }, { "epoch": 0.22, "grad_norm": 1.1849476099014282, "learning_rate": 9.047172982276298e-06, "loss": 0.5736, "step": 3467 }, { "epoch": 0.22, "grad_norm": 1.1938238143920898, "learning_rate": 9.046559073792513e-06, "loss": 0.518, "step": 3468 }, { "epoch": 0.22, "grad_norm": 1.3007392883300781, "learning_rate": 9.045944988444072e-06, "loss": 0.5945, "step": 3469 }, { "epoch": 0.22, "grad_norm": 1.217965006828308, "learning_rate": 9.045330726257816e-06, "loss": 0.5967, "step": 3470 }, { "epoch": 0.22, "grad_norm": 1.4109313488006592, "learning_rate": 9.044716287260589e-06, "loss": 0.572, "step": 3471 }, { "epoch": 0.22, "grad_norm": 1.0701655149459839, "learning_rate": 9.04410167147925e-06, "loss": 0.5655, "step": 3472 }, { "epoch": 0.22, "grad_norm": 1.0858001708984375, "learning_rate": 9.043486878940659e-06, "loss": 0.5627, "step": 3473 }, { "epoch": 0.22, "grad_norm": 1.222851276397705, "learning_rate": 9.042871909671691e-06, "loss": 0.5795, "step": 3474 }, { "epoch": 0.22, "grad_norm": 1.185890555381775, "learning_rate": 9.042256763699221e-06, "loss": 0.5416, "step": 3475 }, { "epoch": 0.22, "grad_norm": 1.0999581813812256, "learning_rate": 9.041641441050137e-06, "loss": 0.5591, "step": 3476 }, { "epoch": 0.22, "grad_norm": 1.0968348979949951, "learning_rate": 9.041025941751333e-06, "loss": 0.5469, "step": 3477 }, { "epoch": 0.22, "grad_norm": 1.0454052686691284, "learning_rate": 9.040410265829713e-06, "loss": 0.5178, "step": 3478 }, { "epoch": 0.22, "grad_norm": 1.170596718788147, "learning_rate": 9.039794413312183e-06, "loss": 0.5372, "step": 3479 }, { "epoch": 0.22, "grad_norm": 1.061955451965332, "learning_rate": 9.039178384225662e-06, "loss": 0.5701, "step": 3480 }, { "epoch": 0.22, "grad_norm": 1.145329475402832, "learning_rate": 9.038562178597076e-06, "loss": 0.5725, "step": 3481 }, { "epoch": 0.22, "grad_norm": 1.1272106170654297, "learning_rate": 9.037945796453356e-06, "loss": 0.5616, "step": 3482 }, { "epoch": 0.22, "grad_norm": 1.1520143747329712, "learning_rate": 9.037329237821442e-06, "loss": 0.5199, "step": 3483 }, { "epoch": 0.22, "grad_norm": 1.2267402410507202, "learning_rate": 9.036712502728287e-06, "loss": 0.5535, "step": 3484 }, { "epoch": 0.22, "grad_norm": 1.106839895248413, "learning_rate": 9.03609559120084e-06, "loss": 0.5716, "step": 3485 }, { "epoch": 0.23, "grad_norm": 1.1474692821502686, "learning_rate": 9.035478503266069e-06, "loss": 0.581, "step": 3486 }, { "epoch": 0.23, "grad_norm": 1.2483395338058472, "learning_rate": 9.034861238950944e-06, "loss": 0.5954, "step": 3487 }, { "epoch": 0.23, "grad_norm": 1.3217617273330688, "learning_rate": 9.034243798282445e-06, "loss": 0.6055, "step": 3488 }, { "epoch": 0.23, "grad_norm": 1.1817748546600342, "learning_rate": 9.033626181287559e-06, "loss": 0.5506, "step": 3489 }, { "epoch": 0.23, "grad_norm": 1.169158935546875, "learning_rate": 9.033008387993279e-06, "loss": 0.5978, "step": 3490 }, { "epoch": 0.23, "grad_norm": 1.0614428520202637, "learning_rate": 9.032390418426606e-06, "loss": 0.5084, "step": 3491 }, { "epoch": 0.23, "grad_norm": 1.1962677240371704, "learning_rate": 9.031772272614554e-06, "loss": 0.5618, "step": 3492 }, { "epoch": 0.23, "grad_norm": 1.131287693977356, "learning_rate": 9.031153950584137e-06, "loss": 0.5534, "step": 3493 }, { "epoch": 0.23, "grad_norm": 1.2028374671936035, "learning_rate": 9.03053545236238e-06, "loss": 0.5794, "step": 3494 }, { "epoch": 0.23, "grad_norm": 1.1194919347763062, "learning_rate": 9.029916777976318e-06, "loss": 0.5555, "step": 3495 }, { "epoch": 0.23, "grad_norm": 1.1688036918640137, "learning_rate": 9.029297927452991e-06, "loss": 0.5555, "step": 3496 }, { "epoch": 0.23, "grad_norm": 1.1513686180114746, "learning_rate": 9.028678900819448e-06, "loss": 0.5719, "step": 3497 }, { "epoch": 0.23, "grad_norm": 1.0844961404800415, "learning_rate": 9.028059698102744e-06, "loss": 0.5101, "step": 3498 }, { "epoch": 0.23, "grad_norm": 1.090301275253296, "learning_rate": 9.027440319329943e-06, "loss": 0.5336, "step": 3499 }, { "epoch": 0.23, "grad_norm": 1.1969722509384155, "learning_rate": 9.026820764528117e-06, "loss": 0.5468, "step": 3500 }, { "epoch": 0.23, "grad_norm": 1.2589716911315918, "learning_rate": 9.026201033724345e-06, "loss": 0.5788, "step": 3501 }, { "epoch": 0.23, "grad_norm": 1.1138368844985962, "learning_rate": 9.025581126945712e-06, "loss": 0.5226, "step": 3502 }, { "epoch": 0.23, "grad_norm": 1.1967614889144897, "learning_rate": 9.024961044219315e-06, "loss": 0.5913, "step": 3503 }, { "epoch": 0.23, "grad_norm": 1.214508056640625, "learning_rate": 9.024340785572256e-06, "loss": 0.5672, "step": 3504 }, { "epoch": 0.23, "grad_norm": 1.2487741708755493, "learning_rate": 9.023720351031644e-06, "loss": 0.5703, "step": 3505 }, { "epoch": 0.23, "grad_norm": 1.1983561515808105, "learning_rate": 9.023099740624597e-06, "loss": 0.6082, "step": 3506 }, { "epoch": 0.23, "grad_norm": 1.1520761251449585, "learning_rate": 9.022478954378238e-06, "loss": 0.581, "step": 3507 }, { "epoch": 0.23, "grad_norm": 1.2419987916946411, "learning_rate": 9.021857992319704e-06, "loss": 0.5653, "step": 3508 }, { "epoch": 0.23, "grad_norm": 1.2357594966888428, "learning_rate": 9.021236854476132e-06, "loss": 0.6306, "step": 3509 }, { "epoch": 0.23, "grad_norm": 1.1355832815170288, "learning_rate": 9.020615540874673e-06, "loss": 0.5373, "step": 3510 }, { "epoch": 0.23, "grad_norm": 1.120291829109192, "learning_rate": 9.019994051542482e-06, "loss": 0.548, "step": 3511 }, { "epoch": 0.23, "grad_norm": 1.1646859645843506, "learning_rate": 9.019372386506725e-06, "loss": 0.5518, "step": 3512 }, { "epoch": 0.23, "grad_norm": 1.0891982316970825, "learning_rate": 9.018750545794567e-06, "loss": 0.5129, "step": 3513 }, { "epoch": 0.23, "grad_norm": 1.226209044456482, "learning_rate": 9.018128529433194e-06, "loss": 0.5978, "step": 3514 }, { "epoch": 0.23, "grad_norm": 1.0954651832580566, "learning_rate": 9.017506337449789e-06, "loss": 0.5571, "step": 3515 }, { "epoch": 0.23, "grad_norm": 1.2471798658370972, "learning_rate": 9.016883969871548e-06, "loss": 0.6227, "step": 3516 }, { "epoch": 0.23, "grad_norm": 1.3448925018310547, "learning_rate": 9.016261426725672e-06, "loss": 0.5192, "step": 3517 }, { "epoch": 0.23, "grad_norm": 1.282010555267334, "learning_rate": 9.01563870803937e-06, "loss": 0.5699, "step": 3518 }, { "epoch": 0.23, "grad_norm": 1.1687705516815186, "learning_rate": 9.015015813839863e-06, "loss": 0.6374, "step": 3519 }, { "epoch": 0.23, "grad_norm": 1.141439437866211, "learning_rate": 9.014392744154371e-06, "loss": 0.5976, "step": 3520 }, { "epoch": 0.23, "grad_norm": 1.2699223756790161, "learning_rate": 9.01376949901013e-06, "loss": 0.5931, "step": 3521 }, { "epoch": 0.23, "grad_norm": 1.3704016208648682, "learning_rate": 9.013146078434381e-06, "loss": 0.595, "step": 3522 }, { "epoch": 0.23, "grad_norm": 1.1960071325302124, "learning_rate": 9.01252248245437e-06, "loss": 0.5608, "step": 3523 }, { "epoch": 0.23, "grad_norm": 1.3327513933181763, "learning_rate": 9.011898711097355e-06, "loss": 0.5563, "step": 3524 }, { "epoch": 0.23, "grad_norm": 1.1884218454360962, "learning_rate": 9.011274764390596e-06, "loss": 0.5782, "step": 3525 }, { "epoch": 0.23, "grad_norm": 1.1540131568908691, "learning_rate": 9.010650642361367e-06, "loss": 0.5444, "step": 3526 }, { "epoch": 0.23, "grad_norm": 1.1874252557754517, "learning_rate": 9.010026345036948e-06, "loss": 0.5911, "step": 3527 }, { "epoch": 0.23, "grad_norm": 1.2431286573410034, "learning_rate": 9.009401872444622e-06, "loss": 0.5354, "step": 3528 }, { "epoch": 0.23, "grad_norm": 1.3050546646118164, "learning_rate": 9.008777224611685e-06, "loss": 0.6007, "step": 3529 }, { "epoch": 0.23, "grad_norm": 1.3186612129211426, "learning_rate": 9.008152401565437e-06, "loss": 0.569, "step": 3530 }, { "epoch": 0.23, "grad_norm": 1.4094308614730835, "learning_rate": 9.00752740333319e-06, "loss": 0.5069, "step": 3531 }, { "epoch": 0.23, "grad_norm": 1.3078339099884033, "learning_rate": 9.006902229942258e-06, "loss": 0.5558, "step": 3532 }, { "epoch": 0.23, "grad_norm": 1.1545495986938477, "learning_rate": 9.006276881419969e-06, "loss": 0.5678, "step": 3533 }, { "epoch": 0.23, "grad_norm": 1.2754132747650146, "learning_rate": 9.005651357793654e-06, "loss": 0.5806, "step": 3534 }, { "epoch": 0.23, "grad_norm": 1.2578418254852295, "learning_rate": 9.005025659090652e-06, "loss": 0.6123, "step": 3535 }, { "epoch": 0.23, "grad_norm": 1.4223941564559937, "learning_rate": 9.004399785338314e-06, "loss": 0.575, "step": 3536 }, { "epoch": 0.23, "grad_norm": 1.450255036354065, "learning_rate": 9.00377373656399e-06, "loss": 0.5809, "step": 3537 }, { "epoch": 0.23, "grad_norm": 1.1413644552230835, "learning_rate": 9.003147512795048e-06, "loss": 0.555, "step": 3538 }, { "epoch": 0.23, "grad_norm": 1.1315866708755493, "learning_rate": 9.002521114058854e-06, "loss": 0.5514, "step": 3539 }, { "epoch": 0.23, "grad_norm": 1.1216843128204346, "learning_rate": 9.00189454038279e-06, "loss": 0.5893, "step": 3540 }, { "epoch": 0.23, "grad_norm": 1.258879542350769, "learning_rate": 9.001267791794242e-06, "loss": 0.5948, "step": 3541 }, { "epoch": 0.23, "grad_norm": 1.3284655809402466, "learning_rate": 9.0006408683206e-06, "loss": 0.5397, "step": 3542 }, { "epoch": 0.23, "grad_norm": 1.2025237083435059, "learning_rate": 9.000013769989269e-06, "loss": 0.5853, "step": 3543 }, { "epoch": 0.23, "grad_norm": 1.1885625123977661, "learning_rate": 8.999386496827655e-06, "loss": 0.5773, "step": 3544 }, { "epoch": 0.23, "grad_norm": 1.2323484420776367, "learning_rate": 8.998759048863176e-06, "loss": 0.548, "step": 3545 }, { "epoch": 0.23, "grad_norm": 1.22560453414917, "learning_rate": 8.998131426123257e-06, "loss": 0.5518, "step": 3546 }, { "epoch": 0.23, "grad_norm": 1.2379326820373535, "learning_rate": 8.997503628635327e-06, "loss": 0.5553, "step": 3547 }, { "epoch": 0.23, "grad_norm": 1.223279356956482, "learning_rate": 8.99687565642683e-06, "loss": 0.5937, "step": 3548 }, { "epoch": 0.23, "grad_norm": 1.1335886716842651, "learning_rate": 8.996247509525206e-06, "loss": 0.5749, "step": 3549 }, { "epoch": 0.23, "grad_norm": 1.257720708847046, "learning_rate": 8.995619187957916e-06, "loss": 0.5536, "step": 3550 }, { "epoch": 0.23, "grad_norm": 1.1830952167510986, "learning_rate": 8.994990691752422e-06, "loss": 0.5614, "step": 3551 }, { "epoch": 0.23, "grad_norm": 1.1146128177642822, "learning_rate": 8.994362020936189e-06, "loss": 0.5532, "step": 3552 }, { "epoch": 0.23, "grad_norm": 1.2165217399597168, "learning_rate": 8.9937331755367e-06, "loss": 0.5547, "step": 3553 }, { "epoch": 0.23, "grad_norm": 1.1419366598129272, "learning_rate": 8.993104155581436e-06, "loss": 0.5965, "step": 3554 }, { "epoch": 0.23, "grad_norm": 1.1350699663162231, "learning_rate": 8.992474961097892e-06, "loss": 0.5644, "step": 3555 }, { "epoch": 0.23, "grad_norm": 1.2163097858428955, "learning_rate": 8.991845592113568e-06, "loss": 0.5549, "step": 3556 }, { "epoch": 0.23, "grad_norm": 1.2125465869903564, "learning_rate": 8.991216048655973e-06, "loss": 0.569, "step": 3557 }, { "epoch": 0.23, "grad_norm": 1.2566794157028198, "learning_rate": 8.990586330752622e-06, "loss": 0.6086, "step": 3558 }, { "epoch": 0.23, "grad_norm": 1.206714391708374, "learning_rate": 8.989956438431038e-06, "loss": 0.6254, "step": 3559 }, { "epoch": 0.23, "grad_norm": 1.1535134315490723, "learning_rate": 8.989326371718754e-06, "loss": 0.5541, "step": 3560 }, { "epoch": 0.23, "grad_norm": 1.1112862825393677, "learning_rate": 8.988696130643306e-06, "loss": 0.5373, "step": 3561 }, { "epoch": 0.23, "grad_norm": 1.4029438495635986, "learning_rate": 8.98806571523224e-06, "loss": 0.5278, "step": 3562 }, { "epoch": 0.23, "grad_norm": 1.1606348752975464, "learning_rate": 8.98743512551311e-06, "loss": 0.5737, "step": 3563 }, { "epoch": 0.23, "grad_norm": 1.2434008121490479, "learning_rate": 8.986804361513482e-06, "loss": 0.6114, "step": 3564 }, { "epoch": 0.23, "grad_norm": 1.2233682870864868, "learning_rate": 8.986173423260917e-06, "loss": 0.5665, "step": 3565 }, { "epoch": 0.23, "grad_norm": 1.0907272100448608, "learning_rate": 8.985542310782999e-06, "loss": 0.593, "step": 3566 }, { "epoch": 0.23, "grad_norm": 1.1673197746276855, "learning_rate": 8.98491102410731e-06, "loss": 0.5455, "step": 3567 }, { "epoch": 0.23, "grad_norm": 1.173221230506897, "learning_rate": 8.98427956326144e-06, "loss": 0.6045, "step": 3568 }, { "epoch": 0.23, "grad_norm": 1.1657785177230835, "learning_rate": 8.983647928272989e-06, "loss": 0.5948, "step": 3569 }, { "epoch": 0.23, "grad_norm": 1.154973030090332, "learning_rate": 8.983016119169567e-06, "loss": 0.5447, "step": 3570 }, { "epoch": 0.23, "grad_norm": 1.3244966268539429, "learning_rate": 8.982384135978785e-06, "loss": 0.5787, "step": 3571 }, { "epoch": 0.23, "grad_norm": 1.203478455543518, "learning_rate": 8.981751978728268e-06, "loss": 0.6062, "step": 3572 }, { "epoch": 0.23, "grad_norm": 1.173987627029419, "learning_rate": 8.981119647445645e-06, "loss": 0.6078, "step": 3573 }, { "epoch": 0.23, "grad_norm": 1.1474155187606812, "learning_rate": 8.980487142158554e-06, "loss": 0.6155, "step": 3574 }, { "epoch": 0.23, "grad_norm": 1.1383572816848755, "learning_rate": 8.97985446289464e-06, "loss": 0.5583, "step": 3575 }, { "epoch": 0.23, "grad_norm": 1.4140400886535645, "learning_rate": 8.979221609681553e-06, "loss": 0.553, "step": 3576 }, { "epoch": 0.23, "grad_norm": 1.2073116302490234, "learning_rate": 8.97858858254696e-06, "loss": 0.597, "step": 3577 }, { "epoch": 0.23, "grad_norm": 1.3070075511932373, "learning_rate": 8.977955381518523e-06, "loss": 0.5275, "step": 3578 }, { "epoch": 0.23, "grad_norm": 1.1114305257797241, "learning_rate": 8.97732200662392e-06, "loss": 0.5499, "step": 3579 }, { "epoch": 0.23, "grad_norm": 1.1589181423187256, "learning_rate": 8.976688457890831e-06, "loss": 0.5406, "step": 3580 }, { "epoch": 0.23, "grad_norm": 1.3169620037078857, "learning_rate": 8.976054735346953e-06, "loss": 0.5715, "step": 3581 }, { "epoch": 0.23, "grad_norm": 1.3041657209396362, "learning_rate": 8.975420839019979e-06, "loss": 0.5552, "step": 3582 }, { "epoch": 0.23, "grad_norm": 1.1001777648925781, "learning_rate": 8.974786768937618e-06, "loss": 0.543, "step": 3583 }, { "epoch": 0.23, "grad_norm": 1.3236500024795532, "learning_rate": 8.974152525127583e-06, "loss": 0.5993, "step": 3584 }, { "epoch": 0.23, "grad_norm": 1.2523540258407593, "learning_rate": 8.973518107617593e-06, "loss": 0.5806, "step": 3585 }, { "epoch": 0.23, "grad_norm": 1.083095908164978, "learning_rate": 8.972883516435378e-06, "loss": 0.5829, "step": 3586 }, { "epoch": 0.23, "grad_norm": 1.122644305229187, "learning_rate": 8.972248751608677e-06, "loss": 0.6132, "step": 3587 }, { "epoch": 0.23, "grad_norm": 1.1764448881149292, "learning_rate": 8.971613813165229e-06, "loss": 0.565, "step": 3588 }, { "epoch": 0.23, "grad_norm": 1.429345726966858, "learning_rate": 8.970978701132788e-06, "loss": 0.5998, "step": 3589 }, { "epoch": 0.23, "grad_norm": 1.175063967704773, "learning_rate": 8.970343415539116e-06, "loss": 0.6234, "step": 3590 }, { "epoch": 0.23, "grad_norm": 1.172057032585144, "learning_rate": 8.969707956411974e-06, "loss": 0.5569, "step": 3591 }, { "epoch": 0.23, "grad_norm": 1.2307522296905518, "learning_rate": 8.96907232377914e-06, "loss": 0.5643, "step": 3592 }, { "epoch": 0.23, "grad_norm": 1.1561157703399658, "learning_rate": 8.968436517668398e-06, "loss": 0.569, "step": 3593 }, { "epoch": 0.23, "grad_norm": 1.15704345703125, "learning_rate": 8.967800538107532e-06, "loss": 0.5817, "step": 3594 }, { "epoch": 0.23, "grad_norm": 1.2329152822494507, "learning_rate": 8.96716438512434e-06, "loss": 0.5519, "step": 3595 }, { "epoch": 0.23, "grad_norm": 1.0748273134231567, "learning_rate": 8.96652805874663e-06, "loss": 0.5017, "step": 3596 }, { "epoch": 0.23, "grad_norm": 1.0824171304702759, "learning_rate": 8.965891559002213e-06, "loss": 0.5966, "step": 3597 }, { "epoch": 0.23, "grad_norm": 1.2352533340454102, "learning_rate": 8.965254885918906e-06, "loss": 0.5562, "step": 3598 }, { "epoch": 0.23, "grad_norm": 1.2018530368804932, "learning_rate": 8.964618039524539e-06, "loss": 0.5599, "step": 3599 }, { "epoch": 0.23, "grad_norm": 1.2562072277069092, "learning_rate": 8.963981019846948e-06, "loss": 0.5353, "step": 3600 }, { "epoch": 0.23, "grad_norm": 1.211974024772644, "learning_rate": 8.963343826913972e-06, "loss": 0.5394, "step": 3601 }, { "epoch": 0.23, "grad_norm": 1.1788076162338257, "learning_rate": 8.962706460753465e-06, "loss": 0.575, "step": 3602 }, { "epoch": 0.23, "grad_norm": 1.191218614578247, "learning_rate": 8.962068921393281e-06, "loss": 0.5745, "step": 3603 }, { "epoch": 0.23, "grad_norm": 1.2498750686645508, "learning_rate": 8.961431208861286e-06, "loss": 0.5735, "step": 3604 }, { "epoch": 0.23, "grad_norm": 1.1784536838531494, "learning_rate": 8.960793323185355e-06, "loss": 0.5725, "step": 3605 }, { "epoch": 0.23, "grad_norm": 1.00620698928833, "learning_rate": 8.960155264393367e-06, "loss": 0.551, "step": 3606 }, { "epoch": 0.23, "grad_norm": 1.1142609119415283, "learning_rate": 8.959517032513208e-06, "loss": 0.5763, "step": 3607 }, { "epoch": 0.23, "grad_norm": 1.1551005840301514, "learning_rate": 8.958878627572778e-06, "loss": 0.5901, "step": 3608 }, { "epoch": 0.23, "grad_norm": 1.2924224138259888, "learning_rate": 8.958240049599974e-06, "loss": 0.5359, "step": 3609 }, { "epoch": 0.23, "grad_norm": 1.2885478734970093, "learning_rate": 8.95760129862271e-06, "loss": 0.6312, "step": 3610 }, { "epoch": 0.23, "grad_norm": 1.1290613412857056, "learning_rate": 8.956962374668907e-06, "loss": 0.6168, "step": 3611 }, { "epoch": 0.23, "grad_norm": 1.1752595901489258, "learning_rate": 8.956323277766485e-06, "loss": 0.5939, "step": 3612 }, { "epoch": 0.23, "grad_norm": 1.1832892894744873, "learning_rate": 8.955684007943381e-06, "loss": 0.5885, "step": 3613 }, { "epoch": 0.23, "grad_norm": 1.2935229539871216, "learning_rate": 8.955044565227536e-06, "loss": 0.6001, "step": 3614 }, { "epoch": 0.23, "grad_norm": 1.19123113155365, "learning_rate": 8.954404949646895e-06, "loss": 0.5972, "step": 3615 }, { "epoch": 0.23, "grad_norm": 1.4873714447021484, "learning_rate": 8.953765161229418e-06, "loss": 0.5909, "step": 3616 }, { "epoch": 0.23, "grad_norm": 1.131589412689209, "learning_rate": 8.953125200003065e-06, "loss": 0.596, "step": 3617 }, { "epoch": 0.23, "grad_norm": 1.123477816581726, "learning_rate": 8.952485065995812e-06, "loss": 0.5919, "step": 3618 }, { "epoch": 0.23, "grad_norm": 1.0880684852600098, "learning_rate": 8.95184475923563e-06, "loss": 0.4962, "step": 3619 }, { "epoch": 0.23, "grad_norm": 1.1447484493255615, "learning_rate": 8.951204279750511e-06, "loss": 0.6024, "step": 3620 }, { "epoch": 0.23, "grad_norm": 1.0558826923370361, "learning_rate": 8.950563627568448e-06, "loss": 0.592, "step": 3621 }, { "epoch": 0.23, "grad_norm": 1.150473713874817, "learning_rate": 8.94992280271744e-06, "loss": 0.552, "step": 3622 }, { "epoch": 0.23, "grad_norm": 1.2161632776260376, "learning_rate": 8.949281805225499e-06, "loss": 0.5845, "step": 3623 }, { "epoch": 0.23, "grad_norm": 1.0710017681121826, "learning_rate": 8.948640635120637e-06, "loss": 0.5715, "step": 3624 }, { "epoch": 0.23, "grad_norm": 1.1324775218963623, "learning_rate": 8.947999292430883e-06, "loss": 0.5446, "step": 3625 }, { "epoch": 0.23, "grad_norm": 1.1235034465789795, "learning_rate": 8.947357777184265e-06, "loss": 0.517, "step": 3626 }, { "epoch": 0.23, "grad_norm": 1.2386642694473267, "learning_rate": 8.946716089408823e-06, "loss": 0.5905, "step": 3627 }, { "epoch": 0.23, "grad_norm": 1.1750990152359009, "learning_rate": 8.946074229132603e-06, "loss": 0.5801, "step": 3628 }, { "epoch": 0.23, "grad_norm": 1.1552140712738037, "learning_rate": 8.94543219638366e-06, "loss": 0.5318, "step": 3629 }, { "epoch": 0.23, "grad_norm": 1.2315870523452759, "learning_rate": 8.944789991190055e-06, "loss": 0.6121, "step": 3630 }, { "epoch": 0.23, "grad_norm": 1.0345118045806885, "learning_rate": 8.944147613579857e-06, "loss": 0.5606, "step": 3631 }, { "epoch": 0.23, "grad_norm": 1.0884124040603638, "learning_rate": 8.943505063581143e-06, "loss": 0.6091, "step": 3632 }, { "epoch": 0.23, "grad_norm": 1.2105847597122192, "learning_rate": 8.942862341221996e-06, "loss": 0.5798, "step": 3633 }, { "epoch": 0.23, "grad_norm": 1.2305690050125122, "learning_rate": 8.94221944653051e-06, "loss": 0.6022, "step": 3634 }, { "epoch": 0.23, "grad_norm": 1.1311358213424683, "learning_rate": 8.941576379534783e-06, "loss": 0.5682, "step": 3635 }, { "epoch": 0.23, "grad_norm": 1.2559062242507935, "learning_rate": 8.940933140262921e-06, "loss": 0.5499, "step": 3636 }, { "epoch": 0.23, "grad_norm": 1.164430856704712, "learning_rate": 8.94028972874304e-06, "loss": 0.6104, "step": 3637 }, { "epoch": 0.23, "grad_norm": 1.1806937456130981, "learning_rate": 8.939646145003261e-06, "loss": 0.6152, "step": 3638 }, { "epoch": 0.23, "grad_norm": 1.1610180139541626, "learning_rate": 8.939002389071714e-06, "loss": 0.5021, "step": 3639 }, { "epoch": 0.23, "grad_norm": 1.1318366527557373, "learning_rate": 8.938358460976534e-06, "loss": 0.5768, "step": 3640 }, { "epoch": 0.24, "grad_norm": 1.2416166067123413, "learning_rate": 8.937714360745868e-06, "loss": 0.6167, "step": 3641 }, { "epoch": 0.24, "grad_norm": 1.1013250350952148, "learning_rate": 8.937070088407865e-06, "loss": 0.5485, "step": 3642 }, { "epoch": 0.24, "grad_norm": 1.117684245109558, "learning_rate": 8.936425643990688e-06, "loss": 0.5735, "step": 3643 }, { "epoch": 0.24, "grad_norm": 1.4274342060089111, "learning_rate": 8.9357810275225e-06, "loss": 0.5471, "step": 3644 }, { "epoch": 0.24, "grad_norm": 1.2999622821807861, "learning_rate": 8.935136239031482e-06, "loss": 0.6103, "step": 3645 }, { "epoch": 0.24, "grad_norm": 1.3370048999786377, "learning_rate": 8.934491278545807e-06, "loss": 0.5605, "step": 3646 }, { "epoch": 0.24, "grad_norm": 1.241560935974121, "learning_rate": 8.933846146093673e-06, "loss": 0.6335, "step": 3647 }, { "epoch": 0.24, "grad_norm": 1.1942795515060425, "learning_rate": 8.933200841703271e-06, "loss": 0.5829, "step": 3648 }, { "epoch": 0.24, "grad_norm": 1.1648398637771606, "learning_rate": 8.932555365402809e-06, "loss": 0.5864, "step": 3649 }, { "epoch": 0.24, "grad_norm": 1.1047593355178833, "learning_rate": 8.931909717220497e-06, "loss": 0.5393, "step": 3650 }, { "epoch": 0.24, "grad_norm": 1.1636347770690918, "learning_rate": 8.931263897184556e-06, "loss": 0.5594, "step": 3651 }, { "epoch": 0.24, "grad_norm": 1.1959476470947266, "learning_rate": 8.930617905323214e-06, "loss": 0.539, "step": 3652 }, { "epoch": 0.24, "grad_norm": 1.1285877227783203, "learning_rate": 8.929971741664703e-06, "loss": 0.6182, "step": 3653 }, { "epoch": 0.24, "grad_norm": 1.203005313873291, "learning_rate": 8.929325406237269e-06, "loss": 0.5856, "step": 3654 }, { "epoch": 0.24, "grad_norm": 1.155884027481079, "learning_rate": 8.928678899069157e-06, "loss": 0.5503, "step": 3655 }, { "epoch": 0.24, "grad_norm": 1.1607710123062134, "learning_rate": 8.928032220188628e-06, "loss": 0.6294, "step": 3656 }, { "epoch": 0.24, "grad_norm": 1.2157918214797974, "learning_rate": 8.927385369623944e-06, "loss": 0.5843, "step": 3657 }, { "epoch": 0.24, "grad_norm": 1.13748300075531, "learning_rate": 8.926738347403378e-06, "loss": 0.5867, "step": 3658 }, { "epoch": 0.24, "grad_norm": 1.188309907913208, "learning_rate": 8.926091153555213e-06, "loss": 0.5814, "step": 3659 }, { "epoch": 0.24, "grad_norm": 1.2380237579345703, "learning_rate": 8.92544378810773e-06, "loss": 0.6091, "step": 3660 }, { "epoch": 0.24, "grad_norm": 1.0962141752243042, "learning_rate": 8.924796251089228e-06, "loss": 0.534, "step": 3661 }, { "epoch": 0.24, "grad_norm": 1.1462260484695435, "learning_rate": 8.924148542528006e-06, "loss": 0.5591, "step": 3662 }, { "epoch": 0.24, "grad_norm": 1.2190889120101929, "learning_rate": 8.923500662452378e-06, "loss": 0.5523, "step": 3663 }, { "epoch": 0.24, "grad_norm": 1.1725716590881348, "learning_rate": 8.922852610890657e-06, "loss": 0.5816, "step": 3664 }, { "epoch": 0.24, "grad_norm": 1.0822045803070068, "learning_rate": 8.92220438787117e-06, "loss": 0.5293, "step": 3665 }, { "epoch": 0.24, "grad_norm": 1.326545000076294, "learning_rate": 8.921555993422251e-06, "loss": 0.6156, "step": 3666 }, { "epoch": 0.24, "grad_norm": 1.2176969051361084, "learning_rate": 8.920907427572234e-06, "loss": 0.5717, "step": 3667 }, { "epoch": 0.24, "grad_norm": 1.1383134126663208, "learning_rate": 8.92025869034947e-06, "loss": 0.5873, "step": 3668 }, { "epoch": 0.24, "grad_norm": 1.0400621891021729, "learning_rate": 8.919609781782314e-06, "loss": 0.5471, "step": 3669 }, { "epoch": 0.24, "grad_norm": 1.1902179718017578, "learning_rate": 8.918960701899128e-06, "loss": 0.6428, "step": 3670 }, { "epoch": 0.24, "grad_norm": 1.2452044486999512, "learning_rate": 8.918311450728279e-06, "loss": 0.5406, "step": 3671 }, { "epoch": 0.24, "grad_norm": 1.2583129405975342, "learning_rate": 8.917662028298148e-06, "loss": 0.5729, "step": 3672 }, { "epoch": 0.24, "grad_norm": 1.1320600509643555, "learning_rate": 8.917012434637116e-06, "loss": 0.5935, "step": 3673 }, { "epoch": 0.24, "grad_norm": 1.1155604124069214, "learning_rate": 8.916362669773576e-06, "loss": 0.5739, "step": 3674 }, { "epoch": 0.24, "grad_norm": 1.2545690536499023, "learning_rate": 8.915712733735929e-06, "loss": 0.5396, "step": 3675 }, { "epoch": 0.24, "grad_norm": 1.26823091506958, "learning_rate": 8.91506262655258e-06, "loss": 0.5846, "step": 3676 }, { "epoch": 0.24, "grad_norm": 1.2280396223068237, "learning_rate": 8.914412348251947e-06, "loss": 0.5896, "step": 3677 }, { "epoch": 0.24, "grad_norm": 1.1130391359329224, "learning_rate": 8.913761898862447e-06, "loss": 0.5451, "step": 3678 }, { "epoch": 0.24, "grad_norm": 1.1924635171890259, "learning_rate": 8.913111278412513e-06, "loss": 0.5741, "step": 3679 }, { "epoch": 0.24, "grad_norm": 1.2069346904754639, "learning_rate": 8.912460486930582e-06, "loss": 0.5229, "step": 3680 }, { "epoch": 0.24, "grad_norm": 1.1997737884521484, "learning_rate": 8.911809524445094e-06, "loss": 0.5271, "step": 3681 }, { "epoch": 0.24, "grad_norm": 1.2128442525863647, "learning_rate": 8.911158390984508e-06, "loss": 0.5647, "step": 3682 }, { "epoch": 0.24, "grad_norm": 1.1530414819717407, "learning_rate": 8.910507086577279e-06, "loss": 0.5694, "step": 3683 }, { "epoch": 0.24, "grad_norm": 1.179091215133667, "learning_rate": 8.909855611251871e-06, "loss": 0.5736, "step": 3684 }, { "epoch": 0.24, "grad_norm": 1.1391832828521729, "learning_rate": 8.909203965036766e-06, "loss": 0.5541, "step": 3685 }, { "epoch": 0.24, "grad_norm": 1.2178925275802612, "learning_rate": 8.90855214796044e-06, "loss": 0.6197, "step": 3686 }, { "epoch": 0.24, "grad_norm": 1.1603444814682007, "learning_rate": 8.907900160051383e-06, "loss": 0.5969, "step": 3687 }, { "epoch": 0.24, "grad_norm": 1.1560927629470825, "learning_rate": 8.907248001338095e-06, "loss": 0.5585, "step": 3688 }, { "epoch": 0.24, "grad_norm": 1.2175583839416504, "learning_rate": 8.906595671849076e-06, "loss": 0.5634, "step": 3689 }, { "epoch": 0.24, "grad_norm": 1.1725958585739136, "learning_rate": 8.905943171612839e-06, "loss": 0.5772, "step": 3690 }, { "epoch": 0.24, "grad_norm": 1.2243541479110718, "learning_rate": 8.905290500657904e-06, "loss": 0.571, "step": 3691 }, { "epoch": 0.24, "grad_norm": 1.1505094766616821, "learning_rate": 8.904637659012798e-06, "loss": 0.6246, "step": 3692 }, { "epoch": 0.24, "grad_norm": 1.1413060426712036, "learning_rate": 8.903984646706051e-06, "loss": 0.5666, "step": 3693 }, { "epoch": 0.24, "grad_norm": 1.1441680192947388, "learning_rate": 8.90333146376621e-06, "loss": 0.5958, "step": 3694 }, { "epoch": 0.24, "grad_norm": 1.0355912446975708, "learning_rate": 8.902678110221821e-06, "loss": 0.5314, "step": 3695 }, { "epoch": 0.24, "grad_norm": 1.3392319679260254, "learning_rate": 8.90202458610144e-06, "loss": 0.5817, "step": 3696 }, { "epoch": 0.24, "grad_norm": 1.2474868297576904, "learning_rate": 8.901370891433634e-06, "loss": 0.5648, "step": 3697 }, { "epoch": 0.24, "grad_norm": 1.0892480611801147, "learning_rate": 8.90071702624697e-06, "loss": 0.523, "step": 3698 }, { "epoch": 0.24, "grad_norm": 1.14325749874115, "learning_rate": 8.900062990570028e-06, "loss": 0.5627, "step": 3699 }, { "epoch": 0.24, "grad_norm": 1.1678993701934814, "learning_rate": 8.899408784431399e-06, "loss": 0.5183, "step": 3700 }, { "epoch": 0.24, "grad_norm": 1.1280611753463745, "learning_rate": 8.898754407859669e-06, "loss": 0.5757, "step": 3701 }, { "epoch": 0.24, "grad_norm": 1.0926711559295654, "learning_rate": 8.898099860883443e-06, "loss": 0.5623, "step": 3702 }, { "epoch": 0.24, "grad_norm": 1.2780802249908447, "learning_rate": 8.897445143531329e-06, "loss": 0.5754, "step": 3703 }, { "epoch": 0.24, "grad_norm": 1.1524566411972046, "learning_rate": 8.896790255831945e-06, "loss": 0.569, "step": 3704 }, { "epoch": 0.24, "grad_norm": 1.1244641542434692, "learning_rate": 8.896135197813911e-06, "loss": 0.5454, "step": 3705 }, { "epoch": 0.24, "grad_norm": 1.128949761390686, "learning_rate": 8.895479969505862e-06, "loss": 0.547, "step": 3706 }, { "epoch": 0.24, "grad_norm": 1.242915153503418, "learning_rate": 8.894824570936433e-06, "loss": 0.5913, "step": 3707 }, { "epoch": 0.24, "grad_norm": 1.29800546169281, "learning_rate": 8.89416900213427e-06, "loss": 0.5554, "step": 3708 }, { "epoch": 0.24, "grad_norm": 1.310650110244751, "learning_rate": 8.893513263128026e-06, "loss": 0.5802, "step": 3709 }, { "epoch": 0.24, "grad_norm": 1.1733821630477905, "learning_rate": 8.892857353946364e-06, "loss": 0.56, "step": 3710 }, { "epoch": 0.24, "grad_norm": 1.1225123405456543, "learning_rate": 8.892201274617952e-06, "loss": 0.5536, "step": 3711 }, { "epoch": 0.24, "grad_norm": 1.229833960533142, "learning_rate": 8.891545025171464e-06, "loss": 0.6048, "step": 3712 }, { "epoch": 0.24, "grad_norm": 1.1719186305999756, "learning_rate": 8.890888605635581e-06, "loss": 0.5903, "step": 3713 }, { "epoch": 0.24, "grad_norm": 1.2556406259536743, "learning_rate": 8.890232016038998e-06, "loss": 0.5459, "step": 3714 }, { "epoch": 0.24, "grad_norm": 1.0829287767410278, "learning_rate": 8.889575256410409e-06, "loss": 0.5766, "step": 3715 }, { "epoch": 0.24, "grad_norm": 1.1196049451828003, "learning_rate": 8.888918326778521e-06, "loss": 0.5735, "step": 3716 }, { "epoch": 0.24, "grad_norm": 1.0769399404525757, "learning_rate": 8.888261227172049e-06, "loss": 0.5535, "step": 3717 }, { "epoch": 0.24, "grad_norm": 1.2978698015213013, "learning_rate": 8.887603957619706e-06, "loss": 0.5842, "step": 3718 }, { "epoch": 0.24, "grad_norm": 1.3224635124206543, "learning_rate": 8.886946518150227e-06, "loss": 0.5957, "step": 3719 }, { "epoch": 0.24, "grad_norm": 1.2302753925323486, "learning_rate": 8.886288908792344e-06, "loss": 0.5961, "step": 3720 }, { "epoch": 0.24, "grad_norm": 1.153314471244812, "learning_rate": 8.8856311295748e-06, "loss": 0.5115, "step": 3721 }, { "epoch": 0.24, "grad_norm": 1.097861647605896, "learning_rate": 8.884973180526345e-06, "loss": 0.5334, "step": 3722 }, { "epoch": 0.24, "grad_norm": 1.0919723510742188, "learning_rate": 8.884315061675733e-06, "loss": 0.5582, "step": 3723 }, { "epoch": 0.24, "grad_norm": 1.278662919998169, "learning_rate": 8.883656773051733e-06, "loss": 0.5875, "step": 3724 }, { "epoch": 0.24, "grad_norm": 1.3179088830947876, "learning_rate": 8.882998314683115e-06, "loss": 0.5986, "step": 3725 }, { "epoch": 0.24, "grad_norm": 1.2185444831848145, "learning_rate": 8.88233968659866e-06, "loss": 0.5671, "step": 3726 }, { "epoch": 0.24, "grad_norm": 1.1786731481552124, "learning_rate": 8.881680888827152e-06, "loss": 0.559, "step": 3727 }, { "epoch": 0.24, "grad_norm": 1.1040403842926025, "learning_rate": 8.881021921397388e-06, "loss": 0.5538, "step": 3728 }, { "epoch": 0.24, "grad_norm": 1.2052663564682007, "learning_rate": 8.88036278433817e-06, "loss": 0.5901, "step": 3729 }, { "epoch": 0.24, "grad_norm": 1.1851383447647095, "learning_rate": 8.879703477678305e-06, "loss": 0.5949, "step": 3730 }, { "epoch": 0.24, "grad_norm": 1.217989206314087, "learning_rate": 8.87904400144661e-06, "loss": 0.576, "step": 3731 }, { "epoch": 0.24, "grad_norm": 1.0999910831451416, "learning_rate": 8.878384355671912e-06, "loss": 0.5676, "step": 3732 }, { "epoch": 0.24, "grad_norm": 1.1522632837295532, "learning_rate": 8.87772454038304e-06, "loss": 0.5567, "step": 3733 }, { "epoch": 0.24, "grad_norm": 1.1563597917556763, "learning_rate": 8.87706455560883e-06, "loss": 0.5847, "step": 3734 }, { "epoch": 0.24, "grad_norm": 1.177238941192627, "learning_rate": 8.876404401378134e-06, "loss": 0.5527, "step": 3735 }, { "epoch": 0.24, "grad_norm": 1.1969016790390015, "learning_rate": 8.875744077719802e-06, "loss": 0.5451, "step": 3736 }, { "epoch": 0.24, "grad_norm": 1.0865689516067505, "learning_rate": 8.875083584662695e-06, "loss": 0.5625, "step": 3737 }, { "epoch": 0.24, "grad_norm": 1.1690431833267212, "learning_rate": 8.874422922235684e-06, "loss": 0.5856, "step": 3738 }, { "epoch": 0.24, "grad_norm": 1.2771482467651367, "learning_rate": 8.873762090467644e-06, "loss": 0.5363, "step": 3739 }, { "epoch": 0.24, "grad_norm": 1.1851136684417725, "learning_rate": 8.873101089387456e-06, "loss": 0.5967, "step": 3740 }, { "epoch": 0.24, "grad_norm": 1.2689841985702515, "learning_rate": 8.872439919024012e-06, "loss": 0.5982, "step": 3741 }, { "epoch": 0.24, "grad_norm": 1.1395779848098755, "learning_rate": 8.87177857940621e-06, "loss": 0.5336, "step": 3742 }, { "epoch": 0.24, "grad_norm": 1.1253215074539185, "learning_rate": 8.871117070562956e-06, "loss": 0.5559, "step": 3743 }, { "epoch": 0.24, "grad_norm": 1.2471671104431152, "learning_rate": 8.870455392523163e-06, "loss": 0.56, "step": 3744 }, { "epoch": 0.24, "grad_norm": 1.2431570291519165, "learning_rate": 8.869793545315751e-06, "loss": 0.5407, "step": 3745 }, { "epoch": 0.24, "grad_norm": 1.1098649501800537, "learning_rate": 8.869131528969646e-06, "loss": 0.5487, "step": 3746 }, { "epoch": 0.24, "grad_norm": 1.1449291706085205, "learning_rate": 8.868469343513783e-06, "loss": 0.5515, "step": 3747 }, { "epoch": 0.24, "grad_norm": 1.1472293138504028, "learning_rate": 8.867806988977109e-06, "loss": 0.588, "step": 3748 }, { "epoch": 0.24, "grad_norm": 1.2142646312713623, "learning_rate": 8.867144465388569e-06, "loss": 0.5576, "step": 3749 }, { "epoch": 0.24, "grad_norm": 1.1335290670394897, "learning_rate": 8.866481772777123e-06, "loss": 0.5498, "step": 3750 }, { "epoch": 0.24, "grad_norm": 1.2611980438232422, "learning_rate": 8.865818911171734e-06, "loss": 0.5997, "step": 3751 }, { "epoch": 0.24, "grad_norm": 1.2695029973983765, "learning_rate": 8.865155880601373e-06, "loss": 0.5535, "step": 3752 }, { "epoch": 0.24, "grad_norm": 1.1583974361419678, "learning_rate": 8.86449268109502e-06, "loss": 0.5938, "step": 3753 }, { "epoch": 0.24, "grad_norm": 1.406121015548706, "learning_rate": 8.863829312681664e-06, "loss": 0.6115, "step": 3754 }, { "epoch": 0.24, "grad_norm": 1.0624927282333374, "learning_rate": 8.863165775390299e-06, "loss": 0.559, "step": 3755 }, { "epoch": 0.24, "grad_norm": 1.155735731124878, "learning_rate": 8.862502069249921e-06, "loss": 0.5728, "step": 3756 }, { "epoch": 0.24, "grad_norm": 1.0774658918380737, "learning_rate": 8.861838194289547e-06, "loss": 0.5391, "step": 3757 }, { "epoch": 0.24, "grad_norm": 1.135269045829773, "learning_rate": 8.861174150538188e-06, "loss": 0.5179, "step": 3758 }, { "epoch": 0.24, "grad_norm": 1.1499838829040527, "learning_rate": 8.860509938024868e-06, "loss": 0.5477, "step": 3759 }, { "epoch": 0.24, "grad_norm": 1.1048429012298584, "learning_rate": 8.85984555677862e-06, "loss": 0.5469, "step": 3760 }, { "epoch": 0.24, "grad_norm": 1.053767442703247, "learning_rate": 8.85918100682848e-06, "loss": 0.5463, "step": 3761 }, { "epoch": 0.24, "grad_norm": 1.1878845691680908, "learning_rate": 8.858516288203494e-06, "loss": 0.5332, "step": 3762 }, { "epoch": 0.24, "grad_norm": 1.1347562074661255, "learning_rate": 8.857851400932718e-06, "loss": 0.5622, "step": 3763 }, { "epoch": 0.24, "grad_norm": 1.143642544746399, "learning_rate": 8.85718634504521e-06, "loss": 0.5815, "step": 3764 }, { "epoch": 0.24, "grad_norm": 1.1492985486984253, "learning_rate": 8.856521120570039e-06, "loss": 0.5423, "step": 3765 }, { "epoch": 0.24, "grad_norm": 1.0876567363739014, "learning_rate": 8.85585572753628e-06, "loss": 0.5542, "step": 3766 }, { "epoch": 0.24, "grad_norm": 1.1860299110412598, "learning_rate": 8.855190165973015e-06, "loss": 0.5531, "step": 3767 }, { "epoch": 0.24, "grad_norm": 1.1237399578094482, "learning_rate": 8.854524435909335e-06, "loss": 0.5471, "step": 3768 }, { "epoch": 0.24, "grad_norm": 1.161790370941162, "learning_rate": 8.853858537374336e-06, "loss": 0.5393, "step": 3769 }, { "epoch": 0.24, "grad_norm": 1.1287895441055298, "learning_rate": 8.853192470397123e-06, "loss": 0.567, "step": 3770 }, { "epoch": 0.24, "grad_norm": 1.2172651290893555, "learning_rate": 8.85252623500681e-06, "loss": 0.5627, "step": 3771 }, { "epoch": 0.24, "grad_norm": 1.1256166696548462, "learning_rate": 8.851859831232515e-06, "loss": 0.5685, "step": 3772 }, { "epoch": 0.24, "grad_norm": 1.2361315488815308, "learning_rate": 8.851193259103362e-06, "loss": 0.5851, "step": 3773 }, { "epoch": 0.24, "grad_norm": 1.2749395370483398, "learning_rate": 8.85052651864849e-06, "loss": 0.585, "step": 3774 }, { "epoch": 0.24, "grad_norm": 1.288921594619751, "learning_rate": 8.849859609897038e-06, "loss": 0.6217, "step": 3775 }, { "epoch": 0.24, "grad_norm": 1.1979711055755615, "learning_rate": 8.849192532878154e-06, "loss": 0.5583, "step": 3776 }, { "epoch": 0.24, "grad_norm": 1.2600170373916626, "learning_rate": 8.848525287620996e-06, "loss": 0.5759, "step": 3777 }, { "epoch": 0.24, "grad_norm": 1.2137260437011719, "learning_rate": 8.847857874154728e-06, "loss": 0.5228, "step": 3778 }, { "epoch": 0.24, "grad_norm": 1.1031126976013184, "learning_rate": 8.84719029250852e-06, "loss": 0.536, "step": 3779 }, { "epoch": 0.24, "grad_norm": 1.2345696687698364, "learning_rate": 8.84652254271155e-06, "loss": 0.6116, "step": 3780 }, { "epoch": 0.24, "grad_norm": 1.1240125894546509, "learning_rate": 8.845854624793003e-06, "loss": 0.4896, "step": 3781 }, { "epoch": 0.24, "grad_norm": 1.1307817697525024, "learning_rate": 8.845186538782074e-06, "loss": 0.5061, "step": 3782 }, { "epoch": 0.24, "grad_norm": 1.0968127250671387, "learning_rate": 8.84451828470796e-06, "loss": 0.5474, "step": 3783 }, { "epoch": 0.24, "grad_norm": 1.1852039098739624, "learning_rate": 8.843849862599874e-06, "loss": 0.5421, "step": 3784 }, { "epoch": 0.24, "grad_norm": 1.247031331062317, "learning_rate": 8.843181272487025e-06, "loss": 0.6034, "step": 3785 }, { "epoch": 0.24, "grad_norm": 1.305012822151184, "learning_rate": 8.842512514398638e-06, "loss": 0.5747, "step": 3786 }, { "epoch": 0.24, "grad_norm": 1.2310391664505005, "learning_rate": 8.841843588363945e-06, "loss": 0.56, "step": 3787 }, { "epoch": 0.24, "grad_norm": 1.1269429922103882, "learning_rate": 8.841174494412179e-06, "loss": 0.5128, "step": 3788 }, { "epoch": 0.24, "grad_norm": 1.1093957424163818, "learning_rate": 8.840505232572587e-06, "loss": 0.5858, "step": 3789 }, { "epoch": 0.24, "grad_norm": 1.2227650880813599, "learning_rate": 8.83983580287442e-06, "loss": 0.6092, "step": 3790 }, { "epoch": 0.24, "grad_norm": 1.267534852027893, "learning_rate": 8.839166205346936e-06, "loss": 0.6195, "step": 3791 }, { "epoch": 0.24, "grad_norm": 1.4025734663009644, "learning_rate": 8.838496440019404e-06, "loss": 0.5669, "step": 3792 }, { "epoch": 0.24, "grad_norm": 1.1942362785339355, "learning_rate": 8.837826506921096e-06, "loss": 0.5824, "step": 3793 }, { "epoch": 0.24, "grad_norm": 1.2925504446029663, "learning_rate": 8.837156406081292e-06, "loss": 0.5965, "step": 3794 }, { "epoch": 0.24, "grad_norm": 1.2738656997680664, "learning_rate": 8.836486137529281e-06, "loss": 0.5696, "step": 3795 }, { "epoch": 0.25, "grad_norm": 1.246833086013794, "learning_rate": 8.835815701294361e-06, "loss": 0.596, "step": 3796 }, { "epoch": 0.25, "grad_norm": 1.1966159343719482, "learning_rate": 8.835145097405832e-06, "loss": 0.5851, "step": 3797 }, { "epoch": 0.25, "grad_norm": 1.2024352550506592, "learning_rate": 8.834474325893006e-06, "loss": 0.5546, "step": 3798 }, { "epoch": 0.25, "grad_norm": 1.3124104738235474, "learning_rate": 8.8338033867852e-06, "loss": 0.5627, "step": 3799 }, { "epoch": 0.25, "grad_norm": 1.1051928997039795, "learning_rate": 8.833132280111738e-06, "loss": 0.5602, "step": 3800 }, { "epoch": 0.25, "grad_norm": 1.1339484453201294, "learning_rate": 8.832461005901955e-06, "loss": 0.5767, "step": 3801 }, { "epoch": 0.25, "grad_norm": 1.2298306226730347, "learning_rate": 8.83178956418519e-06, "loss": 0.5425, "step": 3802 }, { "epoch": 0.25, "grad_norm": 1.1476213932037354, "learning_rate": 8.831117954990789e-06, "loss": 0.5401, "step": 3803 }, { "epoch": 0.25, "grad_norm": 1.1883363723754883, "learning_rate": 8.830446178348107e-06, "loss": 0.5439, "step": 3804 }, { "epoch": 0.25, "grad_norm": 1.3154352903366089, "learning_rate": 8.829774234286503e-06, "loss": 0.6087, "step": 3805 }, { "epoch": 0.25, "grad_norm": 1.206964135169983, "learning_rate": 8.829102122835351e-06, "loss": 0.558, "step": 3806 }, { "epoch": 0.25, "grad_norm": 1.1436753273010254, "learning_rate": 8.828429844024023e-06, "loss": 0.5549, "step": 3807 }, { "epoch": 0.25, "grad_norm": 1.2463828325271606, "learning_rate": 8.827757397881904e-06, "loss": 0.5547, "step": 3808 }, { "epoch": 0.25, "grad_norm": 1.35785710811615, "learning_rate": 8.827084784438385e-06, "loss": 0.5366, "step": 3809 }, { "epoch": 0.25, "grad_norm": 1.1996538639068604, "learning_rate": 8.826412003722864e-06, "loss": 0.6052, "step": 3810 }, { "epoch": 0.25, "grad_norm": 1.0693471431732178, "learning_rate": 8.825739055764745e-06, "loss": 0.4848, "step": 3811 }, { "epoch": 0.25, "grad_norm": 1.1936756372451782, "learning_rate": 8.825065940593444e-06, "loss": 0.5312, "step": 3812 }, { "epoch": 0.25, "grad_norm": 1.1088217496871948, "learning_rate": 8.824392658238381e-06, "loss": 0.5437, "step": 3813 }, { "epoch": 0.25, "grad_norm": 1.15010666847229, "learning_rate": 8.82371920872898e-06, "loss": 0.5471, "step": 3814 }, { "epoch": 0.25, "grad_norm": 1.1952766180038452, "learning_rate": 8.823045592094678e-06, "loss": 0.6407, "step": 3815 }, { "epoch": 0.25, "grad_norm": 1.236138939857483, "learning_rate": 8.822371808364919e-06, "loss": 0.594, "step": 3816 }, { "epoch": 0.25, "grad_norm": 1.1879215240478516, "learning_rate": 8.821697857569147e-06, "loss": 0.6033, "step": 3817 }, { "epoch": 0.25, "grad_norm": 1.0657751560211182, "learning_rate": 8.821023739736823e-06, "loss": 0.537, "step": 3818 }, { "epoch": 0.25, "grad_norm": 1.2368104457855225, "learning_rate": 8.82034945489741e-06, "loss": 0.5912, "step": 3819 }, { "epoch": 0.25, "grad_norm": 1.1514555215835571, "learning_rate": 8.819675003080379e-06, "loss": 0.5522, "step": 3820 }, { "epoch": 0.25, "grad_norm": 1.1072707176208496, "learning_rate": 8.819000384315208e-06, "loss": 0.57, "step": 3821 }, { "epoch": 0.25, "grad_norm": 1.1184895038604736, "learning_rate": 8.818325598631382e-06, "loss": 0.5855, "step": 3822 }, { "epoch": 0.25, "grad_norm": 1.1455302238464355, "learning_rate": 8.817650646058397e-06, "loss": 0.5562, "step": 3823 }, { "epoch": 0.25, "grad_norm": 1.2044910192489624, "learning_rate": 8.816975526625752e-06, "loss": 0.5105, "step": 3824 }, { "epoch": 0.25, "grad_norm": 1.235472321510315, "learning_rate": 8.816300240362955e-06, "loss": 0.5225, "step": 3825 }, { "epoch": 0.25, "grad_norm": 1.1095367670059204, "learning_rate": 8.815624787299521e-06, "loss": 0.5431, "step": 3826 }, { "epoch": 0.25, "grad_norm": 1.1495935916900635, "learning_rate": 8.81494916746497e-06, "loss": 0.5726, "step": 3827 }, { "epoch": 0.25, "grad_norm": 1.1658462285995483, "learning_rate": 8.814273380888837e-06, "loss": 0.5721, "step": 3828 }, { "epoch": 0.25, "grad_norm": 1.1308469772338867, "learning_rate": 8.813597427600651e-06, "loss": 0.5823, "step": 3829 }, { "epoch": 0.25, "grad_norm": 1.2120193243026733, "learning_rate": 8.812921307629963e-06, "loss": 0.5171, "step": 3830 }, { "epoch": 0.25, "grad_norm": 1.1229000091552734, "learning_rate": 8.812245021006322e-06, "loss": 0.5751, "step": 3831 }, { "epoch": 0.25, "grad_norm": 1.11387038230896, "learning_rate": 8.811568567759288e-06, "loss": 0.5418, "step": 3832 }, { "epoch": 0.25, "grad_norm": 1.1190427541732788, "learning_rate": 8.810891947918425e-06, "loss": 0.5564, "step": 3833 }, { "epoch": 0.25, "grad_norm": 1.2935011386871338, "learning_rate": 8.810215161513306e-06, "loss": 0.5857, "step": 3834 }, { "epoch": 0.25, "grad_norm": 1.1949732303619385, "learning_rate": 8.809538208573513e-06, "loss": 0.6187, "step": 3835 }, { "epoch": 0.25, "grad_norm": 1.2282383441925049, "learning_rate": 8.808861089128635e-06, "loss": 0.6123, "step": 3836 }, { "epoch": 0.25, "grad_norm": 1.050137996673584, "learning_rate": 8.808183803208266e-06, "loss": 0.5123, "step": 3837 }, { "epoch": 0.25, "grad_norm": 1.1127707958221436, "learning_rate": 8.807506350842007e-06, "loss": 0.599, "step": 3838 }, { "epoch": 0.25, "grad_norm": 1.2171379327774048, "learning_rate": 8.80682873205947e-06, "loss": 0.5547, "step": 3839 }, { "epoch": 0.25, "grad_norm": 1.1734760999679565, "learning_rate": 8.806150946890268e-06, "loss": 0.5686, "step": 3840 }, { "epoch": 0.25, "grad_norm": 1.2284412384033203, "learning_rate": 8.805472995364031e-06, "loss": 0.6159, "step": 3841 }, { "epoch": 0.25, "grad_norm": 1.1064790487289429, "learning_rate": 8.804794877510388e-06, "loss": 0.5697, "step": 3842 }, { "epoch": 0.25, "grad_norm": 1.1786929368972778, "learning_rate": 8.804116593358976e-06, "loss": 0.5514, "step": 3843 }, { "epoch": 0.25, "grad_norm": 1.1515309810638428, "learning_rate": 8.803438142939442e-06, "loss": 0.5271, "step": 3844 }, { "epoch": 0.25, "grad_norm": 1.1066868305206299, "learning_rate": 8.80275952628144e-06, "loss": 0.5354, "step": 3845 }, { "epoch": 0.25, "grad_norm": 1.1577852964401245, "learning_rate": 8.802080743414631e-06, "loss": 0.585, "step": 3846 }, { "epoch": 0.25, "grad_norm": 1.184975504875183, "learning_rate": 8.801401794368682e-06, "loss": 0.5801, "step": 3847 }, { "epoch": 0.25, "grad_norm": 1.161376953125, "learning_rate": 8.800722679173269e-06, "loss": 0.5377, "step": 3848 }, { "epoch": 0.25, "grad_norm": 1.1200250387191772, "learning_rate": 8.800043397858073e-06, "loss": 0.5347, "step": 3849 }, { "epoch": 0.25, "grad_norm": 1.119250774383545, "learning_rate": 8.799363950452785e-06, "loss": 0.5079, "step": 3850 }, { "epoch": 0.25, "grad_norm": 1.2462012767791748, "learning_rate": 8.7986843369871e-06, "loss": 0.59, "step": 3851 }, { "epoch": 0.25, "grad_norm": 1.1053295135498047, "learning_rate": 8.798004557490725e-06, "loss": 0.5508, "step": 3852 }, { "epoch": 0.25, "grad_norm": 1.3766720294952393, "learning_rate": 8.797324611993368e-06, "loss": 0.6052, "step": 3853 }, { "epoch": 0.25, "grad_norm": 1.2338051795959473, "learning_rate": 8.796644500524749e-06, "loss": 0.5261, "step": 3854 }, { "epoch": 0.25, "grad_norm": 1.256898283958435, "learning_rate": 8.795964223114597e-06, "loss": 0.5544, "step": 3855 }, { "epoch": 0.25, "grad_norm": 1.0391314029693604, "learning_rate": 8.79528377979264e-06, "loss": 0.5619, "step": 3856 }, { "epoch": 0.25, "grad_norm": 1.1429882049560547, "learning_rate": 8.794603170588623e-06, "loss": 0.5535, "step": 3857 }, { "epoch": 0.25, "grad_norm": 1.1372395753860474, "learning_rate": 8.79392239553229e-06, "loss": 0.5773, "step": 3858 }, { "epoch": 0.25, "grad_norm": 1.2785508632659912, "learning_rate": 8.793241454653398e-06, "loss": 0.5538, "step": 3859 }, { "epoch": 0.25, "grad_norm": 1.1247706413269043, "learning_rate": 8.79256034798171e-06, "loss": 0.5345, "step": 3860 }, { "epoch": 0.25, "grad_norm": 1.1539069414138794, "learning_rate": 8.791879075546991e-06, "loss": 0.5799, "step": 3861 }, { "epoch": 0.25, "grad_norm": 1.113779902458191, "learning_rate": 8.791197637379025e-06, "loss": 0.5778, "step": 3862 }, { "epoch": 0.25, "grad_norm": 1.331918716430664, "learning_rate": 8.790516033507589e-06, "loss": 0.5476, "step": 3863 }, { "epoch": 0.25, "grad_norm": 1.1991815567016602, "learning_rate": 8.789834263962478e-06, "loss": 0.6171, "step": 3864 }, { "epoch": 0.25, "grad_norm": 1.22999906539917, "learning_rate": 8.789152328773486e-06, "loss": 0.6168, "step": 3865 }, { "epoch": 0.25, "grad_norm": 1.1235896348953247, "learning_rate": 8.788470227970426e-06, "loss": 0.5843, "step": 3866 }, { "epoch": 0.25, "grad_norm": 1.0840622186660767, "learning_rate": 8.787787961583105e-06, "loss": 0.5328, "step": 3867 }, { "epoch": 0.25, "grad_norm": 1.058383584022522, "learning_rate": 8.787105529641345e-06, "loss": 0.5456, "step": 3868 }, { "epoch": 0.25, "grad_norm": 1.1261732578277588, "learning_rate": 8.786422932174974e-06, "loss": 0.6, "step": 3869 }, { "epoch": 0.25, "grad_norm": 1.1455038785934448, "learning_rate": 8.785740169213824e-06, "loss": 0.5815, "step": 3870 }, { "epoch": 0.25, "grad_norm": 1.2587275505065918, "learning_rate": 8.78505724078774e-06, "loss": 0.5297, "step": 3871 }, { "epoch": 0.25, "grad_norm": 1.0909910202026367, "learning_rate": 8.78437414692657e-06, "loss": 0.5532, "step": 3872 }, { "epoch": 0.25, "grad_norm": 1.1458746194839478, "learning_rate": 8.783690887660167e-06, "loss": 0.5535, "step": 3873 }, { "epoch": 0.25, "grad_norm": 1.1265259981155396, "learning_rate": 8.783007463018398e-06, "loss": 0.6045, "step": 3874 }, { "epoch": 0.25, "grad_norm": 1.2241809368133545, "learning_rate": 8.782323873031136e-06, "loss": 0.5924, "step": 3875 }, { "epoch": 0.25, "grad_norm": 1.066403865814209, "learning_rate": 8.781640117728254e-06, "loss": 0.5947, "step": 3876 }, { "epoch": 0.25, "grad_norm": 1.1355177164077759, "learning_rate": 8.78095619713964e-06, "loss": 0.5616, "step": 3877 }, { "epoch": 0.25, "grad_norm": 1.0872398614883423, "learning_rate": 8.780272111295186e-06, "loss": 0.5684, "step": 3878 }, { "epoch": 0.25, "grad_norm": 1.1235121488571167, "learning_rate": 8.779587860224788e-06, "loss": 0.5846, "step": 3879 }, { "epoch": 0.25, "grad_norm": 1.2177579402923584, "learning_rate": 8.77890344395836e-06, "loss": 0.5865, "step": 3880 }, { "epoch": 0.25, "grad_norm": 1.1464802026748657, "learning_rate": 8.778218862525811e-06, "loss": 0.5696, "step": 3881 }, { "epoch": 0.25, "grad_norm": 1.1953555345535278, "learning_rate": 8.777534115957062e-06, "loss": 0.5976, "step": 3882 }, { "epoch": 0.25, "grad_norm": 1.167892336845398, "learning_rate": 8.776849204282044e-06, "loss": 0.5809, "step": 3883 }, { "epoch": 0.25, "grad_norm": 1.1696326732635498, "learning_rate": 8.776164127530692e-06, "loss": 0.5517, "step": 3884 }, { "epoch": 0.25, "grad_norm": 1.1144976615905762, "learning_rate": 8.775478885732948e-06, "loss": 0.5721, "step": 3885 }, { "epoch": 0.25, "grad_norm": 1.147372841835022, "learning_rate": 8.774793478918764e-06, "loss": 0.5738, "step": 3886 }, { "epoch": 0.25, "grad_norm": 1.3463071584701538, "learning_rate": 8.774107907118094e-06, "loss": 0.5442, "step": 3887 }, { "epoch": 0.25, "grad_norm": 1.1522005796432495, "learning_rate": 8.773422170360907e-06, "loss": 0.5527, "step": 3888 }, { "epoch": 0.25, "grad_norm": 1.167242169380188, "learning_rate": 8.77273626867717e-06, "loss": 0.5685, "step": 3889 }, { "epoch": 0.25, "grad_norm": 1.151157021522522, "learning_rate": 8.772050202096867e-06, "loss": 0.5269, "step": 3890 }, { "epoch": 0.25, "grad_norm": 1.1818888187408447, "learning_rate": 8.77136397064998e-06, "loss": 0.5733, "step": 3891 }, { "epoch": 0.25, "grad_norm": 1.0197139978408813, "learning_rate": 8.770677574366507e-06, "loss": 0.5391, "step": 3892 }, { "epoch": 0.25, "grad_norm": 1.1437205076217651, "learning_rate": 8.769991013276441e-06, "loss": 0.5722, "step": 3893 }, { "epoch": 0.25, "grad_norm": 1.10793936252594, "learning_rate": 8.769304287409797e-06, "loss": 0.5833, "step": 3894 }, { "epoch": 0.25, "grad_norm": 1.0713831186294556, "learning_rate": 8.768617396796588e-06, "loss": 0.5904, "step": 3895 }, { "epoch": 0.25, "grad_norm": 1.2002016305923462, "learning_rate": 8.767930341466835e-06, "loss": 0.6038, "step": 3896 }, { "epoch": 0.25, "grad_norm": 1.229069709777832, "learning_rate": 8.767243121450568e-06, "loss": 0.5722, "step": 3897 }, { "epoch": 0.25, "grad_norm": 1.2691651582717896, "learning_rate": 8.766555736777825e-06, "loss": 0.5983, "step": 3898 }, { "epoch": 0.25, "grad_norm": 1.1854223012924194, "learning_rate": 8.765868187478648e-06, "loss": 0.5283, "step": 3899 }, { "epoch": 0.25, "grad_norm": 1.183544397354126, "learning_rate": 8.765180473583087e-06, "loss": 0.5891, "step": 3900 }, { "epoch": 0.25, "grad_norm": 1.0949819087982178, "learning_rate": 8.764492595121204e-06, "loss": 0.5894, "step": 3901 }, { "epoch": 0.25, "grad_norm": 1.137098789215088, "learning_rate": 8.763804552123061e-06, "loss": 0.609, "step": 3902 }, { "epoch": 0.25, "grad_norm": 1.0976711511611938, "learning_rate": 8.763116344618732e-06, "loss": 0.5797, "step": 3903 }, { "epoch": 0.25, "grad_norm": 1.1159535646438599, "learning_rate": 8.762427972638297e-06, "loss": 0.546, "step": 3904 }, { "epoch": 0.25, "grad_norm": 1.0985345840454102, "learning_rate": 8.761739436211842e-06, "loss": 0.5117, "step": 3905 }, { "epoch": 0.25, "grad_norm": 1.2705931663513184, "learning_rate": 8.761050735369463e-06, "loss": 0.5253, "step": 3906 }, { "epoch": 0.25, "grad_norm": 1.1112093925476074, "learning_rate": 8.760361870141258e-06, "loss": 0.5115, "step": 3907 }, { "epoch": 0.25, "grad_norm": 1.3269422054290771, "learning_rate": 8.759672840557338e-06, "loss": 0.5832, "step": 3908 }, { "epoch": 0.25, "grad_norm": 1.1393476724624634, "learning_rate": 8.758983646647819e-06, "loss": 0.5942, "step": 3909 }, { "epoch": 0.25, "grad_norm": 1.1501203775405884, "learning_rate": 8.758294288442824e-06, "loss": 0.5176, "step": 3910 }, { "epoch": 0.25, "grad_norm": 1.150799036026001, "learning_rate": 8.75760476597248e-06, "loss": 0.5805, "step": 3911 }, { "epoch": 0.25, "grad_norm": 1.076883316040039, "learning_rate": 8.756915079266927e-06, "loss": 0.5339, "step": 3912 }, { "epoch": 0.25, "grad_norm": 1.1021970510482788, "learning_rate": 8.75622522835631e-06, "loss": 0.5494, "step": 3913 }, { "epoch": 0.25, "grad_norm": 1.0621211528778076, "learning_rate": 8.755535213270779e-06, "loss": 0.5031, "step": 3914 }, { "epoch": 0.25, "grad_norm": 1.3043012619018555, "learning_rate": 8.754845034040492e-06, "loss": 0.5576, "step": 3915 }, { "epoch": 0.25, "grad_norm": 1.2732048034667969, "learning_rate": 8.754154690695616e-06, "loss": 0.6184, "step": 3916 }, { "epoch": 0.25, "grad_norm": 1.0867642164230347, "learning_rate": 8.753464183266325e-06, "loss": 0.5267, "step": 3917 }, { "epoch": 0.25, "grad_norm": 1.219251036643982, "learning_rate": 8.752773511782799e-06, "loss": 0.5659, "step": 3918 }, { "epoch": 0.25, "grad_norm": 1.1931946277618408, "learning_rate": 8.752082676275225e-06, "loss": 0.5272, "step": 3919 }, { "epoch": 0.25, "grad_norm": 1.1583251953125, "learning_rate": 8.751391676773798e-06, "loss": 0.5525, "step": 3920 }, { "epoch": 0.25, "grad_norm": 1.1975009441375732, "learning_rate": 8.75070051330872e-06, "loss": 0.6185, "step": 3921 }, { "epoch": 0.25, "grad_norm": 1.1782164573669434, "learning_rate": 8.750009185910197e-06, "loss": 0.5874, "step": 3922 }, { "epoch": 0.25, "grad_norm": 1.1905077695846558, "learning_rate": 8.74931769460845e-06, "loss": 0.5377, "step": 3923 }, { "epoch": 0.25, "grad_norm": 1.139430284500122, "learning_rate": 8.748626039433701e-06, "loss": 0.5879, "step": 3924 }, { "epoch": 0.25, "grad_norm": 1.1085751056671143, "learning_rate": 8.747934220416177e-06, "loss": 0.5449, "step": 3925 }, { "epoch": 0.25, "grad_norm": 1.1371442079544067, "learning_rate": 8.74724223758612e-06, "loss": 0.5897, "step": 3926 }, { "epoch": 0.25, "grad_norm": 1.149074673652649, "learning_rate": 8.746550090973771e-06, "loss": 0.5606, "step": 3927 }, { "epoch": 0.25, "grad_norm": 1.3077040910720825, "learning_rate": 8.745857780609384e-06, "loss": 0.5743, "step": 3928 }, { "epoch": 0.25, "grad_norm": 1.1648486852645874, "learning_rate": 8.745165306523217e-06, "loss": 0.5623, "step": 3929 }, { "epoch": 0.25, "grad_norm": 1.1565190553665161, "learning_rate": 8.74447266874554e-06, "loss": 0.6, "step": 3930 }, { "epoch": 0.25, "grad_norm": 1.1167066097259521, "learning_rate": 8.743779867306622e-06, "loss": 0.5484, "step": 3931 }, { "epoch": 0.25, "grad_norm": 1.1147667169570923, "learning_rate": 8.743086902236743e-06, "loss": 0.5152, "step": 3932 }, { "epoch": 0.25, "grad_norm": 1.02787184715271, "learning_rate": 8.742393773566195e-06, "loss": 0.5322, "step": 3933 }, { "epoch": 0.25, "grad_norm": 1.2673314809799194, "learning_rate": 8.741700481325271e-06, "loss": 0.6162, "step": 3934 }, { "epoch": 0.25, "grad_norm": 1.0544095039367676, "learning_rate": 8.741007025544273e-06, "loss": 0.5569, "step": 3935 }, { "epoch": 0.25, "grad_norm": 1.2303673028945923, "learning_rate": 8.740313406253509e-06, "loss": 0.5808, "step": 3936 }, { "epoch": 0.25, "grad_norm": 1.2233651876449585, "learning_rate": 8.739619623483296e-06, "loss": 0.561, "step": 3937 }, { "epoch": 0.25, "grad_norm": 1.0571507215499878, "learning_rate": 8.738925677263957e-06, "loss": 0.5535, "step": 3938 }, { "epoch": 0.25, "grad_norm": 1.0999078750610352, "learning_rate": 8.738231567625823e-06, "loss": 0.5598, "step": 3939 }, { "epoch": 0.25, "grad_norm": 1.1346397399902344, "learning_rate": 8.737537294599235e-06, "loss": 0.5873, "step": 3940 }, { "epoch": 0.25, "grad_norm": 1.0973371267318726, "learning_rate": 8.736842858214532e-06, "loss": 0.5464, "step": 3941 }, { "epoch": 0.25, "grad_norm": 1.2561131715774536, "learning_rate": 8.736148258502069e-06, "loss": 0.5724, "step": 3942 }, { "epoch": 0.25, "grad_norm": 1.1768475770950317, "learning_rate": 8.735453495492205e-06, "loss": 0.6051, "step": 3943 }, { "epoch": 0.25, "grad_norm": 1.0398838520050049, "learning_rate": 8.734758569215307e-06, "loss": 0.4948, "step": 3944 }, { "epoch": 0.25, "grad_norm": 1.1592611074447632, "learning_rate": 8.734063479701747e-06, "loss": 0.5765, "step": 3945 }, { "epoch": 0.25, "grad_norm": 1.2598648071289062, "learning_rate": 8.733368226981907e-06, "loss": 0.5668, "step": 3946 }, { "epoch": 0.25, "grad_norm": 1.092626690864563, "learning_rate": 8.732672811086174e-06, "loss": 0.5051, "step": 3947 }, { "epoch": 0.25, "grad_norm": 1.1075295209884644, "learning_rate": 8.731977232044943e-06, "loss": 0.5434, "step": 3948 }, { "epoch": 0.25, "grad_norm": 1.158132553100586, "learning_rate": 8.731281489888614e-06, "loss": 0.5208, "step": 3949 }, { "epoch": 0.25, "grad_norm": 1.119278907775879, "learning_rate": 8.7305855846476e-06, "loss": 0.5192, "step": 3950 }, { "epoch": 0.26, "grad_norm": 1.0816078186035156, "learning_rate": 8.729889516352314e-06, "loss": 0.5157, "step": 3951 }, { "epoch": 0.26, "grad_norm": 1.1026076078414917, "learning_rate": 8.729193285033182e-06, "loss": 0.5563, "step": 3952 }, { "epoch": 0.26, "grad_norm": 1.2375439405441284, "learning_rate": 8.728496890720632e-06, "loss": 0.5898, "step": 3953 }, { "epoch": 0.26, "grad_norm": 1.1888214349746704, "learning_rate": 8.727800333445102e-06, "loss": 0.5641, "step": 3954 }, { "epoch": 0.26, "grad_norm": 1.1764353513717651, "learning_rate": 8.727103613237037e-06, "loss": 0.5799, "step": 3955 }, { "epoch": 0.26, "grad_norm": 1.1783572435379028, "learning_rate": 8.726406730126891e-06, "loss": 0.6116, "step": 3956 }, { "epoch": 0.26, "grad_norm": 1.230478048324585, "learning_rate": 8.72570968414512e-06, "loss": 0.5632, "step": 3957 }, { "epoch": 0.26, "grad_norm": 1.1593225002288818, "learning_rate": 8.725012475322193e-06, "loss": 0.5651, "step": 3958 }, { "epoch": 0.26, "grad_norm": 1.1913244724273682, "learning_rate": 8.72431510368858e-06, "loss": 0.5925, "step": 3959 }, { "epoch": 0.26, "grad_norm": 1.2045539617538452, "learning_rate": 8.723617569274761e-06, "loss": 0.535, "step": 3960 }, { "epoch": 0.26, "grad_norm": 1.2458654642105103, "learning_rate": 8.722919872111226e-06, "loss": 0.6062, "step": 3961 }, { "epoch": 0.26, "grad_norm": 1.0896722078323364, "learning_rate": 8.72222201222847e-06, "loss": 0.537, "step": 3962 }, { "epoch": 0.26, "grad_norm": 1.2354884147644043, "learning_rate": 8.721523989656991e-06, "loss": 0.5519, "step": 3963 }, { "epoch": 0.26, "grad_norm": 1.1689494848251343, "learning_rate": 8.720825804427301e-06, "loss": 0.5667, "step": 3964 }, { "epoch": 0.26, "grad_norm": 1.1703341007232666, "learning_rate": 8.720127456569915e-06, "loss": 0.5706, "step": 3965 }, { "epoch": 0.26, "grad_norm": 1.1180808544158936, "learning_rate": 8.719428946115358e-06, "loss": 0.591, "step": 3966 }, { "epoch": 0.26, "grad_norm": 1.1028120517730713, "learning_rate": 8.718730273094156e-06, "loss": 0.5477, "step": 3967 }, { "epoch": 0.26, "grad_norm": 1.1620845794677734, "learning_rate": 8.71803143753685e-06, "loss": 0.5817, "step": 3968 }, { "epoch": 0.26, "grad_norm": 1.3255565166473389, "learning_rate": 8.71733243947398e-06, "loss": 0.5799, "step": 3969 }, { "epoch": 0.26, "grad_norm": 1.1034804582595825, "learning_rate": 8.7166332789361e-06, "loss": 0.5937, "step": 3970 }, { "epoch": 0.26, "grad_norm": 1.0146950483322144, "learning_rate": 8.71593395595377e-06, "loss": 0.5954, "step": 3971 }, { "epoch": 0.26, "grad_norm": 1.0946775674819946, "learning_rate": 8.715234470557553e-06, "loss": 0.5754, "step": 3972 }, { "epoch": 0.26, "grad_norm": 1.0833420753479004, "learning_rate": 8.714534822778022e-06, "loss": 0.5564, "step": 3973 }, { "epoch": 0.26, "grad_norm": 1.183117151260376, "learning_rate": 8.71383501264576e-06, "loss": 0.519, "step": 3974 }, { "epoch": 0.26, "grad_norm": 1.1931922435760498, "learning_rate": 8.71313504019135e-06, "loss": 0.5722, "step": 3975 }, { "epoch": 0.26, "grad_norm": 1.2663898468017578, "learning_rate": 8.712434905445387e-06, "loss": 0.5786, "step": 3976 }, { "epoch": 0.26, "grad_norm": 1.1611545085906982, "learning_rate": 8.711734608438474e-06, "loss": 0.5843, "step": 3977 }, { "epoch": 0.26, "grad_norm": 1.1011699438095093, "learning_rate": 8.711034149201216e-06, "loss": 0.5955, "step": 3978 }, { "epoch": 0.26, "grad_norm": 1.1878786087036133, "learning_rate": 8.71033352776423e-06, "loss": 0.5982, "step": 3979 }, { "epoch": 0.26, "grad_norm": 1.3090639114379883, "learning_rate": 8.709632744158139e-06, "loss": 0.5516, "step": 3980 }, { "epoch": 0.26, "grad_norm": 1.3602348566055298, "learning_rate": 8.708931798413572e-06, "loss": 0.5827, "step": 3981 }, { "epoch": 0.26, "grad_norm": 1.235556721687317, "learning_rate": 8.708230690561165e-06, "loss": 0.5418, "step": 3982 }, { "epoch": 0.26, "grad_norm": 1.1587507724761963, "learning_rate": 8.707529420631563e-06, "loss": 0.5243, "step": 3983 }, { "epoch": 0.26, "grad_norm": 1.2375825643539429, "learning_rate": 8.706827988655413e-06, "loss": 0.5599, "step": 3984 }, { "epoch": 0.26, "grad_norm": 1.3059241771697998, "learning_rate": 8.706126394663376e-06, "loss": 0.5805, "step": 3985 }, { "epoch": 0.26, "grad_norm": 1.2526406049728394, "learning_rate": 8.705424638686117e-06, "loss": 0.5946, "step": 3986 }, { "epoch": 0.26, "grad_norm": 1.202717900276184, "learning_rate": 8.704722720754308e-06, "loss": 0.531, "step": 3987 }, { "epoch": 0.26, "grad_norm": 1.2838563919067383, "learning_rate": 8.704020640898626e-06, "loss": 0.5587, "step": 3988 }, { "epoch": 0.26, "grad_norm": 1.2155884504318237, "learning_rate": 8.703318399149758e-06, "loss": 0.5295, "step": 3989 }, { "epoch": 0.26, "grad_norm": 1.269466757774353, "learning_rate": 8.702615995538399e-06, "loss": 0.6033, "step": 3990 }, { "epoch": 0.26, "grad_norm": 1.21634840965271, "learning_rate": 8.701913430095246e-06, "loss": 0.6018, "step": 3991 }, { "epoch": 0.26, "grad_norm": 1.2231887578964233, "learning_rate": 8.701210702851009e-06, "loss": 0.5812, "step": 3992 }, { "epoch": 0.26, "grad_norm": 1.2534953355789185, "learning_rate": 8.700507813836401e-06, "loss": 0.5714, "step": 3993 }, { "epoch": 0.26, "grad_norm": 1.1297953128814697, "learning_rate": 8.699804763082145e-06, "loss": 0.498, "step": 3994 }, { "epoch": 0.26, "grad_norm": 1.1403988599777222, "learning_rate": 8.699101550618967e-06, "loss": 0.5263, "step": 3995 }, { "epoch": 0.26, "grad_norm": 1.4593912363052368, "learning_rate": 8.698398176477604e-06, "loss": 0.6061, "step": 3996 }, { "epoch": 0.26, "grad_norm": 1.3044729232788086, "learning_rate": 8.6976946406888e-06, "loss": 0.5694, "step": 3997 }, { "epoch": 0.26, "grad_norm": 1.0546660423278809, "learning_rate": 8.696990943283301e-06, "loss": 0.5054, "step": 3998 }, { "epoch": 0.26, "grad_norm": 1.2549679279327393, "learning_rate": 8.696287084291868e-06, "loss": 0.6013, "step": 3999 }, { "epoch": 0.26, "grad_norm": 1.253477692604065, "learning_rate": 8.695583063745264e-06, "loss": 0.5713, "step": 4000 }, { "epoch": 0.26, "grad_norm": 1.1471192836761475, "learning_rate": 8.694878881674257e-06, "loss": 0.529, "step": 4001 }, { "epoch": 0.26, "grad_norm": 1.1212998628616333, "learning_rate": 8.69417453810963e-06, "loss": 0.575, "step": 4002 }, { "epoch": 0.26, "grad_norm": 1.1685744524002075, "learning_rate": 8.693470033082161e-06, "loss": 0.5212, "step": 4003 }, { "epoch": 0.26, "grad_norm": 1.2485170364379883, "learning_rate": 8.692765366622648e-06, "loss": 0.5854, "step": 4004 }, { "epoch": 0.26, "grad_norm": 1.1666810512542725, "learning_rate": 8.69206053876189e-06, "loss": 0.5438, "step": 4005 }, { "epoch": 0.26, "grad_norm": 1.2313896417617798, "learning_rate": 8.69135554953069e-06, "loss": 0.5394, "step": 4006 }, { "epoch": 0.26, "grad_norm": 1.3410488367080688, "learning_rate": 8.690650398959861e-06, "loss": 0.5998, "step": 4007 }, { "epoch": 0.26, "grad_norm": 1.3140232563018799, "learning_rate": 8.689945087080228e-06, "loss": 0.5985, "step": 4008 }, { "epoch": 0.26, "grad_norm": 1.3081482648849487, "learning_rate": 8.689239613922614e-06, "loss": 0.6071, "step": 4009 }, { "epoch": 0.26, "grad_norm": 1.1021801233291626, "learning_rate": 8.688533979517855e-06, "loss": 0.5492, "step": 4010 }, { "epoch": 0.26, "grad_norm": 1.2316725254058838, "learning_rate": 8.687828183896792e-06, "loss": 0.6438, "step": 4011 }, { "epoch": 0.26, "grad_norm": 1.0814305543899536, "learning_rate": 8.687122227090274e-06, "loss": 0.5504, "step": 4012 }, { "epoch": 0.26, "grad_norm": 1.1909000873565674, "learning_rate": 8.686416109129156e-06, "loss": 0.5234, "step": 4013 }, { "epoch": 0.26, "grad_norm": 1.267873764038086, "learning_rate": 8.685709830044302e-06, "loss": 0.6143, "step": 4014 }, { "epoch": 0.26, "grad_norm": 1.168190836906433, "learning_rate": 8.68500338986658e-06, "loss": 0.535, "step": 4015 }, { "epoch": 0.26, "grad_norm": 1.091970682144165, "learning_rate": 8.684296788626868e-06, "loss": 0.5623, "step": 4016 }, { "epoch": 0.26, "grad_norm": 1.1039013862609863, "learning_rate": 8.683590026356049e-06, "loss": 0.5219, "step": 4017 }, { "epoch": 0.26, "grad_norm": 1.2526590824127197, "learning_rate": 8.682883103085012e-06, "loss": 0.5948, "step": 4018 }, { "epoch": 0.26, "grad_norm": 1.3704410791397095, "learning_rate": 8.682176018844658e-06, "loss": 0.5925, "step": 4019 }, { "epoch": 0.26, "grad_norm": 1.1870555877685547, "learning_rate": 8.681468773665888e-06, "loss": 0.5378, "step": 4020 }, { "epoch": 0.26, "grad_norm": 1.0536774396896362, "learning_rate": 8.68076136757962e-06, "loss": 0.5387, "step": 4021 }, { "epoch": 0.26, "grad_norm": 1.1891387701034546, "learning_rate": 8.680053800616766e-06, "loss": 0.6, "step": 4022 }, { "epoch": 0.26, "grad_norm": 1.1306531429290771, "learning_rate": 8.679346072808256e-06, "loss": 0.545, "step": 4023 }, { "epoch": 0.26, "grad_norm": 1.2393906116485596, "learning_rate": 8.678638184185023e-06, "loss": 0.53, "step": 4024 }, { "epoch": 0.26, "grad_norm": 1.1934822797775269, "learning_rate": 8.677930134778005e-06, "loss": 0.5439, "step": 4025 }, { "epoch": 0.26, "grad_norm": 1.2473472356796265, "learning_rate": 8.677221924618151e-06, "loss": 0.645, "step": 4026 }, { "epoch": 0.26, "grad_norm": 1.2801735401153564, "learning_rate": 8.676513553736415e-06, "loss": 0.6055, "step": 4027 }, { "epoch": 0.26, "grad_norm": 1.1188350915908813, "learning_rate": 8.675805022163755e-06, "loss": 0.5678, "step": 4028 }, { "epoch": 0.26, "grad_norm": 1.186638593673706, "learning_rate": 8.67509632993114e-06, "loss": 0.6045, "step": 4029 }, { "epoch": 0.26, "grad_norm": 1.1646413803100586, "learning_rate": 8.674387477069548e-06, "loss": 0.5382, "step": 4030 }, { "epoch": 0.26, "grad_norm": 1.2715028524398804, "learning_rate": 8.67367846360996e-06, "loss": 0.5824, "step": 4031 }, { "epoch": 0.26, "grad_norm": 1.221542239189148, "learning_rate": 8.672969289583363e-06, "loss": 0.5604, "step": 4032 }, { "epoch": 0.26, "grad_norm": 1.0802674293518066, "learning_rate": 8.672259955020757e-06, "loss": 0.5294, "step": 4033 }, { "epoch": 0.26, "grad_norm": 1.2144628763198853, "learning_rate": 8.67155045995314e-06, "loss": 0.5408, "step": 4034 }, { "epoch": 0.26, "grad_norm": 1.1287832260131836, "learning_rate": 8.670840804411526e-06, "loss": 0.5387, "step": 4035 }, { "epoch": 0.26, "grad_norm": 1.0180028676986694, "learning_rate": 8.670130988426933e-06, "loss": 0.5228, "step": 4036 }, { "epoch": 0.26, "grad_norm": 1.0867184400558472, "learning_rate": 8.669421012030383e-06, "loss": 0.5516, "step": 4037 }, { "epoch": 0.26, "grad_norm": 1.193159818649292, "learning_rate": 8.668710875252907e-06, "loss": 0.5379, "step": 4038 }, { "epoch": 0.26, "grad_norm": 1.1406049728393555, "learning_rate": 8.668000578125544e-06, "loss": 0.5758, "step": 4039 }, { "epoch": 0.26, "grad_norm": 1.1814074516296387, "learning_rate": 8.667290120679339e-06, "loss": 0.5451, "step": 4040 }, { "epoch": 0.26, "grad_norm": 1.2732384204864502, "learning_rate": 8.666579502945347e-06, "loss": 0.6368, "step": 4041 }, { "epoch": 0.26, "grad_norm": 1.1717220544815063, "learning_rate": 8.665868724954622e-06, "loss": 0.568, "step": 4042 }, { "epoch": 0.26, "grad_norm": 1.2386481761932373, "learning_rate": 8.665157786738234e-06, "loss": 0.6186, "step": 4043 }, { "epoch": 0.26, "grad_norm": 1.236586093902588, "learning_rate": 8.664446688327256e-06, "loss": 0.6033, "step": 4044 }, { "epoch": 0.26, "grad_norm": 1.1548552513122559, "learning_rate": 8.663735429752766e-06, "loss": 0.5459, "step": 4045 }, { "epoch": 0.26, "grad_norm": 1.0848174095153809, "learning_rate": 8.663024011045856e-06, "loss": 0.5513, "step": 4046 }, { "epoch": 0.26, "grad_norm": 1.0998961925506592, "learning_rate": 8.662312432237614e-06, "loss": 0.5317, "step": 4047 }, { "epoch": 0.26, "grad_norm": 1.1679342985153198, "learning_rate": 8.661600693359146e-06, "loss": 0.6205, "step": 4048 }, { "epoch": 0.26, "grad_norm": 1.14592444896698, "learning_rate": 8.66088879444156e-06, "loss": 0.5275, "step": 4049 }, { "epoch": 0.26, "grad_norm": 1.2288994789123535, "learning_rate": 8.660176735515969e-06, "loss": 0.5495, "step": 4050 }, { "epoch": 0.26, "grad_norm": 1.1553324460983276, "learning_rate": 8.659464516613494e-06, "loss": 0.5563, "step": 4051 }, { "epoch": 0.26, "grad_norm": 1.1011103391647339, "learning_rate": 8.65875213776527e-06, "loss": 0.6187, "step": 4052 }, { "epoch": 0.26, "grad_norm": 1.4219763278961182, "learning_rate": 8.658039599002426e-06, "loss": 0.5612, "step": 4053 }, { "epoch": 0.26, "grad_norm": 1.265527367591858, "learning_rate": 8.657326900356113e-06, "loss": 0.5599, "step": 4054 }, { "epoch": 0.26, "grad_norm": 1.0416659116744995, "learning_rate": 8.656614041857474e-06, "loss": 0.5702, "step": 4055 }, { "epoch": 0.26, "grad_norm": 1.190657138824463, "learning_rate": 8.655901023537672e-06, "loss": 0.5652, "step": 4056 }, { "epoch": 0.26, "grad_norm": 1.1651955842971802, "learning_rate": 8.655187845427866e-06, "loss": 0.5758, "step": 4057 }, { "epoch": 0.26, "grad_norm": 1.273526906967163, "learning_rate": 8.654474507559232e-06, "loss": 0.5341, "step": 4058 }, { "epoch": 0.26, "grad_norm": 1.1717042922973633, "learning_rate": 8.653761009962944e-06, "loss": 0.5887, "step": 4059 }, { "epoch": 0.26, "grad_norm": 1.4188287258148193, "learning_rate": 8.653047352670191e-06, "loss": 0.5452, "step": 4060 }, { "epoch": 0.26, "grad_norm": 1.126776933670044, "learning_rate": 8.652333535712161e-06, "loss": 0.5795, "step": 4061 }, { "epoch": 0.26, "grad_norm": 1.1295825242996216, "learning_rate": 8.651619559120057e-06, "loss": 0.5708, "step": 4062 }, { "epoch": 0.26, "grad_norm": 1.12552809715271, "learning_rate": 8.650905422925085e-06, "loss": 0.6083, "step": 4063 }, { "epoch": 0.26, "grad_norm": 1.2464150190353394, "learning_rate": 8.650191127158454e-06, "loss": 0.5697, "step": 4064 }, { "epoch": 0.26, "grad_norm": 1.1759668588638306, "learning_rate": 8.649476671851387e-06, "loss": 0.5692, "step": 4065 }, { "epoch": 0.26, "grad_norm": 1.1920251846313477, "learning_rate": 8.64876205703511e-06, "loss": 0.5498, "step": 4066 }, { "epoch": 0.26, "grad_norm": 1.1495798826217651, "learning_rate": 8.648047282740858e-06, "loss": 0.5149, "step": 4067 }, { "epoch": 0.26, "grad_norm": 1.3035469055175781, "learning_rate": 8.64733234899987e-06, "loss": 0.6059, "step": 4068 }, { "epoch": 0.26, "grad_norm": 1.139523983001709, "learning_rate": 8.646617255843397e-06, "loss": 0.5812, "step": 4069 }, { "epoch": 0.26, "grad_norm": 1.162172555923462, "learning_rate": 8.645902003302692e-06, "loss": 0.5307, "step": 4070 }, { "epoch": 0.26, "grad_norm": 1.2265089750289917, "learning_rate": 8.645186591409015e-06, "loss": 0.5432, "step": 4071 }, { "epoch": 0.26, "grad_norm": 1.1374565362930298, "learning_rate": 8.64447102019364e-06, "loss": 0.5725, "step": 4072 }, { "epoch": 0.26, "grad_norm": 1.2020412683486938, "learning_rate": 8.643755289687837e-06, "loss": 0.5518, "step": 4073 }, { "epoch": 0.26, "grad_norm": 1.0822380781173706, "learning_rate": 8.643039399922893e-06, "loss": 0.603, "step": 4074 }, { "epoch": 0.26, "grad_norm": 1.083225131034851, "learning_rate": 8.642323350930095e-06, "loss": 0.5331, "step": 4075 }, { "epoch": 0.26, "grad_norm": 1.061043620109558, "learning_rate": 8.641607142740742e-06, "loss": 0.5303, "step": 4076 }, { "epoch": 0.26, "grad_norm": 1.1149680614471436, "learning_rate": 8.640890775386135e-06, "loss": 0.5401, "step": 4077 }, { "epoch": 0.26, "grad_norm": 1.3002843856811523, "learning_rate": 8.640174248897585e-06, "loss": 0.5738, "step": 4078 }, { "epoch": 0.26, "grad_norm": 1.1517008543014526, "learning_rate": 8.639457563306412e-06, "loss": 0.5611, "step": 4079 }, { "epoch": 0.26, "grad_norm": 1.1328048706054688, "learning_rate": 8.638740718643937e-06, "loss": 0.5326, "step": 4080 }, { "epoch": 0.26, "grad_norm": 1.1610215902328491, "learning_rate": 8.638023714941495e-06, "loss": 0.5639, "step": 4081 }, { "epoch": 0.26, "grad_norm": 1.2583478689193726, "learning_rate": 8.637306552230422e-06, "loss": 0.5478, "step": 4082 }, { "epoch": 0.26, "grad_norm": 1.1313501596450806, "learning_rate": 8.636589230542064e-06, "loss": 0.5233, "step": 4083 }, { "epoch": 0.26, "grad_norm": 1.1374956369400024, "learning_rate": 8.635871749907774e-06, "loss": 0.5983, "step": 4084 }, { "epoch": 0.26, "grad_norm": 1.341949462890625, "learning_rate": 8.63515411035891e-06, "loss": 0.5949, "step": 4085 }, { "epoch": 0.26, "grad_norm": 1.1835198402404785, "learning_rate": 8.634436311926837e-06, "loss": 0.5494, "step": 4086 }, { "epoch": 0.26, "grad_norm": 1.265688419342041, "learning_rate": 8.633718354642931e-06, "loss": 0.5458, "step": 4087 }, { "epoch": 0.26, "grad_norm": 1.197568416595459, "learning_rate": 8.633000238538571e-06, "loss": 0.5881, "step": 4088 }, { "epoch": 0.26, "grad_norm": 1.1652406454086304, "learning_rate": 8.632281963645144e-06, "loss": 0.5275, "step": 4089 }, { "epoch": 0.26, "grad_norm": 1.1303117275238037, "learning_rate": 8.631563529994045e-06, "loss": 0.5746, "step": 4090 }, { "epoch": 0.26, "grad_norm": 1.3563423156738281, "learning_rate": 8.63084493761667e-06, "loss": 0.6138, "step": 4091 }, { "epoch": 0.26, "grad_norm": 1.2315207719802856, "learning_rate": 8.630126186544434e-06, "loss": 0.5423, "step": 4092 }, { "epoch": 0.26, "grad_norm": 1.2095768451690674, "learning_rate": 8.629407276808748e-06, "loss": 0.6175, "step": 4093 }, { "epoch": 0.26, "grad_norm": 1.1703165769577026, "learning_rate": 8.628688208441034e-06, "loss": 0.542, "step": 4094 }, { "epoch": 0.26, "grad_norm": 1.2058080434799194, "learning_rate": 8.62796898147272e-06, "loss": 0.5718, "step": 4095 }, { "epoch": 0.26, "grad_norm": 1.1680141687393188, "learning_rate": 8.62724959593524e-06, "loss": 0.568, "step": 4096 }, { "epoch": 0.26, "grad_norm": 1.0718812942504883, "learning_rate": 8.626530051860041e-06, "loss": 0.5322, "step": 4097 }, { "epoch": 0.26, "grad_norm": 1.3402010202407837, "learning_rate": 8.62581034927857e-06, "loss": 0.5618, "step": 4098 }, { "epoch": 0.26, "grad_norm": 1.1805596351623535, "learning_rate": 8.62509048822228e-06, "loss": 0.5763, "step": 4099 }, { "epoch": 0.26, "grad_norm": 1.2077693939208984, "learning_rate": 8.624370468722642e-06, "loss": 0.5583, "step": 4100 }, { "epoch": 0.26, "grad_norm": 1.1123378276824951, "learning_rate": 8.62365029081112e-06, "loss": 0.5739, "step": 4101 }, { "epoch": 0.26, "grad_norm": 1.2137763500213623, "learning_rate": 8.622929954519193e-06, "loss": 0.5395, "step": 4102 }, { "epoch": 0.26, "grad_norm": 1.0867700576782227, "learning_rate": 8.622209459878344e-06, "loss": 0.5649, "step": 4103 }, { "epoch": 0.26, "grad_norm": 1.1300021409988403, "learning_rate": 8.621488806920066e-06, "loss": 0.5439, "step": 4104 }, { "epoch": 0.26, "grad_norm": 1.1251184940338135, "learning_rate": 8.620767995675856e-06, "loss": 0.5622, "step": 4105 }, { "epoch": 0.27, "grad_norm": 1.10682213306427, "learning_rate": 8.620047026177219e-06, "loss": 0.5921, "step": 4106 }, { "epoch": 0.27, "grad_norm": 1.0870078802108765, "learning_rate": 8.619325898455664e-06, "loss": 0.507, "step": 4107 }, { "epoch": 0.27, "grad_norm": 1.1540567874908447, "learning_rate": 8.618604612542713e-06, "loss": 0.5594, "step": 4108 }, { "epoch": 0.27, "grad_norm": 1.1292386054992676, "learning_rate": 8.617883168469892e-06, "loss": 0.5375, "step": 4109 }, { "epoch": 0.27, "grad_norm": 1.146409034729004, "learning_rate": 8.617161566268731e-06, "loss": 0.532, "step": 4110 }, { "epoch": 0.27, "grad_norm": 1.047579288482666, "learning_rate": 8.616439805970771e-06, "loss": 0.5944, "step": 4111 }, { "epoch": 0.27, "grad_norm": 1.1624406576156616, "learning_rate": 8.615717887607556e-06, "loss": 0.5845, "step": 4112 }, { "epoch": 0.27, "grad_norm": 1.063037633895874, "learning_rate": 8.614995811210643e-06, "loss": 0.5494, "step": 4113 }, { "epoch": 0.27, "grad_norm": 1.1873623132705688, "learning_rate": 8.614273576811588e-06, "loss": 0.567, "step": 4114 }, { "epoch": 0.27, "grad_norm": 1.08259117603302, "learning_rate": 8.61355118444196e-06, "loss": 0.557, "step": 4115 }, { "epoch": 0.27, "grad_norm": 1.3232357501983643, "learning_rate": 8.612828634133335e-06, "loss": 0.5839, "step": 4116 }, { "epoch": 0.27, "grad_norm": 1.3898420333862305, "learning_rate": 8.612105925917289e-06, "loss": 0.6025, "step": 4117 }, { "epoch": 0.27, "grad_norm": 1.1931068897247314, "learning_rate": 8.611383059825414e-06, "loss": 0.5628, "step": 4118 }, { "epoch": 0.27, "grad_norm": 1.2866133451461792, "learning_rate": 8.610660035889302e-06, "loss": 0.5612, "step": 4119 }, { "epoch": 0.27, "grad_norm": 1.0937907695770264, "learning_rate": 8.609936854140557e-06, "loss": 0.528, "step": 4120 }, { "epoch": 0.27, "grad_norm": 1.3171077966690063, "learning_rate": 8.609213514610784e-06, "loss": 0.5837, "step": 4121 }, { "epoch": 0.27, "grad_norm": 1.0627778768539429, "learning_rate": 8.608490017331602e-06, "loss": 0.522, "step": 4122 }, { "epoch": 0.27, "grad_norm": 1.165324330329895, "learning_rate": 8.60776636233463e-06, "loss": 0.5728, "step": 4123 }, { "epoch": 0.27, "grad_norm": 1.216237187385559, "learning_rate": 8.607042549651498e-06, "loss": 0.5743, "step": 4124 }, { "epoch": 0.27, "grad_norm": 1.2311025857925415, "learning_rate": 8.606318579313842e-06, "loss": 0.5498, "step": 4125 }, { "epoch": 0.27, "grad_norm": 1.1677300930023193, "learning_rate": 8.605594451353308e-06, "loss": 0.5773, "step": 4126 }, { "epoch": 0.27, "grad_norm": 1.143378734588623, "learning_rate": 8.60487016580154e-06, "loss": 0.5793, "step": 4127 }, { "epoch": 0.27, "grad_norm": 1.1804776191711426, "learning_rate": 8.6041457226902e-06, "loss": 0.5923, "step": 4128 }, { "epoch": 0.27, "grad_norm": 1.0673918724060059, "learning_rate": 8.60342112205095e-06, "loss": 0.5399, "step": 4129 }, { "epoch": 0.27, "grad_norm": 1.0951565504074097, "learning_rate": 8.602696363915457e-06, "loss": 0.6167, "step": 4130 }, { "epoch": 0.27, "grad_norm": 1.1373772621154785, "learning_rate": 8.601971448315403e-06, "loss": 0.5565, "step": 4131 }, { "epoch": 0.27, "grad_norm": 1.140039086341858, "learning_rate": 8.601246375282468e-06, "loss": 0.5436, "step": 4132 }, { "epoch": 0.27, "grad_norm": 1.203288197517395, "learning_rate": 8.600521144848347e-06, "loss": 0.5601, "step": 4133 }, { "epoch": 0.27, "grad_norm": 1.030093789100647, "learning_rate": 8.599795757044736e-06, "loss": 0.5314, "step": 4134 }, { "epoch": 0.27, "grad_norm": 1.2925359010696411, "learning_rate": 8.59907021190334e-06, "loss": 0.6044, "step": 4135 }, { "epoch": 0.27, "grad_norm": 1.2018682956695557, "learning_rate": 8.598344509455871e-06, "loss": 0.5832, "step": 4136 }, { "epoch": 0.27, "grad_norm": 1.1010342836380005, "learning_rate": 8.597618649734047e-06, "loss": 0.5845, "step": 4137 }, { "epoch": 0.27, "grad_norm": 1.1938951015472412, "learning_rate": 8.596892632769594e-06, "loss": 0.5579, "step": 4138 }, { "epoch": 0.27, "grad_norm": 1.3666713237762451, "learning_rate": 8.596166458594244e-06, "loss": 0.62, "step": 4139 }, { "epoch": 0.27, "grad_norm": 1.1132317781448364, "learning_rate": 8.595440127239738e-06, "loss": 0.5313, "step": 4140 }, { "epoch": 0.27, "grad_norm": 1.1912319660186768, "learning_rate": 8.594713638737818e-06, "loss": 0.5253, "step": 4141 }, { "epoch": 0.27, "grad_norm": 1.141101598739624, "learning_rate": 8.59398699312024e-06, "loss": 0.5849, "step": 4142 }, { "epoch": 0.27, "grad_norm": 1.1704363822937012, "learning_rate": 8.593260190418765e-06, "loss": 0.578, "step": 4143 }, { "epoch": 0.27, "grad_norm": 1.1967074871063232, "learning_rate": 8.592533230665154e-06, "loss": 0.569, "step": 4144 }, { "epoch": 0.27, "grad_norm": 1.1468156576156616, "learning_rate": 8.591806113891187e-06, "loss": 0.6015, "step": 4145 }, { "epoch": 0.27, "grad_norm": 1.1384599208831787, "learning_rate": 8.591078840128642e-06, "loss": 0.5517, "step": 4146 }, { "epoch": 0.27, "grad_norm": 1.1105879545211792, "learning_rate": 8.590351409409305e-06, "loss": 0.5386, "step": 4147 }, { "epoch": 0.27, "grad_norm": 1.1625585556030273, "learning_rate": 8.589623821764971e-06, "loss": 0.5018, "step": 4148 }, { "epoch": 0.27, "grad_norm": 1.1445064544677734, "learning_rate": 8.58889607722744e-06, "loss": 0.5178, "step": 4149 }, { "epoch": 0.27, "grad_norm": 1.14942467212677, "learning_rate": 8.588168175828523e-06, "loss": 0.5779, "step": 4150 }, { "epoch": 0.27, "grad_norm": 1.04599928855896, "learning_rate": 8.587440117600033e-06, "loss": 0.5266, "step": 4151 }, { "epoch": 0.27, "grad_norm": 1.2027709484100342, "learning_rate": 8.58671190257379e-06, "loss": 0.5747, "step": 4152 }, { "epoch": 0.27, "grad_norm": 1.2231178283691406, "learning_rate": 8.585983530781623e-06, "loss": 0.6265, "step": 4153 }, { "epoch": 0.27, "grad_norm": 1.0983679294586182, "learning_rate": 8.58525500225537e-06, "loss": 0.5397, "step": 4154 }, { "epoch": 0.27, "grad_norm": 1.170596718788147, "learning_rate": 8.584526317026868e-06, "loss": 0.6039, "step": 4155 }, { "epoch": 0.27, "grad_norm": 1.044090986251831, "learning_rate": 8.58379747512797e-06, "loss": 0.5678, "step": 4156 }, { "epoch": 0.27, "grad_norm": 1.190543293952942, "learning_rate": 8.583068476590533e-06, "loss": 0.5712, "step": 4157 }, { "epoch": 0.27, "grad_norm": 1.3256070613861084, "learning_rate": 8.582339321446414e-06, "loss": 0.5759, "step": 4158 }, { "epoch": 0.27, "grad_norm": 1.2041881084442139, "learning_rate": 8.581610009727487e-06, "loss": 0.5739, "step": 4159 }, { "epoch": 0.27, "grad_norm": 1.1380332708358765, "learning_rate": 8.580880541465628e-06, "loss": 0.5284, "step": 4160 }, { "epoch": 0.27, "grad_norm": 1.1309577226638794, "learning_rate": 8.58015091669272e-06, "loss": 0.4975, "step": 4161 }, { "epoch": 0.27, "grad_norm": 1.1061887741088867, "learning_rate": 8.57942113544065e-06, "loss": 0.5433, "step": 4162 }, { "epoch": 0.27, "grad_norm": 1.0939445495605469, "learning_rate": 8.578691197741317e-06, "loss": 0.5764, "step": 4163 }, { "epoch": 0.27, "grad_norm": 1.2220900058746338, "learning_rate": 8.577961103626627e-06, "loss": 0.5824, "step": 4164 }, { "epoch": 0.27, "grad_norm": 1.0435189008712769, "learning_rate": 8.577230853128488e-06, "loss": 0.5663, "step": 4165 }, { "epoch": 0.27, "grad_norm": 1.158994197845459, "learning_rate": 8.576500446278817e-06, "loss": 0.5559, "step": 4166 }, { "epoch": 0.27, "grad_norm": 1.2098287343978882, "learning_rate": 8.57576988310954e-06, "loss": 0.565, "step": 4167 }, { "epoch": 0.27, "grad_norm": 1.160544514656067, "learning_rate": 8.575039163652585e-06, "loss": 0.5697, "step": 4168 }, { "epoch": 0.27, "grad_norm": 1.0961545705795288, "learning_rate": 8.574308287939894e-06, "loss": 0.5404, "step": 4169 }, { "epoch": 0.27, "grad_norm": 1.2574129104614258, "learning_rate": 8.573577256003408e-06, "loss": 0.582, "step": 4170 }, { "epoch": 0.27, "grad_norm": 1.1174907684326172, "learning_rate": 8.572846067875082e-06, "loss": 0.5631, "step": 4171 }, { "epoch": 0.27, "grad_norm": 1.1355701684951782, "learning_rate": 8.572114723586872e-06, "loss": 0.5594, "step": 4172 }, { "epoch": 0.27, "grad_norm": 1.2124391794204712, "learning_rate": 8.571383223170743e-06, "loss": 0.5876, "step": 4173 }, { "epoch": 0.27, "grad_norm": 1.1282618045806885, "learning_rate": 8.570651566658667e-06, "loss": 0.5959, "step": 4174 }, { "epoch": 0.27, "grad_norm": 1.029502272605896, "learning_rate": 8.569919754082624e-06, "loss": 0.5455, "step": 4175 }, { "epoch": 0.27, "grad_norm": 1.086235523223877, "learning_rate": 8.5691877854746e-06, "loss": 0.5246, "step": 4176 }, { "epoch": 0.27, "grad_norm": 1.2109081745147705, "learning_rate": 8.568455660866584e-06, "loss": 0.585, "step": 4177 }, { "epoch": 0.27, "grad_norm": 1.1536568403244019, "learning_rate": 8.56772338029058e-06, "loss": 0.5621, "step": 4178 }, { "epoch": 0.27, "grad_norm": 1.1477149724960327, "learning_rate": 8.566990943778591e-06, "loss": 0.5667, "step": 4179 }, { "epoch": 0.27, "grad_norm": 1.229463815689087, "learning_rate": 8.566258351362632e-06, "loss": 0.586, "step": 4180 }, { "epoch": 0.27, "grad_norm": 1.0541479587554932, "learning_rate": 8.56552560307472e-06, "loss": 0.5814, "step": 4181 }, { "epoch": 0.27, "grad_norm": 1.1898106336593628, "learning_rate": 8.564792698946885e-06, "loss": 0.594, "step": 4182 }, { "epoch": 0.27, "grad_norm": 1.117129921913147, "learning_rate": 8.564059639011155e-06, "loss": 0.5708, "step": 4183 }, { "epoch": 0.27, "grad_norm": 1.154457449913025, "learning_rate": 8.563326423299577e-06, "loss": 0.5764, "step": 4184 }, { "epoch": 0.27, "grad_norm": 1.1735270023345947, "learning_rate": 8.562593051844192e-06, "loss": 0.5366, "step": 4185 }, { "epoch": 0.27, "grad_norm": 1.1804835796356201, "learning_rate": 8.561859524677059e-06, "loss": 0.6075, "step": 4186 }, { "epoch": 0.27, "grad_norm": 1.1272636651992798, "learning_rate": 8.561125841830234e-06, "loss": 0.5778, "step": 4187 }, { "epoch": 0.27, "grad_norm": 1.1946022510528564, "learning_rate": 8.560392003335787e-06, "loss": 0.5341, "step": 4188 }, { "epoch": 0.27, "grad_norm": 1.2823998928070068, "learning_rate": 8.55965800922579e-06, "loss": 0.6015, "step": 4189 }, { "epoch": 0.27, "grad_norm": 1.2416104078292847, "learning_rate": 8.558923859532328e-06, "loss": 0.5063, "step": 4190 }, { "epoch": 0.27, "grad_norm": 1.1700599193572998, "learning_rate": 8.558189554287483e-06, "loss": 0.5519, "step": 4191 }, { "epoch": 0.27, "grad_norm": 1.251531958580017, "learning_rate": 8.557455093523357e-06, "loss": 0.5599, "step": 4192 }, { "epoch": 0.27, "grad_norm": 1.114341139793396, "learning_rate": 8.556720477272044e-06, "loss": 0.5419, "step": 4193 }, { "epoch": 0.27, "grad_norm": 1.2104015350341797, "learning_rate": 8.555985705565656e-06, "loss": 0.6007, "step": 4194 }, { "epoch": 0.27, "grad_norm": 1.1275825500488281, "learning_rate": 8.555250778436308e-06, "loss": 0.4983, "step": 4195 }, { "epoch": 0.27, "grad_norm": 1.1647285223007202, "learning_rate": 8.554515695916122e-06, "loss": 0.559, "step": 4196 }, { "epoch": 0.27, "grad_norm": 1.201856255531311, "learning_rate": 8.553780458037225e-06, "loss": 0.5649, "step": 4197 }, { "epoch": 0.27, "grad_norm": 1.2920342683792114, "learning_rate": 8.553045064831752e-06, "loss": 0.5654, "step": 4198 }, { "epoch": 0.27, "grad_norm": 1.1583582162857056, "learning_rate": 8.552309516331846e-06, "loss": 0.5916, "step": 4199 }, { "epoch": 0.27, "grad_norm": 1.4117417335510254, "learning_rate": 8.551573812569657e-06, "loss": 0.5769, "step": 4200 }, { "epoch": 0.27, "grad_norm": 1.1877937316894531, "learning_rate": 8.55083795357734e-06, "loss": 0.5479, "step": 4201 }, { "epoch": 0.27, "grad_norm": 1.4274661540985107, "learning_rate": 8.550101939387056e-06, "loss": 0.6005, "step": 4202 }, { "epoch": 0.27, "grad_norm": 1.2900099754333496, "learning_rate": 8.549365770030977e-06, "loss": 0.5236, "step": 4203 }, { "epoch": 0.27, "grad_norm": 0.9774498343467712, "learning_rate": 8.548629445541278e-06, "loss": 0.5468, "step": 4204 }, { "epoch": 0.27, "grad_norm": 1.2065293788909912, "learning_rate": 8.54789296595014e-06, "loss": 0.5551, "step": 4205 }, { "epoch": 0.27, "grad_norm": 1.1830766201019287, "learning_rate": 8.547156331289756e-06, "loss": 0.5761, "step": 4206 }, { "epoch": 0.27, "grad_norm": 1.3191159963607788, "learning_rate": 8.54641954159232e-06, "loss": 0.5932, "step": 4207 }, { "epoch": 0.27, "grad_norm": 1.3631751537322998, "learning_rate": 8.545682596890034e-06, "loss": 0.5408, "step": 4208 }, { "epoch": 0.27, "grad_norm": 1.233719825744629, "learning_rate": 8.544945497215111e-06, "loss": 0.5865, "step": 4209 }, { "epoch": 0.27, "grad_norm": 1.0857144594192505, "learning_rate": 8.544208242599767e-06, "loss": 0.561, "step": 4210 }, { "epoch": 0.27, "grad_norm": 1.2324649095535278, "learning_rate": 8.543470833076224e-06, "loss": 0.5505, "step": 4211 }, { "epoch": 0.27, "grad_norm": 1.2088866233825684, "learning_rate": 8.542733268676714e-06, "loss": 0.5665, "step": 4212 }, { "epoch": 0.27, "grad_norm": 1.3031480312347412, "learning_rate": 8.541995549433473e-06, "loss": 0.5962, "step": 4213 }, { "epoch": 0.27, "grad_norm": 1.1042176485061646, "learning_rate": 8.541257675378745e-06, "loss": 0.5168, "step": 4214 }, { "epoch": 0.27, "grad_norm": 1.2042129039764404, "learning_rate": 8.540519646544781e-06, "loss": 0.5664, "step": 4215 }, { "epoch": 0.27, "grad_norm": 1.1464290618896484, "learning_rate": 8.539781462963837e-06, "loss": 0.55, "step": 4216 }, { "epoch": 0.27, "grad_norm": 1.1003296375274658, "learning_rate": 8.539043124668178e-06, "loss": 0.5363, "step": 4217 }, { "epoch": 0.27, "grad_norm": 1.1638538837432861, "learning_rate": 8.538304631690074e-06, "loss": 0.6097, "step": 4218 }, { "epoch": 0.27, "grad_norm": 1.267133116722107, "learning_rate": 8.537565984061804e-06, "loss": 0.5942, "step": 4219 }, { "epoch": 0.27, "grad_norm": 1.1100050210952759, "learning_rate": 8.536827181815654e-06, "loss": 0.6042, "step": 4220 }, { "epoch": 0.27, "grad_norm": 1.075927734375, "learning_rate": 8.536088224983911e-06, "loss": 0.5112, "step": 4221 }, { "epoch": 0.27, "grad_norm": 1.135105013847351, "learning_rate": 8.535349113598875e-06, "loss": 0.5236, "step": 4222 }, { "epoch": 0.27, "grad_norm": 1.392430067062378, "learning_rate": 8.534609847692851e-06, "loss": 0.63, "step": 4223 }, { "epoch": 0.27, "grad_norm": 1.4041635990142822, "learning_rate": 8.53387042729815e-06, "loss": 0.5808, "step": 4224 }, { "epoch": 0.27, "grad_norm": 1.2112529277801514, "learning_rate": 8.53313085244709e-06, "loss": 0.5524, "step": 4225 }, { "epoch": 0.27, "grad_norm": 1.105129599571228, "learning_rate": 8.532391123171996e-06, "loss": 0.5547, "step": 4226 }, { "epoch": 0.27, "grad_norm": 1.4488410949707031, "learning_rate": 8.531651239505199e-06, "loss": 0.5412, "step": 4227 }, { "epoch": 0.27, "grad_norm": 1.274394154548645, "learning_rate": 8.53091120147904e-06, "loss": 0.5622, "step": 4228 }, { "epoch": 0.27, "grad_norm": 1.2932010889053345, "learning_rate": 8.530171009125861e-06, "loss": 0.6035, "step": 4229 }, { "epoch": 0.27, "grad_norm": 1.1916698217391968, "learning_rate": 8.529430662478014e-06, "loss": 0.5515, "step": 4230 }, { "epoch": 0.27, "grad_norm": 1.2585529088974, "learning_rate": 8.528690161567861e-06, "loss": 0.5585, "step": 4231 }, { "epoch": 0.27, "grad_norm": 1.158145785331726, "learning_rate": 8.527949506427764e-06, "loss": 0.5402, "step": 4232 }, { "epoch": 0.27, "grad_norm": 1.1641933917999268, "learning_rate": 8.527208697090096e-06, "loss": 0.5302, "step": 4233 }, { "epoch": 0.27, "grad_norm": 1.1817654371261597, "learning_rate": 8.526467733587238e-06, "loss": 0.5452, "step": 4234 }, { "epoch": 0.27, "grad_norm": 1.322879672050476, "learning_rate": 8.525726615951571e-06, "loss": 0.5872, "step": 4235 }, { "epoch": 0.27, "grad_norm": 1.2544771432876587, "learning_rate": 8.524985344215494e-06, "loss": 0.5724, "step": 4236 }, { "epoch": 0.27, "grad_norm": 1.347136378288269, "learning_rate": 8.524243918411399e-06, "loss": 0.5488, "step": 4237 }, { "epoch": 0.27, "grad_norm": 1.125471591949463, "learning_rate": 8.523502338571696e-06, "loss": 0.6027, "step": 4238 }, { "epoch": 0.27, "grad_norm": 1.114038109779358, "learning_rate": 8.522760604728795e-06, "loss": 0.601, "step": 4239 }, { "epoch": 0.27, "grad_norm": 1.0700150728225708, "learning_rate": 8.522018716915119e-06, "loss": 0.5373, "step": 4240 }, { "epoch": 0.27, "grad_norm": 1.1649420261383057, "learning_rate": 8.52127667516309e-06, "loss": 0.5546, "step": 4241 }, { "epoch": 0.27, "grad_norm": 1.4677202701568604, "learning_rate": 8.520534479505144e-06, "loss": 0.6066, "step": 4242 }, { "epoch": 0.27, "grad_norm": 1.196492075920105, "learning_rate": 8.519792129973718e-06, "loss": 0.5645, "step": 4243 }, { "epoch": 0.27, "grad_norm": 1.1979260444641113, "learning_rate": 8.51904962660126e-06, "loss": 0.5166, "step": 4244 }, { "epoch": 0.27, "grad_norm": 1.1559909582138062, "learning_rate": 8.518306969420222e-06, "loss": 0.5464, "step": 4245 }, { "epoch": 0.27, "grad_norm": 1.2009435892105103, "learning_rate": 8.517564158463064e-06, "loss": 0.5838, "step": 4246 }, { "epoch": 0.27, "grad_norm": 1.0302141904830933, "learning_rate": 8.51682119376225e-06, "loss": 0.5316, "step": 4247 }, { "epoch": 0.27, "grad_norm": 1.0889039039611816, "learning_rate": 8.516078075350256e-06, "loss": 0.5488, "step": 4248 }, { "epoch": 0.27, "grad_norm": 1.2005246877670288, "learning_rate": 8.515334803259563e-06, "loss": 0.627, "step": 4249 }, { "epoch": 0.27, "grad_norm": 1.1789216995239258, "learning_rate": 8.514591377522654e-06, "loss": 0.5869, "step": 4250 }, { "epoch": 0.27, "grad_norm": 1.2853927612304688, "learning_rate": 8.513847798172023e-06, "loss": 0.591, "step": 4251 }, { "epoch": 0.27, "grad_norm": 1.192060112953186, "learning_rate": 8.513104065240172e-06, "loss": 0.4829, "step": 4252 }, { "epoch": 0.27, "grad_norm": 1.2901992797851562, "learning_rate": 8.512360178759606e-06, "loss": 0.585, "step": 4253 }, { "epoch": 0.27, "grad_norm": 1.133162021636963, "learning_rate": 8.511616138762839e-06, "loss": 0.5757, "step": 4254 }, { "epoch": 0.27, "grad_norm": 1.3850504159927368, "learning_rate": 8.510871945282388e-06, "loss": 0.5985, "step": 4255 }, { "epoch": 0.27, "grad_norm": 1.3033725023269653, "learning_rate": 8.510127598350786e-06, "loss": 0.6114, "step": 4256 }, { "epoch": 0.27, "grad_norm": 1.1111271381378174, "learning_rate": 8.50938309800056e-06, "loss": 0.5913, "step": 4257 }, { "epoch": 0.27, "grad_norm": 1.1221566200256348, "learning_rate": 8.508638444264255e-06, "loss": 0.5696, "step": 4258 }, { "epoch": 0.27, "grad_norm": 1.083627462387085, "learning_rate": 8.507893637174415e-06, "loss": 0.5056, "step": 4259 }, { "epoch": 0.27, "grad_norm": 1.0777406692504883, "learning_rate": 8.507148676763595e-06, "loss": 0.5592, "step": 4260 }, { "epoch": 0.28, "grad_norm": 1.1038761138916016, "learning_rate": 8.506403563064354e-06, "loss": 0.5768, "step": 4261 }, { "epoch": 0.28, "grad_norm": 1.1386860609054565, "learning_rate": 8.50565829610926e-06, "loss": 0.5796, "step": 4262 }, { "epoch": 0.28, "grad_norm": 1.1241108179092407, "learning_rate": 8.504912875930889e-06, "loss": 0.5771, "step": 4263 }, { "epoch": 0.28, "grad_norm": 1.1600590944290161, "learning_rate": 8.504167302561816e-06, "loss": 0.5403, "step": 4264 }, { "epoch": 0.28, "grad_norm": 1.1469686031341553, "learning_rate": 8.503421576034634e-06, "loss": 0.5755, "step": 4265 }, { "epoch": 0.28, "grad_norm": 1.1084126234054565, "learning_rate": 8.502675696381933e-06, "loss": 0.5705, "step": 4266 }, { "epoch": 0.28, "grad_norm": 1.0971648693084717, "learning_rate": 8.501929663636313e-06, "loss": 0.5412, "step": 4267 }, { "epoch": 0.28, "grad_norm": 1.1881049871444702, "learning_rate": 8.501183477830382e-06, "loss": 0.535, "step": 4268 }, { "epoch": 0.28, "grad_norm": 1.2106819152832031, "learning_rate": 8.500437138996755e-06, "loss": 0.5945, "step": 4269 }, { "epoch": 0.28, "grad_norm": 1.1380070447921753, "learning_rate": 8.499690647168053e-06, "loss": 0.588, "step": 4270 }, { "epoch": 0.28, "grad_norm": 1.1879159212112427, "learning_rate": 8.498944002376901e-06, "loss": 0.5621, "step": 4271 }, { "epoch": 0.28, "grad_norm": 1.2517751455307007, "learning_rate": 8.498197204655936e-06, "loss": 0.5699, "step": 4272 }, { "epoch": 0.28, "grad_norm": 1.1819813251495361, "learning_rate": 8.497450254037793e-06, "loss": 0.5649, "step": 4273 }, { "epoch": 0.28, "grad_norm": 1.3392301797866821, "learning_rate": 8.496703150555125e-06, "loss": 0.6004, "step": 4274 }, { "epoch": 0.28, "grad_norm": 1.1815348863601685, "learning_rate": 8.495955894240585e-06, "loss": 0.5658, "step": 4275 }, { "epoch": 0.28, "grad_norm": 1.2456910610198975, "learning_rate": 8.495208485126831e-06, "loss": 0.5586, "step": 4276 }, { "epoch": 0.28, "grad_norm": 1.1682339906692505, "learning_rate": 8.494460923246533e-06, "loss": 0.5567, "step": 4277 }, { "epoch": 0.28, "grad_norm": 1.1759122610092163, "learning_rate": 8.493713208632364e-06, "loss": 0.577, "step": 4278 }, { "epoch": 0.28, "grad_norm": 1.1016336679458618, "learning_rate": 8.492965341317004e-06, "loss": 0.5706, "step": 4279 }, { "epoch": 0.28, "grad_norm": 1.1534262895584106, "learning_rate": 8.49221732133314e-06, "loss": 0.5872, "step": 4280 }, { "epoch": 0.28, "grad_norm": 1.0983301401138306, "learning_rate": 8.491469148713466e-06, "loss": 0.5708, "step": 4281 }, { "epoch": 0.28, "grad_norm": 1.1007918119430542, "learning_rate": 8.490720823490688e-06, "loss": 0.5486, "step": 4282 }, { "epoch": 0.28, "grad_norm": 1.2816460132598877, "learning_rate": 8.489972345697505e-06, "loss": 0.6057, "step": 4283 }, { "epoch": 0.28, "grad_norm": 1.0512797832489014, "learning_rate": 8.489223715366637e-06, "loss": 0.5313, "step": 4284 }, { "epoch": 0.28, "grad_norm": 1.088608741760254, "learning_rate": 8.488474932530802e-06, "loss": 0.5562, "step": 4285 }, { "epoch": 0.28, "grad_norm": 1.2544653415679932, "learning_rate": 8.487725997222728e-06, "loss": 0.5927, "step": 4286 }, { "epoch": 0.28, "grad_norm": 1.0900789499282837, "learning_rate": 8.486976909475148e-06, "loss": 0.5474, "step": 4287 }, { "epoch": 0.28, "grad_norm": 1.2076746225357056, "learning_rate": 8.486227669320806e-06, "loss": 0.5867, "step": 4288 }, { "epoch": 0.28, "grad_norm": 1.0855516195297241, "learning_rate": 8.485478276792446e-06, "loss": 0.5592, "step": 4289 }, { "epoch": 0.28, "grad_norm": 1.0884630680084229, "learning_rate": 8.484728731922825e-06, "loss": 0.5005, "step": 4290 }, { "epoch": 0.28, "grad_norm": 1.0791174173355103, "learning_rate": 8.483979034744698e-06, "loss": 0.5632, "step": 4291 }, { "epoch": 0.28, "grad_norm": 1.1905046701431274, "learning_rate": 8.483229185290839e-06, "loss": 0.5412, "step": 4292 }, { "epoch": 0.28, "grad_norm": 1.3147436380386353, "learning_rate": 8.482479183594018e-06, "loss": 0.561, "step": 4293 }, { "epoch": 0.28, "grad_norm": 1.2090667486190796, "learning_rate": 8.481729029687019e-06, "loss": 0.5575, "step": 4294 }, { "epoch": 0.28, "grad_norm": 1.2520208358764648, "learning_rate": 8.480978723602624e-06, "loss": 0.5463, "step": 4295 }, { "epoch": 0.28, "grad_norm": 1.092302680015564, "learning_rate": 8.480228265373632e-06, "loss": 0.5075, "step": 4296 }, { "epoch": 0.28, "grad_norm": 1.2751803398132324, "learning_rate": 8.479477655032841e-06, "loss": 0.577, "step": 4297 }, { "epoch": 0.28, "grad_norm": 1.1383048295974731, "learning_rate": 8.478726892613059e-06, "loss": 0.558, "step": 4298 }, { "epoch": 0.28, "grad_norm": 1.0667139291763306, "learning_rate": 8.477975978147099e-06, "loss": 0.5486, "step": 4299 }, { "epoch": 0.28, "grad_norm": 1.0980005264282227, "learning_rate": 8.477224911667783e-06, "loss": 0.5757, "step": 4300 }, { "epoch": 0.28, "grad_norm": 1.1661360263824463, "learning_rate": 8.476473693207938e-06, "loss": 0.5849, "step": 4301 }, { "epoch": 0.28, "grad_norm": 1.2865229845046997, "learning_rate": 8.475722322800396e-06, "loss": 0.5771, "step": 4302 }, { "epoch": 0.28, "grad_norm": 1.3061492443084717, "learning_rate": 8.474970800478001e-06, "loss": 0.5938, "step": 4303 }, { "epoch": 0.28, "grad_norm": 1.1361536979675293, "learning_rate": 8.474219126273596e-06, "loss": 0.5522, "step": 4304 }, { "epoch": 0.28, "grad_norm": 1.2726367712020874, "learning_rate": 8.473467300220038e-06, "loss": 0.57, "step": 4305 }, { "epoch": 0.28, "grad_norm": 1.1091747283935547, "learning_rate": 8.472715322350184e-06, "loss": 0.537, "step": 4306 }, { "epoch": 0.28, "grad_norm": 1.1843326091766357, "learning_rate": 8.471963192696904e-06, "loss": 0.5794, "step": 4307 }, { "epoch": 0.28, "grad_norm": 1.3328258991241455, "learning_rate": 8.471210911293071e-06, "loss": 0.5598, "step": 4308 }, { "epoch": 0.28, "grad_norm": 1.0570119619369507, "learning_rate": 8.470458478171564e-06, "loss": 0.5029, "step": 4309 }, { "epoch": 0.28, "grad_norm": 1.2150993347167969, "learning_rate": 8.469705893365272e-06, "loss": 0.5858, "step": 4310 }, { "epoch": 0.28, "grad_norm": 1.2564247846603394, "learning_rate": 8.468953156907086e-06, "loss": 0.5919, "step": 4311 }, { "epoch": 0.28, "grad_norm": 1.1546332836151123, "learning_rate": 8.468200268829909e-06, "loss": 0.5786, "step": 4312 }, { "epoch": 0.28, "grad_norm": 1.2521638870239258, "learning_rate": 8.467447229166646e-06, "loss": 0.5502, "step": 4313 }, { "epoch": 0.28, "grad_norm": 1.124377727508545, "learning_rate": 8.46669403795021e-06, "loss": 0.5835, "step": 4314 }, { "epoch": 0.28, "grad_norm": 1.1688916683197021, "learning_rate": 8.465940695213522e-06, "loss": 0.5739, "step": 4315 }, { "epoch": 0.28, "grad_norm": 1.2858625650405884, "learning_rate": 8.465187200989508e-06, "loss": 0.5799, "step": 4316 }, { "epoch": 0.28, "grad_norm": 1.323425531387329, "learning_rate": 8.464433555311102e-06, "loss": 0.5986, "step": 4317 }, { "epoch": 0.28, "grad_norm": 1.2242186069488525, "learning_rate": 8.463679758211244e-06, "loss": 0.6211, "step": 4318 }, { "epoch": 0.28, "grad_norm": 1.1445696353912354, "learning_rate": 8.46292580972288e-06, "loss": 0.499, "step": 4319 }, { "epoch": 0.28, "grad_norm": 1.2355971336364746, "learning_rate": 8.462171709878964e-06, "loss": 0.5622, "step": 4320 }, { "epoch": 0.28, "grad_norm": 1.3140207529067993, "learning_rate": 8.461417458712454e-06, "loss": 0.5604, "step": 4321 }, { "epoch": 0.28, "grad_norm": 1.260040283203125, "learning_rate": 8.460663056256319e-06, "loss": 0.534, "step": 4322 }, { "epoch": 0.28, "grad_norm": 1.1110544204711914, "learning_rate": 8.459908502543528e-06, "loss": 0.5865, "step": 4323 }, { "epoch": 0.28, "grad_norm": 1.137648582458496, "learning_rate": 8.459153797607065e-06, "loss": 0.535, "step": 4324 }, { "epoch": 0.28, "grad_norm": 1.057915210723877, "learning_rate": 8.458398941479914e-06, "loss": 0.5435, "step": 4325 }, { "epoch": 0.28, "grad_norm": 1.0767076015472412, "learning_rate": 8.457643934195068e-06, "loss": 0.5354, "step": 4326 }, { "epoch": 0.28, "grad_norm": 1.035732388496399, "learning_rate": 8.456888775785526e-06, "loss": 0.5609, "step": 4327 }, { "epoch": 0.28, "grad_norm": 1.0975306034088135, "learning_rate": 8.456133466284296e-06, "loss": 0.5546, "step": 4328 }, { "epoch": 0.28, "grad_norm": 1.206925392150879, "learning_rate": 8.455378005724386e-06, "loss": 0.6041, "step": 4329 }, { "epoch": 0.28, "grad_norm": 1.1324533224105835, "learning_rate": 8.45462239413882e-06, "loss": 0.5167, "step": 4330 }, { "epoch": 0.28, "grad_norm": 1.1734058856964111, "learning_rate": 8.453866631560623e-06, "loss": 0.6264, "step": 4331 }, { "epoch": 0.28, "grad_norm": 1.101871371269226, "learning_rate": 8.453110718022826e-06, "loss": 0.5264, "step": 4332 }, { "epoch": 0.28, "grad_norm": 1.2062329053878784, "learning_rate": 8.452354653558469e-06, "loss": 0.5565, "step": 4333 }, { "epoch": 0.28, "grad_norm": 1.3369157314300537, "learning_rate": 8.451598438200596e-06, "loss": 0.5647, "step": 4334 }, { "epoch": 0.28, "grad_norm": 1.21406090259552, "learning_rate": 8.450842071982263e-06, "loss": 0.5521, "step": 4335 }, { "epoch": 0.28, "grad_norm": 1.1214505434036255, "learning_rate": 8.450085554936525e-06, "loss": 0.5834, "step": 4336 }, { "epoch": 0.28, "grad_norm": 1.2424190044403076, "learning_rate": 8.449328887096449e-06, "loss": 0.5469, "step": 4337 }, { "epoch": 0.28, "grad_norm": 1.1182324886322021, "learning_rate": 8.448572068495105e-06, "loss": 0.5522, "step": 4338 }, { "epoch": 0.28, "grad_norm": 1.2267358303070068, "learning_rate": 8.447815099165574e-06, "loss": 0.65, "step": 4339 }, { "epoch": 0.28, "grad_norm": 1.104353427886963, "learning_rate": 8.447057979140942e-06, "loss": 0.5161, "step": 4340 }, { "epoch": 0.28, "grad_norm": 1.0873545408248901, "learning_rate": 8.446300708454299e-06, "loss": 0.5483, "step": 4341 }, { "epoch": 0.28, "grad_norm": 1.1730316877365112, "learning_rate": 8.445543287138741e-06, "loss": 0.5525, "step": 4342 }, { "epoch": 0.28, "grad_norm": 1.152064561843872, "learning_rate": 8.44478571522738e-06, "loss": 0.553, "step": 4343 }, { "epoch": 0.28, "grad_norm": 1.2390698194503784, "learning_rate": 8.444027992753319e-06, "loss": 0.5622, "step": 4344 }, { "epoch": 0.28, "grad_norm": 1.3274407386779785, "learning_rate": 8.443270119749683e-06, "loss": 0.5458, "step": 4345 }, { "epoch": 0.28, "grad_norm": 1.3268872499465942, "learning_rate": 8.442512096249593e-06, "loss": 0.6026, "step": 4346 }, { "epoch": 0.28, "grad_norm": 1.34464430809021, "learning_rate": 8.441753922286179e-06, "loss": 0.6121, "step": 4347 }, { "epoch": 0.28, "grad_norm": 1.1599137783050537, "learning_rate": 8.440995597892582e-06, "loss": 0.5548, "step": 4348 }, { "epoch": 0.28, "grad_norm": 1.2684557437896729, "learning_rate": 8.440237123101947e-06, "loss": 0.5739, "step": 4349 }, { "epoch": 0.28, "grad_norm": 1.2260032892227173, "learning_rate": 8.439478497947421e-06, "loss": 0.5651, "step": 4350 }, { "epoch": 0.28, "grad_norm": 1.2376829385757446, "learning_rate": 8.438719722462166e-06, "loss": 0.5076, "step": 4351 }, { "epoch": 0.28, "grad_norm": 1.3199927806854248, "learning_rate": 8.437960796679342e-06, "loss": 0.5958, "step": 4352 }, { "epoch": 0.28, "grad_norm": 1.1822702884674072, "learning_rate": 8.437201720632124e-06, "loss": 0.5261, "step": 4353 }, { "epoch": 0.28, "grad_norm": 1.1296483278274536, "learning_rate": 8.436442494353684e-06, "loss": 0.5908, "step": 4354 }, { "epoch": 0.28, "grad_norm": 1.1572638750076294, "learning_rate": 8.435683117877211e-06, "loss": 0.5777, "step": 4355 }, { "epoch": 0.28, "grad_norm": 1.2329332828521729, "learning_rate": 8.434923591235891e-06, "loss": 0.5671, "step": 4356 }, { "epoch": 0.28, "grad_norm": 1.3155925273895264, "learning_rate": 8.434163914462925e-06, "loss": 0.5944, "step": 4357 }, { "epoch": 0.28, "grad_norm": 1.244549036026001, "learning_rate": 8.433404087591515e-06, "loss": 0.5557, "step": 4358 }, { "epoch": 0.28, "grad_norm": 1.1641713380813599, "learning_rate": 8.432644110654868e-06, "loss": 0.5941, "step": 4359 }, { "epoch": 0.28, "grad_norm": 1.2004499435424805, "learning_rate": 8.431883983686206e-06, "loss": 0.6035, "step": 4360 }, { "epoch": 0.28, "grad_norm": 1.1156126260757446, "learning_rate": 8.431123706718747e-06, "loss": 0.5535, "step": 4361 }, { "epoch": 0.28, "grad_norm": 1.2035784721374512, "learning_rate": 8.430363279785725e-06, "loss": 0.5976, "step": 4362 }, { "epoch": 0.28, "grad_norm": 1.3024228811264038, "learning_rate": 8.429602702920375e-06, "loss": 0.6141, "step": 4363 }, { "epoch": 0.28, "grad_norm": 1.2412341833114624, "learning_rate": 8.428841976155937e-06, "loss": 0.5936, "step": 4364 }, { "epoch": 0.28, "grad_norm": 1.1470942497253418, "learning_rate": 8.428081099525663e-06, "loss": 0.5422, "step": 4365 }, { "epoch": 0.28, "grad_norm": 1.234218955039978, "learning_rate": 8.42732007306281e-06, "loss": 0.5335, "step": 4366 }, { "epoch": 0.28, "grad_norm": 1.0921976566314697, "learning_rate": 8.42655889680064e-06, "loss": 0.5294, "step": 4367 }, { "epoch": 0.28, "grad_norm": 1.1665053367614746, "learning_rate": 8.42579757077242e-06, "loss": 0.5962, "step": 4368 }, { "epoch": 0.28, "grad_norm": 1.0597567558288574, "learning_rate": 8.425036095011428e-06, "loss": 0.5394, "step": 4369 }, { "epoch": 0.28, "grad_norm": 1.3308526277542114, "learning_rate": 8.424274469550943e-06, "loss": 0.6072, "step": 4370 }, { "epoch": 0.28, "grad_norm": 1.2705678939819336, "learning_rate": 8.423512694424256e-06, "loss": 0.5971, "step": 4371 }, { "epoch": 0.28, "grad_norm": 1.067089319229126, "learning_rate": 8.422750769664663e-06, "loss": 0.538, "step": 4372 }, { "epoch": 0.28, "grad_norm": 1.2676701545715332, "learning_rate": 8.421988695305464e-06, "loss": 0.5592, "step": 4373 }, { "epoch": 0.28, "grad_norm": 1.075002670288086, "learning_rate": 8.421226471379969e-06, "loss": 0.5212, "step": 4374 }, { "epoch": 0.28, "grad_norm": 1.1590514183044434, "learning_rate": 8.42046409792149e-06, "loss": 0.543, "step": 4375 }, { "epoch": 0.28, "grad_norm": 1.199355125427246, "learning_rate": 8.419701574963352e-06, "loss": 0.559, "step": 4376 }, { "epoch": 0.28, "grad_norm": 1.269083023071289, "learning_rate": 8.41893890253888e-06, "loss": 0.5883, "step": 4377 }, { "epoch": 0.28, "grad_norm": 1.127142071723938, "learning_rate": 8.41817608068141e-06, "loss": 0.5464, "step": 4378 }, { "epoch": 0.28, "grad_norm": 1.2091821432113647, "learning_rate": 8.417413109424282e-06, "loss": 0.53, "step": 4379 }, { "epoch": 0.28, "grad_norm": 1.3352535963058472, "learning_rate": 8.416649988800844e-06, "loss": 0.6075, "step": 4380 }, { "epoch": 0.28, "grad_norm": 1.1442055702209473, "learning_rate": 8.415886718844452e-06, "loss": 0.565, "step": 4381 }, { "epoch": 0.28, "grad_norm": 1.2790042161941528, "learning_rate": 8.415123299588462e-06, "loss": 0.5565, "step": 4382 }, { "epoch": 0.28, "grad_norm": 1.2350032329559326, "learning_rate": 8.414359731066246e-06, "loss": 0.6044, "step": 4383 }, { "epoch": 0.28, "grad_norm": 1.0662792921066284, "learning_rate": 8.413596013311175e-06, "loss": 0.5162, "step": 4384 }, { "epoch": 0.28, "grad_norm": 1.1549326181411743, "learning_rate": 8.412832146356627e-06, "loss": 0.539, "step": 4385 }, { "epoch": 0.28, "grad_norm": 1.1714472770690918, "learning_rate": 8.412068130235992e-06, "loss": 0.5888, "step": 4386 }, { "epoch": 0.28, "grad_norm": 1.1207756996154785, "learning_rate": 8.411303964982665e-06, "loss": 0.5429, "step": 4387 }, { "epoch": 0.28, "grad_norm": 1.1319615840911865, "learning_rate": 8.410539650630039e-06, "loss": 0.5349, "step": 4388 }, { "epoch": 0.28, "grad_norm": 1.2691510915756226, "learning_rate": 8.409775187211527e-06, "loss": 0.5571, "step": 4389 }, { "epoch": 0.28, "grad_norm": 1.1095499992370605, "learning_rate": 8.409010574760538e-06, "loss": 0.5618, "step": 4390 }, { "epoch": 0.28, "grad_norm": 1.1187206506729126, "learning_rate": 8.408245813310492e-06, "loss": 0.6098, "step": 4391 }, { "epoch": 0.28, "grad_norm": 1.1772844791412354, "learning_rate": 8.407480902894815e-06, "loss": 0.5322, "step": 4392 }, { "epoch": 0.28, "grad_norm": 1.1859307289123535, "learning_rate": 8.40671584354694e-06, "loss": 0.5182, "step": 4393 }, { "epoch": 0.28, "grad_norm": 1.2080026865005493, "learning_rate": 8.405950635300305e-06, "loss": 0.5669, "step": 4394 }, { "epoch": 0.28, "grad_norm": 1.256892204284668, "learning_rate": 8.405185278188354e-06, "loss": 0.5962, "step": 4395 }, { "epoch": 0.28, "grad_norm": 1.204559087753296, "learning_rate": 8.40441977224454e-06, "loss": 0.5774, "step": 4396 }, { "epoch": 0.28, "grad_norm": 1.0735137462615967, "learning_rate": 8.403654117502323e-06, "loss": 0.5653, "step": 4397 }, { "epoch": 0.28, "grad_norm": 1.1939719915390015, "learning_rate": 8.402888313995164e-06, "loss": 0.5174, "step": 4398 }, { "epoch": 0.28, "grad_norm": 1.1059821844100952, "learning_rate": 8.402122361756537e-06, "loss": 0.5462, "step": 4399 }, { "epoch": 0.28, "grad_norm": 1.0513750314712524, "learning_rate": 8.401356260819918e-06, "loss": 0.5158, "step": 4400 }, { "epoch": 0.28, "grad_norm": 1.0931636095046997, "learning_rate": 8.400590011218794e-06, "loss": 0.5312, "step": 4401 }, { "epoch": 0.28, "grad_norm": 1.165984034538269, "learning_rate": 8.399823612986655e-06, "loss": 0.5149, "step": 4402 }, { "epoch": 0.28, "grad_norm": 1.1042741537094116, "learning_rate": 8.399057066156995e-06, "loss": 0.5352, "step": 4403 }, { "epoch": 0.28, "grad_norm": 1.2810529470443726, "learning_rate": 8.398290370763323e-06, "loss": 0.5343, "step": 4404 }, { "epoch": 0.28, "grad_norm": 1.1556878089904785, "learning_rate": 8.397523526839145e-06, "loss": 0.5514, "step": 4405 }, { "epoch": 0.28, "grad_norm": 1.0579122304916382, "learning_rate": 8.396756534417981e-06, "loss": 0.5507, "step": 4406 }, { "epoch": 0.28, "grad_norm": 1.199345588684082, "learning_rate": 8.395989393533351e-06, "loss": 0.5574, "step": 4407 }, { "epoch": 0.28, "grad_norm": 1.1110341548919678, "learning_rate": 8.395222104218785e-06, "loss": 0.5621, "step": 4408 }, { "epoch": 0.28, "grad_norm": 1.287221908569336, "learning_rate": 8.394454666507824e-06, "loss": 0.5544, "step": 4409 }, { "epoch": 0.28, "grad_norm": 1.0302618741989136, "learning_rate": 8.393687080434005e-06, "loss": 0.5321, "step": 4410 }, { "epoch": 0.28, "grad_norm": 1.1254916191101074, "learning_rate": 8.392919346030881e-06, "loss": 0.6039, "step": 4411 }, { "epoch": 0.28, "grad_norm": 1.1024731397628784, "learning_rate": 8.392151463332004e-06, "loss": 0.5423, "step": 4412 }, { "epoch": 0.28, "grad_norm": 1.0191887617111206, "learning_rate": 8.39138343237094e-06, "loss": 0.6144, "step": 4413 }, { "epoch": 0.28, "grad_norm": 1.1529531478881836, "learning_rate": 8.390615253181257e-06, "loss": 0.5432, "step": 4414 }, { "epoch": 0.28, "grad_norm": 1.1179159879684448, "learning_rate": 8.389846925796528e-06, "loss": 0.5503, "step": 4415 }, { "epoch": 0.29, "grad_norm": 1.0404047966003418, "learning_rate": 8.389078450250337e-06, "loss": 0.5197, "step": 4416 }, { "epoch": 0.29, "grad_norm": 1.1222012042999268, "learning_rate": 8.388309826576268e-06, "loss": 0.5562, "step": 4417 }, { "epoch": 0.29, "grad_norm": 1.3624321222305298, "learning_rate": 8.387541054807922e-06, "loss": 0.5929, "step": 4418 }, { "epoch": 0.29, "grad_norm": 1.1558488607406616, "learning_rate": 8.386772134978894e-06, "loss": 0.5513, "step": 4419 }, { "epoch": 0.29, "grad_norm": 1.140915036201477, "learning_rate": 8.386003067122794e-06, "loss": 0.5649, "step": 4420 }, { "epoch": 0.29, "grad_norm": 1.12167227268219, "learning_rate": 8.385233851273237e-06, "loss": 0.6022, "step": 4421 }, { "epoch": 0.29, "grad_norm": 1.1364848613739014, "learning_rate": 8.384464487463843e-06, "loss": 0.5341, "step": 4422 }, { "epoch": 0.29, "grad_norm": 1.262194275856018, "learning_rate": 8.383694975728236e-06, "loss": 0.584, "step": 4423 }, { "epoch": 0.29, "grad_norm": 1.1342270374298096, "learning_rate": 8.382925316100054e-06, "loss": 0.5583, "step": 4424 }, { "epoch": 0.29, "grad_norm": 1.15078866481781, "learning_rate": 8.382155508612933e-06, "loss": 0.5569, "step": 4425 }, { "epoch": 0.29, "grad_norm": 1.1346032619476318, "learning_rate": 8.38138555330052e-06, "loss": 0.6171, "step": 4426 }, { "epoch": 0.29, "grad_norm": 1.1611682176589966, "learning_rate": 8.380615450196472e-06, "loss": 0.5452, "step": 4427 }, { "epoch": 0.29, "grad_norm": 1.0971094369888306, "learning_rate": 8.379845199334442e-06, "loss": 0.5654, "step": 4428 }, { "epoch": 0.29, "grad_norm": 1.2180039882659912, "learning_rate": 8.379074800748099e-06, "loss": 0.5601, "step": 4429 }, { "epoch": 0.29, "grad_norm": 1.071210503578186, "learning_rate": 8.378304254471115e-06, "loss": 0.5638, "step": 4430 }, { "epoch": 0.29, "grad_norm": 1.094608187675476, "learning_rate": 8.377533560537168e-06, "loss": 0.5433, "step": 4431 }, { "epoch": 0.29, "grad_norm": 1.1805886030197144, "learning_rate": 8.376762718979943e-06, "loss": 0.5824, "step": 4432 }, { "epoch": 0.29, "grad_norm": 1.1520401239395142, "learning_rate": 8.375991729833131e-06, "loss": 0.5516, "step": 4433 }, { "epoch": 0.29, "grad_norm": 1.24687922000885, "learning_rate": 8.37522059313043e-06, "loss": 0.5326, "step": 4434 }, { "epoch": 0.29, "grad_norm": 1.1365751028060913, "learning_rate": 8.374449308905548e-06, "loss": 0.5097, "step": 4435 }, { "epoch": 0.29, "grad_norm": 1.0771852731704712, "learning_rate": 8.373677877192192e-06, "loss": 0.5389, "step": 4436 }, { "epoch": 0.29, "grad_norm": 1.1513439416885376, "learning_rate": 8.372906298024079e-06, "loss": 0.5526, "step": 4437 }, { "epoch": 0.29, "grad_norm": 1.0978751182556152, "learning_rate": 8.372134571434934e-06, "loss": 0.5903, "step": 4438 }, { "epoch": 0.29, "grad_norm": 1.1906909942626953, "learning_rate": 8.371362697458488e-06, "loss": 0.581, "step": 4439 }, { "epoch": 0.29, "grad_norm": 1.1115784645080566, "learning_rate": 8.370590676128477e-06, "loss": 0.5417, "step": 4440 }, { "epoch": 0.29, "grad_norm": 1.0927066802978516, "learning_rate": 8.369818507478642e-06, "loss": 0.5737, "step": 4441 }, { "epoch": 0.29, "grad_norm": 1.173009991645813, "learning_rate": 8.369046191542735e-06, "loss": 0.5435, "step": 4442 }, { "epoch": 0.29, "grad_norm": 1.0506014823913574, "learning_rate": 8.368273728354512e-06, "loss": 0.5291, "step": 4443 }, { "epoch": 0.29, "grad_norm": 1.7239065170288086, "learning_rate": 8.367501117947734e-06, "loss": 0.5466, "step": 4444 }, { "epoch": 0.29, "grad_norm": 1.1475955247879028, "learning_rate": 8.366728360356172e-06, "loss": 0.5732, "step": 4445 }, { "epoch": 0.29, "grad_norm": 1.2189234495162964, "learning_rate": 8.365955455613597e-06, "loss": 0.5868, "step": 4446 }, { "epoch": 0.29, "grad_norm": 1.0517569780349731, "learning_rate": 8.365182403753796e-06, "loss": 0.5476, "step": 4447 }, { "epoch": 0.29, "grad_norm": 1.2524584531784058, "learning_rate": 8.364409204810553e-06, "loss": 0.543, "step": 4448 }, { "epoch": 0.29, "grad_norm": 1.281139850616455, "learning_rate": 8.363635858817664e-06, "loss": 0.5678, "step": 4449 }, { "epoch": 0.29, "grad_norm": 1.225807547569275, "learning_rate": 8.362862365808929e-06, "loss": 0.5943, "step": 4450 }, { "epoch": 0.29, "grad_norm": 1.0739809274673462, "learning_rate": 8.362088725818158e-06, "loss": 0.5274, "step": 4451 }, { "epoch": 0.29, "grad_norm": 1.0905756950378418, "learning_rate": 8.361314938879161e-06, "loss": 0.5754, "step": 4452 }, { "epoch": 0.29, "grad_norm": 1.2090665102005005, "learning_rate": 8.360541005025761e-06, "loss": 0.5612, "step": 4453 }, { "epoch": 0.29, "grad_norm": 1.2407820224761963, "learning_rate": 8.359766924291786e-06, "loss": 0.5828, "step": 4454 }, { "epoch": 0.29, "grad_norm": 1.288584589958191, "learning_rate": 8.358992696711066e-06, "loss": 0.5696, "step": 4455 }, { "epoch": 0.29, "grad_norm": 1.0515015125274658, "learning_rate": 8.35821832231744e-06, "loss": 0.5095, "step": 4456 }, { "epoch": 0.29, "grad_norm": 1.0102336406707764, "learning_rate": 8.357443801144758e-06, "loss": 0.5603, "step": 4457 }, { "epoch": 0.29, "grad_norm": 1.2405822277069092, "learning_rate": 8.356669133226867e-06, "loss": 0.543, "step": 4458 }, { "epoch": 0.29, "grad_norm": 1.151465654373169, "learning_rate": 8.35589431859763e-06, "loss": 0.5477, "step": 4459 }, { "epoch": 0.29, "grad_norm": 1.162619709968567, "learning_rate": 8.35511935729091e-06, "loss": 0.5692, "step": 4460 }, { "epoch": 0.29, "grad_norm": 1.1677298545837402, "learning_rate": 8.35434424934058e-06, "loss": 0.5624, "step": 4461 }, { "epoch": 0.29, "grad_norm": 1.2157788276672363, "learning_rate": 8.353568994780518e-06, "loss": 0.5945, "step": 4462 }, { "epoch": 0.29, "grad_norm": 1.0793201923370361, "learning_rate": 8.352793593644606e-06, "loss": 0.5497, "step": 4463 }, { "epoch": 0.29, "grad_norm": 1.363779902458191, "learning_rate": 8.352018045966736e-06, "loss": 0.5683, "step": 4464 }, { "epoch": 0.29, "grad_norm": 1.2570534944534302, "learning_rate": 8.351242351780807e-06, "loss": 0.5996, "step": 4465 }, { "epoch": 0.29, "grad_norm": 1.0812482833862305, "learning_rate": 8.35046651112072e-06, "loss": 0.5486, "step": 4466 }, { "epoch": 0.29, "grad_norm": 1.2885262966156006, "learning_rate": 8.349690524020389e-06, "loss": 0.6134, "step": 4467 }, { "epoch": 0.29, "grad_norm": 1.2046712636947632, "learning_rate": 8.348914390513724e-06, "loss": 0.5483, "step": 4468 }, { "epoch": 0.29, "grad_norm": 1.184145450592041, "learning_rate": 8.348138110634652e-06, "loss": 0.5412, "step": 4469 }, { "epoch": 0.29, "grad_norm": 1.306910753250122, "learning_rate": 8.347361684417103e-06, "loss": 0.5318, "step": 4470 }, { "epoch": 0.29, "grad_norm": 1.164894700050354, "learning_rate": 8.346585111895009e-06, "loss": 0.5723, "step": 4471 }, { "epoch": 0.29, "grad_norm": 1.1065046787261963, "learning_rate": 8.345808393102315e-06, "loss": 0.5963, "step": 4472 }, { "epoch": 0.29, "grad_norm": 1.1343938112258911, "learning_rate": 8.345031528072969e-06, "loss": 0.5271, "step": 4473 }, { "epoch": 0.29, "grad_norm": 1.1011266708374023, "learning_rate": 8.344254516840925e-06, "loss": 0.5541, "step": 4474 }, { "epoch": 0.29, "grad_norm": 1.0815871953964233, "learning_rate": 8.343477359440145e-06, "loss": 0.5823, "step": 4475 }, { "epoch": 0.29, "grad_norm": 1.1652356386184692, "learning_rate": 8.342700055904594e-06, "loss": 0.5553, "step": 4476 }, { "epoch": 0.29, "grad_norm": 1.1344659328460693, "learning_rate": 8.34192260626825e-06, "loss": 0.5709, "step": 4477 }, { "epoch": 0.29, "grad_norm": 1.1491665840148926, "learning_rate": 8.341145010565088e-06, "loss": 0.5305, "step": 4478 }, { "epoch": 0.29, "grad_norm": 1.2059564590454102, "learning_rate": 8.3403672688291e-06, "loss": 0.548, "step": 4479 }, { "epoch": 0.29, "grad_norm": 1.2134772539138794, "learning_rate": 8.339589381094277e-06, "loss": 0.6122, "step": 4480 }, { "epoch": 0.29, "grad_norm": 1.138485312461853, "learning_rate": 8.33881134739462e-06, "loss": 0.5764, "step": 4481 }, { "epoch": 0.29, "grad_norm": 1.0709683895111084, "learning_rate": 8.33803316776413e-06, "loss": 0.5841, "step": 4482 }, { "epoch": 0.29, "grad_norm": 1.2227375507354736, "learning_rate": 8.337254842236825e-06, "loss": 0.594, "step": 4483 }, { "epoch": 0.29, "grad_norm": 1.2005945444107056, "learning_rate": 8.33647637084672e-06, "loss": 0.5418, "step": 4484 }, { "epoch": 0.29, "grad_norm": 1.1192705631256104, "learning_rate": 8.33569775362784e-06, "loss": 0.5194, "step": 4485 }, { "epoch": 0.29, "grad_norm": 1.0666401386260986, "learning_rate": 8.334918990614218e-06, "loss": 0.5273, "step": 4486 }, { "epoch": 0.29, "grad_norm": 1.154405951499939, "learning_rate": 8.334140081839893e-06, "loss": 0.5743, "step": 4487 }, { "epoch": 0.29, "grad_norm": 1.2743196487426758, "learning_rate": 8.333361027338904e-06, "loss": 0.6022, "step": 4488 }, { "epoch": 0.29, "grad_norm": 1.0885275602340698, "learning_rate": 8.332581827145307e-06, "loss": 0.4985, "step": 4489 }, { "epoch": 0.29, "grad_norm": 1.2443546056747437, "learning_rate": 8.331802481293156e-06, "loss": 0.5796, "step": 4490 }, { "epoch": 0.29, "grad_norm": 1.1041762828826904, "learning_rate": 8.331022989816516e-06, "loss": 0.5272, "step": 4491 }, { "epoch": 0.29, "grad_norm": 1.1775476932525635, "learning_rate": 8.330243352749454e-06, "loss": 0.5454, "step": 4492 }, { "epoch": 0.29, "grad_norm": 1.2699558734893799, "learning_rate": 8.329463570126046e-06, "loss": 0.5788, "step": 4493 }, { "epoch": 0.29, "grad_norm": 1.2599940299987793, "learning_rate": 8.328683641980378e-06, "loss": 0.5951, "step": 4494 }, { "epoch": 0.29, "grad_norm": 1.1077044010162354, "learning_rate": 8.327903568346534e-06, "loss": 0.5375, "step": 4495 }, { "epoch": 0.29, "grad_norm": 1.1015515327453613, "learning_rate": 8.327123349258613e-06, "loss": 0.5226, "step": 4496 }, { "epoch": 0.29, "grad_norm": 1.0671303272247314, "learning_rate": 8.326342984750711e-06, "loss": 0.5957, "step": 4497 }, { "epoch": 0.29, "grad_norm": 1.112524151802063, "learning_rate": 8.325562474856943e-06, "loss": 0.5531, "step": 4498 }, { "epoch": 0.29, "grad_norm": 1.146830439567566, "learning_rate": 8.324781819611418e-06, "loss": 0.6456, "step": 4499 }, { "epoch": 0.29, "grad_norm": 1.1539188623428345, "learning_rate": 8.324001019048256e-06, "loss": 0.5957, "step": 4500 }, { "epoch": 0.29, "grad_norm": 1.086916208267212, "learning_rate": 8.323220073201588e-06, "loss": 0.5636, "step": 4501 }, { "epoch": 0.29, "grad_norm": 1.0727019309997559, "learning_rate": 8.322438982105545e-06, "loss": 0.5561, "step": 4502 }, { "epoch": 0.29, "grad_norm": 1.141391396522522, "learning_rate": 8.321657745794264e-06, "loss": 0.5166, "step": 4503 }, { "epoch": 0.29, "grad_norm": 1.248253583908081, "learning_rate": 8.320876364301894e-06, "loss": 0.5832, "step": 4504 }, { "epoch": 0.29, "grad_norm": 1.1957063674926758, "learning_rate": 8.320094837662586e-06, "loss": 0.5563, "step": 4505 }, { "epoch": 0.29, "grad_norm": 1.1157881021499634, "learning_rate": 8.319313165910497e-06, "loss": 0.5517, "step": 4506 }, { "epoch": 0.29, "grad_norm": 1.1712217330932617, "learning_rate": 8.318531349079797e-06, "loss": 0.5795, "step": 4507 }, { "epoch": 0.29, "grad_norm": 1.0857014656066895, "learning_rate": 8.31774938720465e-06, "loss": 0.5641, "step": 4508 }, { "epoch": 0.29, "grad_norm": 1.1502070426940918, "learning_rate": 8.31696728031924e-06, "loss": 0.5445, "step": 4509 }, { "epoch": 0.29, "grad_norm": 1.0423694849014282, "learning_rate": 8.316185028457746e-06, "loss": 0.5462, "step": 4510 }, { "epoch": 0.29, "grad_norm": 1.3215546607971191, "learning_rate": 8.315402631654362e-06, "loss": 0.6234, "step": 4511 }, { "epoch": 0.29, "grad_norm": 1.1760042905807495, "learning_rate": 8.31462008994328e-06, "loss": 0.5691, "step": 4512 }, { "epoch": 0.29, "grad_norm": 1.207904577255249, "learning_rate": 8.313837403358708e-06, "loss": 0.5581, "step": 4513 }, { "epoch": 0.29, "grad_norm": 1.2181651592254639, "learning_rate": 8.313054571934853e-06, "loss": 0.5869, "step": 4514 }, { "epoch": 0.29, "grad_norm": 1.0935386419296265, "learning_rate": 8.312271595705931e-06, "loss": 0.5343, "step": 4515 }, { "epoch": 0.29, "grad_norm": 1.168544054031372, "learning_rate": 8.311488474706161e-06, "loss": 0.5022, "step": 4516 }, { "epoch": 0.29, "grad_norm": 1.1623481512069702, "learning_rate": 8.310705208969776e-06, "loss": 0.5942, "step": 4517 }, { "epoch": 0.29, "grad_norm": 1.0585906505584717, "learning_rate": 8.309921798531006e-06, "loss": 0.5609, "step": 4518 }, { "epoch": 0.29, "grad_norm": 1.12926185131073, "learning_rate": 8.309138243424095e-06, "loss": 0.5214, "step": 4519 }, { "epoch": 0.29, "grad_norm": 1.1634422540664673, "learning_rate": 8.30835454368329e-06, "loss": 0.5399, "step": 4520 }, { "epoch": 0.29, "grad_norm": 1.2057167291641235, "learning_rate": 8.307570699342842e-06, "loss": 0.5519, "step": 4521 }, { "epoch": 0.29, "grad_norm": 1.1942968368530273, "learning_rate": 8.306786710437014e-06, "loss": 0.5461, "step": 4522 }, { "epoch": 0.29, "grad_norm": 1.2316491603851318, "learning_rate": 8.306002577000067e-06, "loss": 0.556, "step": 4523 }, { "epoch": 0.29, "grad_norm": 1.2066751718521118, "learning_rate": 8.30521829906628e-06, "loss": 0.5458, "step": 4524 }, { "epoch": 0.29, "grad_norm": 1.0805027484893799, "learning_rate": 8.304433876669929e-06, "loss": 0.5457, "step": 4525 }, { "epoch": 0.29, "grad_norm": 1.3530185222625732, "learning_rate": 8.303649309845296e-06, "loss": 0.5627, "step": 4526 }, { "epoch": 0.29, "grad_norm": 1.181649088859558, "learning_rate": 8.30286459862668e-06, "loss": 0.551, "step": 4527 }, { "epoch": 0.29, "grad_norm": 1.164999008178711, "learning_rate": 8.30207974304837e-06, "loss": 0.6154, "step": 4528 }, { "epoch": 0.29, "grad_norm": 1.2358076572418213, "learning_rate": 8.301294743144675e-06, "loss": 0.5406, "step": 4529 }, { "epoch": 0.29, "grad_norm": 1.0610888004302979, "learning_rate": 8.300509598949903e-06, "loss": 0.5144, "step": 4530 }, { "epoch": 0.29, "grad_norm": 1.2118734121322632, "learning_rate": 8.299724310498374e-06, "loss": 0.5257, "step": 4531 }, { "epoch": 0.29, "grad_norm": 1.090264081954956, "learning_rate": 8.298938877824408e-06, "loss": 0.5682, "step": 4532 }, { "epoch": 0.29, "grad_norm": 1.2285828590393066, "learning_rate": 8.298153300962334e-06, "loss": 0.5973, "step": 4533 }, { "epoch": 0.29, "grad_norm": 1.2083477973937988, "learning_rate": 8.297367579946489e-06, "loss": 0.5275, "step": 4534 }, { "epoch": 0.29, "grad_norm": 1.23689603805542, "learning_rate": 8.296581714811213e-06, "loss": 0.5745, "step": 4535 }, { "epoch": 0.29, "grad_norm": 1.1202524900436401, "learning_rate": 8.295795705590858e-06, "loss": 0.5305, "step": 4536 }, { "epoch": 0.29, "grad_norm": 1.1521756649017334, "learning_rate": 8.295009552319777e-06, "loss": 0.5831, "step": 4537 }, { "epoch": 0.29, "grad_norm": 1.1230437755584717, "learning_rate": 8.294223255032327e-06, "loss": 0.5012, "step": 4538 }, { "epoch": 0.29, "grad_norm": 1.190630316734314, "learning_rate": 8.293436813762878e-06, "loss": 0.6093, "step": 4539 }, { "epoch": 0.29, "grad_norm": 1.2098593711853027, "learning_rate": 8.292650228545804e-06, "loss": 0.599, "step": 4540 }, { "epoch": 0.29, "grad_norm": 1.0294145345687866, "learning_rate": 8.291863499415484e-06, "loss": 0.4874, "step": 4541 }, { "epoch": 0.29, "grad_norm": 1.1688506603240967, "learning_rate": 8.291076626406303e-06, "loss": 0.5424, "step": 4542 }, { "epoch": 0.29, "grad_norm": 1.1578352451324463, "learning_rate": 8.290289609552653e-06, "loss": 0.5638, "step": 4543 }, { "epoch": 0.29, "grad_norm": 1.2042649984359741, "learning_rate": 8.289502448888934e-06, "loss": 0.5779, "step": 4544 }, { "epoch": 0.29, "grad_norm": 1.2210220098495483, "learning_rate": 8.28871514444955e-06, "loss": 0.5429, "step": 4545 }, { "epoch": 0.29, "grad_norm": 1.1854115724563599, "learning_rate": 8.287927696268912e-06, "loss": 0.538, "step": 4546 }, { "epoch": 0.29, "grad_norm": 1.1690969467163086, "learning_rate": 8.287140104381437e-06, "loss": 0.5417, "step": 4547 }, { "epoch": 0.29, "grad_norm": 1.0789892673492432, "learning_rate": 8.28635236882155e-06, "loss": 0.5318, "step": 4548 }, { "epoch": 0.29, "grad_norm": 1.2144392728805542, "learning_rate": 8.285564489623679e-06, "loss": 0.5481, "step": 4549 }, { "epoch": 0.29, "grad_norm": 1.257716178894043, "learning_rate": 8.284776466822261e-06, "loss": 0.5994, "step": 4550 }, { "epoch": 0.29, "grad_norm": 1.283042311668396, "learning_rate": 8.283988300451739e-06, "loss": 0.5891, "step": 4551 }, { "epoch": 0.29, "grad_norm": 1.1253180503845215, "learning_rate": 8.28319999054656e-06, "loss": 0.5701, "step": 4552 }, { "epoch": 0.29, "grad_norm": 1.0402154922485352, "learning_rate": 8.282411537141182e-06, "loss": 0.5386, "step": 4553 }, { "epoch": 0.29, "grad_norm": 1.1567602157592773, "learning_rate": 8.281622940270064e-06, "loss": 0.5357, "step": 4554 }, { "epoch": 0.29, "grad_norm": 1.0811679363250732, "learning_rate": 8.280834199967672e-06, "loss": 0.5471, "step": 4555 }, { "epoch": 0.29, "grad_norm": 1.047499656677246, "learning_rate": 8.280045316268484e-06, "loss": 0.5056, "step": 4556 }, { "epoch": 0.29, "grad_norm": 1.196110486984253, "learning_rate": 8.279256289206976e-06, "loss": 0.5299, "step": 4557 }, { "epoch": 0.29, "grad_norm": 1.4127299785614014, "learning_rate": 8.278467118817638e-06, "loss": 0.5588, "step": 4558 }, { "epoch": 0.29, "grad_norm": 1.2151973247528076, "learning_rate": 8.27767780513496e-06, "loss": 0.5905, "step": 4559 }, { "epoch": 0.29, "grad_norm": 1.2914358377456665, "learning_rate": 8.276888348193441e-06, "loss": 0.6269, "step": 4560 }, { "epoch": 0.29, "grad_norm": 1.234668254852295, "learning_rate": 8.276098748027587e-06, "loss": 0.5547, "step": 4561 }, { "epoch": 0.29, "grad_norm": 1.1185295581817627, "learning_rate": 8.275309004671912e-06, "loss": 0.5377, "step": 4562 }, { "epoch": 0.29, "grad_norm": 1.067530870437622, "learning_rate": 8.274519118160927e-06, "loss": 0.5632, "step": 4563 }, { "epoch": 0.29, "grad_norm": 1.1090073585510254, "learning_rate": 8.27372908852916e-06, "loss": 0.5679, "step": 4564 }, { "epoch": 0.29, "grad_norm": 1.1729700565338135, "learning_rate": 8.272938915811144e-06, "loss": 0.5317, "step": 4565 }, { "epoch": 0.29, "grad_norm": 1.142424464225769, "learning_rate": 8.272148600041409e-06, "loss": 0.6113, "step": 4566 }, { "epoch": 0.29, "grad_norm": 1.1402894258499146, "learning_rate": 8.271358141254503e-06, "loss": 0.5617, "step": 4567 }, { "epoch": 0.29, "grad_norm": 1.2415225505828857, "learning_rate": 8.270567539484972e-06, "loss": 0.6046, "step": 4568 }, { "epoch": 0.29, "grad_norm": 1.2211530208587646, "learning_rate": 8.269776794767373e-06, "loss": 0.5748, "step": 4569 }, { "epoch": 0.29, "grad_norm": 1.3067878484725952, "learning_rate": 8.268985907136264e-06, "loss": 0.5674, "step": 4570 }, { "epoch": 0.3, "grad_norm": 1.193326711654663, "learning_rate": 8.268194876626219e-06, "loss": 0.5742, "step": 4571 }, { "epoch": 0.3, "grad_norm": 1.0365511178970337, "learning_rate": 8.267403703271805e-06, "loss": 0.5113, "step": 4572 }, { "epoch": 0.3, "grad_norm": 1.1528751850128174, "learning_rate": 8.266612387107605e-06, "loss": 0.5834, "step": 4573 }, { "epoch": 0.3, "grad_norm": 1.1416103839874268, "learning_rate": 8.265820928168207e-06, "loss": 0.5609, "step": 4574 }, { "epoch": 0.3, "grad_norm": 1.28289794921875, "learning_rate": 8.265029326488201e-06, "loss": 0.5944, "step": 4575 }, { "epoch": 0.3, "grad_norm": 1.0822961330413818, "learning_rate": 8.264237582102187e-06, "loss": 0.5052, "step": 4576 }, { "epoch": 0.3, "grad_norm": 1.406314492225647, "learning_rate": 8.263445695044771e-06, "loss": 0.5706, "step": 4577 }, { "epoch": 0.3, "grad_norm": 1.3127353191375732, "learning_rate": 8.262653665350565e-06, "loss": 0.5764, "step": 4578 }, { "epoch": 0.3, "grad_norm": 1.1653861999511719, "learning_rate": 8.261861493054182e-06, "loss": 0.6127, "step": 4579 }, { "epoch": 0.3, "grad_norm": 1.152790904045105, "learning_rate": 8.261069178190251e-06, "loss": 0.5067, "step": 4580 }, { "epoch": 0.3, "grad_norm": 1.1036198139190674, "learning_rate": 8.260276720793399e-06, "loss": 0.5399, "step": 4581 }, { "epoch": 0.3, "grad_norm": 1.1265487670898438, "learning_rate": 8.259484120898262e-06, "loss": 0.5734, "step": 4582 }, { "epoch": 0.3, "grad_norm": 1.23056960105896, "learning_rate": 8.258691378539485e-06, "loss": 0.5145, "step": 4583 }, { "epoch": 0.3, "grad_norm": 1.1870856285095215, "learning_rate": 8.257898493751716e-06, "loss": 0.5743, "step": 4584 }, { "epoch": 0.3, "grad_norm": 1.204835295677185, "learning_rate": 8.257105466569608e-06, "loss": 0.5818, "step": 4585 }, { "epoch": 0.3, "grad_norm": 1.248800277709961, "learning_rate": 8.256312297027824e-06, "loss": 0.6028, "step": 4586 }, { "epoch": 0.3, "grad_norm": 1.1397472620010376, "learning_rate": 8.25551898516103e-06, "loss": 0.5653, "step": 4587 }, { "epoch": 0.3, "grad_norm": 1.1667463779449463, "learning_rate": 8.254725531003902e-06, "loss": 0.5364, "step": 4588 }, { "epoch": 0.3, "grad_norm": 1.2003967761993408, "learning_rate": 8.253931934591118e-06, "loss": 0.589, "step": 4589 }, { "epoch": 0.3, "grad_norm": 1.2578719854354858, "learning_rate": 8.253138195957364e-06, "loss": 0.5319, "step": 4590 }, { "epoch": 0.3, "grad_norm": 1.4208194017410278, "learning_rate": 8.252344315137333e-06, "loss": 0.56, "step": 4591 }, { "epoch": 0.3, "grad_norm": 1.2462248802185059, "learning_rate": 8.251550292165725e-06, "loss": 0.5272, "step": 4592 }, { "epoch": 0.3, "grad_norm": 1.2490955591201782, "learning_rate": 8.25075612707724e-06, "loss": 0.515, "step": 4593 }, { "epoch": 0.3, "grad_norm": 1.1970361471176147, "learning_rate": 8.24996181990659e-06, "loss": 0.5147, "step": 4594 }, { "epoch": 0.3, "grad_norm": 1.1935627460479736, "learning_rate": 8.249167370688498e-06, "loss": 0.577, "step": 4595 }, { "epoch": 0.3, "grad_norm": 1.1582973003387451, "learning_rate": 8.248372779457683e-06, "loss": 0.5523, "step": 4596 }, { "epoch": 0.3, "grad_norm": 1.3436214923858643, "learning_rate": 8.247578046248872e-06, "loss": 0.5496, "step": 4597 }, { "epoch": 0.3, "grad_norm": 1.2255866527557373, "learning_rate": 8.246783171096805e-06, "loss": 0.5842, "step": 4598 }, { "epoch": 0.3, "grad_norm": 1.1621055603027344, "learning_rate": 8.245988154036222e-06, "loss": 0.5706, "step": 4599 }, { "epoch": 0.3, "grad_norm": 1.053869605064392, "learning_rate": 8.245192995101872e-06, "loss": 0.5115, "step": 4600 }, { "epoch": 0.3, "grad_norm": 1.1440906524658203, "learning_rate": 8.244397694328508e-06, "loss": 0.5174, "step": 4601 }, { "epoch": 0.3, "grad_norm": 1.2696075439453125, "learning_rate": 8.243602251750893e-06, "loss": 0.5955, "step": 4602 }, { "epoch": 0.3, "grad_norm": 1.2190139293670654, "learning_rate": 8.242806667403791e-06, "loss": 0.5714, "step": 4603 }, { "epoch": 0.3, "grad_norm": 1.0690147876739502, "learning_rate": 8.242010941321976e-06, "loss": 0.5355, "step": 4604 }, { "epoch": 0.3, "grad_norm": 1.1981568336486816, "learning_rate": 8.241215073540228e-06, "loss": 0.5791, "step": 4605 }, { "epoch": 0.3, "grad_norm": 1.1561431884765625, "learning_rate": 8.240419064093332e-06, "loss": 0.5517, "step": 4606 }, { "epoch": 0.3, "grad_norm": 1.1099801063537598, "learning_rate": 8.239622913016077e-06, "loss": 0.5621, "step": 4607 }, { "epoch": 0.3, "grad_norm": 1.2125641107559204, "learning_rate": 8.238826620343264e-06, "loss": 0.6048, "step": 4608 }, { "epoch": 0.3, "grad_norm": 1.2112172842025757, "learning_rate": 8.238030186109696e-06, "loss": 0.5943, "step": 4609 }, { "epoch": 0.3, "grad_norm": 1.1206166744232178, "learning_rate": 8.237233610350183e-06, "loss": 0.5358, "step": 4610 }, { "epoch": 0.3, "grad_norm": 1.1241809129714966, "learning_rate": 8.23643689309954e-06, "loss": 0.559, "step": 4611 }, { "epoch": 0.3, "grad_norm": 1.1586787700653076, "learning_rate": 8.235640034392593e-06, "loss": 0.5856, "step": 4612 }, { "epoch": 0.3, "grad_norm": 1.108552098274231, "learning_rate": 8.234843034264165e-06, "loss": 0.5603, "step": 4613 }, { "epoch": 0.3, "grad_norm": 1.131235122680664, "learning_rate": 8.234045892749097e-06, "loss": 0.5519, "step": 4614 }, { "epoch": 0.3, "grad_norm": 1.3100807666778564, "learning_rate": 8.233248609882225e-06, "loss": 0.5202, "step": 4615 }, { "epoch": 0.3, "grad_norm": 1.010246753692627, "learning_rate": 8.232451185698398e-06, "loss": 0.5005, "step": 4616 }, { "epoch": 0.3, "grad_norm": 1.210525631904602, "learning_rate": 8.23165362023247e-06, "loss": 0.591, "step": 4617 }, { "epoch": 0.3, "grad_norm": 1.1200566291809082, "learning_rate": 8.2308559135193e-06, "loss": 0.563, "step": 4618 }, { "epoch": 0.3, "grad_norm": 1.2386291027069092, "learning_rate": 8.230058065593753e-06, "loss": 0.6303, "step": 4619 }, { "epoch": 0.3, "grad_norm": 1.148564100265503, "learning_rate": 8.229260076490702e-06, "loss": 0.5876, "step": 4620 }, { "epoch": 0.3, "grad_norm": 1.1157658100128174, "learning_rate": 8.228461946245026e-06, "loss": 0.5183, "step": 4621 }, { "epoch": 0.3, "grad_norm": 1.0608738660812378, "learning_rate": 8.227663674891607e-06, "loss": 0.5275, "step": 4622 }, { "epoch": 0.3, "grad_norm": 1.1374874114990234, "learning_rate": 8.226865262465336e-06, "loss": 0.5693, "step": 4623 }, { "epoch": 0.3, "grad_norm": 1.2754145860671997, "learning_rate": 8.22606670900111e-06, "loss": 0.5744, "step": 4624 }, { "epoch": 0.3, "grad_norm": 1.2182146310806274, "learning_rate": 8.225268014533832e-06, "loss": 0.5746, "step": 4625 }, { "epoch": 0.3, "grad_norm": 1.1129134893417358, "learning_rate": 8.224469179098409e-06, "loss": 0.5173, "step": 4626 }, { "epoch": 0.3, "grad_norm": 1.1650114059448242, "learning_rate": 8.223670202729759e-06, "loss": 0.5753, "step": 4627 }, { "epoch": 0.3, "grad_norm": 1.069318175315857, "learning_rate": 8.2228710854628e-06, "loss": 0.5759, "step": 4628 }, { "epoch": 0.3, "grad_norm": 1.2067476511001587, "learning_rate": 8.222071827332463e-06, "loss": 0.5794, "step": 4629 }, { "epoch": 0.3, "grad_norm": 1.2448790073394775, "learning_rate": 8.22127242837368e-06, "loss": 0.608, "step": 4630 }, { "epoch": 0.3, "grad_norm": 1.2709656953811646, "learning_rate": 8.220472888621386e-06, "loss": 0.5676, "step": 4631 }, { "epoch": 0.3, "grad_norm": 1.2820338010787964, "learning_rate": 8.219673208110533e-06, "loss": 0.5803, "step": 4632 }, { "epoch": 0.3, "grad_norm": 1.1775102615356445, "learning_rate": 8.218873386876071e-06, "loss": 0.5398, "step": 4633 }, { "epoch": 0.3, "grad_norm": 1.1891427040100098, "learning_rate": 8.21807342495296e-06, "loss": 0.6117, "step": 4634 }, { "epoch": 0.3, "grad_norm": 1.1154675483703613, "learning_rate": 8.21727332237616e-06, "loss": 0.5595, "step": 4635 }, { "epoch": 0.3, "grad_norm": 1.1941343545913696, "learning_rate": 8.216473079180644e-06, "loss": 0.5748, "step": 4636 }, { "epoch": 0.3, "grad_norm": 1.02490234375, "learning_rate": 8.21567269540139e-06, "loss": 0.5286, "step": 4637 }, { "epoch": 0.3, "grad_norm": 1.3881347179412842, "learning_rate": 8.214872171073377e-06, "loss": 0.5558, "step": 4638 }, { "epoch": 0.3, "grad_norm": 1.1232655048370361, "learning_rate": 8.214071506231598e-06, "loss": 0.5559, "step": 4639 }, { "epoch": 0.3, "grad_norm": 1.2071049213409424, "learning_rate": 8.213270700911044e-06, "loss": 0.5516, "step": 4640 }, { "epoch": 0.3, "grad_norm": 1.1561723947525024, "learning_rate": 8.21246975514672e-06, "loss": 0.5475, "step": 4641 }, { "epoch": 0.3, "grad_norm": 1.2812962532043457, "learning_rate": 8.211668668973629e-06, "loss": 0.5568, "step": 4642 }, { "epoch": 0.3, "grad_norm": 1.138037919998169, "learning_rate": 8.210867442426787e-06, "loss": 0.5542, "step": 4643 }, { "epoch": 0.3, "grad_norm": 1.2152416706085205, "learning_rate": 8.210066075541215e-06, "loss": 0.5674, "step": 4644 }, { "epoch": 0.3, "grad_norm": 1.2717140913009644, "learning_rate": 8.209264568351936e-06, "loss": 0.532, "step": 4645 }, { "epoch": 0.3, "grad_norm": 1.172376275062561, "learning_rate": 8.208462920893982e-06, "loss": 0.5724, "step": 4646 }, { "epoch": 0.3, "grad_norm": 1.2705049514770508, "learning_rate": 8.207661133202393e-06, "loss": 0.5824, "step": 4647 }, { "epoch": 0.3, "grad_norm": 1.1452248096466064, "learning_rate": 8.20685920531221e-06, "loss": 0.5828, "step": 4648 }, { "epoch": 0.3, "grad_norm": 1.157062292098999, "learning_rate": 8.206057137258486e-06, "loss": 0.5862, "step": 4649 }, { "epoch": 0.3, "grad_norm": 1.217957854270935, "learning_rate": 8.205254929076276e-06, "loss": 0.5779, "step": 4650 }, { "epoch": 0.3, "grad_norm": 1.2422736883163452, "learning_rate": 8.204452580800644e-06, "loss": 0.6021, "step": 4651 }, { "epoch": 0.3, "grad_norm": 1.2215067148208618, "learning_rate": 8.203650092466656e-06, "loss": 0.5752, "step": 4652 }, { "epoch": 0.3, "grad_norm": 1.2421292066574097, "learning_rate": 8.202847464109388e-06, "loss": 0.6378, "step": 4653 }, { "epoch": 0.3, "grad_norm": 1.223987102508545, "learning_rate": 8.20204469576392e-06, "loss": 0.5894, "step": 4654 }, { "epoch": 0.3, "grad_norm": 1.1516979932785034, "learning_rate": 8.20124178746534e-06, "loss": 0.5492, "step": 4655 }, { "epoch": 0.3, "grad_norm": 1.1444668769836426, "learning_rate": 8.200438739248744e-06, "loss": 0.5304, "step": 4656 }, { "epoch": 0.3, "grad_norm": 1.2535308599472046, "learning_rate": 8.199635551149225e-06, "loss": 0.5402, "step": 4657 }, { "epoch": 0.3, "grad_norm": 1.091137409210205, "learning_rate": 8.198832223201892e-06, "loss": 0.6064, "step": 4658 }, { "epoch": 0.3, "grad_norm": 1.1739733219146729, "learning_rate": 8.198028755441855e-06, "loss": 0.5399, "step": 4659 }, { "epoch": 0.3, "grad_norm": 1.1637754440307617, "learning_rate": 8.197225147904233e-06, "loss": 0.5327, "step": 4660 }, { "epoch": 0.3, "grad_norm": 1.250483512878418, "learning_rate": 8.196421400624147e-06, "loss": 0.6324, "step": 4661 }, { "epoch": 0.3, "grad_norm": 1.1159433126449585, "learning_rate": 8.195617513636732e-06, "loss": 0.5353, "step": 4662 }, { "epoch": 0.3, "grad_norm": 1.1518477201461792, "learning_rate": 8.194813486977116e-06, "loss": 0.6163, "step": 4663 }, { "epoch": 0.3, "grad_norm": 1.1162686347961426, "learning_rate": 8.194009320680448e-06, "loss": 0.5441, "step": 4664 }, { "epoch": 0.3, "grad_norm": 1.1306248903274536, "learning_rate": 8.19320501478187e-06, "loss": 0.5194, "step": 4665 }, { "epoch": 0.3, "grad_norm": 1.189937949180603, "learning_rate": 8.192400569316543e-06, "loss": 0.5694, "step": 4666 }, { "epoch": 0.3, "grad_norm": 1.1435658931732178, "learning_rate": 8.191595984319623e-06, "loss": 0.5033, "step": 4667 }, { "epoch": 0.3, "grad_norm": 1.1385897397994995, "learning_rate": 8.190791259826276e-06, "loss": 0.5349, "step": 4668 }, { "epoch": 0.3, "grad_norm": 1.2038731575012207, "learning_rate": 8.189986395871676e-06, "loss": 0.5464, "step": 4669 }, { "epoch": 0.3, "grad_norm": 1.1263988018035889, "learning_rate": 8.189181392491001e-06, "loss": 0.5839, "step": 4670 }, { "epoch": 0.3, "grad_norm": 1.0798659324645996, "learning_rate": 8.188376249719433e-06, "loss": 0.5373, "step": 4671 }, { "epoch": 0.3, "grad_norm": 1.1596952676773071, "learning_rate": 8.187570967592169e-06, "loss": 0.5537, "step": 4672 }, { "epoch": 0.3, "grad_norm": 1.1300950050354004, "learning_rate": 8.1867655461444e-06, "loss": 0.5515, "step": 4673 }, { "epoch": 0.3, "grad_norm": 1.114730954170227, "learning_rate": 8.185959985411331e-06, "loss": 0.6152, "step": 4674 }, { "epoch": 0.3, "grad_norm": 1.1525579690933228, "learning_rate": 8.185154285428172e-06, "loss": 0.5408, "step": 4675 }, { "epoch": 0.3, "grad_norm": 1.1660763025283813, "learning_rate": 8.184348446230137e-06, "loss": 0.5963, "step": 4676 }, { "epoch": 0.3, "grad_norm": 1.0925673246383667, "learning_rate": 8.183542467852445e-06, "loss": 0.5349, "step": 4677 }, { "epoch": 0.3, "grad_norm": 1.2303006649017334, "learning_rate": 8.182736350330327e-06, "loss": 0.6018, "step": 4678 }, { "epoch": 0.3, "grad_norm": 1.1342778205871582, "learning_rate": 8.181930093699016e-06, "loss": 0.587, "step": 4679 }, { "epoch": 0.3, "grad_norm": 1.161067247390747, "learning_rate": 8.181123697993748e-06, "loss": 0.547, "step": 4680 }, { "epoch": 0.3, "grad_norm": 1.2563939094543457, "learning_rate": 8.180317163249771e-06, "loss": 0.6112, "step": 4681 }, { "epoch": 0.3, "grad_norm": 1.1589431762695312, "learning_rate": 8.179510489502337e-06, "loss": 0.5974, "step": 4682 }, { "epoch": 0.3, "grad_norm": 1.0581014156341553, "learning_rate": 8.1787036767867e-06, "loss": 0.5531, "step": 4683 }, { "epoch": 0.3, "grad_norm": 1.2351536750793457, "learning_rate": 8.17789672513813e-06, "loss": 0.5554, "step": 4684 }, { "epoch": 0.3, "grad_norm": 1.038382649421692, "learning_rate": 8.177089634591893e-06, "loss": 0.5154, "step": 4685 }, { "epoch": 0.3, "grad_norm": 1.3369332551956177, "learning_rate": 8.176282405183263e-06, "loss": 0.5679, "step": 4686 }, { "epoch": 0.3, "grad_norm": 1.046993374824524, "learning_rate": 8.175475036947524e-06, "loss": 0.5123, "step": 4687 }, { "epoch": 0.3, "grad_norm": 1.2117737531661987, "learning_rate": 8.174667529919966e-06, "loss": 0.5383, "step": 4688 }, { "epoch": 0.3, "grad_norm": 1.2640533447265625, "learning_rate": 8.17385988413588e-06, "loss": 0.5802, "step": 4689 }, { "epoch": 0.3, "grad_norm": 1.065331220626831, "learning_rate": 8.173052099630565e-06, "loss": 0.5316, "step": 4690 }, { "epoch": 0.3, "grad_norm": 1.0737777948379517, "learning_rate": 8.172244176439333e-06, "loss": 0.5123, "step": 4691 }, { "epoch": 0.3, "grad_norm": 1.1435551643371582, "learning_rate": 8.171436114597489e-06, "loss": 0.5682, "step": 4692 }, { "epoch": 0.3, "grad_norm": 1.0600639581680298, "learning_rate": 8.170627914140357e-06, "loss": 0.5707, "step": 4693 }, { "epoch": 0.3, "grad_norm": 1.1936761140823364, "learning_rate": 8.16981957510326e-06, "loss": 0.5736, "step": 4694 }, { "epoch": 0.3, "grad_norm": 1.2905770540237427, "learning_rate": 8.169011097521525e-06, "loss": 0.5791, "step": 4695 }, { "epoch": 0.3, "grad_norm": 1.2331393957138062, "learning_rate": 8.168202481430494e-06, "loss": 0.5717, "step": 4696 }, { "epoch": 0.3, "grad_norm": 1.1017723083496094, "learning_rate": 8.167393726865504e-06, "loss": 0.5418, "step": 4697 }, { "epoch": 0.3, "grad_norm": 1.1917537450790405, "learning_rate": 8.166584833861909e-06, "loss": 0.5533, "step": 4698 }, { "epoch": 0.3, "grad_norm": 1.2931568622589111, "learning_rate": 8.165775802455057e-06, "loss": 0.6076, "step": 4699 }, { "epoch": 0.3, "grad_norm": 1.1881548166275024, "learning_rate": 8.164966632680316e-06, "loss": 0.5858, "step": 4700 }, { "epoch": 0.3, "grad_norm": 1.2850698232650757, "learning_rate": 8.164157324573047e-06, "loss": 0.6189, "step": 4701 }, { "epoch": 0.3, "grad_norm": 1.2530097961425781, "learning_rate": 8.163347878168625e-06, "loss": 0.528, "step": 4702 }, { "epoch": 0.3, "grad_norm": 1.2273836135864258, "learning_rate": 8.162538293502431e-06, "loss": 0.5621, "step": 4703 }, { "epoch": 0.3, "grad_norm": 1.0761053562164307, "learning_rate": 8.161728570609845e-06, "loss": 0.5353, "step": 4704 }, { "epoch": 0.3, "grad_norm": 1.2308977842330933, "learning_rate": 8.160918709526262e-06, "loss": 0.5714, "step": 4705 }, { "epoch": 0.3, "grad_norm": 1.161710262298584, "learning_rate": 8.160108710287078e-06, "loss": 0.5424, "step": 4706 }, { "epoch": 0.3, "grad_norm": 1.1442722082138062, "learning_rate": 8.159298572927694e-06, "loss": 0.5549, "step": 4707 }, { "epoch": 0.3, "grad_norm": 1.1107795238494873, "learning_rate": 8.158488297483523e-06, "loss": 0.5593, "step": 4708 }, { "epoch": 0.3, "grad_norm": 1.121049165725708, "learning_rate": 8.157677883989976e-06, "loss": 0.5536, "step": 4709 }, { "epoch": 0.3, "grad_norm": 1.3802584409713745, "learning_rate": 8.156867332482474e-06, "loss": 0.5978, "step": 4710 }, { "epoch": 0.3, "grad_norm": 1.1794406175613403, "learning_rate": 8.156056642996447e-06, "loss": 0.5535, "step": 4711 }, { "epoch": 0.3, "grad_norm": 1.213496208190918, "learning_rate": 8.155245815567328e-06, "loss": 0.5587, "step": 4712 }, { "epoch": 0.3, "grad_norm": 1.2632566690444946, "learning_rate": 8.154434850230553e-06, "loss": 0.5932, "step": 4713 }, { "epoch": 0.3, "grad_norm": 1.1838246583938599, "learning_rate": 8.153623747021569e-06, "loss": 0.5638, "step": 4714 }, { "epoch": 0.3, "grad_norm": 1.177177906036377, "learning_rate": 8.15281250597583e-06, "loss": 0.5378, "step": 4715 }, { "epoch": 0.3, "grad_norm": 1.1573357582092285, "learning_rate": 8.152001127128788e-06, "loss": 0.5705, "step": 4716 }, { "epoch": 0.3, "grad_norm": 1.160605788230896, "learning_rate": 8.151189610515911e-06, "loss": 0.5599, "step": 4717 }, { "epoch": 0.3, "grad_norm": 1.1021414995193481, "learning_rate": 8.150377956172666e-06, "loss": 0.5397, "step": 4718 }, { "epoch": 0.3, "grad_norm": 1.1454122066497803, "learning_rate": 8.149566164134529e-06, "loss": 0.5866, "step": 4719 }, { "epoch": 0.3, "grad_norm": 1.2193049192428589, "learning_rate": 8.14875423443698e-06, "loss": 0.5596, "step": 4720 }, { "epoch": 0.3, "grad_norm": 1.3936996459960938, "learning_rate": 8.147942167115508e-06, "loss": 0.5666, "step": 4721 }, { "epoch": 0.3, "grad_norm": 1.227713704109192, "learning_rate": 8.147129962205606e-06, "loss": 0.5619, "step": 4722 }, { "epoch": 0.3, "grad_norm": 1.2147077322006226, "learning_rate": 8.146317619742772e-06, "loss": 0.5851, "step": 4723 }, { "epoch": 0.3, "grad_norm": 1.1582871675491333, "learning_rate": 8.145505139762512e-06, "loss": 0.5707, "step": 4724 }, { "epoch": 0.3, "grad_norm": 1.1050046682357788, "learning_rate": 8.144692522300339e-06, "loss": 0.6119, "step": 4725 }, { "epoch": 0.31, "grad_norm": 1.0621004104614258, "learning_rate": 8.14387976739177e-06, "loss": 0.5567, "step": 4726 }, { "epoch": 0.31, "grad_norm": 1.1747993230819702, "learning_rate": 8.143066875072325e-06, "loss": 0.4981, "step": 4727 }, { "epoch": 0.31, "grad_norm": 1.3628517389297485, "learning_rate": 8.142253845377538e-06, "loss": 0.5557, "step": 4728 }, { "epoch": 0.31, "grad_norm": 1.2293494939804077, "learning_rate": 8.141440678342942e-06, "loss": 0.5537, "step": 4729 }, { "epoch": 0.31, "grad_norm": 1.2539228200912476, "learning_rate": 8.14062737400408e-06, "loss": 0.6103, "step": 4730 }, { "epoch": 0.31, "grad_norm": 1.0771300792694092, "learning_rate": 8.139813932396496e-06, "loss": 0.5474, "step": 4731 }, { "epoch": 0.31, "grad_norm": 1.15011465549469, "learning_rate": 8.13900035355575e-06, "loss": 0.5684, "step": 4732 }, { "epoch": 0.31, "grad_norm": 1.23301100730896, "learning_rate": 8.138186637517394e-06, "loss": 0.5386, "step": 4733 }, { "epoch": 0.31, "grad_norm": 1.1550657749176025, "learning_rate": 8.137372784316998e-06, "loss": 0.5199, "step": 4734 }, { "epoch": 0.31, "grad_norm": 1.1814695596694946, "learning_rate": 8.13655879399013e-06, "loss": 0.5645, "step": 4735 }, { "epoch": 0.31, "grad_norm": 1.21467125415802, "learning_rate": 8.135744666572372e-06, "loss": 0.5822, "step": 4736 }, { "epoch": 0.31, "grad_norm": 1.156336784362793, "learning_rate": 8.134930402099304e-06, "loss": 0.5277, "step": 4737 }, { "epoch": 0.31, "grad_norm": 1.1420670747756958, "learning_rate": 8.134116000606517e-06, "loss": 0.5838, "step": 4738 }, { "epoch": 0.31, "grad_norm": 1.118360161781311, "learning_rate": 8.133301462129605e-06, "loss": 0.5088, "step": 4739 }, { "epoch": 0.31, "grad_norm": 1.2108335494995117, "learning_rate": 8.13248678670417e-06, "loss": 0.551, "step": 4740 }, { "epoch": 0.31, "grad_norm": 1.2277374267578125, "learning_rate": 8.13167197436582e-06, "loss": 0.5835, "step": 4741 }, { "epoch": 0.31, "grad_norm": 1.180535078048706, "learning_rate": 8.130857025150168e-06, "loss": 0.5734, "step": 4742 }, { "epoch": 0.31, "grad_norm": 1.138743281364441, "learning_rate": 8.130041939092832e-06, "loss": 0.5481, "step": 4743 }, { "epoch": 0.31, "grad_norm": 1.1648602485656738, "learning_rate": 8.129226716229438e-06, "loss": 0.5902, "step": 4744 }, { "epoch": 0.31, "grad_norm": 1.0406643152236938, "learning_rate": 8.128411356595621e-06, "loss": 0.5553, "step": 4745 }, { "epoch": 0.31, "grad_norm": 1.0722829103469849, "learning_rate": 8.127595860227013e-06, "loss": 0.5361, "step": 4746 }, { "epoch": 0.31, "grad_norm": 1.1839747428894043, "learning_rate": 8.12678022715926e-06, "loss": 0.5398, "step": 4747 }, { "epoch": 0.31, "grad_norm": 1.122078537940979, "learning_rate": 8.125964457428008e-06, "loss": 0.5518, "step": 4748 }, { "epoch": 0.31, "grad_norm": 1.1687086820602417, "learning_rate": 8.125148551068916e-06, "loss": 0.5863, "step": 4749 }, { "epoch": 0.31, "grad_norm": 1.3018648624420166, "learning_rate": 8.124332508117645e-06, "loss": 0.6215, "step": 4750 }, { "epoch": 0.31, "grad_norm": 1.2535998821258545, "learning_rate": 8.123516328609859e-06, "loss": 0.5773, "step": 4751 }, { "epoch": 0.31, "grad_norm": 1.1420323848724365, "learning_rate": 8.122700012581235e-06, "loss": 0.5947, "step": 4752 }, { "epoch": 0.31, "grad_norm": 1.2381410598754883, "learning_rate": 8.121883560067449e-06, "loss": 0.5398, "step": 4753 }, { "epoch": 0.31, "grad_norm": 1.0683287382125854, "learning_rate": 8.121066971104188e-06, "loss": 0.5699, "step": 4754 }, { "epoch": 0.31, "grad_norm": 1.2420501708984375, "learning_rate": 8.120250245727142e-06, "loss": 0.5806, "step": 4755 }, { "epoch": 0.31, "grad_norm": 1.1848307847976685, "learning_rate": 8.119433383972008e-06, "loss": 0.5895, "step": 4756 }, { "epoch": 0.31, "grad_norm": 1.1975256204605103, "learning_rate": 8.118616385874486e-06, "loss": 0.5561, "step": 4757 }, { "epoch": 0.31, "grad_norm": 1.2398018836975098, "learning_rate": 8.117799251470292e-06, "loss": 0.5805, "step": 4758 }, { "epoch": 0.31, "grad_norm": 1.107002854347229, "learning_rate": 8.116981980795135e-06, "loss": 0.5595, "step": 4759 }, { "epoch": 0.31, "grad_norm": 1.0663529634475708, "learning_rate": 8.116164573884739e-06, "loss": 0.5162, "step": 4760 }, { "epoch": 0.31, "grad_norm": 1.2042031288146973, "learning_rate": 8.115347030774827e-06, "loss": 0.5809, "step": 4761 }, { "epoch": 0.31, "grad_norm": 1.3441112041473389, "learning_rate": 8.114529351501137e-06, "loss": 0.6115, "step": 4762 }, { "epoch": 0.31, "grad_norm": 1.1703108549118042, "learning_rate": 8.113711536099402e-06, "loss": 0.5642, "step": 4763 }, { "epoch": 0.31, "grad_norm": 1.128806471824646, "learning_rate": 8.11289358460537e-06, "loss": 0.5463, "step": 4764 }, { "epoch": 0.31, "grad_norm": 1.137448787689209, "learning_rate": 8.112075497054791e-06, "loss": 0.5865, "step": 4765 }, { "epoch": 0.31, "grad_norm": 1.1397027969360352, "learning_rate": 8.111257273483422e-06, "loss": 0.5949, "step": 4766 }, { "epoch": 0.31, "grad_norm": 1.2060493230819702, "learning_rate": 8.110438913927024e-06, "loss": 0.6344, "step": 4767 }, { "epoch": 0.31, "grad_norm": 1.1780316829681396, "learning_rate": 8.109620418421365e-06, "loss": 0.6006, "step": 4768 }, { "epoch": 0.31, "grad_norm": 1.1582057476043701, "learning_rate": 8.108801787002222e-06, "loss": 0.5318, "step": 4769 }, { "epoch": 0.31, "grad_norm": 1.1595065593719482, "learning_rate": 8.107983019705374e-06, "loss": 0.6025, "step": 4770 }, { "epoch": 0.31, "grad_norm": 1.180004358291626, "learning_rate": 8.107164116566607e-06, "loss": 0.6311, "step": 4771 }, { "epoch": 0.31, "grad_norm": 1.1321882009506226, "learning_rate": 8.10634507762171e-06, "loss": 0.5175, "step": 4772 }, { "epoch": 0.31, "grad_norm": 1.2108877897262573, "learning_rate": 8.105525902906487e-06, "loss": 0.5772, "step": 4773 }, { "epoch": 0.31, "grad_norm": 1.1063463687896729, "learning_rate": 8.104706592456738e-06, "loss": 0.5381, "step": 4774 }, { "epoch": 0.31, "grad_norm": 1.0883692502975464, "learning_rate": 8.103887146308274e-06, "loss": 0.5778, "step": 4775 }, { "epoch": 0.31, "grad_norm": 1.219451665878296, "learning_rate": 8.103067564496912e-06, "loss": 0.5593, "step": 4776 }, { "epoch": 0.31, "grad_norm": 1.0959181785583496, "learning_rate": 8.102247847058472e-06, "loss": 0.5298, "step": 4777 }, { "epoch": 0.31, "grad_norm": 1.0675091743469238, "learning_rate": 8.101427994028784e-06, "loss": 0.5467, "step": 4778 }, { "epoch": 0.31, "grad_norm": 1.1265064477920532, "learning_rate": 8.100608005443678e-06, "loss": 0.5552, "step": 4779 }, { "epoch": 0.31, "grad_norm": 1.1886732578277588, "learning_rate": 8.099787881338996e-06, "loss": 0.561, "step": 4780 }, { "epoch": 0.31, "grad_norm": 1.253777027130127, "learning_rate": 8.098967621750584e-06, "loss": 0.5382, "step": 4781 }, { "epoch": 0.31, "grad_norm": 1.2415269613265991, "learning_rate": 8.098147226714294e-06, "loss": 0.5167, "step": 4782 }, { "epoch": 0.31, "grad_norm": 1.1924042701721191, "learning_rate": 8.09732669626598e-06, "loss": 0.5056, "step": 4783 }, { "epoch": 0.31, "grad_norm": 1.1123046875, "learning_rate": 8.096506030441508e-06, "loss": 0.5409, "step": 4784 }, { "epoch": 0.31, "grad_norm": 1.122882604598999, "learning_rate": 8.095685229276747e-06, "loss": 0.5504, "step": 4785 }, { "epoch": 0.31, "grad_norm": 1.1917279958724976, "learning_rate": 8.094864292807571e-06, "loss": 0.6075, "step": 4786 }, { "epoch": 0.31, "grad_norm": 1.140260100364685, "learning_rate": 8.09404322106986e-06, "loss": 0.5028, "step": 4787 }, { "epoch": 0.31, "grad_norm": 1.0596709251403809, "learning_rate": 8.093222014099505e-06, "loss": 0.5323, "step": 4788 }, { "epoch": 0.31, "grad_norm": 1.2026252746582031, "learning_rate": 8.092400671932396e-06, "loss": 0.564, "step": 4789 }, { "epoch": 0.31, "grad_norm": 1.2219605445861816, "learning_rate": 8.091579194604432e-06, "loss": 0.5575, "step": 4790 }, { "epoch": 0.31, "grad_norm": 1.0941299200057983, "learning_rate": 8.090757582151519e-06, "loss": 0.559, "step": 4791 }, { "epoch": 0.31, "grad_norm": 1.2252895832061768, "learning_rate": 8.089935834609563e-06, "loss": 0.5792, "step": 4792 }, { "epoch": 0.31, "grad_norm": 1.0858091115951538, "learning_rate": 8.089113952014488e-06, "loss": 0.5716, "step": 4793 }, { "epoch": 0.31, "grad_norm": 1.223053216934204, "learning_rate": 8.088291934402208e-06, "loss": 0.6172, "step": 4794 }, { "epoch": 0.31, "grad_norm": 1.065733551979065, "learning_rate": 8.087469781808658e-06, "loss": 0.5392, "step": 4795 }, { "epoch": 0.31, "grad_norm": 1.117041826248169, "learning_rate": 8.086647494269768e-06, "loss": 0.5349, "step": 4796 }, { "epoch": 0.31, "grad_norm": 1.1256568431854248, "learning_rate": 8.085825071821482e-06, "loss": 0.5316, "step": 4797 }, { "epoch": 0.31, "grad_norm": 1.0919804573059082, "learning_rate": 8.085002514499742e-06, "loss": 0.5739, "step": 4798 }, { "epoch": 0.31, "grad_norm": 1.2111748456954956, "learning_rate": 8.0841798223405e-06, "loss": 0.5942, "step": 4799 }, { "epoch": 0.31, "grad_norm": 1.1312488317489624, "learning_rate": 8.083356995379718e-06, "loss": 0.5654, "step": 4800 }, { "epoch": 0.31, "grad_norm": 1.1893064975738525, "learning_rate": 8.082534033653357e-06, "loss": 0.6113, "step": 4801 }, { "epoch": 0.31, "grad_norm": 1.2963674068450928, "learning_rate": 8.081710937197385e-06, "loss": 0.5493, "step": 4802 }, { "epoch": 0.31, "grad_norm": 1.140413522720337, "learning_rate": 8.08088770604778e-06, "loss": 0.5368, "step": 4803 }, { "epoch": 0.31, "grad_norm": 1.1556423902511597, "learning_rate": 8.080064340240522e-06, "loss": 0.5213, "step": 4804 }, { "epoch": 0.31, "grad_norm": 1.195914626121521, "learning_rate": 8.079240839811595e-06, "loss": 0.5269, "step": 4805 }, { "epoch": 0.31, "grad_norm": 1.283606767654419, "learning_rate": 8.078417204797e-06, "loss": 0.5586, "step": 4806 }, { "epoch": 0.31, "grad_norm": 1.261691927909851, "learning_rate": 8.07759343523273e-06, "loss": 0.6009, "step": 4807 }, { "epoch": 0.31, "grad_norm": 1.1294625997543335, "learning_rate": 8.076769531154789e-06, "loss": 0.5494, "step": 4808 }, { "epoch": 0.31, "grad_norm": 1.1760510206222534, "learning_rate": 8.075945492599193e-06, "loss": 0.5271, "step": 4809 }, { "epoch": 0.31, "grad_norm": 1.9821515083312988, "learning_rate": 8.075121319601954e-06, "loss": 0.5683, "step": 4810 }, { "epoch": 0.31, "grad_norm": 1.2302148342132568, "learning_rate": 8.074297012199097e-06, "loss": 0.5618, "step": 4811 }, { "epoch": 0.31, "grad_norm": 1.2202064990997314, "learning_rate": 8.073472570426649e-06, "loss": 0.5764, "step": 4812 }, { "epoch": 0.31, "grad_norm": 1.2150570154190063, "learning_rate": 8.072647994320643e-06, "loss": 0.5184, "step": 4813 }, { "epoch": 0.31, "grad_norm": 1.1386945247650146, "learning_rate": 8.071823283917123e-06, "loss": 0.5501, "step": 4814 }, { "epoch": 0.31, "grad_norm": 1.2028141021728516, "learning_rate": 8.070998439252132e-06, "loss": 0.4946, "step": 4815 }, { "epoch": 0.31, "grad_norm": 1.1564229726791382, "learning_rate": 8.070173460361721e-06, "loss": 0.5707, "step": 4816 }, { "epoch": 0.31, "grad_norm": 1.2219865322113037, "learning_rate": 8.069348347281952e-06, "loss": 0.5775, "step": 4817 }, { "epoch": 0.31, "grad_norm": 1.2082653045654297, "learning_rate": 8.068523100048884e-06, "loss": 0.5681, "step": 4818 }, { "epoch": 0.31, "grad_norm": 1.1581029891967773, "learning_rate": 8.067697718698588e-06, "loss": 0.5234, "step": 4819 }, { "epoch": 0.31, "grad_norm": 1.0729800462722778, "learning_rate": 8.06687220326714e-06, "loss": 0.4952, "step": 4820 }, { "epoch": 0.31, "grad_norm": 1.055768370628357, "learning_rate": 8.066046553790622e-06, "loss": 0.5525, "step": 4821 }, { "epoch": 0.31, "grad_norm": 1.0957218408584595, "learning_rate": 8.065220770305118e-06, "loss": 0.5268, "step": 4822 }, { "epoch": 0.31, "grad_norm": 1.2633432149887085, "learning_rate": 8.064394852846723e-06, "loss": 0.5413, "step": 4823 }, { "epoch": 0.31, "grad_norm": 1.1718250513076782, "learning_rate": 8.063568801451534e-06, "loss": 0.581, "step": 4824 }, { "epoch": 0.31, "grad_norm": 1.2012674808502197, "learning_rate": 8.062742616155657e-06, "loss": 0.5538, "step": 4825 }, { "epoch": 0.31, "grad_norm": 1.2017333507537842, "learning_rate": 8.061916296995205e-06, "loss": 0.6451, "step": 4826 }, { "epoch": 0.31, "grad_norm": 1.1330372095108032, "learning_rate": 8.06108984400629e-06, "loss": 0.5389, "step": 4827 }, { "epoch": 0.31, "grad_norm": 1.2716891765594482, "learning_rate": 8.060263257225033e-06, "loss": 0.6051, "step": 4828 }, { "epoch": 0.31, "grad_norm": 1.1120624542236328, "learning_rate": 8.059436536687568e-06, "loss": 0.5406, "step": 4829 }, { "epoch": 0.31, "grad_norm": 1.332822322845459, "learning_rate": 8.058609682430023e-06, "loss": 0.5766, "step": 4830 }, { "epoch": 0.31, "grad_norm": 1.2066810131072998, "learning_rate": 8.057782694488541e-06, "loss": 0.5183, "step": 4831 }, { "epoch": 0.31, "grad_norm": 1.1130647659301758, "learning_rate": 8.056955572899268e-06, "loss": 0.5529, "step": 4832 }, { "epoch": 0.31, "grad_norm": 1.0586270093917847, "learning_rate": 8.056128317698352e-06, "loss": 0.5296, "step": 4833 }, { "epoch": 0.31, "grad_norm": 1.192774772644043, "learning_rate": 8.055300928921954e-06, "loss": 0.5812, "step": 4834 }, { "epoch": 0.31, "grad_norm": 1.1783690452575684, "learning_rate": 8.054473406606233e-06, "loss": 0.5444, "step": 4835 }, { "epoch": 0.31, "grad_norm": 1.0683788061141968, "learning_rate": 8.053645750787362e-06, "loss": 0.4612, "step": 4836 }, { "epoch": 0.31, "grad_norm": 1.202007532119751, "learning_rate": 8.052817961501512e-06, "loss": 0.5515, "step": 4837 }, { "epoch": 0.31, "grad_norm": 1.1979700326919556, "learning_rate": 8.051990038784867e-06, "loss": 0.5877, "step": 4838 }, { "epoch": 0.31, "grad_norm": 1.1926647424697876, "learning_rate": 8.05116198267361e-06, "loss": 0.5252, "step": 4839 }, { "epoch": 0.31, "grad_norm": 1.1058015823364258, "learning_rate": 8.050333793203936e-06, "loss": 0.5652, "step": 4840 }, { "epoch": 0.31, "grad_norm": 1.2380850315093994, "learning_rate": 8.049505470412043e-06, "loss": 0.5984, "step": 4841 }, { "epoch": 0.31, "grad_norm": 1.2741628885269165, "learning_rate": 8.048677014334132e-06, "loss": 0.5334, "step": 4842 }, { "epoch": 0.31, "grad_norm": 1.1875132322311401, "learning_rate": 8.047848425006414e-06, "loss": 0.5513, "step": 4843 }, { "epoch": 0.31, "grad_norm": 1.1437066793441772, "learning_rate": 8.047019702465106e-06, "loss": 0.5336, "step": 4844 }, { "epoch": 0.31, "grad_norm": 1.1397351026535034, "learning_rate": 8.046190846746427e-06, "loss": 0.5386, "step": 4845 }, { "epoch": 0.31, "grad_norm": 1.3223615884780884, "learning_rate": 8.045361857886606e-06, "loss": 0.5716, "step": 4846 }, { "epoch": 0.31, "grad_norm": 1.2260327339172363, "learning_rate": 8.044532735921876e-06, "loss": 0.556, "step": 4847 }, { "epoch": 0.31, "grad_norm": 1.2076138257980347, "learning_rate": 8.043703480888474e-06, "loss": 0.5771, "step": 4848 }, { "epoch": 0.31, "grad_norm": 1.1914371252059937, "learning_rate": 8.042874092822647e-06, "loss": 0.5611, "step": 4849 }, { "epoch": 0.31, "grad_norm": 2.4361963272094727, "learning_rate": 8.042044571760643e-06, "loss": 0.512, "step": 4850 }, { "epoch": 0.31, "grad_norm": 1.2666715383529663, "learning_rate": 8.04121491773872e-06, "loss": 0.578, "step": 4851 }, { "epoch": 0.31, "grad_norm": 1.1806942224502563, "learning_rate": 8.04038513079314e-06, "loss": 0.5348, "step": 4852 }, { "epoch": 0.31, "grad_norm": 1.2145602703094482, "learning_rate": 8.039555210960168e-06, "loss": 0.6036, "step": 4853 }, { "epoch": 0.31, "grad_norm": 1.25577974319458, "learning_rate": 8.038725158276082e-06, "loss": 0.6261, "step": 4854 }, { "epoch": 0.31, "grad_norm": 1.1162503957748413, "learning_rate": 8.03789497277716e-06, "loss": 0.4792, "step": 4855 }, { "epoch": 0.31, "grad_norm": 1.1551904678344727, "learning_rate": 8.037064654499685e-06, "loss": 0.5969, "step": 4856 }, { "epoch": 0.31, "grad_norm": 1.1014868021011353, "learning_rate": 8.036234203479949e-06, "loss": 0.5781, "step": 4857 }, { "epoch": 0.31, "grad_norm": 1.2672700881958008, "learning_rate": 8.03540361975425e-06, "loss": 0.5791, "step": 4858 }, { "epoch": 0.31, "grad_norm": 1.1872702836990356, "learning_rate": 8.034572903358891e-06, "loss": 0.568, "step": 4859 }, { "epoch": 0.31, "grad_norm": 1.2190617322921753, "learning_rate": 8.03374205433018e-06, "loss": 0.5824, "step": 4860 }, { "epoch": 0.31, "grad_norm": 1.1659135818481445, "learning_rate": 8.03291107270443e-06, "loss": 0.6068, "step": 4861 }, { "epoch": 0.31, "grad_norm": 1.0982389450073242, "learning_rate": 8.032079958517962e-06, "loss": 0.544, "step": 4862 }, { "epoch": 0.31, "grad_norm": 1.0621376037597656, "learning_rate": 8.031248711807102e-06, "loss": 0.5234, "step": 4863 }, { "epoch": 0.31, "grad_norm": 1.1210061311721802, "learning_rate": 8.030417332608183e-06, "loss": 0.5576, "step": 4864 }, { "epoch": 0.31, "grad_norm": 1.0925287008285522, "learning_rate": 8.02958582095754e-06, "loss": 0.5477, "step": 4865 }, { "epoch": 0.31, "grad_norm": 1.1799594163894653, "learning_rate": 8.028754176891516e-06, "loss": 0.5565, "step": 4866 }, { "epoch": 0.31, "grad_norm": 1.1037461757659912, "learning_rate": 8.027922400446462e-06, "loss": 0.555, "step": 4867 }, { "epoch": 0.31, "grad_norm": 1.0671534538269043, "learning_rate": 8.027090491658733e-06, "loss": 0.5521, "step": 4868 }, { "epoch": 0.31, "grad_norm": 1.147395372390747, "learning_rate": 8.026258450564688e-06, "loss": 0.5394, "step": 4869 }, { "epoch": 0.31, "grad_norm": 1.1377174854278564, "learning_rate": 8.025426277200694e-06, "loss": 0.5835, "step": 4870 }, { "epoch": 0.31, "grad_norm": 1.033140778541565, "learning_rate": 8.024593971603124e-06, "loss": 0.5703, "step": 4871 }, { "epoch": 0.31, "grad_norm": 1.2941197156906128, "learning_rate": 8.023761533808352e-06, "loss": 0.5378, "step": 4872 }, { "epoch": 0.31, "grad_norm": 1.2263635396957397, "learning_rate": 8.022928963852767e-06, "loss": 0.5448, "step": 4873 }, { "epoch": 0.31, "grad_norm": 1.389503836631775, "learning_rate": 8.022096261772755e-06, "loss": 0.576, "step": 4874 }, { "epoch": 0.31, "grad_norm": 1.1535240411758423, "learning_rate": 8.021263427604715e-06, "loss": 0.5651, "step": 4875 }, { "epoch": 0.31, "grad_norm": 1.1397316455841064, "learning_rate": 8.020430461385044e-06, "loss": 0.5638, "step": 4876 }, { "epoch": 0.31, "grad_norm": 1.1533654928207397, "learning_rate": 8.01959736315015e-06, "loss": 0.5687, "step": 4877 }, { "epoch": 0.31, "grad_norm": 1.0962615013122559, "learning_rate": 8.018764132936446e-06, "loss": 0.6107, "step": 4878 }, { "epoch": 0.31, "grad_norm": 1.0840588808059692, "learning_rate": 8.017930770780351e-06, "loss": 0.6172, "step": 4879 }, { "epoch": 0.31, "grad_norm": 1.2126954793930054, "learning_rate": 8.017097276718288e-06, "loss": 0.5575, "step": 4880 }, { "epoch": 0.32, "grad_norm": 1.1619877815246582, "learning_rate": 8.016263650786688e-06, "loss": 0.5505, "step": 4881 }, { "epoch": 0.32, "grad_norm": 1.2275164127349854, "learning_rate": 8.015429893021985e-06, "loss": 0.5486, "step": 4882 }, { "epoch": 0.32, "grad_norm": 1.2118995189666748, "learning_rate": 8.014596003460621e-06, "loss": 0.5765, "step": 4883 }, { "epoch": 0.32, "grad_norm": 1.2506895065307617, "learning_rate": 8.013761982139044e-06, "loss": 0.5995, "step": 4884 }, { "epoch": 0.32, "grad_norm": 1.1567293405532837, "learning_rate": 8.012927829093706e-06, "loss": 0.5795, "step": 4885 }, { "epoch": 0.32, "grad_norm": 0.999817430973053, "learning_rate": 8.012093544361067e-06, "loss": 0.5406, "step": 4886 }, { "epoch": 0.32, "grad_norm": 1.0591460466384888, "learning_rate": 8.011259127977588e-06, "loss": 0.5358, "step": 4887 }, { "epoch": 0.32, "grad_norm": 1.1313190460205078, "learning_rate": 8.010424579979745e-06, "loss": 0.5456, "step": 4888 }, { "epoch": 0.32, "grad_norm": 1.1606119871139526, "learning_rate": 8.009589900404009e-06, "loss": 0.5315, "step": 4889 }, { "epoch": 0.32, "grad_norm": 1.1304410696029663, "learning_rate": 8.008755089286863e-06, "loss": 0.5765, "step": 4890 }, { "epoch": 0.32, "grad_norm": 1.177607774734497, "learning_rate": 8.007920146664796e-06, "loss": 0.5531, "step": 4891 }, { "epoch": 0.32, "grad_norm": 1.226080298423767, "learning_rate": 8.007085072574299e-06, "loss": 0.5957, "step": 4892 }, { "epoch": 0.32, "grad_norm": 1.0918089151382446, "learning_rate": 8.00624986705187e-06, "loss": 0.5464, "step": 4893 }, { "epoch": 0.32, "grad_norm": 1.1380081176757812, "learning_rate": 8.00541453013402e-06, "loss": 0.5771, "step": 4894 }, { "epoch": 0.32, "grad_norm": 1.132552146911621, "learning_rate": 8.004579061857251e-06, "loss": 0.5775, "step": 4895 }, { "epoch": 0.32, "grad_norm": 1.1094505786895752, "learning_rate": 8.003743462258084e-06, "loss": 0.5366, "step": 4896 }, { "epoch": 0.32, "grad_norm": 1.1384755373001099, "learning_rate": 8.002907731373041e-06, "loss": 0.5113, "step": 4897 }, { "epoch": 0.32, "grad_norm": 1.1014859676361084, "learning_rate": 8.002071869238648e-06, "loss": 0.5478, "step": 4898 }, { "epoch": 0.32, "grad_norm": 1.0807825326919556, "learning_rate": 8.00123587589144e-06, "loss": 0.5759, "step": 4899 }, { "epoch": 0.32, "grad_norm": 1.1907901763916016, "learning_rate": 8.000399751367953e-06, "loss": 0.5585, "step": 4900 }, { "epoch": 0.32, "grad_norm": 1.2030014991760254, "learning_rate": 7.999563495704736e-06, "loss": 0.5879, "step": 4901 }, { "epoch": 0.32, "grad_norm": 1.1148550510406494, "learning_rate": 7.998727108938338e-06, "loss": 0.5144, "step": 4902 }, { "epoch": 0.32, "grad_norm": 1.0967353582382202, "learning_rate": 7.997890591105313e-06, "loss": 0.5451, "step": 4903 }, { "epoch": 0.32, "grad_norm": 1.1089638471603394, "learning_rate": 7.997053942242225e-06, "loss": 0.5344, "step": 4904 }, { "epoch": 0.32, "grad_norm": 1.059515118598938, "learning_rate": 7.996217162385643e-06, "loss": 0.5604, "step": 4905 }, { "epoch": 0.32, "grad_norm": 1.0908249616622925, "learning_rate": 7.995380251572138e-06, "loss": 0.5358, "step": 4906 }, { "epoch": 0.32, "grad_norm": 1.1224219799041748, "learning_rate": 7.994543209838293e-06, "loss": 0.5206, "step": 4907 }, { "epoch": 0.32, "grad_norm": 1.2063870429992676, "learning_rate": 7.993706037220687e-06, "loss": 0.4974, "step": 4908 }, { "epoch": 0.32, "grad_norm": 1.1069802045822144, "learning_rate": 7.992868733755916e-06, "loss": 0.55, "step": 4909 }, { "epoch": 0.32, "grad_norm": 1.1804029941558838, "learning_rate": 7.992031299480574e-06, "loss": 0.5385, "step": 4910 }, { "epoch": 0.32, "grad_norm": 1.2047052383422852, "learning_rate": 7.991193734431264e-06, "loss": 0.5407, "step": 4911 }, { "epoch": 0.32, "grad_norm": 1.2069098949432373, "learning_rate": 7.990356038644591e-06, "loss": 0.5713, "step": 4912 }, { "epoch": 0.32, "grad_norm": 1.1242607831954956, "learning_rate": 7.989518212157172e-06, "loss": 0.5471, "step": 4913 }, { "epoch": 0.32, "grad_norm": 1.1531654596328735, "learning_rate": 7.988680255005625e-06, "loss": 0.5153, "step": 4914 }, { "epoch": 0.32, "grad_norm": 1.2055079936981201, "learning_rate": 7.987842167226576e-06, "loss": 0.509, "step": 4915 }, { "epoch": 0.32, "grad_norm": 1.0919629335403442, "learning_rate": 7.987003948856653e-06, "loss": 0.549, "step": 4916 }, { "epoch": 0.32, "grad_norm": 1.2705968618392944, "learning_rate": 7.986165599932495e-06, "loss": 0.55, "step": 4917 }, { "epoch": 0.32, "grad_norm": 1.1714781522750854, "learning_rate": 7.985327120490742e-06, "loss": 0.5238, "step": 4918 }, { "epoch": 0.32, "grad_norm": 1.172209620475769, "learning_rate": 7.984488510568042e-06, "loss": 0.5553, "step": 4919 }, { "epoch": 0.32, "grad_norm": 1.2323743104934692, "learning_rate": 7.983649770201053e-06, "loss": 0.5819, "step": 4920 }, { "epoch": 0.32, "grad_norm": 1.4483691453933716, "learning_rate": 7.982810899426428e-06, "loss": 0.6009, "step": 4921 }, { "epoch": 0.32, "grad_norm": 1.1657919883728027, "learning_rate": 7.981971898280834e-06, "loss": 0.549, "step": 4922 }, { "epoch": 0.32, "grad_norm": 1.3344067335128784, "learning_rate": 7.981132766800943e-06, "loss": 0.5852, "step": 4923 }, { "epoch": 0.32, "grad_norm": 1.1642366647720337, "learning_rate": 7.980293505023428e-06, "loss": 0.5256, "step": 4924 }, { "epoch": 0.32, "grad_norm": 1.1348544359207153, "learning_rate": 7.979454112984977e-06, "loss": 0.5121, "step": 4925 }, { "epoch": 0.32, "grad_norm": 1.1321874856948853, "learning_rate": 7.978614590722272e-06, "loss": 0.577, "step": 4926 }, { "epoch": 0.32, "grad_norm": 1.2028374671936035, "learning_rate": 7.977774938272006e-06, "loss": 0.5552, "step": 4927 }, { "epoch": 0.32, "grad_norm": 1.1321789026260376, "learning_rate": 7.976935155670884e-06, "loss": 0.6051, "step": 4928 }, { "epoch": 0.32, "grad_norm": 1.3577159643173218, "learning_rate": 7.976095242955603e-06, "loss": 0.5468, "step": 4929 }, { "epoch": 0.32, "grad_norm": 1.1604117155075073, "learning_rate": 7.975255200162881e-06, "loss": 0.5263, "step": 4930 }, { "epoch": 0.32, "grad_norm": 1.0038323402404785, "learning_rate": 7.974415027329427e-06, "loss": 0.5466, "step": 4931 }, { "epoch": 0.32, "grad_norm": 1.160973310470581, "learning_rate": 7.97357472449197e-06, "loss": 0.5658, "step": 4932 }, { "epoch": 0.32, "grad_norm": 1.152825117111206, "learning_rate": 7.972734291687232e-06, "loss": 0.514, "step": 4933 }, { "epoch": 0.32, "grad_norm": 1.1678345203399658, "learning_rate": 7.971893728951946e-06, "loss": 0.6181, "step": 4934 }, { "epoch": 0.32, "grad_norm": 1.127058982849121, "learning_rate": 7.971053036322855e-06, "loss": 0.5815, "step": 4935 }, { "epoch": 0.32, "grad_norm": 1.0445972681045532, "learning_rate": 7.970212213836701e-06, "loss": 0.5191, "step": 4936 }, { "epoch": 0.32, "grad_norm": 1.142807126045227, "learning_rate": 7.969371261530233e-06, "loss": 0.5917, "step": 4937 }, { "epoch": 0.32, "grad_norm": 1.340813398361206, "learning_rate": 7.96853017944021e-06, "loss": 0.6021, "step": 4938 }, { "epoch": 0.32, "grad_norm": 1.1065980195999146, "learning_rate": 7.96768896760339e-06, "loss": 0.5859, "step": 4939 }, { "epoch": 0.32, "grad_norm": 1.090348720550537, "learning_rate": 7.966847626056544e-06, "loss": 0.5605, "step": 4940 }, { "epoch": 0.32, "grad_norm": 1.2201941013336182, "learning_rate": 7.966006154836441e-06, "loss": 0.5294, "step": 4941 }, { "epoch": 0.32, "grad_norm": 1.3785343170166016, "learning_rate": 7.965164553979862e-06, "loss": 0.5991, "step": 4942 }, { "epoch": 0.32, "grad_norm": 1.2378592491149902, "learning_rate": 7.964322823523591e-06, "loss": 0.5743, "step": 4943 }, { "epoch": 0.32, "grad_norm": 1.0985076427459717, "learning_rate": 7.963480963504417e-06, "loss": 0.5335, "step": 4944 }, { "epoch": 0.32, "grad_norm": 1.2219483852386475, "learning_rate": 7.962638973959137e-06, "loss": 0.5667, "step": 4945 }, { "epoch": 0.32, "grad_norm": 1.1110230684280396, "learning_rate": 7.961796854924551e-06, "loss": 0.5525, "step": 4946 }, { "epoch": 0.32, "grad_norm": 1.2001789808273315, "learning_rate": 7.960954606437463e-06, "loss": 0.4856, "step": 4947 }, { "epoch": 0.32, "grad_norm": 1.195831060409546, "learning_rate": 7.960112228534691e-06, "loss": 0.5639, "step": 4948 }, { "epoch": 0.32, "grad_norm": 1.189626693725586, "learning_rate": 7.959269721253051e-06, "loss": 0.5638, "step": 4949 }, { "epoch": 0.32, "grad_norm": 1.1513673067092896, "learning_rate": 7.958427084629366e-06, "loss": 0.575, "step": 4950 }, { "epoch": 0.32, "grad_norm": 1.2848176956176758, "learning_rate": 7.957584318700466e-06, "loss": 0.5592, "step": 4951 }, { "epoch": 0.32, "grad_norm": 1.228643536567688, "learning_rate": 7.956741423503187e-06, "loss": 0.5595, "step": 4952 }, { "epoch": 0.32, "grad_norm": 1.150312900543213, "learning_rate": 7.955898399074368e-06, "loss": 0.5701, "step": 4953 }, { "epoch": 0.32, "grad_norm": 1.437445044517517, "learning_rate": 7.955055245450856e-06, "loss": 0.5856, "step": 4954 }, { "epoch": 0.32, "grad_norm": 1.2773414850234985, "learning_rate": 7.954211962669503e-06, "loss": 0.5148, "step": 4955 }, { "epoch": 0.32, "grad_norm": 1.2260830402374268, "learning_rate": 7.953368550767168e-06, "loss": 0.5959, "step": 4956 }, { "epoch": 0.32, "grad_norm": 1.1019260883331299, "learning_rate": 7.952525009780712e-06, "loss": 0.5373, "step": 4957 }, { "epoch": 0.32, "grad_norm": 1.2215869426727295, "learning_rate": 7.951681339747007e-06, "loss": 0.5565, "step": 4958 }, { "epoch": 0.32, "grad_norm": 1.1426169872283936, "learning_rate": 7.950837540702926e-06, "loss": 0.539, "step": 4959 }, { "epoch": 0.32, "grad_norm": 1.3175153732299805, "learning_rate": 7.949993612685349e-06, "loss": 0.5611, "step": 4960 }, { "epoch": 0.32, "grad_norm": 1.1986539363861084, "learning_rate": 7.949149555731163e-06, "loss": 0.5798, "step": 4961 }, { "epoch": 0.32, "grad_norm": 1.1088930368423462, "learning_rate": 7.948305369877257e-06, "loss": 0.528, "step": 4962 }, { "epoch": 0.32, "grad_norm": 1.1746039390563965, "learning_rate": 7.94746105516053e-06, "loss": 0.5818, "step": 4963 }, { "epoch": 0.32, "grad_norm": 1.107744574546814, "learning_rate": 7.946616611617888e-06, "loss": 0.5385, "step": 4964 }, { "epoch": 0.32, "grad_norm": 1.1218992471694946, "learning_rate": 7.945772039286233e-06, "loss": 0.5907, "step": 4965 }, { "epoch": 0.32, "grad_norm": 1.1710606813430786, "learning_rate": 7.944927338202483e-06, "loss": 0.5735, "step": 4966 }, { "epoch": 0.32, "grad_norm": 1.2412360906600952, "learning_rate": 7.944082508403559e-06, "loss": 0.5712, "step": 4967 }, { "epoch": 0.32, "grad_norm": 1.313011884689331, "learning_rate": 7.943237549926383e-06, "loss": 0.5944, "step": 4968 }, { "epoch": 0.32, "grad_norm": 1.2353023290634155, "learning_rate": 7.942392462807886e-06, "loss": 0.5715, "step": 4969 }, { "epoch": 0.32, "grad_norm": 1.241748571395874, "learning_rate": 7.941547247085008e-06, "loss": 0.5457, "step": 4970 }, { "epoch": 0.32, "grad_norm": 1.1786152124404907, "learning_rate": 7.940701902794688e-06, "loss": 0.5899, "step": 4971 }, { "epoch": 0.32, "grad_norm": 1.0859856605529785, "learning_rate": 7.939856429973876e-06, "loss": 0.516, "step": 4972 }, { "epoch": 0.32, "grad_norm": 1.1592457294464111, "learning_rate": 7.939010828659524e-06, "loss": 0.5465, "step": 4973 }, { "epoch": 0.32, "grad_norm": 1.1742234230041504, "learning_rate": 7.93816509888859e-06, "loss": 0.559, "step": 4974 }, { "epoch": 0.32, "grad_norm": 1.1679177284240723, "learning_rate": 7.937319240698042e-06, "loss": 0.5752, "step": 4975 }, { "epoch": 0.32, "grad_norm": 1.1372004747390747, "learning_rate": 7.936473254124848e-06, "loss": 0.5535, "step": 4976 }, { "epoch": 0.32, "grad_norm": 1.145331621170044, "learning_rate": 7.935627139205983e-06, "loss": 0.5059, "step": 4977 }, { "epoch": 0.32, "grad_norm": 1.0866734981536865, "learning_rate": 7.934780895978432e-06, "loss": 0.5554, "step": 4978 }, { "epoch": 0.32, "grad_norm": 1.1291296482086182, "learning_rate": 7.933934524479177e-06, "loss": 0.5058, "step": 4979 }, { "epoch": 0.32, "grad_norm": 1.1944531202316284, "learning_rate": 7.933088024745216e-06, "loss": 0.5949, "step": 4980 }, { "epoch": 0.32, "grad_norm": 1.2096883058547974, "learning_rate": 7.932241396813542e-06, "loss": 0.5262, "step": 4981 }, { "epoch": 0.32, "grad_norm": 1.2512545585632324, "learning_rate": 7.931394640721166e-06, "loss": 0.5997, "step": 4982 }, { "epoch": 0.32, "grad_norm": 1.0692495107650757, "learning_rate": 7.93054775650509e-06, "loss": 0.5142, "step": 4983 }, { "epoch": 0.32, "grad_norm": 1.1460082530975342, "learning_rate": 7.929700744202336e-06, "loss": 0.559, "step": 4984 }, { "epoch": 0.32, "grad_norm": 1.1832143068313599, "learning_rate": 7.928853603849918e-06, "loss": 0.5843, "step": 4985 }, { "epoch": 0.32, "grad_norm": 1.2703447341918945, "learning_rate": 7.928006335484865e-06, "loss": 0.57, "step": 4986 }, { "epoch": 0.32, "grad_norm": 1.1585217714309692, "learning_rate": 7.927158939144212e-06, "loss": 0.5536, "step": 4987 }, { "epoch": 0.32, "grad_norm": 1.170399785041809, "learning_rate": 7.926311414864992e-06, "loss": 0.5916, "step": 4988 }, { "epoch": 0.32, "grad_norm": 1.3470450639724731, "learning_rate": 7.92546376268425e-06, "loss": 0.5756, "step": 4989 }, { "epoch": 0.32, "grad_norm": 1.2981290817260742, "learning_rate": 7.924615982639037e-06, "loss": 0.6222, "step": 4990 }, { "epoch": 0.32, "grad_norm": 1.2397503852844238, "learning_rate": 7.923768074766403e-06, "loss": 0.5707, "step": 4991 }, { "epoch": 0.32, "grad_norm": 1.2008939981460571, "learning_rate": 7.92292003910341e-06, "loss": 0.542, "step": 4992 }, { "epoch": 0.32, "grad_norm": 1.1951539516448975, "learning_rate": 7.922071875687125e-06, "loss": 0.5714, "step": 4993 }, { "epoch": 0.32, "grad_norm": 1.228193759918213, "learning_rate": 7.921223584554616e-06, "loss": 0.5575, "step": 4994 }, { "epoch": 0.32, "grad_norm": 1.1522843837738037, "learning_rate": 7.92037516574296e-06, "loss": 0.5108, "step": 4995 }, { "epoch": 0.32, "grad_norm": 1.1852306127548218, "learning_rate": 7.919526619289243e-06, "loss": 0.5935, "step": 4996 }, { "epoch": 0.32, "grad_norm": 1.2359068393707275, "learning_rate": 7.918677945230545e-06, "loss": 0.5132, "step": 4997 }, { "epoch": 0.32, "grad_norm": 1.0950919389724731, "learning_rate": 7.917829143603969e-06, "loss": 0.5602, "step": 4998 }, { "epoch": 0.32, "grad_norm": 1.1500312089920044, "learning_rate": 7.916980214446608e-06, "loss": 0.5273, "step": 4999 }, { "epoch": 0.32, "grad_norm": 1.1378073692321777, "learning_rate": 7.916131157795569e-06, "loss": 0.5777, "step": 5000 }, { "epoch": 0.32, "grad_norm": 1.1841806173324585, "learning_rate": 7.915281973687958e-06, "loss": 0.6043, "step": 5001 }, { "epoch": 0.32, "grad_norm": 1.1498222351074219, "learning_rate": 7.914432662160894e-06, "loss": 0.5405, "step": 5002 }, { "epoch": 0.32, "grad_norm": 1.0829683542251587, "learning_rate": 7.913583223251498e-06, "loss": 0.5622, "step": 5003 }, { "epoch": 0.32, "grad_norm": 1.2997440099716187, "learning_rate": 7.912733656996897e-06, "loss": 0.5823, "step": 5004 }, { "epoch": 0.32, "grad_norm": 1.1382665634155273, "learning_rate": 7.911883963434224e-06, "loss": 0.5504, "step": 5005 }, { "epoch": 0.32, "grad_norm": 1.2714178562164307, "learning_rate": 7.911034142600613e-06, "loss": 0.534, "step": 5006 }, { "epoch": 0.32, "grad_norm": 1.34064781665802, "learning_rate": 7.910184194533212e-06, "loss": 0.5818, "step": 5007 }, { "epoch": 0.32, "grad_norm": 1.1587709188461304, "learning_rate": 7.909334119269168e-06, "loss": 0.5791, "step": 5008 }, { "epoch": 0.32, "grad_norm": 1.1422158479690552, "learning_rate": 7.908483916845636e-06, "loss": 0.5719, "step": 5009 }, { "epoch": 0.32, "grad_norm": 1.1269519329071045, "learning_rate": 7.907633587299775e-06, "loss": 0.5908, "step": 5010 }, { "epoch": 0.32, "grad_norm": 1.32268488407135, "learning_rate": 7.906783130668753e-06, "loss": 0.5339, "step": 5011 }, { "epoch": 0.32, "grad_norm": 1.1794226169586182, "learning_rate": 7.905932546989738e-06, "loss": 0.556, "step": 5012 }, { "epoch": 0.32, "grad_norm": 1.1198139190673828, "learning_rate": 7.905081836299911e-06, "loss": 0.5456, "step": 5013 }, { "epoch": 0.32, "grad_norm": 1.2941033840179443, "learning_rate": 7.904230998636451e-06, "loss": 0.5698, "step": 5014 }, { "epoch": 0.32, "grad_norm": 1.1710541248321533, "learning_rate": 7.903380034036547e-06, "loss": 0.5814, "step": 5015 }, { "epoch": 0.32, "grad_norm": 1.03315269947052, "learning_rate": 7.902528942537394e-06, "loss": 0.5546, "step": 5016 }, { "epoch": 0.32, "grad_norm": 1.1995911598205566, "learning_rate": 7.901677724176188e-06, "loss": 0.5606, "step": 5017 }, { "epoch": 0.32, "grad_norm": 1.1415154933929443, "learning_rate": 7.900826378990134e-06, "loss": 0.5883, "step": 5018 }, { "epoch": 0.32, "grad_norm": 1.1534783840179443, "learning_rate": 7.899974907016445e-06, "loss": 0.5486, "step": 5019 }, { "epoch": 0.32, "grad_norm": 1.0676370859146118, "learning_rate": 7.899123308292335e-06, "loss": 0.5456, "step": 5020 }, { "epoch": 0.32, "grad_norm": 1.170638918876648, "learning_rate": 7.898271582855025e-06, "loss": 0.5216, "step": 5021 }, { "epoch": 0.32, "grad_norm": 1.3977515697479248, "learning_rate": 7.897419730741741e-06, "loss": 0.5676, "step": 5022 }, { "epoch": 0.32, "grad_norm": 1.4042943716049194, "learning_rate": 7.896567751989716e-06, "loss": 0.5825, "step": 5023 }, { "epoch": 0.32, "grad_norm": 1.152015209197998, "learning_rate": 7.895715646636188e-06, "loss": 0.5549, "step": 5024 }, { "epoch": 0.32, "grad_norm": 1.19979989528656, "learning_rate": 7.894863414718402e-06, "loss": 0.5096, "step": 5025 }, { "epoch": 0.32, "grad_norm": 1.119577169418335, "learning_rate": 7.894011056273602e-06, "loss": 0.5577, "step": 5026 }, { "epoch": 0.32, "grad_norm": 1.061992883682251, "learning_rate": 7.893158571339045e-06, "loss": 0.5587, "step": 5027 }, { "epoch": 0.32, "grad_norm": 1.1192338466644287, "learning_rate": 7.892305959951996e-06, "loss": 0.6023, "step": 5028 }, { "epoch": 0.32, "grad_norm": 1.1309703588485718, "learning_rate": 7.891453222149712e-06, "loss": 0.5222, "step": 5029 }, { "epoch": 0.32, "grad_norm": 1.2265772819519043, "learning_rate": 7.890600357969466e-06, "loss": 0.5675, "step": 5030 }, { "epoch": 0.32, "grad_norm": 1.2315254211425781, "learning_rate": 7.88974736744854e-06, "loss": 0.5587, "step": 5031 }, { "epoch": 0.32, "grad_norm": 1.163399577140808, "learning_rate": 7.88889425062421e-06, "loss": 0.525, "step": 5032 }, { "epoch": 0.32, "grad_norm": 1.111648440361023, "learning_rate": 7.888041007533767e-06, "loss": 0.5905, "step": 5033 }, { "epoch": 0.32, "grad_norm": 1.2203259468078613, "learning_rate": 7.887187638214503e-06, "loss": 0.5608, "step": 5034 }, { "epoch": 0.33, "grad_norm": 1.3612428903579712, "learning_rate": 7.886334142703716e-06, "loss": 0.5949, "step": 5035 }, { "epoch": 0.33, "grad_norm": 1.2087748050689697, "learning_rate": 7.885480521038709e-06, "loss": 0.5235, "step": 5036 }, { "epoch": 0.33, "grad_norm": 1.1100337505340576, "learning_rate": 7.884626773256794e-06, "loss": 0.4743, "step": 5037 }, { "epoch": 0.33, "grad_norm": 1.1922575235366821, "learning_rate": 7.883772899395285e-06, "loss": 0.5875, "step": 5038 }, { "epoch": 0.33, "grad_norm": 1.1404733657836914, "learning_rate": 7.882918899491505e-06, "loss": 0.5266, "step": 5039 }, { "epoch": 0.33, "grad_norm": 1.0513113737106323, "learning_rate": 7.882064773582777e-06, "loss": 0.4883, "step": 5040 }, { "epoch": 0.33, "grad_norm": 1.1331409215927124, "learning_rate": 7.881210521706431e-06, "loss": 0.5683, "step": 5041 }, { "epoch": 0.33, "grad_norm": 1.1851820945739746, "learning_rate": 7.88035614389981e-06, "loss": 0.5464, "step": 5042 }, { "epoch": 0.33, "grad_norm": 1.2180583477020264, "learning_rate": 7.879501640200252e-06, "loss": 0.5238, "step": 5043 }, { "epoch": 0.33, "grad_norm": 1.0475536584854126, "learning_rate": 7.878647010645105e-06, "loss": 0.5443, "step": 5044 }, { "epoch": 0.33, "grad_norm": 1.1271823644638062, "learning_rate": 7.877792255271726e-06, "loss": 0.5709, "step": 5045 }, { "epoch": 0.33, "grad_norm": 1.0451933145523071, "learning_rate": 7.87693737411747e-06, "loss": 0.5194, "step": 5046 }, { "epoch": 0.33, "grad_norm": 1.0682401657104492, "learning_rate": 7.876082367219708e-06, "loss": 0.5363, "step": 5047 }, { "epoch": 0.33, "grad_norm": 1.157545804977417, "learning_rate": 7.875227234615803e-06, "loss": 0.5637, "step": 5048 }, { "epoch": 0.33, "grad_norm": 1.1269150972366333, "learning_rate": 7.874371976343134e-06, "loss": 0.5407, "step": 5049 }, { "epoch": 0.33, "grad_norm": 1.224502682685852, "learning_rate": 7.873516592439083e-06, "loss": 0.5812, "step": 5050 }, { "epoch": 0.33, "grad_norm": 1.0506807565689087, "learning_rate": 7.872661082941036e-06, "loss": 0.5174, "step": 5051 }, { "epoch": 0.33, "grad_norm": 1.1017299890518188, "learning_rate": 7.871805447886382e-06, "loss": 0.599, "step": 5052 }, { "epoch": 0.33, "grad_norm": 1.1747201681137085, "learning_rate": 7.870949687312524e-06, "loss": 0.6046, "step": 5053 }, { "epoch": 0.33, "grad_norm": 1.107767105102539, "learning_rate": 7.870093801256861e-06, "loss": 0.5349, "step": 5054 }, { "epoch": 0.33, "grad_norm": 1.177946925163269, "learning_rate": 7.869237789756803e-06, "loss": 0.5338, "step": 5055 }, { "epoch": 0.33, "grad_norm": 1.1806291341781616, "learning_rate": 7.868381652849764e-06, "loss": 0.5896, "step": 5056 }, { "epoch": 0.33, "grad_norm": 1.0960795879364014, "learning_rate": 7.867525390573162e-06, "loss": 0.5735, "step": 5057 }, { "epoch": 0.33, "grad_norm": 1.2193176746368408, "learning_rate": 7.866669002964426e-06, "loss": 0.5506, "step": 5058 }, { "epoch": 0.33, "grad_norm": 1.2534481287002563, "learning_rate": 7.865812490060984e-06, "loss": 0.5655, "step": 5059 }, { "epoch": 0.33, "grad_norm": 1.096072793006897, "learning_rate": 7.864955851900272e-06, "loss": 0.5629, "step": 5060 }, { "epoch": 0.33, "grad_norm": 1.141029953956604, "learning_rate": 7.864099088519729e-06, "loss": 0.5393, "step": 5061 }, { "epoch": 0.33, "grad_norm": 1.082260251045227, "learning_rate": 7.863242199956806e-06, "loss": 0.5493, "step": 5062 }, { "epoch": 0.33, "grad_norm": 1.2865337133407593, "learning_rate": 7.862385186248954e-06, "loss": 0.6072, "step": 5063 }, { "epoch": 0.33, "grad_norm": 1.2727187871932983, "learning_rate": 7.861528047433629e-06, "loss": 0.5441, "step": 5064 }, { "epoch": 0.33, "grad_norm": 1.2791892290115356, "learning_rate": 7.860670783548296e-06, "loss": 0.5242, "step": 5065 }, { "epoch": 0.33, "grad_norm": 1.0758508443832397, "learning_rate": 7.859813394630425e-06, "loss": 0.5329, "step": 5066 }, { "epoch": 0.33, "grad_norm": 1.1877052783966064, "learning_rate": 7.85895588071749e-06, "loss": 0.5634, "step": 5067 }, { "epoch": 0.33, "grad_norm": 1.2267918586730957, "learning_rate": 7.858098241846968e-06, "loss": 0.5547, "step": 5068 }, { "epoch": 0.33, "grad_norm": 1.059660792350769, "learning_rate": 7.857240478056345e-06, "loss": 0.5501, "step": 5069 }, { "epoch": 0.33, "grad_norm": 1.0545819997787476, "learning_rate": 7.856382589383113e-06, "loss": 0.5402, "step": 5070 }, { "epoch": 0.33, "grad_norm": 1.0654183626174927, "learning_rate": 7.855524575864768e-06, "loss": 0.549, "step": 5071 }, { "epoch": 0.33, "grad_norm": 1.1636347770690918, "learning_rate": 7.854666437538811e-06, "loss": 0.5235, "step": 5072 }, { "epoch": 0.33, "grad_norm": 1.1381102800369263, "learning_rate": 7.85380817444275e-06, "loss": 0.5483, "step": 5073 }, { "epoch": 0.33, "grad_norm": 1.1260610818862915, "learning_rate": 7.852949786614097e-06, "loss": 0.4993, "step": 5074 }, { "epoch": 0.33, "grad_norm": 1.0486385822296143, "learning_rate": 7.85209127409037e-06, "loss": 0.568, "step": 5075 }, { "epoch": 0.33, "grad_norm": 1.1200079917907715, "learning_rate": 7.851232636909089e-06, "loss": 0.5922, "step": 5076 }, { "epoch": 0.33, "grad_norm": 1.1294569969177246, "learning_rate": 7.850373875107786e-06, "loss": 0.5432, "step": 5077 }, { "epoch": 0.33, "grad_norm": 1.215813398361206, "learning_rate": 7.849514988723997e-06, "loss": 0.5209, "step": 5078 }, { "epoch": 0.33, "grad_norm": 1.2953956127166748, "learning_rate": 7.848655977795258e-06, "loss": 0.5598, "step": 5079 }, { "epoch": 0.33, "grad_norm": 1.1555309295654297, "learning_rate": 7.847796842359116e-06, "loss": 0.6056, "step": 5080 }, { "epoch": 0.33, "grad_norm": 1.1272735595703125, "learning_rate": 7.846937582453123e-06, "loss": 0.5548, "step": 5081 }, { "epoch": 0.33, "grad_norm": 1.12312912940979, "learning_rate": 7.846078198114832e-06, "loss": 0.5326, "step": 5082 }, { "epoch": 0.33, "grad_norm": 1.1507765054702759, "learning_rate": 7.845218689381806e-06, "loss": 0.5366, "step": 5083 }, { "epoch": 0.33, "grad_norm": 1.077966570854187, "learning_rate": 7.844359056291614e-06, "loss": 0.4972, "step": 5084 }, { "epoch": 0.33, "grad_norm": 1.17067551612854, "learning_rate": 7.843499298881824e-06, "loss": 0.5777, "step": 5085 }, { "epoch": 0.33, "grad_norm": 1.2625657320022583, "learning_rate": 7.842639417190017e-06, "loss": 0.5325, "step": 5086 }, { "epoch": 0.33, "grad_norm": 1.126343011856079, "learning_rate": 7.841779411253774e-06, "loss": 0.5185, "step": 5087 }, { "epoch": 0.33, "grad_norm": 1.2702370882034302, "learning_rate": 7.840919281110686e-06, "loss": 0.568, "step": 5088 }, { "epoch": 0.33, "grad_norm": 1.31534743309021, "learning_rate": 7.840059026798346e-06, "loss": 0.5622, "step": 5089 }, { "epoch": 0.33, "grad_norm": 1.1580873727798462, "learning_rate": 7.839198648354352e-06, "loss": 0.5557, "step": 5090 }, { "epoch": 0.33, "grad_norm": 1.2032802104949951, "learning_rate": 7.838338145816312e-06, "loss": 0.5884, "step": 5091 }, { "epoch": 0.33, "grad_norm": 1.084396481513977, "learning_rate": 7.837477519221831e-06, "loss": 0.536, "step": 5092 }, { "epoch": 0.33, "grad_norm": 1.434567928314209, "learning_rate": 7.836616768608533e-06, "loss": 0.5636, "step": 5093 }, { "epoch": 0.33, "grad_norm": 1.1994879245758057, "learning_rate": 7.835755894014032e-06, "loss": 0.5321, "step": 5094 }, { "epoch": 0.33, "grad_norm": 1.3045369386672974, "learning_rate": 7.834894895475958e-06, "loss": 0.5796, "step": 5095 }, { "epoch": 0.33, "grad_norm": 1.2256425619125366, "learning_rate": 7.834033773031944e-06, "loss": 0.5348, "step": 5096 }, { "epoch": 0.33, "grad_norm": 1.0628204345703125, "learning_rate": 7.833172526719623e-06, "loss": 0.5508, "step": 5097 }, { "epoch": 0.33, "grad_norm": 1.1585452556610107, "learning_rate": 7.83231115657664e-06, "loss": 0.566, "step": 5098 }, { "epoch": 0.33, "grad_norm": 1.07655668258667, "learning_rate": 7.831449662640647e-06, "loss": 0.5557, "step": 5099 }, { "epoch": 0.33, "grad_norm": 1.0890008211135864, "learning_rate": 7.830588044949292e-06, "loss": 0.5551, "step": 5100 }, { "epoch": 0.33, "grad_norm": 1.2347503900527954, "learning_rate": 7.829726303540237e-06, "loss": 0.5543, "step": 5101 }, { "epoch": 0.33, "grad_norm": 1.2694449424743652, "learning_rate": 7.828864438451147e-06, "loss": 0.5433, "step": 5102 }, { "epoch": 0.33, "grad_norm": 1.125844120979309, "learning_rate": 7.82800244971969e-06, "loss": 0.5818, "step": 5103 }, { "epoch": 0.33, "grad_norm": 1.2415742874145508, "learning_rate": 7.827140337383543e-06, "loss": 0.5424, "step": 5104 }, { "epoch": 0.33, "grad_norm": 1.147890329360962, "learning_rate": 7.826278101480386e-06, "loss": 0.5736, "step": 5105 }, { "epoch": 0.33, "grad_norm": 1.1361021995544434, "learning_rate": 7.825415742047903e-06, "loss": 0.5327, "step": 5106 }, { "epoch": 0.33, "grad_norm": 1.1293907165527344, "learning_rate": 7.82455325912379e-06, "loss": 0.5634, "step": 5107 }, { "epoch": 0.33, "grad_norm": 1.1574926376342773, "learning_rate": 7.823690652745741e-06, "loss": 0.543, "step": 5108 }, { "epoch": 0.33, "grad_norm": 1.1086362600326538, "learning_rate": 7.82282792295146e-06, "loss": 0.5174, "step": 5109 }, { "epoch": 0.33, "grad_norm": 1.0952142477035522, "learning_rate": 7.82196506977865e-06, "loss": 0.5087, "step": 5110 }, { "epoch": 0.33, "grad_norm": 1.2879297733306885, "learning_rate": 7.82110209326503e-06, "loss": 0.5428, "step": 5111 }, { "epoch": 0.33, "grad_norm": 1.3635361194610596, "learning_rate": 7.820238993448315e-06, "loss": 0.5471, "step": 5112 }, { "epoch": 0.33, "grad_norm": 1.0592621564865112, "learning_rate": 7.819375770366232e-06, "loss": 0.4818, "step": 5113 }, { "epoch": 0.33, "grad_norm": 1.1487445831298828, "learning_rate": 7.818512424056506e-06, "loss": 0.5344, "step": 5114 }, { "epoch": 0.33, "grad_norm": 1.1286321878433228, "learning_rate": 7.817648954556874e-06, "loss": 0.5325, "step": 5115 }, { "epoch": 0.33, "grad_norm": 1.195540428161621, "learning_rate": 7.816785361905078e-06, "loss": 0.5493, "step": 5116 }, { "epoch": 0.33, "grad_norm": 1.2105562686920166, "learning_rate": 7.815921646138858e-06, "loss": 0.5863, "step": 5117 }, { "epoch": 0.33, "grad_norm": 1.282595157623291, "learning_rate": 7.81505780729597e-06, "loss": 0.5777, "step": 5118 }, { "epoch": 0.33, "grad_norm": 1.3115586042404175, "learning_rate": 7.814193845414169e-06, "loss": 0.5695, "step": 5119 }, { "epoch": 0.33, "grad_norm": 1.1942055225372314, "learning_rate": 7.813329760531215e-06, "loss": 0.5851, "step": 5120 }, { "epoch": 0.33, "grad_norm": 1.2479885816574097, "learning_rate": 7.812465552684874e-06, "loss": 0.5761, "step": 5121 }, { "epoch": 0.33, "grad_norm": 1.2717609405517578, "learning_rate": 7.811601221912921e-06, "loss": 0.5585, "step": 5122 }, { "epoch": 0.33, "grad_norm": 1.255454421043396, "learning_rate": 7.810736768253133e-06, "loss": 0.5576, "step": 5123 }, { "epoch": 0.33, "grad_norm": 1.1239911317825317, "learning_rate": 7.809872191743292e-06, "loss": 0.5628, "step": 5124 }, { "epoch": 0.33, "grad_norm": 1.269710898399353, "learning_rate": 7.809007492421188e-06, "loss": 0.6036, "step": 5125 }, { "epoch": 0.33, "grad_norm": 1.1905585527420044, "learning_rate": 7.808142670324612e-06, "loss": 0.5219, "step": 5126 }, { "epoch": 0.33, "grad_norm": 1.2291979789733887, "learning_rate": 7.807277725491367e-06, "loss": 0.5908, "step": 5127 }, { "epoch": 0.33, "grad_norm": 1.1250534057617188, "learning_rate": 7.806412657959255e-06, "loss": 0.5148, "step": 5128 }, { "epoch": 0.33, "grad_norm": 1.2204301357269287, "learning_rate": 7.805547467766087e-06, "loss": 0.5873, "step": 5129 }, { "epoch": 0.33, "grad_norm": 1.0859451293945312, "learning_rate": 7.804682154949674e-06, "loss": 0.5189, "step": 5130 }, { "epoch": 0.33, "grad_norm": 1.1729536056518555, "learning_rate": 7.803816719547845e-06, "loss": 0.5906, "step": 5131 }, { "epoch": 0.33, "grad_norm": 1.3099662065505981, "learning_rate": 7.802951161598418e-06, "loss": 0.5409, "step": 5132 }, { "epoch": 0.33, "grad_norm": 1.2175813913345337, "learning_rate": 7.802085481139227e-06, "loss": 0.5312, "step": 5133 }, { "epoch": 0.33, "grad_norm": 1.1065171957015991, "learning_rate": 7.801219678208111e-06, "loss": 0.6155, "step": 5134 }, { "epoch": 0.33, "grad_norm": 1.2289918661117554, "learning_rate": 7.800353752842911e-06, "loss": 0.5609, "step": 5135 }, { "epoch": 0.33, "grad_norm": 1.1211332082748413, "learning_rate": 7.799487705081472e-06, "loss": 0.5599, "step": 5136 }, { "epoch": 0.33, "grad_norm": 1.2554138898849487, "learning_rate": 7.798621534961648e-06, "loss": 0.5797, "step": 5137 }, { "epoch": 0.33, "grad_norm": 1.1858456134796143, "learning_rate": 7.797755242521298e-06, "loss": 0.5427, "step": 5138 }, { "epoch": 0.33, "grad_norm": 1.2749555110931396, "learning_rate": 7.796888827798283e-06, "loss": 0.6166, "step": 5139 }, { "epoch": 0.33, "grad_norm": 1.2870393991470337, "learning_rate": 7.796022290830477e-06, "loss": 0.5611, "step": 5140 }, { "epoch": 0.33, "grad_norm": 1.1692824363708496, "learning_rate": 7.795155631655748e-06, "loss": 0.5984, "step": 5141 }, { "epoch": 0.33, "grad_norm": 1.254621148109436, "learning_rate": 7.794288850311978e-06, "loss": 0.5013, "step": 5142 }, { "epoch": 0.33, "grad_norm": 1.213228464126587, "learning_rate": 7.793421946837052e-06, "loss": 0.5837, "step": 5143 }, { "epoch": 0.33, "grad_norm": 1.2455217838287354, "learning_rate": 7.79255492126886e-06, "loss": 0.5786, "step": 5144 }, { "epoch": 0.33, "grad_norm": 1.0682522058486938, "learning_rate": 7.791687773645296e-06, "loss": 0.5608, "step": 5145 }, { "epoch": 0.33, "grad_norm": 1.1140522956848145, "learning_rate": 7.790820504004263e-06, "loss": 0.5636, "step": 5146 }, { "epoch": 0.33, "grad_norm": 1.065245270729065, "learning_rate": 7.789953112383665e-06, "loss": 0.4904, "step": 5147 }, { "epoch": 0.33, "grad_norm": 1.093039631843567, "learning_rate": 7.789085598821416e-06, "loss": 0.5415, "step": 5148 }, { "epoch": 0.33, "grad_norm": 1.059126615524292, "learning_rate": 7.788217963355432e-06, "loss": 0.5057, "step": 5149 }, { "epoch": 0.33, "grad_norm": 1.1702195405960083, "learning_rate": 7.787350206023633e-06, "loss": 0.5359, "step": 5150 }, { "epoch": 0.33, "grad_norm": 1.1672930717468262, "learning_rate": 7.78648232686395e-06, "loss": 0.5923, "step": 5151 }, { "epoch": 0.33, "grad_norm": 1.1064592599868774, "learning_rate": 7.785614325914313e-06, "loss": 0.4674, "step": 5152 }, { "epoch": 0.33, "grad_norm": 1.1897873878479004, "learning_rate": 7.784746203212658e-06, "loss": 0.5551, "step": 5153 }, { "epoch": 0.33, "grad_norm": 1.0981510877609253, "learning_rate": 7.783877958796934e-06, "loss": 0.5626, "step": 5154 }, { "epoch": 0.33, "grad_norm": 1.184319257736206, "learning_rate": 7.783009592705087e-06, "loss": 0.556, "step": 5155 }, { "epoch": 0.33, "grad_norm": 1.0912688970565796, "learning_rate": 7.78214110497507e-06, "loss": 0.5404, "step": 5156 }, { "epoch": 0.33, "grad_norm": 1.3341281414031982, "learning_rate": 7.781272495644844e-06, "loss": 0.5983, "step": 5157 }, { "epoch": 0.33, "grad_norm": 1.1089410781860352, "learning_rate": 7.78040376475237e-06, "loss": 0.5655, "step": 5158 }, { "epoch": 0.33, "grad_norm": 1.0568162202835083, "learning_rate": 7.779534912335625e-06, "loss": 0.5326, "step": 5159 }, { "epoch": 0.33, "grad_norm": 1.260148525238037, "learning_rate": 7.77866593843258e-06, "loss": 0.6118, "step": 5160 }, { "epoch": 0.33, "grad_norm": 1.3106818199157715, "learning_rate": 7.777796843081213e-06, "loss": 0.5682, "step": 5161 }, { "epoch": 0.33, "grad_norm": 1.2752857208251953, "learning_rate": 7.776927626319514e-06, "loss": 0.5592, "step": 5162 }, { "epoch": 0.33, "grad_norm": 1.2348328828811646, "learning_rate": 7.776058288185476e-06, "loss": 0.5884, "step": 5163 }, { "epoch": 0.33, "grad_norm": 1.220432996749878, "learning_rate": 7.77518882871709e-06, "loss": 0.5737, "step": 5164 }, { "epoch": 0.33, "grad_norm": 1.126645803451538, "learning_rate": 7.77431924795236e-06, "loss": 0.5276, "step": 5165 }, { "epoch": 0.33, "grad_norm": 1.2944118976593018, "learning_rate": 7.773449545929294e-06, "loss": 0.5595, "step": 5166 }, { "epoch": 0.33, "grad_norm": 1.1268830299377441, "learning_rate": 7.772579722685905e-06, "loss": 0.5876, "step": 5167 }, { "epoch": 0.33, "grad_norm": 1.2919403314590454, "learning_rate": 7.771709778260206e-06, "loss": 0.6112, "step": 5168 }, { "epoch": 0.33, "grad_norm": 1.3367233276367188, "learning_rate": 7.770839712690229e-06, "loss": 0.5221, "step": 5169 }, { "epoch": 0.33, "grad_norm": 1.2310446500778198, "learning_rate": 7.769969526013993e-06, "loss": 0.5741, "step": 5170 }, { "epoch": 0.33, "grad_norm": 1.1253018379211426, "learning_rate": 7.769099218269537e-06, "loss": 0.5649, "step": 5171 }, { "epoch": 0.33, "grad_norm": 1.2057076692581177, "learning_rate": 7.768228789494898e-06, "loss": 0.5579, "step": 5172 }, { "epoch": 0.33, "grad_norm": 1.1582069396972656, "learning_rate": 7.767358239728122e-06, "loss": 0.5438, "step": 5173 }, { "epoch": 0.33, "grad_norm": 1.072068691253662, "learning_rate": 7.766487569007254e-06, "loss": 0.5758, "step": 5174 }, { "epoch": 0.33, "grad_norm": 1.1208277940750122, "learning_rate": 7.765616777370357e-06, "loss": 0.577, "step": 5175 }, { "epoch": 0.33, "grad_norm": 1.0947788953781128, "learning_rate": 7.764745864855483e-06, "loss": 0.5402, "step": 5176 }, { "epoch": 0.33, "grad_norm": 1.2465643882751465, "learning_rate": 7.7638748315007e-06, "loss": 0.5297, "step": 5177 }, { "epoch": 0.33, "grad_norm": 1.1728841066360474, "learning_rate": 7.76300367734408e-06, "loss": 0.5397, "step": 5178 }, { "epoch": 0.33, "grad_norm": 1.2586702108383179, "learning_rate": 7.762132402423699e-06, "loss": 0.6114, "step": 5179 }, { "epoch": 0.33, "grad_norm": 1.1659660339355469, "learning_rate": 7.761261006777635e-06, "loss": 0.6083, "step": 5180 }, { "epoch": 0.33, "grad_norm": 1.273001790046692, "learning_rate": 7.760389490443978e-06, "loss": 0.5135, "step": 5181 }, { "epoch": 0.33, "grad_norm": 1.0793017148971558, "learning_rate": 7.759517853460817e-06, "loss": 0.5193, "step": 5182 }, { "epoch": 0.33, "grad_norm": 1.0357341766357422, "learning_rate": 7.758646095866252e-06, "loss": 0.5387, "step": 5183 }, { "epoch": 0.33, "grad_norm": 1.183027744293213, "learning_rate": 7.757774217698382e-06, "loss": 0.565, "step": 5184 }, { "epoch": 0.33, "grad_norm": 1.2845957279205322, "learning_rate": 7.756902218995318e-06, "loss": 0.615, "step": 5185 }, { "epoch": 0.33, "grad_norm": 1.2447563409805298, "learning_rate": 7.756030099795169e-06, "loss": 0.5924, "step": 5186 }, { "epoch": 0.33, "grad_norm": 1.147626519203186, "learning_rate": 7.755157860136056e-06, "loss": 0.5679, "step": 5187 }, { "epoch": 0.33, "grad_norm": 1.1965446472167969, "learning_rate": 7.7542855000561e-06, "loss": 0.5597, "step": 5188 }, { "epoch": 0.33, "grad_norm": 1.3176227807998657, "learning_rate": 7.75341301959343e-06, "loss": 0.6116, "step": 5189 }, { "epoch": 0.34, "grad_norm": 1.1891757249832153, "learning_rate": 7.752540418786184e-06, "loss": 0.5792, "step": 5190 }, { "epoch": 0.34, "grad_norm": 1.1371773481369019, "learning_rate": 7.751667697672496e-06, "loss": 0.5245, "step": 5191 }, { "epoch": 0.34, "grad_norm": 1.1719211339950562, "learning_rate": 7.750794856290513e-06, "loss": 0.5156, "step": 5192 }, { "epoch": 0.34, "grad_norm": 1.2878490686416626, "learning_rate": 7.749921894678385e-06, "loss": 0.5455, "step": 5193 }, { "epoch": 0.34, "grad_norm": 1.1128712892532349, "learning_rate": 7.749048812874265e-06, "loss": 0.6025, "step": 5194 }, { "epoch": 0.34, "grad_norm": 1.1915000677108765, "learning_rate": 7.748175610916313e-06, "loss": 0.5842, "step": 5195 }, { "epoch": 0.34, "grad_norm": 1.2352783679962158, "learning_rate": 7.747302288842695e-06, "loss": 0.5917, "step": 5196 }, { "epoch": 0.34, "grad_norm": 1.3078744411468506, "learning_rate": 7.746428846691585e-06, "loss": 0.5945, "step": 5197 }, { "epoch": 0.34, "grad_norm": 1.220716118812561, "learning_rate": 7.745555284501155e-06, "loss": 0.5287, "step": 5198 }, { "epoch": 0.34, "grad_norm": 1.2227251529693604, "learning_rate": 7.744681602309584e-06, "loss": 0.5608, "step": 5199 }, { "epoch": 0.34, "grad_norm": 1.0873260498046875, "learning_rate": 7.743807800155065e-06, "loss": 0.5324, "step": 5200 }, { "epoch": 0.34, "grad_norm": 1.1691257953643799, "learning_rate": 7.742933878075785e-06, "loss": 0.5442, "step": 5201 }, { "epoch": 0.34, "grad_norm": 1.164760708808899, "learning_rate": 7.742059836109944e-06, "loss": 0.5612, "step": 5202 }, { "epoch": 0.34, "grad_norm": 1.1625614166259766, "learning_rate": 7.74118567429574e-06, "loss": 0.5769, "step": 5203 }, { "epoch": 0.34, "grad_norm": 1.0682306289672852, "learning_rate": 7.740311392671382e-06, "loss": 0.5716, "step": 5204 }, { "epoch": 0.34, "grad_norm": 1.152363657951355, "learning_rate": 7.739436991275085e-06, "loss": 0.5171, "step": 5205 }, { "epoch": 0.34, "grad_norm": 1.1723383665084839, "learning_rate": 7.738562470145063e-06, "loss": 0.5829, "step": 5206 }, { "epoch": 0.34, "grad_norm": 1.2142202854156494, "learning_rate": 7.737687829319542e-06, "loss": 0.521, "step": 5207 }, { "epoch": 0.34, "grad_norm": 1.157274603843689, "learning_rate": 7.736813068836749e-06, "loss": 0.5313, "step": 5208 }, { "epoch": 0.34, "grad_norm": 1.1021475791931152, "learning_rate": 7.735938188734918e-06, "loss": 0.5173, "step": 5209 }, { "epoch": 0.34, "grad_norm": 1.1971075534820557, "learning_rate": 7.735063189052287e-06, "loss": 0.572, "step": 5210 }, { "epoch": 0.34, "grad_norm": 1.1567403078079224, "learning_rate": 7.734188069827102e-06, "loss": 0.5567, "step": 5211 }, { "epoch": 0.34, "grad_norm": 1.1520408391952515, "learning_rate": 7.73331283109761e-06, "loss": 0.5281, "step": 5212 }, { "epoch": 0.34, "grad_norm": 1.243229627609253, "learning_rate": 7.732437472902067e-06, "loss": 0.5441, "step": 5213 }, { "epoch": 0.34, "grad_norm": 1.1661432981491089, "learning_rate": 7.731561995278732e-06, "loss": 0.5773, "step": 5214 }, { "epoch": 0.34, "grad_norm": 1.4144525527954102, "learning_rate": 7.730686398265868e-06, "loss": 0.6032, "step": 5215 }, { "epoch": 0.34, "grad_norm": 1.2225571870803833, "learning_rate": 7.729810681901747e-06, "loss": 0.5265, "step": 5216 }, { "epoch": 0.34, "grad_norm": 1.128629207611084, "learning_rate": 7.728934846224645e-06, "loss": 0.5644, "step": 5217 }, { "epoch": 0.34, "grad_norm": 1.3020881414413452, "learning_rate": 7.728058891272841e-06, "loss": 0.5558, "step": 5218 }, { "epoch": 0.34, "grad_norm": 1.1470425128936768, "learning_rate": 7.727182817084622e-06, "loss": 0.5628, "step": 5219 }, { "epoch": 0.34, "grad_norm": 1.1413060426712036, "learning_rate": 7.726306623698278e-06, "loss": 0.519, "step": 5220 }, { "epoch": 0.34, "grad_norm": 1.1870614290237427, "learning_rate": 7.725430311152104e-06, "loss": 0.5301, "step": 5221 }, { "epoch": 0.34, "grad_norm": 1.1426341533660889, "learning_rate": 7.724553879484406e-06, "loss": 0.5002, "step": 5222 }, { "epoch": 0.34, "grad_norm": 1.174915075302124, "learning_rate": 7.723677328733484e-06, "loss": 0.5198, "step": 5223 }, { "epoch": 0.34, "grad_norm": 1.1525828838348389, "learning_rate": 7.722800658937655e-06, "loss": 0.5895, "step": 5224 }, { "epoch": 0.34, "grad_norm": 1.149767518043518, "learning_rate": 7.721923870135235e-06, "loss": 0.5888, "step": 5225 }, { "epoch": 0.34, "grad_norm": 1.1108005046844482, "learning_rate": 7.721046962364542e-06, "loss": 0.5668, "step": 5226 }, { "epoch": 0.34, "grad_norm": 1.0522185564041138, "learning_rate": 7.72016993566391e-06, "loss": 0.5816, "step": 5227 }, { "epoch": 0.34, "grad_norm": 1.1513668298721313, "learning_rate": 7.719292790071666e-06, "loss": 0.4936, "step": 5228 }, { "epoch": 0.34, "grad_norm": 1.2263849973678589, "learning_rate": 7.718415525626151e-06, "loss": 0.6159, "step": 5229 }, { "epoch": 0.34, "grad_norm": 1.1379835605621338, "learning_rate": 7.717538142365709e-06, "loss": 0.5734, "step": 5230 }, { "epoch": 0.34, "grad_norm": 1.0490171909332275, "learning_rate": 7.716660640328684e-06, "loss": 0.4984, "step": 5231 }, { "epoch": 0.34, "grad_norm": 1.1292047500610352, "learning_rate": 7.71578301955343e-06, "loss": 0.5652, "step": 5232 }, { "epoch": 0.34, "grad_norm": 1.1050353050231934, "learning_rate": 7.714905280078308e-06, "loss": 0.5119, "step": 5233 }, { "epoch": 0.34, "grad_norm": 1.0038163661956787, "learning_rate": 7.714027421941683e-06, "loss": 0.5176, "step": 5234 }, { "epoch": 0.34, "grad_norm": 1.0485284328460693, "learning_rate": 7.71314944518192e-06, "loss": 0.5276, "step": 5235 }, { "epoch": 0.34, "grad_norm": 1.2852141857147217, "learning_rate": 7.712271349837395e-06, "loss": 0.5969, "step": 5236 }, { "epoch": 0.34, "grad_norm": 1.1886060237884521, "learning_rate": 7.711393135946487e-06, "loss": 0.5517, "step": 5237 }, { "epoch": 0.34, "grad_norm": 1.1083241701126099, "learning_rate": 7.71051480354758e-06, "loss": 0.5217, "step": 5238 }, { "epoch": 0.34, "grad_norm": 1.1188125610351562, "learning_rate": 7.709636352679066e-06, "loss": 0.5034, "step": 5239 }, { "epoch": 0.34, "grad_norm": 1.1814112663269043, "learning_rate": 7.708757783379337e-06, "loss": 0.5752, "step": 5240 }, { "epoch": 0.34, "grad_norm": 1.1370328664779663, "learning_rate": 7.707879095686792e-06, "loss": 0.5297, "step": 5241 }, { "epoch": 0.34, "grad_norm": 1.1115479469299316, "learning_rate": 7.70700028963984e-06, "loss": 0.5526, "step": 5242 }, { "epoch": 0.34, "grad_norm": 1.1328001022338867, "learning_rate": 7.706121365276888e-06, "loss": 0.5522, "step": 5243 }, { "epoch": 0.34, "grad_norm": 1.1014554500579834, "learning_rate": 7.705242322636354e-06, "loss": 0.5048, "step": 5244 }, { "epoch": 0.34, "grad_norm": 1.2058738470077515, "learning_rate": 7.70436316175666e-06, "loss": 0.5617, "step": 5245 }, { "epoch": 0.34, "grad_norm": 1.152478575706482, "learning_rate": 7.703483882676228e-06, "loss": 0.5472, "step": 5246 }, { "epoch": 0.34, "grad_norm": 1.1447254419326782, "learning_rate": 7.702604485433488e-06, "loss": 0.532, "step": 5247 }, { "epoch": 0.34, "grad_norm": 1.2146507501602173, "learning_rate": 7.701724970066882e-06, "loss": 0.5512, "step": 5248 }, { "epoch": 0.34, "grad_norm": 1.1853328943252563, "learning_rate": 7.700845336614846e-06, "loss": 0.6021, "step": 5249 }, { "epoch": 0.34, "grad_norm": 1.209696650505066, "learning_rate": 7.69996558511583e-06, "loss": 0.55, "step": 5250 }, { "epoch": 0.34, "grad_norm": 1.1487953662872314, "learning_rate": 7.699085715608284e-06, "loss": 0.5753, "step": 5251 }, { "epoch": 0.34, "grad_norm": 1.1855895519256592, "learning_rate": 7.698205728130664e-06, "loss": 0.5836, "step": 5252 }, { "epoch": 0.34, "grad_norm": 1.2886747121810913, "learning_rate": 7.697325622721435e-06, "loss": 0.5398, "step": 5253 }, { "epoch": 0.34, "grad_norm": 1.0700310468673706, "learning_rate": 7.696445399419062e-06, "loss": 0.5646, "step": 5254 }, { "epoch": 0.34, "grad_norm": 1.1519582271575928, "learning_rate": 7.695565058262015e-06, "loss": 0.5565, "step": 5255 }, { "epoch": 0.34, "grad_norm": 1.100731611251831, "learning_rate": 7.694684599288775e-06, "loss": 0.5199, "step": 5256 }, { "epoch": 0.34, "grad_norm": 1.1366620063781738, "learning_rate": 7.693804022537826e-06, "loss": 0.5514, "step": 5257 }, { "epoch": 0.34, "grad_norm": 1.1318026781082153, "learning_rate": 7.69292332804765e-06, "loss": 0.5366, "step": 5258 }, { "epoch": 0.34, "grad_norm": 1.1540831327438354, "learning_rate": 7.692042515856745e-06, "loss": 0.5242, "step": 5259 }, { "epoch": 0.34, "grad_norm": 1.1917681694030762, "learning_rate": 7.691161586003607e-06, "loss": 0.5693, "step": 5260 }, { "epoch": 0.34, "grad_norm": 1.14195716381073, "learning_rate": 7.69028053852674e-06, "loss": 0.5497, "step": 5261 }, { "epoch": 0.34, "grad_norm": 1.1555476188659668, "learning_rate": 7.68939937346465e-06, "loss": 0.5699, "step": 5262 }, { "epoch": 0.34, "grad_norm": 1.1529300212860107, "learning_rate": 7.688518090855853e-06, "loss": 0.5445, "step": 5263 }, { "epoch": 0.34, "grad_norm": 1.0845869779586792, "learning_rate": 7.687636690738867e-06, "loss": 0.5348, "step": 5264 }, { "epoch": 0.34, "grad_norm": 1.1459673643112183, "learning_rate": 7.686755173152216e-06, "loss": 0.5278, "step": 5265 }, { "epoch": 0.34, "grad_norm": 1.3909952640533447, "learning_rate": 7.685873538134427e-06, "loss": 0.532, "step": 5266 }, { "epoch": 0.34, "grad_norm": 1.0564026832580566, "learning_rate": 7.684991785724036e-06, "loss": 0.5863, "step": 5267 }, { "epoch": 0.34, "grad_norm": 1.216622233390808, "learning_rate": 7.684109915959582e-06, "loss": 0.4989, "step": 5268 }, { "epoch": 0.34, "grad_norm": 1.1357160806655884, "learning_rate": 7.683227928879608e-06, "loss": 0.5591, "step": 5269 }, { "epoch": 0.34, "grad_norm": 1.0995501279830933, "learning_rate": 7.682345824522663e-06, "loss": 0.4943, "step": 5270 }, { "epoch": 0.34, "grad_norm": 1.1795320510864258, "learning_rate": 7.681463602927305e-06, "loss": 0.579, "step": 5271 }, { "epoch": 0.34, "grad_norm": 1.1692336797714233, "learning_rate": 7.680581264132088e-06, "loss": 0.557, "step": 5272 }, { "epoch": 0.34, "grad_norm": 1.0429553985595703, "learning_rate": 7.679698808175582e-06, "loss": 0.5425, "step": 5273 }, { "epoch": 0.34, "grad_norm": 1.0731685161590576, "learning_rate": 7.678816235096353e-06, "loss": 0.5295, "step": 5274 }, { "epoch": 0.34, "grad_norm": 1.271051287651062, "learning_rate": 7.67793354493298e-06, "loss": 0.5548, "step": 5275 }, { "epoch": 0.34, "grad_norm": 1.1028194427490234, "learning_rate": 7.67705073772404e-06, "loss": 0.5361, "step": 5276 }, { "epoch": 0.34, "grad_norm": 1.353411316871643, "learning_rate": 7.67616781350812e-06, "loss": 0.5886, "step": 5277 }, { "epoch": 0.34, "grad_norm": 1.1987680196762085, "learning_rate": 7.675284772323808e-06, "loss": 0.5395, "step": 5278 }, { "epoch": 0.34, "grad_norm": 1.1704400777816772, "learning_rate": 7.6744016142097e-06, "loss": 0.5329, "step": 5279 }, { "epoch": 0.34, "grad_norm": 1.1354113817214966, "learning_rate": 7.6735183392044e-06, "loss": 0.5231, "step": 5280 }, { "epoch": 0.34, "grad_norm": 1.1063940525054932, "learning_rate": 7.672634947346512e-06, "loss": 0.5054, "step": 5281 }, { "epoch": 0.34, "grad_norm": 1.096911072731018, "learning_rate": 7.671751438674644e-06, "loss": 0.4968, "step": 5282 }, { "epoch": 0.34, "grad_norm": 1.1291542053222656, "learning_rate": 7.670867813227415e-06, "loss": 0.5226, "step": 5283 }, { "epoch": 0.34, "grad_norm": 1.0952372550964355, "learning_rate": 7.669984071043442e-06, "loss": 0.5251, "step": 5284 }, { "epoch": 0.34, "grad_norm": 1.0634998083114624, "learning_rate": 7.669100212161356e-06, "loss": 0.5646, "step": 5285 }, { "epoch": 0.34, "grad_norm": 1.1296523809432983, "learning_rate": 7.668216236619786e-06, "loss": 0.5104, "step": 5286 }, { "epoch": 0.34, "grad_norm": 1.1469058990478516, "learning_rate": 7.667332144457369e-06, "loss": 0.5209, "step": 5287 }, { "epoch": 0.34, "grad_norm": 1.217813491821289, "learning_rate": 7.666447935712743e-06, "loss": 0.5444, "step": 5288 }, { "epoch": 0.34, "grad_norm": 1.2202595472335815, "learning_rate": 7.665563610424562e-06, "loss": 0.551, "step": 5289 }, { "epoch": 0.34, "grad_norm": 1.1932369470596313, "learning_rate": 7.664679168631468e-06, "loss": 0.5769, "step": 5290 }, { "epoch": 0.34, "grad_norm": 1.1389732360839844, "learning_rate": 7.663794610372124e-06, "loss": 0.5547, "step": 5291 }, { "epoch": 0.34, "grad_norm": 1.1729873418807983, "learning_rate": 7.662909935685193e-06, "loss": 0.573, "step": 5292 }, { "epoch": 0.34, "grad_norm": 1.1421481370925903, "learning_rate": 7.662025144609336e-06, "loss": 0.5295, "step": 5293 }, { "epoch": 0.34, "grad_norm": 1.0870554447174072, "learning_rate": 7.661140237183228e-06, "loss": 0.5595, "step": 5294 }, { "epoch": 0.34, "grad_norm": 1.0917867422103882, "learning_rate": 7.660255213445549e-06, "loss": 0.5031, "step": 5295 }, { "epoch": 0.34, "grad_norm": 1.0609972476959229, "learning_rate": 7.659370073434974e-06, "loss": 0.5329, "step": 5296 }, { "epoch": 0.34, "grad_norm": 1.1536729335784912, "learning_rate": 7.658484817190199e-06, "loss": 0.5843, "step": 5297 }, { "epoch": 0.34, "grad_norm": 1.2865275144577026, "learning_rate": 7.657599444749907e-06, "loss": 0.5569, "step": 5298 }, { "epoch": 0.34, "grad_norm": 1.2253623008728027, "learning_rate": 7.656713956152803e-06, "loss": 0.6099, "step": 5299 }, { "epoch": 0.34, "grad_norm": 1.2677398920059204, "learning_rate": 7.655828351437587e-06, "loss": 0.5929, "step": 5300 }, { "epoch": 0.34, "grad_norm": 1.231753945350647, "learning_rate": 7.654942630642965e-06, "loss": 0.5565, "step": 5301 }, { "epoch": 0.34, "grad_norm": 1.2104440927505493, "learning_rate": 7.65405679380765e-06, "loss": 0.547, "step": 5302 }, { "epoch": 0.34, "grad_norm": 1.2164151668548584, "learning_rate": 7.65317084097036e-06, "loss": 0.5511, "step": 5303 }, { "epoch": 0.34, "grad_norm": 1.1260902881622314, "learning_rate": 7.652284772169816e-06, "loss": 0.5658, "step": 5304 }, { "epoch": 0.34, "grad_norm": 1.2369928359985352, "learning_rate": 7.65139858744475e-06, "loss": 0.543, "step": 5305 }, { "epoch": 0.34, "grad_norm": 1.141698956489563, "learning_rate": 7.650512286833891e-06, "loss": 0.5813, "step": 5306 }, { "epoch": 0.34, "grad_norm": 1.1987355947494507, "learning_rate": 7.649625870375981e-06, "loss": 0.6015, "step": 5307 }, { "epoch": 0.34, "grad_norm": 1.205248475074768, "learning_rate": 7.648739338109759e-06, "loss": 0.5337, "step": 5308 }, { "epoch": 0.34, "grad_norm": 1.184255599975586, "learning_rate": 7.647852690073973e-06, "loss": 0.554, "step": 5309 }, { "epoch": 0.34, "grad_norm": 1.3334485292434692, "learning_rate": 7.64696592630738e-06, "loss": 0.5168, "step": 5310 }, { "epoch": 0.34, "grad_norm": 1.154405951499939, "learning_rate": 7.646079046848732e-06, "loss": 0.5274, "step": 5311 }, { "epoch": 0.34, "grad_norm": 1.1037330627441406, "learning_rate": 7.645192051736799e-06, "loss": 0.5476, "step": 5312 }, { "epoch": 0.34, "grad_norm": 1.195655345916748, "learning_rate": 7.644304941010345e-06, "loss": 0.617, "step": 5313 }, { "epoch": 0.34, "grad_norm": 1.1694222688674927, "learning_rate": 7.643417714708144e-06, "loss": 0.5243, "step": 5314 }, { "epoch": 0.34, "grad_norm": 1.0447254180908203, "learning_rate": 7.642530372868974e-06, "loss": 0.5345, "step": 5315 }, { "epoch": 0.34, "grad_norm": 1.149206519126892, "learning_rate": 7.641642915531622e-06, "loss": 0.5975, "step": 5316 }, { "epoch": 0.34, "grad_norm": 1.095477819442749, "learning_rate": 7.640755342734872e-06, "loss": 0.5408, "step": 5317 }, { "epoch": 0.34, "grad_norm": 1.1131772994995117, "learning_rate": 7.63986765451752e-06, "loss": 0.5198, "step": 5318 }, { "epoch": 0.34, "grad_norm": 1.2275123596191406, "learning_rate": 7.638979850918364e-06, "loss": 0.5335, "step": 5319 }, { "epoch": 0.34, "grad_norm": 1.0565706491470337, "learning_rate": 7.638091931976206e-06, "loss": 0.5424, "step": 5320 }, { "epoch": 0.34, "grad_norm": 1.2085338830947876, "learning_rate": 7.637203897729856e-06, "loss": 0.5471, "step": 5321 }, { "epoch": 0.34, "grad_norm": 1.2310467958450317, "learning_rate": 7.636315748218129e-06, "loss": 0.5711, "step": 5322 }, { "epoch": 0.34, "grad_norm": 1.0657742023468018, "learning_rate": 7.63542748347984e-06, "loss": 0.5095, "step": 5323 }, { "epoch": 0.34, "grad_norm": 1.033154845237732, "learning_rate": 7.634539103553818e-06, "loss": 0.5255, "step": 5324 }, { "epoch": 0.34, "grad_norm": 1.0965481996536255, "learning_rate": 7.633650608478887e-06, "loss": 0.5025, "step": 5325 }, { "epoch": 0.34, "grad_norm": 1.1149483919143677, "learning_rate": 7.632761998293884e-06, "loss": 0.5308, "step": 5326 }, { "epoch": 0.34, "grad_norm": 1.128082036972046, "learning_rate": 7.631873273037647e-06, "loss": 0.5496, "step": 5327 }, { "epoch": 0.34, "grad_norm": 1.1930826902389526, "learning_rate": 7.630984432749017e-06, "loss": 0.5526, "step": 5328 }, { "epoch": 0.34, "grad_norm": 1.170544981956482, "learning_rate": 7.630095477466848e-06, "loss": 0.6423, "step": 5329 }, { "epoch": 0.34, "grad_norm": 1.1774237155914307, "learning_rate": 7.629206407229992e-06, "loss": 0.558, "step": 5330 }, { "epoch": 0.34, "grad_norm": 1.1075693368911743, "learning_rate": 7.6283172220773065e-06, "loss": 0.5306, "step": 5331 }, { "epoch": 0.34, "grad_norm": 1.152161955833435, "learning_rate": 7.627427922047654e-06, "loss": 0.5664, "step": 5332 }, { "epoch": 0.34, "grad_norm": 1.1938809156417847, "learning_rate": 7.62653850717991e-06, "loss": 0.568, "step": 5333 }, { "epoch": 0.34, "grad_norm": 1.0717967748641968, "learning_rate": 7.6256489775129415e-06, "loss": 0.5096, "step": 5334 }, { "epoch": 0.34, "grad_norm": 1.2157598733901978, "learning_rate": 7.624759333085632e-06, "loss": 0.561, "step": 5335 }, { "epoch": 0.34, "grad_norm": 1.2158870697021484, "learning_rate": 7.623869573936863e-06, "loss": 0.5557, "step": 5336 }, { "epoch": 0.34, "grad_norm": 1.3301523923873901, "learning_rate": 7.6229797001055235e-06, "loss": 0.5762, "step": 5337 }, { "epoch": 0.34, "grad_norm": 1.2244553565979004, "learning_rate": 7.622089711630512e-06, "loss": 0.5854, "step": 5338 }, { "epoch": 0.34, "grad_norm": 1.2638280391693115, "learning_rate": 7.6211996085507224e-06, "loss": 0.5387, "step": 5339 }, { "epoch": 0.34, "grad_norm": 1.1498723030090332, "learning_rate": 7.62030939090506e-06, "loss": 0.5577, "step": 5340 }, { "epoch": 0.34, "grad_norm": 1.2727370262145996, "learning_rate": 7.619419058732434e-06, "loss": 0.5908, "step": 5341 }, { "epoch": 0.34, "grad_norm": 1.0758169889450073, "learning_rate": 7.6185286120717615e-06, "loss": 0.5878, "step": 5342 }, { "epoch": 0.34, "grad_norm": 1.2126446962356567, "learning_rate": 7.617638050961957e-06, "loss": 0.5692, "step": 5343 }, { "epoch": 0.34, "grad_norm": 1.2383177280426025, "learning_rate": 7.616747375441949e-06, "loss": 0.5178, "step": 5344 }, { "epoch": 0.35, "grad_norm": 1.2239452600479126, "learning_rate": 7.6158565855506635e-06, "loss": 0.5377, "step": 5345 }, { "epoch": 0.35, "grad_norm": 1.189140796661377, "learning_rate": 7.6149656813270355e-06, "loss": 0.5203, "step": 5346 }, { "epoch": 0.35, "grad_norm": 1.1163347959518433, "learning_rate": 7.614074662810005e-06, "loss": 0.5497, "step": 5347 }, { "epoch": 0.35, "grad_norm": 1.1895772218704224, "learning_rate": 7.613183530038515e-06, "loss": 0.5457, "step": 5348 }, { "epoch": 0.35, "grad_norm": 1.161159873008728, "learning_rate": 7.612292283051515e-06, "loss": 0.5617, "step": 5349 }, { "epoch": 0.35, "grad_norm": 1.1358160972595215, "learning_rate": 7.611400921887958e-06, "loss": 0.5392, "step": 5350 }, { "epoch": 0.35, "grad_norm": 1.0804537534713745, "learning_rate": 7.610509446586806e-06, "loss": 0.5252, "step": 5351 }, { "epoch": 0.35, "grad_norm": 1.269178032875061, "learning_rate": 7.60961785718702e-06, "loss": 0.5468, "step": 5352 }, { "epoch": 0.35, "grad_norm": 1.2356646060943604, "learning_rate": 7.60872615372757e-06, "loss": 0.5703, "step": 5353 }, { "epoch": 0.35, "grad_norm": 1.1670888662338257, "learning_rate": 7.607834336247433e-06, "loss": 0.5566, "step": 5354 }, { "epoch": 0.35, "grad_norm": 1.1419731378555298, "learning_rate": 7.6069424047855824e-06, "loss": 0.5603, "step": 5355 }, { "epoch": 0.35, "grad_norm": 1.1418609619140625, "learning_rate": 7.606050359381007e-06, "loss": 0.5323, "step": 5356 }, { "epoch": 0.35, "grad_norm": 1.0949897766113281, "learning_rate": 7.6051582000726945e-06, "loss": 0.5835, "step": 5357 }, { "epoch": 0.35, "grad_norm": 1.1713711023330688, "learning_rate": 7.604265926899639e-06, "loss": 0.5579, "step": 5358 }, { "epoch": 0.35, "grad_norm": 1.12054443359375, "learning_rate": 7.603373539900835e-06, "loss": 0.5274, "step": 5359 }, { "epoch": 0.35, "grad_norm": 1.1576460599899292, "learning_rate": 7.602481039115295e-06, "loss": 0.5101, "step": 5360 }, { "epoch": 0.35, "grad_norm": 1.2430750131607056, "learning_rate": 7.601588424582021e-06, "loss": 0.5622, "step": 5361 }, { "epoch": 0.35, "grad_norm": 1.1039743423461914, "learning_rate": 7.600695696340029e-06, "loss": 0.5781, "step": 5362 }, { "epoch": 0.35, "grad_norm": 1.0920823812484741, "learning_rate": 7.5998028544283395e-06, "loss": 0.5417, "step": 5363 }, { "epoch": 0.35, "grad_norm": 1.1716045141220093, "learning_rate": 7.598909898885973e-06, "loss": 0.5844, "step": 5364 }, { "epoch": 0.35, "grad_norm": 1.138023853302002, "learning_rate": 7.598016829751959e-06, "loss": 0.5332, "step": 5365 }, { "epoch": 0.35, "grad_norm": 1.1980936527252197, "learning_rate": 7.597123647065336e-06, "loss": 0.5567, "step": 5366 }, { "epoch": 0.35, "grad_norm": 1.1312016248703003, "learning_rate": 7.596230350865137e-06, "loss": 0.5496, "step": 5367 }, { "epoch": 0.35, "grad_norm": 1.1401221752166748, "learning_rate": 7.595336941190409e-06, "loss": 0.555, "step": 5368 }, { "epoch": 0.35, "grad_norm": 1.1091283559799194, "learning_rate": 7.594443418080197e-06, "loss": 0.5451, "step": 5369 }, { "epoch": 0.35, "grad_norm": 1.3306376934051514, "learning_rate": 7.593549781573559e-06, "loss": 0.5962, "step": 5370 }, { "epoch": 0.35, "grad_norm": 1.123010516166687, "learning_rate": 7.592656031709551e-06, "loss": 0.5333, "step": 5371 }, { "epoch": 0.35, "grad_norm": 1.134989619255066, "learning_rate": 7.591762168527237e-06, "loss": 0.5665, "step": 5372 }, { "epoch": 0.35, "grad_norm": 1.2090617418289185, "learning_rate": 7.5908681920656834e-06, "loss": 0.5721, "step": 5373 }, { "epoch": 0.35, "grad_norm": 1.158243179321289, "learning_rate": 7.589974102363968e-06, "loss": 0.5466, "step": 5374 }, { "epoch": 0.35, "grad_norm": 1.1269898414611816, "learning_rate": 7.589079899461167e-06, "loss": 0.5427, "step": 5375 }, { "epoch": 0.35, "grad_norm": 1.185671329498291, "learning_rate": 7.588185583396363e-06, "loss": 0.5447, "step": 5376 }, { "epoch": 0.35, "grad_norm": 1.1422100067138672, "learning_rate": 7.587291154208645e-06, "loss": 0.4959, "step": 5377 }, { "epoch": 0.35, "grad_norm": 1.1472878456115723, "learning_rate": 7.586396611937104e-06, "loss": 0.5123, "step": 5378 }, { "epoch": 0.35, "grad_norm": 1.1266852617263794, "learning_rate": 7.5855019566208425e-06, "loss": 0.5901, "step": 5379 }, { "epoch": 0.35, "grad_norm": 1.1988399028778076, "learning_rate": 7.58460718829896e-06, "loss": 0.5922, "step": 5380 }, { "epoch": 0.35, "grad_norm": 1.2177042961120605, "learning_rate": 7.583712307010566e-06, "loss": 0.5735, "step": 5381 }, { "epoch": 0.35, "grad_norm": 1.3645920753479004, "learning_rate": 7.5828173127947725e-06, "loss": 0.5975, "step": 5382 }, { "epoch": 0.35, "grad_norm": 1.1094369888305664, "learning_rate": 7.5819222056907e-06, "loss": 0.5866, "step": 5383 }, { "epoch": 0.35, "grad_norm": 1.0733152627944946, "learning_rate": 7.581026985737467e-06, "loss": 0.523, "step": 5384 }, { "epoch": 0.35, "grad_norm": 1.054534673690796, "learning_rate": 7.580131652974203e-06, "loss": 0.5226, "step": 5385 }, { "epoch": 0.35, "grad_norm": 1.2049810886383057, "learning_rate": 7.579236207440044e-06, "loss": 0.6115, "step": 5386 }, { "epoch": 0.35, "grad_norm": 1.1013836860656738, "learning_rate": 7.578340649174123e-06, "loss": 0.5751, "step": 5387 }, { "epoch": 0.35, "grad_norm": 1.2659319639205933, "learning_rate": 7.577444978215584e-06, "loss": 0.5987, "step": 5388 }, { "epoch": 0.35, "grad_norm": 1.252727746963501, "learning_rate": 7.576549194603578e-06, "loss": 0.5322, "step": 5389 }, { "epoch": 0.35, "grad_norm": 1.147745132446289, "learning_rate": 7.575653298377252e-06, "loss": 0.5751, "step": 5390 }, { "epoch": 0.35, "grad_norm": 1.0666148662567139, "learning_rate": 7.574757289575765e-06, "loss": 0.5547, "step": 5391 }, { "epoch": 0.35, "grad_norm": 1.1059188842773438, "learning_rate": 7.5738611682382816e-06, "loss": 0.5758, "step": 5392 }, { "epoch": 0.35, "grad_norm": 1.228482723236084, "learning_rate": 7.5729649344039655e-06, "loss": 0.5526, "step": 5393 }, { "epoch": 0.35, "grad_norm": 1.1480543613433838, "learning_rate": 7.572068588111991e-06, "loss": 0.5787, "step": 5394 }, { "epoch": 0.35, "grad_norm": 1.0857813358306885, "learning_rate": 7.5711721294015364e-06, "loss": 0.5492, "step": 5395 }, { "epoch": 0.35, "grad_norm": 1.0935235023498535, "learning_rate": 7.570275558311779e-06, "loss": 0.5598, "step": 5396 }, { "epoch": 0.35, "grad_norm": 1.107990026473999, "learning_rate": 7.56937887488191e-06, "loss": 0.5246, "step": 5397 }, { "epoch": 0.35, "grad_norm": 1.1287130117416382, "learning_rate": 7.568482079151118e-06, "loss": 0.5351, "step": 5398 }, { "epoch": 0.35, "grad_norm": 1.1564990282058716, "learning_rate": 7.5675851711586025e-06, "loss": 0.5713, "step": 5399 }, { "epoch": 0.35, "grad_norm": 1.1710011959075928, "learning_rate": 7.566688150943563e-06, "loss": 0.5193, "step": 5400 }, { "epoch": 0.35, "grad_norm": 1.1414436101913452, "learning_rate": 7.565791018545208e-06, "loss": 0.5667, "step": 5401 }, { "epoch": 0.35, "grad_norm": 1.1396621465682983, "learning_rate": 7.564893774002745e-06, "loss": 0.5561, "step": 5402 }, { "epoch": 0.35, "grad_norm": 1.1161457300186157, "learning_rate": 7.563996417355395e-06, "loss": 0.5621, "step": 5403 }, { "epoch": 0.35, "grad_norm": 1.0992064476013184, "learning_rate": 7.563098948642374e-06, "loss": 0.5172, "step": 5404 }, { "epoch": 0.35, "grad_norm": 1.1505564451217651, "learning_rate": 7.562201367902914e-06, "loss": 0.5516, "step": 5405 }, { "epoch": 0.35, "grad_norm": 1.1556353569030762, "learning_rate": 7.56130367517624e-06, "loss": 0.5609, "step": 5406 }, { "epoch": 0.35, "grad_norm": 1.1305575370788574, "learning_rate": 7.5604058705015926e-06, "loss": 0.587, "step": 5407 }, { "epoch": 0.35, "grad_norm": 1.1263940334320068, "learning_rate": 7.5595079539182106e-06, "loss": 0.5284, "step": 5408 }, { "epoch": 0.35, "grad_norm": 1.1622871160507202, "learning_rate": 7.558609925465338e-06, "loss": 0.5413, "step": 5409 }, { "epoch": 0.35, "grad_norm": 1.2562532424926758, "learning_rate": 7.557711785182228e-06, "loss": 0.5296, "step": 5410 }, { "epoch": 0.35, "grad_norm": 1.0573279857635498, "learning_rate": 7.556813533108134e-06, "loss": 0.5205, "step": 5411 }, { "epoch": 0.35, "grad_norm": 1.2013992071151733, "learning_rate": 7.555915169282317e-06, "loss": 0.5755, "step": 5412 }, { "epoch": 0.35, "grad_norm": 1.164459228515625, "learning_rate": 7.555016693744044e-06, "loss": 0.5415, "step": 5413 }, { "epoch": 0.35, "grad_norm": 1.2181628942489624, "learning_rate": 7.554118106532582e-06, "loss": 0.5642, "step": 5414 }, { "epoch": 0.35, "grad_norm": 1.2768744230270386, "learning_rate": 7.553219407687207e-06, "loss": 0.5864, "step": 5415 }, { "epoch": 0.35, "grad_norm": 1.0628116130828857, "learning_rate": 7.5523205972472e-06, "loss": 0.5341, "step": 5416 }, { "epoch": 0.35, "grad_norm": 1.309834361076355, "learning_rate": 7.551421675251844e-06, "loss": 0.5682, "step": 5417 }, { "epoch": 0.35, "grad_norm": 1.259482979774475, "learning_rate": 7.55052264174043e-06, "loss": 0.5486, "step": 5418 }, { "epoch": 0.35, "grad_norm": 1.1518080234527588, "learning_rate": 7.549623496752251e-06, "loss": 0.5749, "step": 5419 }, { "epoch": 0.35, "grad_norm": 1.1368176937103271, "learning_rate": 7.548724240326607e-06, "loss": 0.5287, "step": 5420 }, { "epoch": 0.35, "grad_norm": 1.2166739702224731, "learning_rate": 7.547824872502803e-06, "loss": 0.5736, "step": 5421 }, { "epoch": 0.35, "grad_norm": 1.2698787450790405, "learning_rate": 7.546925393320147e-06, "loss": 0.5818, "step": 5422 }, { "epoch": 0.35, "grad_norm": 1.1001555919647217, "learning_rate": 7.546025802817953e-06, "loss": 0.5132, "step": 5423 }, { "epoch": 0.35, "grad_norm": 1.222825050354004, "learning_rate": 7.54512610103554e-06, "loss": 0.5438, "step": 5424 }, { "epoch": 0.35, "grad_norm": 1.3056999444961548, "learning_rate": 7.544226288012233e-06, "loss": 0.5483, "step": 5425 }, { "epoch": 0.35, "grad_norm": 1.2389544248580933, "learning_rate": 7.543326363787358e-06, "loss": 0.5222, "step": 5426 }, { "epoch": 0.35, "grad_norm": 1.2160736322402954, "learning_rate": 7.54242632840025e-06, "loss": 0.5565, "step": 5427 }, { "epoch": 0.35, "grad_norm": 1.1134592294692993, "learning_rate": 7.541526181890245e-06, "loss": 0.5583, "step": 5428 }, { "epoch": 0.35, "grad_norm": 1.1329703330993652, "learning_rate": 7.540625924296689e-06, "loss": 0.4991, "step": 5429 }, { "epoch": 0.35, "grad_norm": 1.1154366731643677, "learning_rate": 7.53972555565893e-06, "loss": 0.5342, "step": 5430 }, { "epoch": 0.35, "grad_norm": 1.2119797468185425, "learning_rate": 7.538825076016318e-06, "loss": 0.5226, "step": 5431 }, { "epoch": 0.35, "grad_norm": 1.1957484483718872, "learning_rate": 7.537924485408212e-06, "loss": 0.5603, "step": 5432 }, { "epoch": 0.35, "grad_norm": 1.1601412296295166, "learning_rate": 7.537023783873976e-06, "loss": 0.4942, "step": 5433 }, { "epoch": 0.35, "grad_norm": 1.1691296100616455, "learning_rate": 7.536122971452975e-06, "loss": 0.5571, "step": 5434 }, { "epoch": 0.35, "grad_norm": 1.1438825130462646, "learning_rate": 7.535222048184581e-06, "loss": 0.5561, "step": 5435 }, { "epoch": 0.35, "grad_norm": 1.2145202159881592, "learning_rate": 7.534321014108175e-06, "loss": 0.573, "step": 5436 }, { "epoch": 0.35, "grad_norm": 1.1051675081253052, "learning_rate": 7.533419869263135e-06, "loss": 0.5113, "step": 5437 }, { "epoch": 0.35, "grad_norm": 1.1864917278289795, "learning_rate": 7.532518613688847e-06, "loss": 0.5849, "step": 5438 }, { "epoch": 0.35, "grad_norm": 1.0020220279693604, "learning_rate": 7.531617247424707e-06, "loss": 0.5316, "step": 5439 }, { "epoch": 0.35, "grad_norm": 1.1390008926391602, "learning_rate": 7.530715770510108e-06, "loss": 0.554, "step": 5440 }, { "epoch": 0.35, "grad_norm": 1.179360270500183, "learning_rate": 7.529814182984451e-06, "loss": 0.519, "step": 5441 }, { "epoch": 0.35, "grad_norm": 1.2843286991119385, "learning_rate": 7.528912484887144e-06, "loss": 0.5735, "step": 5442 }, { "epoch": 0.35, "grad_norm": 1.3086591958999634, "learning_rate": 7.528010676257596e-06, "loss": 0.5667, "step": 5443 }, { "epoch": 0.35, "grad_norm": 1.1810839176177979, "learning_rate": 7.527108757135224e-06, "loss": 0.5242, "step": 5444 }, { "epoch": 0.35, "grad_norm": 1.1344958543777466, "learning_rate": 7.52620672755945e-06, "loss": 0.5749, "step": 5445 }, { "epoch": 0.35, "grad_norm": 1.1840623617172241, "learning_rate": 7.525304587569696e-06, "loss": 0.5483, "step": 5446 }, { "epoch": 0.35, "grad_norm": 1.3128767013549805, "learning_rate": 7.524402337205395e-06, "loss": 0.591, "step": 5447 }, { "epoch": 0.35, "grad_norm": 1.1242873668670654, "learning_rate": 7.52349997650598e-06, "loss": 0.5122, "step": 5448 }, { "epoch": 0.35, "grad_norm": 1.0768061876296997, "learning_rate": 7.522597505510892e-06, "loss": 0.5484, "step": 5449 }, { "epoch": 0.35, "grad_norm": 1.0386093854904175, "learning_rate": 7.521694924259575e-06, "loss": 0.4671, "step": 5450 }, { "epoch": 0.35, "grad_norm": 1.2477993965148926, "learning_rate": 7.520792232791479e-06, "loss": 0.5293, "step": 5451 }, { "epoch": 0.35, "grad_norm": 1.2595700025558472, "learning_rate": 7.519889431146059e-06, "loss": 0.5511, "step": 5452 }, { "epoch": 0.35, "grad_norm": 1.2430750131607056, "learning_rate": 7.5189865193627735e-06, "loss": 0.5549, "step": 5453 }, { "epoch": 0.35, "grad_norm": 1.1728750467300415, "learning_rate": 7.518083497481086e-06, "loss": 0.5424, "step": 5454 }, { "epoch": 0.35, "grad_norm": 1.1700879335403442, "learning_rate": 7.517180365540464e-06, "loss": 0.5987, "step": 5455 }, { "epoch": 0.35, "grad_norm": 1.0949169397354126, "learning_rate": 7.516277123580384e-06, "loss": 0.5629, "step": 5456 }, { "epoch": 0.35, "grad_norm": 1.2231053113937378, "learning_rate": 7.515373771640324e-06, "loss": 0.5915, "step": 5457 }, { "epoch": 0.35, "grad_norm": 1.1767629384994507, "learning_rate": 7.514470309759765e-06, "loss": 0.5722, "step": 5458 }, { "epoch": 0.35, "grad_norm": 1.1205991506576538, "learning_rate": 7.513566737978196e-06, "loss": 0.5555, "step": 5459 }, { "epoch": 0.35, "grad_norm": 1.3602882623672485, "learning_rate": 7.51266305633511e-06, "loss": 0.5289, "step": 5460 }, { "epoch": 0.35, "grad_norm": 1.0612502098083496, "learning_rate": 7.511759264870005e-06, "loss": 0.5496, "step": 5461 }, { "epoch": 0.35, "grad_norm": 1.1239593029022217, "learning_rate": 7.510855363622382e-06, "loss": 0.5426, "step": 5462 }, { "epoch": 0.35, "grad_norm": 1.3332877159118652, "learning_rate": 7.50995135263175e-06, "loss": 0.5939, "step": 5463 }, { "epoch": 0.35, "grad_norm": 1.229878306388855, "learning_rate": 7.509047231937619e-06, "loss": 0.5657, "step": 5464 }, { "epoch": 0.35, "grad_norm": 1.2119086980819702, "learning_rate": 7.508143001579508e-06, "loss": 0.598, "step": 5465 }, { "epoch": 0.35, "grad_norm": 1.1171534061431885, "learning_rate": 7.507238661596938e-06, "loss": 0.536, "step": 5466 }, { "epoch": 0.35, "grad_norm": 1.1070027351379395, "learning_rate": 7.506334212029433e-06, "loss": 0.6254, "step": 5467 }, { "epoch": 0.35, "grad_norm": 1.124298334121704, "learning_rate": 7.505429652916529e-06, "loss": 0.5202, "step": 5468 }, { "epoch": 0.35, "grad_norm": 1.1564234495162964, "learning_rate": 7.5045249842977565e-06, "loss": 0.5357, "step": 5469 }, { "epoch": 0.35, "grad_norm": 1.2027894258499146, "learning_rate": 7.50362020621266e-06, "loss": 0.504, "step": 5470 }, { "epoch": 0.35, "grad_norm": 1.1728402376174927, "learning_rate": 7.502715318700784e-06, "loss": 0.5797, "step": 5471 }, { "epoch": 0.35, "grad_norm": 1.1693511009216309, "learning_rate": 7.501810321801677e-06, "loss": 0.5573, "step": 5472 }, { "epoch": 0.35, "grad_norm": 1.1928865909576416, "learning_rate": 7.500905215554896e-06, "loss": 0.59, "step": 5473 }, { "epoch": 0.35, "grad_norm": 1.1045122146606445, "learning_rate": 7.500000000000001e-06, "loss": 0.5441, "step": 5474 }, { "epoch": 0.35, "grad_norm": 1.1235836744308472, "learning_rate": 7.499094675176556e-06, "loss": 0.5186, "step": 5475 }, { "epoch": 0.35, "grad_norm": 1.1159747838974, "learning_rate": 7.49818924112413e-06, "loss": 0.5736, "step": 5476 }, { "epoch": 0.35, "grad_norm": 1.164484977722168, "learning_rate": 7.497283697882298e-06, "loss": 0.5372, "step": 5477 }, { "epoch": 0.35, "grad_norm": 1.2185794115066528, "learning_rate": 7.4963780454906385e-06, "loss": 0.6504, "step": 5478 }, { "epoch": 0.35, "grad_norm": 1.2706176042556763, "learning_rate": 7.495472283988735e-06, "loss": 0.5947, "step": 5479 }, { "epoch": 0.35, "grad_norm": 1.1351563930511475, "learning_rate": 7.494566413416178e-06, "loss": 0.536, "step": 5480 }, { "epoch": 0.35, "grad_norm": 1.2322174310684204, "learning_rate": 7.493660433812558e-06, "loss": 0.541, "step": 5481 }, { "epoch": 0.35, "grad_norm": 1.0683743953704834, "learning_rate": 7.4927543452174735e-06, "loss": 0.5168, "step": 5482 }, { "epoch": 0.35, "grad_norm": 1.2550463676452637, "learning_rate": 7.4918481476705285e-06, "loss": 0.5474, "step": 5483 }, { "epoch": 0.35, "grad_norm": 1.1691628694534302, "learning_rate": 7.49094184121133e-06, "loss": 0.5483, "step": 5484 }, { "epoch": 0.35, "grad_norm": 1.1408016681671143, "learning_rate": 7.490035425879491e-06, "loss": 0.5468, "step": 5485 }, { "epoch": 0.35, "grad_norm": 1.1903530359268188, "learning_rate": 7.489128901714627e-06, "loss": 0.5533, "step": 5486 }, { "epoch": 0.35, "grad_norm": 1.1244807243347168, "learning_rate": 7.488222268756361e-06, "loss": 0.5385, "step": 5487 }, { "epoch": 0.35, "grad_norm": 1.1444770097732544, "learning_rate": 7.487315527044319e-06, "loss": 0.5071, "step": 5488 }, { "epoch": 0.35, "grad_norm": 1.1901558637619019, "learning_rate": 7.486408676618135e-06, "loss": 0.5715, "step": 5489 }, { "epoch": 0.35, "grad_norm": 1.266455888748169, "learning_rate": 7.48550171751744e-06, "loss": 0.5841, "step": 5490 }, { "epoch": 0.35, "grad_norm": 1.2091716527938843, "learning_rate": 7.484594649781878e-06, "loss": 0.5606, "step": 5491 }, { "epoch": 0.35, "grad_norm": 1.1493537425994873, "learning_rate": 7.483687473451096e-06, "loss": 0.5225, "step": 5492 }, { "epoch": 0.35, "grad_norm": 1.1401935815811157, "learning_rate": 7.48278018856474e-06, "loss": 0.5441, "step": 5493 }, { "epoch": 0.35, "grad_norm": 1.1633353233337402, "learning_rate": 7.48187279516247e-06, "loss": 0.6318, "step": 5494 }, { "epoch": 0.35, "grad_norm": 1.0204540491104126, "learning_rate": 7.480965293283942e-06, "loss": 0.5273, "step": 5495 }, { "epoch": 0.35, "grad_norm": 1.2524417638778687, "learning_rate": 7.480057682968822e-06, "loss": 0.5525, "step": 5496 }, { "epoch": 0.35, "grad_norm": 1.084473729133606, "learning_rate": 7.47914996425678e-06, "loss": 0.5702, "step": 5497 }, { "epoch": 0.35, "grad_norm": 1.0942329168319702, "learning_rate": 7.478242137187488e-06, "loss": 0.4975, "step": 5498 }, { "epoch": 0.35, "grad_norm": 1.241229772567749, "learning_rate": 7.477334201800628e-06, "loss": 0.5655, "step": 5499 }, { "epoch": 0.36, "grad_norm": 1.1674082279205322, "learning_rate": 7.476426158135881e-06, "loss": 0.5128, "step": 5500 }, { "epoch": 0.36, "grad_norm": 1.13178288936615, "learning_rate": 7.475518006232936e-06, "loss": 0.5018, "step": 5501 }, { "epoch": 0.36, "grad_norm": 1.13345468044281, "learning_rate": 7.474609746131485e-06, "loss": 0.5461, "step": 5502 }, { "epoch": 0.36, "grad_norm": 1.281059980392456, "learning_rate": 7.473701377871227e-06, "loss": 0.5457, "step": 5503 }, { "epoch": 0.36, "grad_norm": 1.1241029500961304, "learning_rate": 7.4727929014918655e-06, "loss": 0.5593, "step": 5504 }, { "epoch": 0.36, "grad_norm": 1.1471863985061646, "learning_rate": 7.471884317033103e-06, "loss": 0.5792, "step": 5505 }, { "epoch": 0.36, "grad_norm": 1.0752359628677368, "learning_rate": 7.470975624534658e-06, "loss": 0.5486, "step": 5506 }, { "epoch": 0.36, "grad_norm": 1.0897670984268188, "learning_rate": 7.470066824036242e-06, "loss": 0.5567, "step": 5507 }, { "epoch": 0.36, "grad_norm": 1.0948748588562012, "learning_rate": 7.469157915577578e-06, "loss": 0.5285, "step": 5508 }, { "epoch": 0.36, "grad_norm": 1.0966328382492065, "learning_rate": 7.468248899198392e-06, "loss": 0.5678, "step": 5509 }, { "epoch": 0.36, "grad_norm": 1.1385775804519653, "learning_rate": 7.4673397749384135e-06, "loss": 0.5387, "step": 5510 }, { "epoch": 0.36, "grad_norm": 1.1329470872879028, "learning_rate": 7.46643054283738e-06, "loss": 0.4781, "step": 5511 }, { "epoch": 0.36, "grad_norm": 1.1307183504104614, "learning_rate": 7.465521202935033e-06, "loss": 0.5602, "step": 5512 }, { "epoch": 0.36, "grad_norm": 1.1621760129928589, "learning_rate": 7.464611755271114e-06, "loss": 0.5535, "step": 5513 }, { "epoch": 0.36, "grad_norm": 1.1362693309783936, "learning_rate": 7.4637021998853725e-06, "loss": 0.5402, "step": 5514 }, { "epoch": 0.36, "grad_norm": 1.090770959854126, "learning_rate": 7.462792536817567e-06, "loss": 0.5309, "step": 5515 }, { "epoch": 0.36, "grad_norm": 1.2866289615631104, "learning_rate": 7.461882766107451e-06, "loss": 0.5632, "step": 5516 }, { "epoch": 0.36, "grad_norm": 1.0624874830245972, "learning_rate": 7.460972887794793e-06, "loss": 0.5399, "step": 5517 }, { "epoch": 0.36, "grad_norm": 1.2681312561035156, "learning_rate": 7.460062901919359e-06, "loss": 0.5278, "step": 5518 }, { "epoch": 0.36, "grad_norm": 1.1057775020599365, "learning_rate": 7.459152808520922e-06, "loss": 0.5283, "step": 5519 }, { "epoch": 0.36, "grad_norm": 1.2596614360809326, "learning_rate": 7.458242607639261e-06, "loss": 0.5404, "step": 5520 }, { "epoch": 0.36, "grad_norm": 1.0604331493377686, "learning_rate": 7.457332299314159e-06, "loss": 0.564, "step": 5521 }, { "epoch": 0.36, "grad_norm": 1.171038269996643, "learning_rate": 7.4564218835854e-06, "loss": 0.5571, "step": 5522 }, { "epoch": 0.36, "grad_norm": 1.2882106304168701, "learning_rate": 7.455511360492779e-06, "loss": 0.5265, "step": 5523 }, { "epoch": 0.36, "grad_norm": 1.4391204118728638, "learning_rate": 7.454600730076093e-06, "loss": 0.5347, "step": 5524 }, { "epoch": 0.36, "grad_norm": 1.1651965379714966, "learning_rate": 7.4536899923751395e-06, "loss": 0.5227, "step": 5525 }, { "epoch": 0.36, "grad_norm": 1.1005122661590576, "learning_rate": 7.4527791474297285e-06, "loss": 0.5278, "step": 5526 }, { "epoch": 0.36, "grad_norm": 1.291439414024353, "learning_rate": 7.4518681952796705e-06, "loss": 0.53, "step": 5527 }, { "epoch": 0.36, "grad_norm": 1.115951657295227, "learning_rate": 7.4509571359647785e-06, "loss": 0.5713, "step": 5528 }, { "epoch": 0.36, "grad_norm": 1.2465462684631348, "learning_rate": 7.4500459695248725e-06, "loss": 0.5563, "step": 5529 }, { "epoch": 0.36, "grad_norm": 1.0506154298782349, "learning_rate": 7.44913469599978e-06, "loss": 0.4807, "step": 5530 }, { "epoch": 0.36, "grad_norm": 1.1468669176101685, "learning_rate": 7.448223315429328e-06, "loss": 0.5815, "step": 5531 }, { "epoch": 0.36, "grad_norm": 1.0422422885894775, "learning_rate": 7.447311827853351e-06, "loss": 0.5206, "step": 5532 }, { "epoch": 0.36, "grad_norm": 1.0737615823745728, "learning_rate": 7.446400233311689e-06, "loss": 0.5575, "step": 5533 }, { "epoch": 0.36, "grad_norm": 1.173399567604065, "learning_rate": 7.445488531844184e-06, "loss": 0.5326, "step": 5534 }, { "epoch": 0.36, "grad_norm": 1.2378158569335938, "learning_rate": 7.444576723490684e-06, "loss": 0.5656, "step": 5535 }, { "epoch": 0.36, "grad_norm": 1.1816930770874023, "learning_rate": 7.4436648082910446e-06, "loss": 0.5563, "step": 5536 }, { "epoch": 0.36, "grad_norm": 1.0493543148040771, "learning_rate": 7.44275278628512e-06, "loss": 0.5457, "step": 5537 }, { "epoch": 0.36, "grad_norm": 1.1291446685791016, "learning_rate": 7.4418406575127735e-06, "loss": 0.5448, "step": 5538 }, { "epoch": 0.36, "grad_norm": 1.3579275608062744, "learning_rate": 7.440928422013872e-06, "loss": 0.5281, "step": 5539 }, { "epoch": 0.36, "grad_norm": 1.1827322244644165, "learning_rate": 7.440016079828288e-06, "loss": 0.5591, "step": 5540 }, { "epoch": 0.36, "grad_norm": 1.2674354314804077, "learning_rate": 7.439103630995894e-06, "loss": 0.5843, "step": 5541 }, { "epoch": 0.36, "grad_norm": 1.1649702787399292, "learning_rate": 7.4381910755565745e-06, "loss": 0.5773, "step": 5542 }, { "epoch": 0.36, "grad_norm": 1.196000337600708, "learning_rate": 7.437278413550215e-06, "loss": 0.5625, "step": 5543 }, { "epoch": 0.36, "grad_norm": 1.2414393424987793, "learning_rate": 7.436365645016702e-06, "loss": 0.5587, "step": 5544 }, { "epoch": 0.36, "grad_norm": 1.19171142578125, "learning_rate": 7.435452769995935e-06, "loss": 0.5445, "step": 5545 }, { "epoch": 0.36, "grad_norm": 1.2204545736312866, "learning_rate": 7.4345397885278096e-06, "loss": 0.5524, "step": 5546 }, { "epoch": 0.36, "grad_norm": 1.08087158203125, "learning_rate": 7.43362670065223e-06, "loss": 0.5382, "step": 5547 }, { "epoch": 0.36, "grad_norm": 1.2337099313735962, "learning_rate": 7.432713506409108e-06, "loss": 0.6078, "step": 5548 }, { "epoch": 0.36, "grad_norm": 1.1620111465454102, "learning_rate": 7.431800205838355e-06, "loss": 0.5518, "step": 5549 }, { "epoch": 0.36, "grad_norm": 1.2376446723937988, "learning_rate": 7.430886798979888e-06, "loss": 0.5411, "step": 5550 }, { "epoch": 0.36, "grad_norm": 1.2793899774551392, "learning_rate": 7.429973285873632e-06, "loss": 0.5346, "step": 5551 }, { "epoch": 0.36, "grad_norm": 1.2180553674697876, "learning_rate": 7.429059666559512e-06, "loss": 0.5821, "step": 5552 }, { "epoch": 0.36, "grad_norm": 1.1166185140609741, "learning_rate": 7.428145941077463e-06, "loss": 0.5371, "step": 5553 }, { "epoch": 0.36, "grad_norm": 1.15378737449646, "learning_rate": 7.4272321094674175e-06, "loss": 0.5782, "step": 5554 }, { "epoch": 0.36, "grad_norm": 1.1211744546890259, "learning_rate": 7.426318171769319e-06, "loss": 0.5311, "step": 5555 }, { "epoch": 0.36, "grad_norm": 1.1950082778930664, "learning_rate": 7.425404128023114e-06, "loss": 0.5897, "step": 5556 }, { "epoch": 0.36, "grad_norm": 1.2908018827438354, "learning_rate": 7.424489978268752e-06, "loss": 0.6005, "step": 5557 }, { "epoch": 0.36, "grad_norm": 1.0332947969436646, "learning_rate": 7.423575722546188e-06, "loss": 0.5121, "step": 5558 }, { "epoch": 0.36, "grad_norm": 1.0740870237350464, "learning_rate": 7.422661360895382e-06, "loss": 0.4957, "step": 5559 }, { "epoch": 0.36, "grad_norm": 1.2792448997497559, "learning_rate": 7.421746893356299e-06, "loss": 0.5617, "step": 5560 }, { "epoch": 0.36, "grad_norm": 1.0629814863204956, "learning_rate": 7.420832319968907e-06, "loss": 0.5572, "step": 5561 }, { "epoch": 0.36, "grad_norm": 1.128578543663025, "learning_rate": 7.41991764077318e-06, "loss": 0.5722, "step": 5562 }, { "epoch": 0.36, "grad_norm": 1.1423484086990356, "learning_rate": 7.419002855809096e-06, "loss": 0.5238, "step": 5563 }, { "epoch": 0.36, "grad_norm": 1.3197636604309082, "learning_rate": 7.4180879651166394e-06, "loss": 0.5618, "step": 5564 }, { "epoch": 0.36, "grad_norm": 1.2743921279907227, "learning_rate": 7.417172968735797e-06, "loss": 0.566, "step": 5565 }, { "epoch": 0.36, "grad_norm": 1.3383973836898804, "learning_rate": 7.41625786670656e-06, "loss": 0.5812, "step": 5566 }, { "epoch": 0.36, "grad_norm": 1.1005091667175293, "learning_rate": 7.415342659068925e-06, "loss": 0.5216, "step": 5567 }, { "epoch": 0.36, "grad_norm": 1.0925246477127075, "learning_rate": 7.414427345862895e-06, "loss": 0.5564, "step": 5568 }, { "epoch": 0.36, "grad_norm": 1.1962809562683105, "learning_rate": 7.413511927128474e-06, "loss": 0.5653, "step": 5569 }, { "epoch": 0.36, "grad_norm": 1.2055100202560425, "learning_rate": 7.412596402905674e-06, "loss": 0.5694, "step": 5570 }, { "epoch": 0.36, "grad_norm": 1.1602998971939087, "learning_rate": 7.41168077323451e-06, "loss": 0.5796, "step": 5571 }, { "epoch": 0.36, "grad_norm": 1.137847900390625, "learning_rate": 7.410765038155001e-06, "loss": 0.5281, "step": 5572 }, { "epoch": 0.36, "grad_norm": 1.1651768684387207, "learning_rate": 7.409849197707173e-06, "loss": 0.6083, "step": 5573 }, { "epoch": 0.36, "grad_norm": 1.0996882915496826, "learning_rate": 7.408933251931054e-06, "loss": 0.5175, "step": 5574 }, { "epoch": 0.36, "grad_norm": 1.1513992547988892, "learning_rate": 7.408017200866677e-06, "loss": 0.4957, "step": 5575 }, { "epoch": 0.36, "grad_norm": 1.097699522972107, "learning_rate": 7.4071010445540804e-06, "loss": 0.5303, "step": 5576 }, { "epoch": 0.36, "grad_norm": 1.1685315370559692, "learning_rate": 7.406184783033309e-06, "loss": 0.5711, "step": 5577 }, { "epoch": 0.36, "grad_norm": 1.1051154136657715, "learning_rate": 7.405268416344408e-06, "loss": 0.5898, "step": 5578 }, { "epoch": 0.36, "grad_norm": 1.2651057243347168, "learning_rate": 7.4043519445274304e-06, "loss": 0.6126, "step": 5579 }, { "epoch": 0.36, "grad_norm": 1.1454801559448242, "learning_rate": 7.403435367622434e-06, "loss": 0.5729, "step": 5580 }, { "epoch": 0.36, "grad_norm": 1.0778753757476807, "learning_rate": 7.402518685669475e-06, "loss": 0.5024, "step": 5581 }, { "epoch": 0.36, "grad_norm": 1.2001028060913086, "learning_rate": 7.4016018987086255e-06, "loss": 0.6023, "step": 5582 }, { "epoch": 0.36, "grad_norm": 1.2296545505523682, "learning_rate": 7.400685006779953e-06, "loss": 0.5603, "step": 5583 }, { "epoch": 0.36, "grad_norm": 1.1140049695968628, "learning_rate": 7.399768009923533e-06, "loss": 0.5626, "step": 5584 }, { "epoch": 0.36, "grad_norm": 1.2228472232818604, "learning_rate": 7.398850908179445e-06, "loss": 0.5353, "step": 5585 }, { "epoch": 0.36, "grad_norm": 1.1199146509170532, "learning_rate": 7.3979337015877715e-06, "loss": 0.5606, "step": 5586 }, { "epoch": 0.36, "grad_norm": 1.2539582252502441, "learning_rate": 7.397016390188603e-06, "loss": 0.5409, "step": 5587 }, { "epoch": 0.36, "grad_norm": 1.2292884588241577, "learning_rate": 7.396098974022032e-06, "loss": 0.584, "step": 5588 }, { "epoch": 0.36, "grad_norm": 1.1895663738250732, "learning_rate": 7.395181453128158e-06, "loss": 0.5259, "step": 5589 }, { "epoch": 0.36, "grad_norm": 1.0665720701217651, "learning_rate": 7.394263827547083e-06, "loss": 0.5512, "step": 5590 }, { "epoch": 0.36, "grad_norm": 1.3202285766601562, "learning_rate": 7.393346097318912e-06, "loss": 0.6162, "step": 5591 }, { "epoch": 0.36, "grad_norm": 1.156246304512024, "learning_rate": 7.392428262483758e-06, "loss": 0.5409, "step": 5592 }, { "epoch": 0.36, "grad_norm": 1.1741524934768677, "learning_rate": 7.391510323081738e-06, "loss": 0.5497, "step": 5593 }, { "epoch": 0.36, "grad_norm": 1.1514745950698853, "learning_rate": 7.390592279152973e-06, "loss": 0.5507, "step": 5594 }, { "epoch": 0.36, "grad_norm": 1.0927292108535767, "learning_rate": 7.389674130737585e-06, "loss": 0.5086, "step": 5595 }, { "epoch": 0.36, "grad_norm": 1.1635595560073853, "learning_rate": 7.388755877875706e-06, "loss": 0.5358, "step": 5596 }, { "epoch": 0.36, "grad_norm": 1.14754056930542, "learning_rate": 7.387837520607473e-06, "loss": 0.4978, "step": 5597 }, { "epoch": 0.36, "grad_norm": 1.2189937829971313, "learning_rate": 7.386919058973021e-06, "loss": 0.539, "step": 5598 }, { "epoch": 0.36, "grad_norm": 1.2159640789031982, "learning_rate": 7.3860004930124955e-06, "loss": 0.5364, "step": 5599 }, { "epoch": 0.36, "grad_norm": 1.1882342100143433, "learning_rate": 7.385081822766045e-06, "loss": 0.5976, "step": 5600 }, { "epoch": 0.36, "grad_norm": 1.1795450448989868, "learning_rate": 7.3841630482738205e-06, "loss": 0.5315, "step": 5601 }, { "epoch": 0.36, "grad_norm": 1.2128121852874756, "learning_rate": 7.38324416957598e-06, "loss": 0.5408, "step": 5602 }, { "epoch": 0.36, "grad_norm": 1.1850249767303467, "learning_rate": 7.382325186712688e-06, "loss": 0.5906, "step": 5603 }, { "epoch": 0.36, "grad_norm": 1.0967975854873657, "learning_rate": 7.381406099724107e-06, "loss": 0.5501, "step": 5604 }, { "epoch": 0.36, "grad_norm": 1.2015533447265625, "learning_rate": 7.380486908650408e-06, "loss": 0.5994, "step": 5605 }, { "epoch": 0.36, "grad_norm": 1.41315758228302, "learning_rate": 7.3795676135317704e-06, "loss": 0.5625, "step": 5606 }, { "epoch": 0.36, "grad_norm": 1.2584609985351562, "learning_rate": 7.378648214408371e-06, "loss": 0.5399, "step": 5607 }, { "epoch": 0.36, "grad_norm": 1.3055232763290405, "learning_rate": 7.3777287113203955e-06, "loss": 0.5866, "step": 5608 }, { "epoch": 0.36, "grad_norm": 1.1479839086532593, "learning_rate": 7.376809104308032e-06, "loss": 0.5322, "step": 5609 }, { "epoch": 0.36, "grad_norm": 1.138612151145935, "learning_rate": 7.375889393411475e-06, "loss": 0.5216, "step": 5610 }, { "epoch": 0.36, "grad_norm": 1.0899081230163574, "learning_rate": 7.374969578670923e-06, "loss": 0.5329, "step": 5611 }, { "epoch": 0.36, "grad_norm": 1.1664243936538696, "learning_rate": 7.374049660126579e-06, "loss": 0.5962, "step": 5612 }, { "epoch": 0.36, "grad_norm": 1.271498203277588, "learning_rate": 7.373129637818648e-06, "loss": 0.5626, "step": 5613 }, { "epoch": 0.36, "grad_norm": 1.168816089630127, "learning_rate": 7.372209511787342e-06, "loss": 0.5346, "step": 5614 }, { "epoch": 0.36, "grad_norm": 1.0416576862335205, "learning_rate": 7.371289282072882e-06, "loss": 0.534, "step": 5615 }, { "epoch": 0.36, "grad_norm": 1.2224944829940796, "learning_rate": 7.370368948715483e-06, "loss": 0.5188, "step": 5616 }, { "epoch": 0.36, "grad_norm": 1.1361415386199951, "learning_rate": 7.369448511755373e-06, "loss": 0.5249, "step": 5617 }, { "epoch": 0.36, "grad_norm": 1.2336713075637817, "learning_rate": 7.368527971232784e-06, "loss": 0.5352, "step": 5618 }, { "epoch": 0.36, "grad_norm": 1.228845477104187, "learning_rate": 7.367607327187945e-06, "loss": 0.5254, "step": 5619 }, { "epoch": 0.36, "grad_norm": 1.24238121509552, "learning_rate": 7.3666865796611006e-06, "loss": 0.5405, "step": 5620 }, { "epoch": 0.36, "grad_norm": 1.1875838041305542, "learning_rate": 7.365765728692492e-06, "loss": 0.5906, "step": 5621 }, { "epoch": 0.36, "grad_norm": 1.2144149541854858, "learning_rate": 7.364844774322366e-06, "loss": 0.5354, "step": 5622 }, { "epoch": 0.36, "grad_norm": 1.22421395778656, "learning_rate": 7.363923716590976e-06, "loss": 0.5162, "step": 5623 }, { "epoch": 0.36, "grad_norm": 1.0888527631759644, "learning_rate": 7.3630025555385805e-06, "loss": 0.5304, "step": 5624 }, { "epoch": 0.36, "grad_norm": 1.0734505653381348, "learning_rate": 7.36208129120544e-06, "loss": 0.5409, "step": 5625 }, { "epoch": 0.36, "grad_norm": 1.1444995403289795, "learning_rate": 7.36115992363182e-06, "loss": 0.5977, "step": 5626 }, { "epoch": 0.36, "grad_norm": 1.1755620241165161, "learning_rate": 7.3602384528579914e-06, "loss": 0.575, "step": 5627 }, { "epoch": 0.36, "grad_norm": 1.2134054899215698, "learning_rate": 7.3593168789242294e-06, "loss": 0.5769, "step": 5628 }, { "epoch": 0.36, "grad_norm": 1.1201062202453613, "learning_rate": 7.358395201870815e-06, "loss": 0.5373, "step": 5629 }, { "epoch": 0.36, "grad_norm": 1.137150764465332, "learning_rate": 7.35747342173803e-06, "loss": 0.5927, "step": 5630 }, { "epoch": 0.36, "grad_norm": 1.0974582433700562, "learning_rate": 7.356551538566164e-06, "loss": 0.5002, "step": 5631 }, { "epoch": 0.36, "grad_norm": 1.1316862106323242, "learning_rate": 7.355629552395511e-06, "loss": 0.5205, "step": 5632 }, { "epoch": 0.36, "grad_norm": 1.1814055442810059, "learning_rate": 7.354707463266367e-06, "loss": 0.5853, "step": 5633 }, { "epoch": 0.36, "grad_norm": 1.1240674257278442, "learning_rate": 7.353785271219036e-06, "loss": 0.5543, "step": 5634 }, { "epoch": 0.36, "grad_norm": 1.119915246963501, "learning_rate": 7.352862976293823e-06, "loss": 0.5991, "step": 5635 }, { "epoch": 0.36, "grad_norm": 1.0895845890045166, "learning_rate": 7.351940578531039e-06, "loss": 0.5394, "step": 5636 }, { "epoch": 0.36, "grad_norm": 1.1849263906478882, "learning_rate": 7.351018077971002e-06, "loss": 0.607, "step": 5637 }, { "epoch": 0.36, "grad_norm": 1.2839784622192383, "learning_rate": 7.35009547465403e-06, "loss": 0.5376, "step": 5638 }, { "epoch": 0.36, "grad_norm": 1.114722728729248, "learning_rate": 7.3491727686204484e-06, "loss": 0.5117, "step": 5639 }, { "epoch": 0.36, "grad_norm": 1.1598553657531738, "learning_rate": 7.348249959910585e-06, "loss": 0.5478, "step": 5640 }, { "epoch": 0.36, "grad_norm": 1.1571632623672485, "learning_rate": 7.347327048564775e-06, "loss": 0.5915, "step": 5641 }, { "epoch": 0.36, "grad_norm": 1.113567590713501, "learning_rate": 7.346404034623356e-06, "loss": 0.5814, "step": 5642 }, { "epoch": 0.36, "grad_norm": 1.1165026426315308, "learning_rate": 7.345480918126669e-06, "loss": 0.5323, "step": 5643 }, { "epoch": 0.36, "grad_norm": 1.1958017349243164, "learning_rate": 7.344557699115064e-06, "loss": 0.5163, "step": 5644 }, { "epoch": 0.36, "grad_norm": 1.0525249242782593, "learning_rate": 7.343634377628892e-06, "loss": 0.5298, "step": 5645 }, { "epoch": 0.36, "grad_norm": 1.170751929283142, "learning_rate": 7.342710953708506e-06, "loss": 0.5364, "step": 5646 }, { "epoch": 0.36, "grad_norm": 1.2476434707641602, "learning_rate": 7.341787427394269e-06, "loss": 0.5579, "step": 5647 }, { "epoch": 0.36, "grad_norm": 1.2212053537368774, "learning_rate": 7.340863798726546e-06, "loss": 0.5177, "step": 5648 }, { "epoch": 0.36, "grad_norm": 1.2210757732391357, "learning_rate": 7.339940067745705e-06, "loss": 0.5657, "step": 5649 }, { "epoch": 0.36, "grad_norm": 1.1418302059173584, "learning_rate": 7.3390162344921225e-06, "loss": 0.5096, "step": 5650 }, { "epoch": 0.36, "grad_norm": 1.1104822158813477, "learning_rate": 7.338092299006173e-06, "loss": 0.5523, "step": 5651 }, { "epoch": 0.36, "grad_norm": 1.2375762462615967, "learning_rate": 7.337168261328243e-06, "loss": 0.5266, "step": 5652 }, { "epoch": 0.36, "grad_norm": 1.1362711191177368, "learning_rate": 7.336244121498718e-06, "loss": 0.5852, "step": 5653 }, { "epoch": 0.36, "grad_norm": 1.1835466623306274, "learning_rate": 7.335319879557989e-06, "loss": 0.5635, "step": 5654 }, { "epoch": 0.37, "grad_norm": 1.2241095304489136, "learning_rate": 7.334395535546453e-06, "loss": 0.5215, "step": 5655 }, { "epoch": 0.37, "grad_norm": 1.116811752319336, "learning_rate": 7.333471089504514e-06, "loss": 0.5691, "step": 5656 }, { "epoch": 0.37, "grad_norm": 1.1790446043014526, "learning_rate": 7.332546541472571e-06, "loss": 0.5045, "step": 5657 }, { "epoch": 0.37, "grad_norm": 1.2714204788208008, "learning_rate": 7.331621891491038e-06, "loss": 0.6061, "step": 5658 }, { "epoch": 0.37, "grad_norm": 1.1003875732421875, "learning_rate": 7.330697139600328e-06, "loss": 0.5693, "step": 5659 }, { "epoch": 0.37, "grad_norm": 1.2296850681304932, "learning_rate": 7.329772285840858e-06, "loss": 0.5361, "step": 5660 }, { "epoch": 0.37, "grad_norm": 1.3128952980041504, "learning_rate": 7.328847330253052e-06, "loss": 0.5523, "step": 5661 }, { "epoch": 0.37, "grad_norm": 1.1502240896224976, "learning_rate": 7.32792227287734e-06, "loss": 0.5744, "step": 5662 }, { "epoch": 0.37, "grad_norm": 1.3162270784378052, "learning_rate": 7.326997113754151e-06, "loss": 0.5604, "step": 5663 }, { "epoch": 0.37, "grad_norm": 1.1063525676727295, "learning_rate": 7.326071852923921e-06, "loss": 0.5322, "step": 5664 }, { "epoch": 0.37, "grad_norm": 1.1422526836395264, "learning_rate": 7.325146490427092e-06, "loss": 0.4908, "step": 5665 }, { "epoch": 0.37, "grad_norm": 1.0877641439437866, "learning_rate": 7.324221026304109e-06, "loss": 0.597, "step": 5666 }, { "epoch": 0.37, "grad_norm": 1.0900601148605347, "learning_rate": 7.323295460595422e-06, "loss": 0.5481, "step": 5667 }, { "epoch": 0.37, "grad_norm": 1.2302056550979614, "learning_rate": 7.322369793341484e-06, "loss": 0.5421, "step": 5668 }, { "epoch": 0.37, "grad_norm": 1.2003591060638428, "learning_rate": 7.321444024582753e-06, "loss": 0.5197, "step": 5669 }, { "epoch": 0.37, "grad_norm": 1.2067997455596924, "learning_rate": 7.320518154359695e-06, "loss": 0.5408, "step": 5670 }, { "epoch": 0.37, "grad_norm": 1.1242495775222778, "learning_rate": 7.319592182712775e-06, "loss": 0.574, "step": 5671 }, { "epoch": 0.37, "grad_norm": 1.1774888038635254, "learning_rate": 7.318666109682464e-06, "loss": 0.5781, "step": 5672 }, { "epoch": 0.37, "grad_norm": 1.086575984954834, "learning_rate": 7.3177399353092415e-06, "loss": 0.5144, "step": 5673 }, { "epoch": 0.37, "grad_norm": 1.1077046394348145, "learning_rate": 7.3168136596335845e-06, "loss": 0.5464, "step": 5674 }, { "epoch": 0.37, "grad_norm": 1.15811288356781, "learning_rate": 7.315887282695981e-06, "loss": 0.5852, "step": 5675 }, { "epoch": 0.37, "grad_norm": 1.1312737464904785, "learning_rate": 7.314960804536919e-06, "loss": 0.5326, "step": 5676 }, { "epoch": 0.37, "grad_norm": 1.2380162477493286, "learning_rate": 7.314034225196893e-06, "loss": 0.5868, "step": 5677 }, { "epoch": 0.37, "grad_norm": 1.174134612083435, "learning_rate": 7.313107544716402e-06, "loss": 0.588, "step": 5678 }, { "epoch": 0.37, "grad_norm": 1.1631232500076294, "learning_rate": 7.312180763135948e-06, "loss": 0.5064, "step": 5679 }, { "epoch": 0.37, "grad_norm": 1.206087350845337, "learning_rate": 7.311253880496036e-06, "loss": 0.5772, "step": 5680 }, { "epoch": 0.37, "grad_norm": 1.2813851833343506, "learning_rate": 7.310326896837183e-06, "loss": 0.5477, "step": 5681 }, { "epoch": 0.37, "grad_norm": 1.0493805408477783, "learning_rate": 7.309399812199901e-06, "loss": 0.5358, "step": 5682 }, { "epoch": 0.37, "grad_norm": 1.0391411781311035, "learning_rate": 7.3084726266247105e-06, "loss": 0.4617, "step": 5683 }, { "epoch": 0.37, "grad_norm": 1.1277152299880981, "learning_rate": 7.30754534015214e-06, "loss": 0.5381, "step": 5684 }, { "epoch": 0.37, "grad_norm": 1.1659657955169678, "learning_rate": 7.306617952822714e-06, "loss": 0.5402, "step": 5685 }, { "epoch": 0.37, "grad_norm": 1.168610692024231, "learning_rate": 7.30569046467697e-06, "loss": 0.5519, "step": 5686 }, { "epoch": 0.37, "grad_norm": 1.1991264820098877, "learning_rate": 7.3047628757554425e-06, "loss": 0.5472, "step": 5687 }, { "epoch": 0.37, "grad_norm": 1.1156401634216309, "learning_rate": 7.3038351860986774e-06, "loss": 0.5214, "step": 5688 }, { "epoch": 0.37, "grad_norm": 1.1550531387329102, "learning_rate": 7.302907395747221e-06, "loss": 0.5379, "step": 5689 }, { "epoch": 0.37, "grad_norm": 1.1169308423995972, "learning_rate": 7.301979504741622e-06, "loss": 0.548, "step": 5690 }, { "epoch": 0.37, "grad_norm": 1.0841573476791382, "learning_rate": 7.30105151312244e-06, "loss": 0.545, "step": 5691 }, { "epoch": 0.37, "grad_norm": 1.1924899816513062, "learning_rate": 7.3001234209302315e-06, "loss": 0.5601, "step": 5692 }, { "epoch": 0.37, "grad_norm": 1.201799988746643, "learning_rate": 7.2991952282055634e-06, "loss": 0.5493, "step": 5693 }, { "epoch": 0.37, "grad_norm": 1.0918487310409546, "learning_rate": 7.298266934989005e-06, "loss": 0.5231, "step": 5694 }, { "epoch": 0.37, "grad_norm": 1.2691515684127808, "learning_rate": 7.297338541321126e-06, "loss": 0.5535, "step": 5695 }, { "epoch": 0.37, "grad_norm": 1.050613284111023, "learning_rate": 7.296410047242508e-06, "loss": 0.53, "step": 5696 }, { "epoch": 0.37, "grad_norm": 1.1244384050369263, "learning_rate": 7.295481452793732e-06, "loss": 0.5102, "step": 5697 }, { "epoch": 0.37, "grad_norm": 1.2350740432739258, "learning_rate": 7.294552758015383e-06, "loss": 0.5812, "step": 5698 }, { "epoch": 0.37, "grad_norm": 1.1093435287475586, "learning_rate": 7.293623962948053e-06, "loss": 0.5185, "step": 5699 }, { "epoch": 0.37, "grad_norm": 1.1545640230178833, "learning_rate": 7.2926950676323385e-06, "loss": 0.541, "step": 5700 }, { "epoch": 0.37, "grad_norm": 1.1949628591537476, "learning_rate": 7.291766072108837e-06, "loss": 0.5751, "step": 5701 }, { "epoch": 0.37, "grad_norm": 1.1106243133544922, "learning_rate": 7.290836976418153e-06, "loss": 0.5619, "step": 5702 }, { "epoch": 0.37, "grad_norm": 1.1819723844528198, "learning_rate": 7.289907780600896e-06, "loss": 0.5213, "step": 5703 }, { "epoch": 0.37, "grad_norm": 1.1743876934051514, "learning_rate": 7.288978484697678e-06, "loss": 0.5661, "step": 5704 }, { "epoch": 0.37, "grad_norm": 1.1661611795425415, "learning_rate": 7.288049088749115e-06, "loss": 0.5385, "step": 5705 }, { "epoch": 0.37, "grad_norm": 1.190329909324646, "learning_rate": 7.287119592795831e-06, "loss": 0.5318, "step": 5706 }, { "epoch": 0.37, "grad_norm": 1.1279124021530151, "learning_rate": 7.28618999687845e-06, "loss": 0.5413, "step": 5707 }, { "epoch": 0.37, "grad_norm": 1.2001405954360962, "learning_rate": 7.285260301037603e-06, "loss": 0.5749, "step": 5708 }, { "epoch": 0.37, "grad_norm": 1.2578269243240356, "learning_rate": 7.284330505313924e-06, "loss": 0.6006, "step": 5709 }, { "epoch": 0.37, "grad_norm": 1.0687938928604126, "learning_rate": 7.28340060974805e-06, "loss": 0.5195, "step": 5710 }, { "epoch": 0.37, "grad_norm": 1.1530332565307617, "learning_rate": 7.28247061438063e-06, "loss": 0.5033, "step": 5711 }, { "epoch": 0.37, "grad_norm": 1.1615771055221558, "learning_rate": 7.2815405192523055e-06, "loss": 0.5384, "step": 5712 }, { "epoch": 0.37, "grad_norm": 1.046248435974121, "learning_rate": 7.280610324403733e-06, "loss": 0.515, "step": 5713 }, { "epoch": 0.37, "grad_norm": 1.3179404735565186, "learning_rate": 7.279680029875568e-06, "loss": 0.5797, "step": 5714 }, { "epoch": 0.37, "grad_norm": 1.1612770557403564, "learning_rate": 7.2787496357084686e-06, "loss": 0.5301, "step": 5715 }, { "epoch": 0.37, "grad_norm": 1.0120980739593506, "learning_rate": 7.277819141943103e-06, "loss": 0.5288, "step": 5716 }, { "epoch": 0.37, "grad_norm": 1.108380913734436, "learning_rate": 7.27688854862014e-06, "loss": 0.5117, "step": 5717 }, { "epoch": 0.37, "grad_norm": 1.3068398237228394, "learning_rate": 7.275957855780252e-06, "loss": 0.564, "step": 5718 }, { "epoch": 0.37, "grad_norm": 1.2480427026748657, "learning_rate": 7.2750270634641176e-06, "loss": 0.5438, "step": 5719 }, { "epoch": 0.37, "grad_norm": 1.2297406196594238, "learning_rate": 7.274096171712422e-06, "loss": 0.5641, "step": 5720 }, { "epoch": 0.37, "grad_norm": 1.3231711387634277, "learning_rate": 7.273165180565849e-06, "loss": 0.5692, "step": 5721 }, { "epoch": 0.37, "grad_norm": 1.119573712348938, "learning_rate": 7.272234090065089e-06, "loss": 0.493, "step": 5722 }, { "epoch": 0.37, "grad_norm": 1.3073246479034424, "learning_rate": 7.271302900250843e-06, "loss": 0.551, "step": 5723 }, { "epoch": 0.37, "grad_norm": 1.207917332649231, "learning_rate": 7.270371611163805e-06, "loss": 0.5904, "step": 5724 }, { "epoch": 0.37, "grad_norm": 1.102921724319458, "learning_rate": 7.2694402228446805e-06, "loss": 0.5621, "step": 5725 }, { "epoch": 0.37, "grad_norm": 1.2411013841629028, "learning_rate": 7.268508735334181e-06, "loss": 0.5981, "step": 5726 }, { "epoch": 0.37, "grad_norm": 1.0554594993591309, "learning_rate": 7.267577148673016e-06, "loss": 0.5492, "step": 5727 }, { "epoch": 0.37, "grad_norm": 1.0855646133422852, "learning_rate": 7.266645462901907e-06, "loss": 0.5342, "step": 5728 }, { "epoch": 0.37, "grad_norm": 1.0349678993225098, "learning_rate": 7.26571367806157e-06, "loss": 0.5227, "step": 5729 }, { "epoch": 0.37, "grad_norm": 1.0914286375045776, "learning_rate": 7.264781794192736e-06, "loss": 0.5173, "step": 5730 }, { "epoch": 0.37, "grad_norm": 1.2989475727081299, "learning_rate": 7.26384981133613e-06, "loss": 0.5398, "step": 5731 }, { "epoch": 0.37, "grad_norm": 1.269661784172058, "learning_rate": 7.262917729532491e-06, "loss": 0.5827, "step": 5732 }, { "epoch": 0.37, "grad_norm": 1.1898289918899536, "learning_rate": 7.261985548822558e-06, "loss": 0.5308, "step": 5733 }, { "epoch": 0.37, "grad_norm": 1.0806317329406738, "learning_rate": 7.2610532692470715e-06, "loss": 0.557, "step": 5734 }, { "epoch": 0.37, "grad_norm": 1.172336459159851, "learning_rate": 7.26012089084678e-06, "loss": 0.593, "step": 5735 }, { "epoch": 0.37, "grad_norm": 1.1896017789840698, "learning_rate": 7.259188413662436e-06, "loss": 0.5535, "step": 5736 }, { "epoch": 0.37, "grad_norm": 1.0572518110275269, "learning_rate": 7.258255837734794e-06, "loss": 0.5427, "step": 5737 }, { "epoch": 0.37, "grad_norm": 1.1257312297821045, "learning_rate": 7.257323163104617e-06, "loss": 0.5676, "step": 5738 }, { "epoch": 0.37, "grad_norm": 1.1029785871505737, "learning_rate": 7.256390389812667e-06, "loss": 0.4788, "step": 5739 }, { "epoch": 0.37, "grad_norm": 1.1390739679336548, "learning_rate": 7.255457517899715e-06, "loss": 0.5482, "step": 5740 }, { "epoch": 0.37, "grad_norm": 1.0571955442428589, "learning_rate": 7.254524547406536e-06, "loss": 0.5478, "step": 5741 }, { "epoch": 0.37, "grad_norm": 1.2199081182479858, "learning_rate": 7.253591478373905e-06, "loss": 0.5274, "step": 5742 }, { "epoch": 0.37, "grad_norm": 1.3256365060806274, "learning_rate": 7.2526583108426044e-06, "loss": 0.5608, "step": 5743 }, { "epoch": 0.37, "grad_norm": 1.2011208534240723, "learning_rate": 7.2517250448534214e-06, "loss": 0.5725, "step": 5744 }, { "epoch": 0.37, "grad_norm": 1.2043511867523193, "learning_rate": 7.250791680447145e-06, "loss": 0.5877, "step": 5745 }, { "epoch": 0.37, "grad_norm": 1.2123358249664307, "learning_rate": 7.249858217664571e-06, "loss": 0.5901, "step": 5746 }, { "epoch": 0.37, "grad_norm": 1.2032604217529297, "learning_rate": 7.248924656546501e-06, "loss": 0.5587, "step": 5747 }, { "epoch": 0.37, "grad_norm": 1.0528265237808228, "learning_rate": 7.247990997133736e-06, "loss": 0.5289, "step": 5748 }, { "epoch": 0.37, "grad_norm": 1.208188772201538, "learning_rate": 7.247057239467084e-06, "loss": 0.5871, "step": 5749 }, { "epoch": 0.37, "grad_norm": 1.3076770305633545, "learning_rate": 7.246123383587358e-06, "loss": 0.5367, "step": 5750 }, { "epoch": 0.37, "grad_norm": 1.151496171951294, "learning_rate": 7.245189429535373e-06, "loss": 0.5391, "step": 5751 }, { "epoch": 0.37, "grad_norm": 1.2852808237075806, "learning_rate": 7.244255377351951e-06, "loss": 0.571, "step": 5752 }, { "epoch": 0.37, "grad_norm": 1.1832083463668823, "learning_rate": 7.2433212270779175e-06, "loss": 0.5512, "step": 5753 }, { "epoch": 0.37, "grad_norm": 1.1869926452636719, "learning_rate": 7.2423869787541e-06, "loss": 0.5147, "step": 5754 }, { "epoch": 0.37, "grad_norm": 1.1049416065216064, "learning_rate": 7.2414526324213355e-06, "loss": 0.566, "step": 5755 }, { "epoch": 0.37, "grad_norm": 1.2389614582061768, "learning_rate": 7.2405181881204564e-06, "loss": 0.5063, "step": 5756 }, { "epoch": 0.37, "grad_norm": 1.2829859256744385, "learning_rate": 7.2395836458923085e-06, "loss": 0.5889, "step": 5757 }, { "epoch": 0.37, "grad_norm": 1.2559632062911987, "learning_rate": 7.238649005777739e-06, "loss": 0.5905, "step": 5758 }, { "epoch": 0.37, "grad_norm": 1.1751359701156616, "learning_rate": 7.237714267817596e-06, "loss": 0.5738, "step": 5759 }, { "epoch": 0.37, "grad_norm": 1.1287460327148438, "learning_rate": 7.236779432052736e-06, "loss": 0.595, "step": 5760 }, { "epoch": 0.37, "grad_norm": 1.2539925575256348, "learning_rate": 7.235844498524019e-06, "loss": 0.6256, "step": 5761 }, { "epoch": 0.37, "grad_norm": 1.1265925168991089, "learning_rate": 7.234909467272306e-06, "loss": 0.5287, "step": 5762 }, { "epoch": 0.37, "grad_norm": 1.1372203826904297, "learning_rate": 7.233974338338467e-06, "loss": 0.5588, "step": 5763 }, { "epoch": 0.37, "grad_norm": 1.1653119325637817, "learning_rate": 7.233039111763375e-06, "loss": 0.5231, "step": 5764 }, { "epoch": 0.37, "grad_norm": 1.0201700925827026, "learning_rate": 7.232103787587902e-06, "loss": 0.5394, "step": 5765 }, { "epoch": 0.37, "grad_norm": 1.151314377784729, "learning_rate": 7.2311683658529315e-06, "loss": 0.497, "step": 5766 }, { "epoch": 0.37, "grad_norm": 1.1124215126037598, "learning_rate": 7.230232846599351e-06, "loss": 0.5534, "step": 5767 }, { "epoch": 0.37, "grad_norm": 1.071043610572815, "learning_rate": 7.229297229868044e-06, "loss": 0.5465, "step": 5768 }, { "epoch": 0.37, "grad_norm": 1.2673600912094116, "learning_rate": 7.228361515699909e-06, "loss": 0.5556, "step": 5769 }, { "epoch": 0.37, "grad_norm": 1.2198686599731445, "learning_rate": 7.22742570413584e-06, "loss": 0.5337, "step": 5770 }, { "epoch": 0.37, "grad_norm": 1.2100132703781128, "learning_rate": 7.22648979521674e-06, "loss": 0.5173, "step": 5771 }, { "epoch": 0.37, "grad_norm": 1.2523349523544312, "learning_rate": 7.225553788983516e-06, "loss": 0.5804, "step": 5772 }, { "epoch": 0.37, "grad_norm": 1.1790745258331299, "learning_rate": 7.2246176854770785e-06, "loss": 0.5658, "step": 5773 }, { "epoch": 0.37, "grad_norm": 1.196999192237854, "learning_rate": 7.223681484738341e-06, "loss": 0.6025, "step": 5774 }, { "epoch": 0.37, "grad_norm": 1.1288695335388184, "learning_rate": 7.222745186808223e-06, "loss": 0.5869, "step": 5775 }, { "epoch": 0.37, "grad_norm": 1.1620633602142334, "learning_rate": 7.221808791727648e-06, "loss": 0.5508, "step": 5776 }, { "epoch": 0.37, "grad_norm": 1.0982884168624878, "learning_rate": 7.220872299537543e-06, "loss": 0.5478, "step": 5777 }, { "epoch": 0.37, "grad_norm": 1.1754827499389648, "learning_rate": 7.219935710278839e-06, "loss": 0.5504, "step": 5778 }, { "epoch": 0.37, "grad_norm": 1.1308120489120483, "learning_rate": 7.218999023992475e-06, "loss": 0.533, "step": 5779 }, { "epoch": 0.37, "grad_norm": 1.1243635416030884, "learning_rate": 7.218062240719386e-06, "loss": 0.6061, "step": 5780 }, { "epoch": 0.37, "grad_norm": 1.1403840780258179, "learning_rate": 7.217125360500519e-06, "loss": 0.5229, "step": 5781 }, { "epoch": 0.37, "grad_norm": 1.0526148080825806, "learning_rate": 7.216188383376824e-06, "loss": 0.5202, "step": 5782 }, { "epoch": 0.37, "grad_norm": 1.1716594696044922, "learning_rate": 7.215251309389253e-06, "loss": 0.5532, "step": 5783 }, { "epoch": 0.37, "grad_norm": 1.3252540826797485, "learning_rate": 7.214314138578761e-06, "loss": 0.5677, "step": 5784 }, { "epoch": 0.37, "grad_norm": 1.2348546981811523, "learning_rate": 7.213376870986313e-06, "loss": 0.572, "step": 5785 }, { "epoch": 0.37, "grad_norm": 1.1366153955459595, "learning_rate": 7.212439506652871e-06, "loss": 0.5383, "step": 5786 }, { "epoch": 0.37, "grad_norm": 1.1713887453079224, "learning_rate": 7.211502045619406e-06, "loss": 0.5376, "step": 5787 }, { "epoch": 0.37, "grad_norm": 1.2791463136672974, "learning_rate": 7.210564487926894e-06, "loss": 0.5399, "step": 5788 }, { "epoch": 0.37, "grad_norm": 1.1178141832351685, "learning_rate": 7.20962683361631e-06, "loss": 0.5751, "step": 5789 }, { "epoch": 0.37, "grad_norm": 1.1533840894699097, "learning_rate": 7.208689082728639e-06, "loss": 0.5132, "step": 5790 }, { "epoch": 0.37, "grad_norm": 1.1210908889770508, "learning_rate": 7.2077512353048676e-06, "loss": 0.4973, "step": 5791 }, { "epoch": 0.37, "grad_norm": 1.2548011541366577, "learning_rate": 7.206813291385984e-06, "loss": 0.5324, "step": 5792 }, { "epoch": 0.37, "grad_norm": 1.2109514474868774, "learning_rate": 7.205875251012985e-06, "loss": 0.5499, "step": 5793 }, { "epoch": 0.37, "grad_norm": 1.2042735815048218, "learning_rate": 7.204937114226871e-06, "loss": 0.591, "step": 5794 }, { "epoch": 0.37, "grad_norm": 1.1264816522598267, "learning_rate": 7.203998881068645e-06, "loss": 0.541, "step": 5795 }, { "epoch": 0.37, "grad_norm": 1.2050350904464722, "learning_rate": 7.203060551579314e-06, "loss": 0.4531, "step": 5796 }, { "epoch": 0.37, "grad_norm": 1.2122163772583008, "learning_rate": 7.2021221257998885e-06, "loss": 0.5187, "step": 5797 }, { "epoch": 0.37, "grad_norm": 1.2047815322875977, "learning_rate": 7.2011836037713875e-06, "loss": 0.5307, "step": 5798 }, { "epoch": 0.37, "grad_norm": 1.1528726816177368, "learning_rate": 7.200244985534832e-06, "loss": 0.5414, "step": 5799 }, { "epoch": 0.37, "grad_norm": 1.0813578367233276, "learning_rate": 7.199306271131244e-06, "loss": 0.5356, "step": 5800 }, { "epoch": 0.37, "grad_norm": 1.1713374853134155, "learning_rate": 7.198367460601651e-06, "loss": 0.5318, "step": 5801 }, { "epoch": 0.37, "grad_norm": 1.2509236335754395, "learning_rate": 7.197428553987091e-06, "loss": 0.5684, "step": 5802 }, { "epoch": 0.37, "grad_norm": 1.1558974981307983, "learning_rate": 7.1964895513285975e-06, "loss": 0.5379, "step": 5803 }, { "epoch": 0.37, "grad_norm": 1.219514012336731, "learning_rate": 7.195550452667212e-06, "loss": 0.5878, "step": 5804 }, { "epoch": 0.37, "grad_norm": 1.21390700340271, "learning_rate": 7.194611258043981e-06, "loss": 0.5522, "step": 5805 }, { "epoch": 0.37, "grad_norm": 1.0790561437606812, "learning_rate": 7.1936719674999535e-06, "loss": 0.5399, "step": 5806 }, { "epoch": 0.37, "grad_norm": 1.1336605548858643, "learning_rate": 7.192732581076185e-06, "loss": 0.4935, "step": 5807 }, { "epoch": 0.37, "grad_norm": 1.2275872230529785, "learning_rate": 7.191793098813733e-06, "loss": 0.578, "step": 5808 }, { "epoch": 0.37, "grad_norm": 1.2399828433990479, "learning_rate": 7.190853520753659e-06, "loss": 0.5996, "step": 5809 }, { "epoch": 0.38, "grad_norm": 1.1283091306686401, "learning_rate": 7.18991384693703e-06, "loss": 0.5339, "step": 5810 }, { "epoch": 0.38, "grad_norm": 1.2699087858200073, "learning_rate": 7.188974077404917e-06, "loss": 0.5747, "step": 5811 }, { "epoch": 0.38, "grad_norm": 1.2900972366333008, "learning_rate": 7.188034212198395e-06, "loss": 0.5569, "step": 5812 }, { "epoch": 0.38, "grad_norm": 1.1028287410736084, "learning_rate": 7.187094251358542e-06, "loss": 0.6051, "step": 5813 }, { "epoch": 0.38, "grad_norm": 1.0998378992080688, "learning_rate": 7.1861541949264435e-06, "loss": 0.5256, "step": 5814 }, { "epoch": 0.38, "grad_norm": 1.1961936950683594, "learning_rate": 7.185214042943184e-06, "loss": 0.5652, "step": 5815 }, { "epoch": 0.38, "grad_norm": 1.2026958465576172, "learning_rate": 7.1842737954498565e-06, "loss": 0.5376, "step": 5816 }, { "epoch": 0.38, "grad_norm": 1.1593708992004395, "learning_rate": 7.183333452487559e-06, "loss": 0.5065, "step": 5817 }, { "epoch": 0.38, "grad_norm": 1.1331236362457275, "learning_rate": 7.182393014097386e-06, "loss": 0.5415, "step": 5818 }, { "epoch": 0.38, "grad_norm": 1.260648250579834, "learning_rate": 7.181452480320449e-06, "loss": 0.5458, "step": 5819 }, { "epoch": 0.38, "grad_norm": 1.185605764389038, "learning_rate": 7.180511851197849e-06, "loss": 0.5403, "step": 5820 }, { "epoch": 0.38, "grad_norm": 1.1001445055007935, "learning_rate": 7.179571126770704e-06, "loss": 0.5906, "step": 5821 }, { "epoch": 0.38, "grad_norm": 1.183261752128601, "learning_rate": 7.178630307080127e-06, "loss": 0.5562, "step": 5822 }, { "epoch": 0.38, "grad_norm": 1.1376618146896362, "learning_rate": 7.177689392167239e-06, "loss": 0.5269, "step": 5823 }, { "epoch": 0.38, "grad_norm": 1.213853120803833, "learning_rate": 7.17674838207317e-06, "loss": 0.6241, "step": 5824 }, { "epoch": 0.38, "grad_norm": 1.2213525772094727, "learning_rate": 7.1758072768390426e-06, "loss": 0.5814, "step": 5825 }, { "epoch": 0.38, "grad_norm": 1.0947715044021606, "learning_rate": 7.1748660765059945e-06, "loss": 0.553, "step": 5826 }, { "epoch": 0.38, "grad_norm": 1.2184007167816162, "learning_rate": 7.173924781115159e-06, "loss": 0.5899, "step": 5827 }, { "epoch": 0.38, "grad_norm": 1.2104908227920532, "learning_rate": 7.1729833907076815e-06, "loss": 0.5398, "step": 5828 }, { "epoch": 0.38, "grad_norm": 1.1875090599060059, "learning_rate": 7.172041905324707e-06, "loss": 0.5702, "step": 5829 }, { "epoch": 0.38, "grad_norm": 1.182926893234253, "learning_rate": 7.171100325007383e-06, "loss": 0.5989, "step": 5830 }, { "epoch": 0.38, "grad_norm": 1.246800184249878, "learning_rate": 7.170158649796866e-06, "loss": 0.5691, "step": 5831 }, { "epoch": 0.38, "grad_norm": 1.0940747261047363, "learning_rate": 7.1692168797343156e-06, "loss": 0.5067, "step": 5832 }, { "epoch": 0.38, "grad_norm": 1.0162845849990845, "learning_rate": 7.168275014860889e-06, "loss": 0.5403, "step": 5833 }, { "epoch": 0.38, "grad_norm": 1.2027497291564941, "learning_rate": 7.167333055217757e-06, "loss": 0.594, "step": 5834 }, { "epoch": 0.38, "grad_norm": 1.207471251487732, "learning_rate": 7.1663910008460894e-06, "loss": 0.5685, "step": 5835 }, { "epoch": 0.38, "grad_norm": 1.1420252323150635, "learning_rate": 7.165448851787059e-06, "loss": 0.5278, "step": 5836 }, { "epoch": 0.38, "grad_norm": 1.1805298328399658, "learning_rate": 7.164506608081847e-06, "loss": 0.5492, "step": 5837 }, { "epoch": 0.38, "grad_norm": 1.1730459928512573, "learning_rate": 7.163564269771637e-06, "loss": 0.5614, "step": 5838 }, { "epoch": 0.38, "grad_norm": 1.270294189453125, "learning_rate": 7.162621836897613e-06, "loss": 0.5316, "step": 5839 }, { "epoch": 0.38, "grad_norm": 1.120287299156189, "learning_rate": 7.16167930950097e-06, "loss": 0.5355, "step": 5840 }, { "epoch": 0.38, "grad_norm": 1.0978341102600098, "learning_rate": 7.1607366876229e-06, "loss": 0.5683, "step": 5841 }, { "epoch": 0.38, "grad_norm": 1.1617077589035034, "learning_rate": 7.159793971304605e-06, "loss": 0.5608, "step": 5842 }, { "epoch": 0.38, "grad_norm": 1.2396286725997925, "learning_rate": 7.158851160587288e-06, "loss": 0.5213, "step": 5843 }, { "epoch": 0.38, "grad_norm": 1.0758823156356812, "learning_rate": 7.157908255512156e-06, "loss": 0.5329, "step": 5844 }, { "epoch": 0.38, "grad_norm": 1.2401196956634521, "learning_rate": 7.1569652561204206e-06, "loss": 0.5564, "step": 5845 }, { "epoch": 0.38, "grad_norm": 1.2270708084106445, "learning_rate": 7.156022162453301e-06, "loss": 0.541, "step": 5846 }, { "epoch": 0.38, "grad_norm": 1.0196689367294312, "learning_rate": 7.155078974552014e-06, "loss": 0.5028, "step": 5847 }, { "epoch": 0.38, "grad_norm": 1.1954830884933472, "learning_rate": 7.154135692457785e-06, "loss": 0.5317, "step": 5848 }, { "epoch": 0.38, "grad_norm": 1.062595248222351, "learning_rate": 7.153192316211845e-06, "loss": 0.5379, "step": 5849 }, { "epoch": 0.38, "grad_norm": 1.1993967294692993, "learning_rate": 7.152248845855421e-06, "loss": 0.5542, "step": 5850 }, { "epoch": 0.38, "grad_norm": 1.1795965433120728, "learning_rate": 7.1513052814297545e-06, "loss": 0.5373, "step": 5851 }, { "epoch": 0.38, "grad_norm": 1.1769273281097412, "learning_rate": 7.1503616229760844e-06, "loss": 0.5654, "step": 5852 }, { "epoch": 0.38, "grad_norm": 1.2237850427627563, "learning_rate": 7.1494178705356555e-06, "loss": 0.5425, "step": 5853 }, { "epoch": 0.38, "grad_norm": 1.1166274547576904, "learning_rate": 7.1484740241497165e-06, "loss": 0.5618, "step": 5854 }, { "epoch": 0.38, "grad_norm": 1.1856451034545898, "learning_rate": 7.147530083859523e-06, "loss": 0.5231, "step": 5855 }, { "epoch": 0.38, "grad_norm": 1.1233056783676147, "learning_rate": 7.146586049706328e-06, "loss": 0.5755, "step": 5856 }, { "epoch": 0.38, "grad_norm": 1.1166242361068726, "learning_rate": 7.145641921731397e-06, "loss": 0.5579, "step": 5857 }, { "epoch": 0.38, "grad_norm": 1.092622995376587, "learning_rate": 7.144697699975992e-06, "loss": 0.5631, "step": 5858 }, { "epoch": 0.38, "grad_norm": 1.1753989458084106, "learning_rate": 7.1437533844813845e-06, "loss": 0.5579, "step": 5859 }, { "epoch": 0.38, "grad_norm": 1.0985023975372314, "learning_rate": 7.142808975288846e-06, "loss": 0.5447, "step": 5860 }, { "epoch": 0.38, "grad_norm": 1.1280579566955566, "learning_rate": 7.141864472439659e-06, "loss": 0.5348, "step": 5861 }, { "epoch": 0.38, "grad_norm": 1.134055733680725, "learning_rate": 7.140919875975098e-06, "loss": 0.5387, "step": 5862 }, { "epoch": 0.38, "grad_norm": 1.187336802482605, "learning_rate": 7.139975185936456e-06, "loss": 0.5312, "step": 5863 }, { "epoch": 0.38, "grad_norm": 1.102538824081421, "learning_rate": 7.139030402365019e-06, "loss": 0.5548, "step": 5864 }, { "epoch": 0.38, "grad_norm": 1.2118993997573853, "learning_rate": 7.138085525302082e-06, "loss": 0.5266, "step": 5865 }, { "epoch": 0.38, "grad_norm": 1.2904717922210693, "learning_rate": 7.137140554788943e-06, "loss": 0.5596, "step": 5866 }, { "epoch": 0.38, "grad_norm": 1.1922576427459717, "learning_rate": 7.136195490866904e-06, "loss": 0.5462, "step": 5867 }, { "epoch": 0.38, "grad_norm": 1.277056097984314, "learning_rate": 7.135250333577272e-06, "loss": 0.5655, "step": 5868 }, { "epoch": 0.38, "grad_norm": 1.1791486740112305, "learning_rate": 7.134305082961356e-06, "loss": 0.5694, "step": 5869 }, { "epoch": 0.38, "grad_norm": 1.0768704414367676, "learning_rate": 7.133359739060471e-06, "loss": 0.5661, "step": 5870 }, { "epoch": 0.38, "grad_norm": 1.3962079286575317, "learning_rate": 7.132414301915937e-06, "loss": 0.5904, "step": 5871 }, { "epoch": 0.38, "grad_norm": 1.1684479713439941, "learning_rate": 7.131468771569076e-06, "loss": 0.5668, "step": 5872 }, { "epoch": 0.38, "grad_norm": 1.1093254089355469, "learning_rate": 7.1305231480612145e-06, "loss": 0.5086, "step": 5873 }, { "epoch": 0.38, "grad_norm": 1.079606533050537, "learning_rate": 7.129577431433682e-06, "loss": 0.5176, "step": 5874 }, { "epoch": 0.38, "grad_norm": 1.2229093313217163, "learning_rate": 7.128631621727814e-06, "loss": 0.6111, "step": 5875 }, { "epoch": 0.38, "grad_norm": 1.0268090963363647, "learning_rate": 7.1276857189849515e-06, "loss": 0.5649, "step": 5876 }, { "epoch": 0.38, "grad_norm": 1.0969514846801758, "learning_rate": 7.126739723246433e-06, "loss": 0.5253, "step": 5877 }, { "epoch": 0.38, "grad_norm": 1.1722850799560547, "learning_rate": 7.125793634553611e-06, "loss": 0.6163, "step": 5878 }, { "epoch": 0.38, "grad_norm": 1.1206034421920776, "learning_rate": 7.124847452947832e-06, "loss": 0.5338, "step": 5879 }, { "epoch": 0.38, "grad_norm": 1.1833761930465698, "learning_rate": 7.1239011784704535e-06, "loss": 0.5329, "step": 5880 }, { "epoch": 0.38, "grad_norm": 1.1419885158538818, "learning_rate": 7.122954811162834e-06, "loss": 0.5725, "step": 5881 }, { "epoch": 0.38, "grad_norm": 1.2405177354812622, "learning_rate": 7.122008351066339e-06, "loss": 0.5354, "step": 5882 }, { "epoch": 0.38, "grad_norm": 1.109251618385315, "learning_rate": 7.121061798222331e-06, "loss": 0.5949, "step": 5883 }, { "epoch": 0.38, "grad_norm": 1.129345178604126, "learning_rate": 7.120115152672186e-06, "loss": 0.5452, "step": 5884 }, { "epoch": 0.38, "grad_norm": 1.1716216802597046, "learning_rate": 7.119168414457276e-06, "loss": 0.5228, "step": 5885 }, { "epoch": 0.38, "grad_norm": 1.0288782119750977, "learning_rate": 7.118221583618983e-06, "loss": 0.527, "step": 5886 }, { "epoch": 0.38, "grad_norm": 1.125312328338623, "learning_rate": 7.117274660198691e-06, "loss": 0.5217, "step": 5887 }, { "epoch": 0.38, "grad_norm": 1.1530115604400635, "learning_rate": 7.116327644237785e-06, "loss": 0.5597, "step": 5888 }, { "epoch": 0.38, "grad_norm": 1.3321495056152344, "learning_rate": 7.1153805357776575e-06, "loss": 0.5321, "step": 5889 }, { "epoch": 0.38, "grad_norm": 1.1722452640533447, "learning_rate": 7.114433334859705e-06, "loss": 0.5958, "step": 5890 }, { "epoch": 0.38, "grad_norm": 1.1999024152755737, "learning_rate": 7.113486041525326e-06, "loss": 0.6267, "step": 5891 }, { "epoch": 0.38, "grad_norm": 1.2218945026397705, "learning_rate": 7.112538655815926e-06, "loss": 0.5558, "step": 5892 }, { "epoch": 0.38, "grad_norm": 1.1272554397583008, "learning_rate": 7.111591177772912e-06, "loss": 0.5622, "step": 5893 }, { "epoch": 0.38, "grad_norm": 1.1566001176834106, "learning_rate": 7.110643607437695e-06, "loss": 0.5753, "step": 5894 }, { "epoch": 0.38, "grad_norm": 1.0941451787948608, "learning_rate": 7.109695944851691e-06, "loss": 0.5466, "step": 5895 }, { "epoch": 0.38, "grad_norm": 1.1834310293197632, "learning_rate": 7.108748190056322e-06, "loss": 0.5598, "step": 5896 }, { "epoch": 0.38, "grad_norm": 1.2212058305740356, "learning_rate": 7.1078003430930085e-06, "loss": 0.5627, "step": 5897 }, { "epoch": 0.38, "grad_norm": 1.140204906463623, "learning_rate": 7.106852404003181e-06, "loss": 0.563, "step": 5898 }, { "epoch": 0.38, "grad_norm": 1.2981072664260864, "learning_rate": 7.1059043728282705e-06, "loss": 0.5885, "step": 5899 }, { "epoch": 0.38, "grad_norm": 1.1624181270599365, "learning_rate": 7.1049562496097135e-06, "loss": 0.5579, "step": 5900 }, { "epoch": 0.38, "grad_norm": 1.127376675605774, "learning_rate": 7.1040080343889484e-06, "loss": 0.579, "step": 5901 }, { "epoch": 0.38, "grad_norm": 1.4308329820632935, "learning_rate": 7.103059727207422e-06, "loss": 0.6288, "step": 5902 }, { "epoch": 0.38, "grad_norm": 1.0566595792770386, "learning_rate": 7.10211132810658e-06, "loss": 0.534, "step": 5903 }, { "epoch": 0.38, "grad_norm": 1.0849971771240234, "learning_rate": 7.101162837127875e-06, "loss": 0.5442, "step": 5904 }, { "epoch": 0.38, "grad_norm": 1.235209345817566, "learning_rate": 7.100214254312765e-06, "loss": 0.5506, "step": 5905 }, { "epoch": 0.38, "grad_norm": 1.093534231185913, "learning_rate": 7.0992655797027076e-06, "loss": 0.5292, "step": 5906 }, { "epoch": 0.38, "grad_norm": 1.2067729234695435, "learning_rate": 7.0983168133391674e-06, "loss": 0.5613, "step": 5907 }, { "epoch": 0.38, "grad_norm": 1.205297589302063, "learning_rate": 7.097367955263614e-06, "loss": 0.5593, "step": 5908 }, { "epoch": 0.38, "grad_norm": 1.1132402420043945, "learning_rate": 7.096419005517518e-06, "loss": 0.5261, "step": 5909 }, { "epoch": 0.38, "grad_norm": 1.1648036241531372, "learning_rate": 7.0954699641423566e-06, "loss": 0.5181, "step": 5910 }, { "epoch": 0.38, "grad_norm": 1.1310566663742065, "learning_rate": 7.09452083117961e-06, "loss": 0.5582, "step": 5911 }, { "epoch": 0.38, "grad_norm": 1.1619608402252197, "learning_rate": 7.09357160667076e-06, "loss": 0.5777, "step": 5912 }, { "epoch": 0.38, "grad_norm": 1.1818745136260986, "learning_rate": 7.092622290657298e-06, "loss": 0.5639, "step": 5913 }, { "epoch": 0.38, "grad_norm": 1.2391283512115479, "learning_rate": 7.091672883180715e-06, "loss": 0.5407, "step": 5914 }, { "epoch": 0.38, "grad_norm": 1.1132333278656006, "learning_rate": 7.090723384282507e-06, "loss": 0.5482, "step": 5915 }, { "epoch": 0.38, "grad_norm": 1.252042293548584, "learning_rate": 7.089773794004172e-06, "loss": 0.5998, "step": 5916 }, { "epoch": 0.38, "grad_norm": 1.1291967630386353, "learning_rate": 7.088824112387218e-06, "loss": 0.5972, "step": 5917 }, { "epoch": 0.38, "grad_norm": 1.2278239727020264, "learning_rate": 7.087874339473151e-06, "loss": 0.547, "step": 5918 }, { "epoch": 0.38, "grad_norm": 1.0507817268371582, "learning_rate": 7.086924475303482e-06, "loss": 0.5069, "step": 5919 }, { "epoch": 0.38, "grad_norm": 1.3200109004974365, "learning_rate": 7.08597451991973e-06, "loss": 0.5898, "step": 5920 }, { "epoch": 0.38, "grad_norm": 1.0606679916381836, "learning_rate": 7.085024473363414e-06, "loss": 0.5616, "step": 5921 }, { "epoch": 0.38, "grad_norm": 1.262665033340454, "learning_rate": 7.084074335676056e-06, "loss": 0.5961, "step": 5922 }, { "epoch": 0.38, "grad_norm": 1.0339683294296265, "learning_rate": 7.083124106899187e-06, "loss": 0.5158, "step": 5923 }, { "epoch": 0.38, "grad_norm": 1.1102458238601685, "learning_rate": 7.082173787074338e-06, "loss": 0.5642, "step": 5924 }, { "epoch": 0.38, "grad_norm": 1.3803186416625977, "learning_rate": 7.081223376243045e-06, "loss": 0.5752, "step": 5925 }, { "epoch": 0.38, "grad_norm": 1.1194926500320435, "learning_rate": 7.080272874446847e-06, "loss": 0.558, "step": 5926 }, { "epoch": 0.38, "grad_norm": 1.1106921434402466, "learning_rate": 7.079322281727288e-06, "loss": 0.5764, "step": 5927 }, { "epoch": 0.38, "grad_norm": 1.1613093614578247, "learning_rate": 7.078371598125919e-06, "loss": 0.5216, "step": 5928 }, { "epoch": 0.38, "grad_norm": 1.2396891117095947, "learning_rate": 7.077420823684287e-06, "loss": 0.645, "step": 5929 }, { "epoch": 0.38, "grad_norm": 1.136828899383545, "learning_rate": 7.076469958443952e-06, "loss": 0.4916, "step": 5930 }, { "epoch": 0.38, "grad_norm": 1.1257729530334473, "learning_rate": 7.075519002446474e-06, "loss": 0.5646, "step": 5931 }, { "epoch": 0.38, "grad_norm": 1.0732311010360718, "learning_rate": 7.074567955733413e-06, "loss": 0.523, "step": 5932 }, { "epoch": 0.38, "grad_norm": 1.1384631395339966, "learning_rate": 7.073616818346339e-06, "loss": 0.5401, "step": 5933 }, { "epoch": 0.38, "grad_norm": 1.121858835220337, "learning_rate": 7.072665590326826e-06, "loss": 0.5742, "step": 5934 }, { "epoch": 0.38, "grad_norm": 1.2632606029510498, "learning_rate": 7.071714271716445e-06, "loss": 0.535, "step": 5935 }, { "epoch": 0.38, "grad_norm": 1.2425942420959473, "learning_rate": 7.0707628625567805e-06, "loss": 0.5588, "step": 5936 }, { "epoch": 0.38, "grad_norm": 1.3214445114135742, "learning_rate": 7.069811362889414e-06, "loss": 0.5849, "step": 5937 }, { "epoch": 0.38, "grad_norm": 1.1006072759628296, "learning_rate": 7.0688597727559316e-06, "loss": 0.5404, "step": 5938 }, { "epoch": 0.38, "grad_norm": 1.1337168216705322, "learning_rate": 7.067908092197926e-06, "loss": 0.4709, "step": 5939 }, { "epoch": 0.38, "grad_norm": 1.2509974241256714, "learning_rate": 7.0669563212569946e-06, "loss": 0.5695, "step": 5940 }, { "epoch": 0.38, "grad_norm": 1.2295137643814087, "learning_rate": 7.066004459974735e-06, "loss": 0.5621, "step": 5941 }, { "epoch": 0.38, "grad_norm": 1.3713997602462769, "learning_rate": 7.065052508392749e-06, "loss": 0.5951, "step": 5942 }, { "epoch": 0.38, "grad_norm": 1.1970453262329102, "learning_rate": 7.064100466552648e-06, "loss": 0.5328, "step": 5943 }, { "epoch": 0.38, "grad_norm": 1.163084864616394, "learning_rate": 7.06314833449604e-06, "loss": 0.4949, "step": 5944 }, { "epoch": 0.38, "grad_norm": 1.1376888751983643, "learning_rate": 7.062196112264541e-06, "loss": 0.5445, "step": 5945 }, { "epoch": 0.38, "grad_norm": 1.2043371200561523, "learning_rate": 7.0612437998997705e-06, "loss": 0.5701, "step": 5946 }, { "epoch": 0.38, "grad_norm": 1.2555267810821533, "learning_rate": 7.0602913974433514e-06, "loss": 0.5985, "step": 5947 }, { "epoch": 0.38, "grad_norm": 1.1379119157791138, "learning_rate": 7.0593389049369125e-06, "loss": 0.4837, "step": 5948 }, { "epoch": 0.38, "grad_norm": 1.274133563041687, "learning_rate": 7.058386322422082e-06, "loss": 0.504, "step": 5949 }, { "epoch": 0.38, "grad_norm": 1.102485179901123, "learning_rate": 7.057433649940496e-06, "loss": 0.5522, "step": 5950 }, { "epoch": 0.38, "grad_norm": 1.3883492946624756, "learning_rate": 7.056480887533793e-06, "loss": 0.5529, "step": 5951 }, { "epoch": 0.38, "grad_norm": 1.1092784404754639, "learning_rate": 7.0555280352436175e-06, "loss": 0.5018, "step": 5952 }, { "epoch": 0.38, "grad_norm": 1.1429247856140137, "learning_rate": 7.054575093111614e-06, "loss": 0.5804, "step": 5953 }, { "epoch": 0.38, "grad_norm": 1.3457599878311157, "learning_rate": 7.053622061179435e-06, "loss": 0.5805, "step": 5954 }, { "epoch": 0.38, "grad_norm": 1.2160340547561646, "learning_rate": 7.052668939488734e-06, "loss": 0.578, "step": 5955 }, { "epoch": 0.38, "grad_norm": 1.1565308570861816, "learning_rate": 7.051715728081168e-06, "loss": 0.537, "step": 5956 }, { "epoch": 0.38, "grad_norm": 1.2257615327835083, "learning_rate": 7.050762426998403e-06, "loss": 0.5197, "step": 5957 }, { "epoch": 0.38, "grad_norm": 1.3751704692840576, "learning_rate": 7.049809036282102e-06, "loss": 0.5444, "step": 5958 }, { "epoch": 0.38, "grad_norm": 1.1222777366638184, "learning_rate": 7.0488555559739365e-06, "loss": 0.5442, "step": 5959 }, { "epoch": 0.38, "grad_norm": 1.1759331226348877, "learning_rate": 7.047901986115582e-06, "loss": 0.5486, "step": 5960 }, { "epoch": 0.38, "grad_norm": 1.1738823652267456, "learning_rate": 7.046948326748714e-06, "loss": 0.54, "step": 5961 }, { "epoch": 0.38, "grad_norm": 1.270519733428955, "learning_rate": 7.045994577915018e-06, "loss": 0.5751, "step": 5962 }, { "epoch": 0.38, "grad_norm": 1.172080397605896, "learning_rate": 7.045040739656175e-06, "loss": 0.5494, "step": 5963 }, { "epoch": 0.38, "grad_norm": 1.1190766096115112, "learning_rate": 7.0440868120138795e-06, "loss": 0.5103, "step": 5964 }, { "epoch": 0.39, "grad_norm": 1.1153640747070312, "learning_rate": 7.043132795029822e-06, "loss": 0.5522, "step": 5965 }, { "epoch": 0.39, "grad_norm": 1.17256498336792, "learning_rate": 7.042178688745702e-06, "loss": 0.5103, "step": 5966 }, { "epoch": 0.39, "grad_norm": 1.0911903381347656, "learning_rate": 7.04122449320322e-06, "loss": 0.568, "step": 5967 }, { "epoch": 0.39, "grad_norm": 1.1115723848342896, "learning_rate": 7.040270208444082e-06, "loss": 0.542, "step": 5968 }, { "epoch": 0.39, "grad_norm": 1.2751787900924683, "learning_rate": 7.039315834509999e-06, "loss": 0.5842, "step": 5969 }, { "epoch": 0.39, "grad_norm": 1.1389261484146118, "learning_rate": 7.038361371442679e-06, "loss": 0.4911, "step": 5970 }, { "epoch": 0.39, "grad_norm": 1.1324448585510254, "learning_rate": 7.037406819283845e-06, "loss": 0.6208, "step": 5971 }, { "epoch": 0.39, "grad_norm": 1.1927802562713623, "learning_rate": 7.036452178075215e-06, "loss": 0.529, "step": 5972 }, { "epoch": 0.39, "grad_norm": 1.0914695262908936, "learning_rate": 7.035497447858514e-06, "loss": 0.5589, "step": 5973 }, { "epoch": 0.39, "grad_norm": 1.2278850078582764, "learning_rate": 7.03454262867547e-06, "loss": 0.5925, "step": 5974 }, { "epoch": 0.39, "grad_norm": 1.0827195644378662, "learning_rate": 7.03358772056782e-06, "loss": 0.5654, "step": 5975 }, { "epoch": 0.39, "grad_norm": 1.2122312784194946, "learning_rate": 7.032632723577295e-06, "loss": 0.59, "step": 5976 }, { "epoch": 0.39, "grad_norm": 1.1600488424301147, "learning_rate": 7.031677637745637e-06, "loss": 0.5057, "step": 5977 }, { "epoch": 0.39, "grad_norm": 1.183225393295288, "learning_rate": 7.030722463114594e-06, "loss": 0.5879, "step": 5978 }, { "epoch": 0.39, "grad_norm": 1.1917356252670288, "learning_rate": 7.02976719972591e-06, "loss": 0.5725, "step": 5979 }, { "epoch": 0.39, "grad_norm": 1.1490625143051147, "learning_rate": 7.028811847621338e-06, "loss": 0.5342, "step": 5980 }, { "epoch": 0.39, "grad_norm": 1.0759623050689697, "learning_rate": 7.0278564068426366e-06, "loss": 0.6, "step": 5981 }, { "epoch": 0.39, "grad_norm": 1.0471049547195435, "learning_rate": 7.026900877431562e-06, "loss": 0.4889, "step": 5982 }, { "epoch": 0.39, "grad_norm": 1.0237011909484863, "learning_rate": 7.025945259429879e-06, "loss": 0.5021, "step": 5983 }, { "epoch": 0.39, "grad_norm": 1.0073984861373901, "learning_rate": 7.024989552879357e-06, "loss": 0.516, "step": 5984 }, { "epoch": 0.39, "grad_norm": 1.1593754291534424, "learning_rate": 7.024033757821766e-06, "loss": 0.5281, "step": 5985 }, { "epoch": 0.39, "grad_norm": 1.0300569534301758, "learning_rate": 7.023077874298881e-06, "loss": 0.5208, "step": 5986 }, { "epoch": 0.39, "grad_norm": 1.1544256210327148, "learning_rate": 7.0221219023524836e-06, "loss": 0.5426, "step": 5987 }, { "epoch": 0.39, "grad_norm": 1.131250023841858, "learning_rate": 7.021165842024352e-06, "loss": 0.5253, "step": 5988 }, { "epoch": 0.39, "grad_norm": 1.1583726406097412, "learning_rate": 7.020209693356278e-06, "loss": 0.5121, "step": 5989 }, { "epoch": 0.39, "grad_norm": 1.1274409294128418, "learning_rate": 7.019253456390051e-06, "loss": 0.5419, "step": 5990 }, { "epoch": 0.39, "grad_norm": 1.0660803318023682, "learning_rate": 7.018297131167464e-06, "loss": 0.4907, "step": 5991 }, { "epoch": 0.39, "grad_norm": 1.111977219581604, "learning_rate": 7.017340717730317e-06, "loss": 0.5603, "step": 5992 }, { "epoch": 0.39, "grad_norm": 1.1652241945266724, "learning_rate": 7.016384216120412e-06, "loss": 0.6039, "step": 5993 }, { "epoch": 0.39, "grad_norm": 1.1423523426055908, "learning_rate": 7.015427626379554e-06, "loss": 0.5846, "step": 5994 }, { "epoch": 0.39, "grad_norm": 1.2844923734664917, "learning_rate": 7.014470948549555e-06, "loss": 0.5628, "step": 5995 }, { "epoch": 0.39, "grad_norm": 1.31515634059906, "learning_rate": 7.01351418267223e-06, "loss": 0.6002, "step": 5996 }, { "epoch": 0.39, "grad_norm": 1.365286946296692, "learning_rate": 7.012557328789393e-06, "loss": 0.577, "step": 5997 }, { "epoch": 0.39, "grad_norm": 1.121494174003601, "learning_rate": 7.011600386942868e-06, "loss": 0.5973, "step": 5998 }, { "epoch": 0.39, "grad_norm": 1.0499173402786255, "learning_rate": 7.010643357174483e-06, "loss": 0.5278, "step": 5999 }, { "epoch": 0.39, "grad_norm": 1.1012037992477417, "learning_rate": 7.00968623952606e-06, "loss": 0.5077, "step": 6000 }, { "epoch": 0.39, "grad_norm": 1.152089238166809, "learning_rate": 7.008729034039439e-06, "loss": 0.5512, "step": 6001 }, { "epoch": 0.39, "grad_norm": 1.1079769134521484, "learning_rate": 7.007771740756454e-06, "loss": 0.5572, "step": 6002 }, { "epoch": 0.39, "grad_norm": 1.0332717895507812, "learning_rate": 7.006814359718945e-06, "loss": 0.4821, "step": 6003 }, { "epoch": 0.39, "grad_norm": 1.2203813791275024, "learning_rate": 7.005856890968761e-06, "loss": 0.6062, "step": 6004 }, { "epoch": 0.39, "grad_norm": 1.1566096544265747, "learning_rate": 7.004899334547746e-06, "loss": 0.5814, "step": 6005 }, { "epoch": 0.39, "grad_norm": 1.3286107778549194, "learning_rate": 7.003941690497753e-06, "loss": 0.6087, "step": 6006 }, { "epoch": 0.39, "grad_norm": 1.2182289361953735, "learning_rate": 7.00298395886064e-06, "loss": 0.5813, "step": 6007 }, { "epoch": 0.39, "grad_norm": 1.0740230083465576, "learning_rate": 7.002026139678264e-06, "loss": 0.5332, "step": 6008 }, { "epoch": 0.39, "grad_norm": 1.2105324268341064, "learning_rate": 7.001068232992494e-06, "loss": 0.6177, "step": 6009 }, { "epoch": 0.39, "grad_norm": 1.2289000749588013, "learning_rate": 7.000110238845192e-06, "loss": 0.5618, "step": 6010 }, { "epoch": 0.39, "grad_norm": 1.1695048809051514, "learning_rate": 6.999152157278233e-06, "loss": 0.4943, "step": 6011 }, { "epoch": 0.39, "grad_norm": 1.1715737581253052, "learning_rate": 6.9981939883334896e-06, "loss": 0.5038, "step": 6012 }, { "epoch": 0.39, "grad_norm": 1.2402842044830322, "learning_rate": 6.997235732052844e-06, "loss": 0.6068, "step": 6013 }, { "epoch": 0.39, "grad_norm": 1.3024351596832275, "learning_rate": 6.996277388478176e-06, "loss": 0.5157, "step": 6014 }, { "epoch": 0.39, "grad_norm": 1.2516709566116333, "learning_rate": 6.995318957651373e-06, "loss": 0.5725, "step": 6015 }, { "epoch": 0.39, "grad_norm": 1.1151095628738403, "learning_rate": 6.99436043961433e-06, "loss": 0.5272, "step": 6016 }, { "epoch": 0.39, "grad_norm": 1.0935468673706055, "learning_rate": 6.993401834408935e-06, "loss": 0.4808, "step": 6017 }, { "epoch": 0.39, "grad_norm": 1.249038577079773, "learning_rate": 6.992443142077089e-06, "loss": 0.5481, "step": 6018 }, { "epoch": 0.39, "grad_norm": 1.261025071144104, "learning_rate": 6.991484362660695e-06, "loss": 0.5349, "step": 6019 }, { "epoch": 0.39, "grad_norm": 1.2629719972610474, "learning_rate": 6.990525496201657e-06, "loss": 0.5491, "step": 6020 }, { "epoch": 0.39, "grad_norm": 1.221947431564331, "learning_rate": 6.989566542741884e-06, "loss": 0.5591, "step": 6021 }, { "epoch": 0.39, "grad_norm": 1.1607176065444946, "learning_rate": 6.988607502323293e-06, "loss": 0.5488, "step": 6022 }, { "epoch": 0.39, "grad_norm": 1.160613775253296, "learning_rate": 6.9876483749877964e-06, "loss": 0.5315, "step": 6023 }, { "epoch": 0.39, "grad_norm": 1.1581149101257324, "learning_rate": 6.986689160777318e-06, "loss": 0.5182, "step": 6024 }, { "epoch": 0.39, "grad_norm": 1.1878185272216797, "learning_rate": 6.985729859733783e-06, "loss": 0.5427, "step": 6025 }, { "epoch": 0.39, "grad_norm": 1.1034772396087646, "learning_rate": 6.9847704718991184e-06, "loss": 0.5272, "step": 6026 }, { "epoch": 0.39, "grad_norm": 1.4101601839065552, "learning_rate": 6.983810997315257e-06, "loss": 0.5449, "step": 6027 }, { "epoch": 0.39, "grad_norm": 1.3281725645065308, "learning_rate": 6.982851436024136e-06, "loss": 0.6066, "step": 6028 }, { "epoch": 0.39, "grad_norm": 1.269468903541565, "learning_rate": 6.981891788067694e-06, "loss": 0.5991, "step": 6029 }, { "epoch": 0.39, "grad_norm": 1.3466609716415405, "learning_rate": 6.980932053487875e-06, "loss": 0.5453, "step": 6030 }, { "epoch": 0.39, "grad_norm": 1.160348892211914, "learning_rate": 6.979972232326629e-06, "loss": 0.5852, "step": 6031 }, { "epoch": 0.39, "grad_norm": 1.1268566846847534, "learning_rate": 6.979012324625902e-06, "loss": 0.5433, "step": 6032 }, { "epoch": 0.39, "grad_norm": 1.199669361114502, "learning_rate": 6.978052330427654e-06, "loss": 0.5105, "step": 6033 }, { "epoch": 0.39, "grad_norm": 1.1791691780090332, "learning_rate": 6.977092249773842e-06, "loss": 0.546, "step": 6034 }, { "epoch": 0.39, "grad_norm": 1.0759509801864624, "learning_rate": 6.976132082706428e-06, "loss": 0.5201, "step": 6035 }, { "epoch": 0.39, "grad_norm": 1.1391018629074097, "learning_rate": 6.975171829267379e-06, "loss": 0.5616, "step": 6036 }, { "epoch": 0.39, "grad_norm": 1.0531902313232422, "learning_rate": 6.9742114894986675e-06, "loss": 0.55, "step": 6037 }, { "epoch": 0.39, "grad_norm": 1.2269883155822754, "learning_rate": 6.973251063442264e-06, "loss": 0.562, "step": 6038 }, { "epoch": 0.39, "grad_norm": 1.3020291328430176, "learning_rate": 6.972290551140146e-06, "loss": 0.4922, "step": 6039 }, { "epoch": 0.39, "grad_norm": 1.1781184673309326, "learning_rate": 6.9713299526343e-06, "loss": 0.5493, "step": 6040 }, { "epoch": 0.39, "grad_norm": 1.218240737915039, "learning_rate": 6.970369267966705e-06, "loss": 0.5422, "step": 6041 }, { "epoch": 0.39, "grad_norm": 1.3272016048431396, "learning_rate": 6.969408497179353e-06, "loss": 0.5654, "step": 6042 }, { "epoch": 0.39, "grad_norm": 1.138597011566162, "learning_rate": 6.968447640314238e-06, "loss": 0.5314, "step": 6043 }, { "epoch": 0.39, "grad_norm": 1.0984880924224854, "learning_rate": 6.967486697413355e-06, "loss": 0.5842, "step": 6044 }, { "epoch": 0.39, "grad_norm": 1.196541666984558, "learning_rate": 6.966525668518704e-06, "loss": 0.5323, "step": 6045 }, { "epoch": 0.39, "grad_norm": 1.118553638458252, "learning_rate": 6.96556455367229e-06, "loss": 0.5435, "step": 6046 }, { "epoch": 0.39, "grad_norm": 1.1273646354675293, "learning_rate": 6.964603352916122e-06, "loss": 0.5393, "step": 6047 }, { "epoch": 0.39, "grad_norm": 1.297393798828125, "learning_rate": 6.963642066292207e-06, "loss": 0.5288, "step": 6048 }, { "epoch": 0.39, "grad_norm": 1.1897549629211426, "learning_rate": 6.962680693842567e-06, "loss": 0.5621, "step": 6049 }, { "epoch": 0.39, "grad_norm": 1.170188307762146, "learning_rate": 6.961719235609217e-06, "loss": 0.547, "step": 6050 }, { "epoch": 0.39, "grad_norm": 1.0355745553970337, "learning_rate": 6.960757691634179e-06, "loss": 0.4888, "step": 6051 }, { "epoch": 0.39, "grad_norm": 1.1023086309432983, "learning_rate": 6.9597960619594815e-06, "loss": 0.5449, "step": 6052 }, { "epoch": 0.39, "grad_norm": 1.2165300846099854, "learning_rate": 6.958834346627156e-06, "loss": 0.4906, "step": 6053 }, { "epoch": 0.39, "grad_norm": 1.1041756868362427, "learning_rate": 6.957872545679233e-06, "loss": 0.5154, "step": 6054 }, { "epoch": 0.39, "grad_norm": 1.2305055856704712, "learning_rate": 6.956910659157753e-06, "loss": 0.5332, "step": 6055 }, { "epoch": 0.39, "grad_norm": 1.0779824256896973, "learning_rate": 6.9559486871047575e-06, "loss": 0.5009, "step": 6056 }, { "epoch": 0.39, "grad_norm": 1.3828777074813843, "learning_rate": 6.954986629562292e-06, "loss": 0.5386, "step": 6057 }, { "epoch": 0.39, "grad_norm": 1.3330185413360596, "learning_rate": 6.954024486572404e-06, "loss": 0.5924, "step": 6058 }, { "epoch": 0.39, "grad_norm": 1.1372451782226562, "learning_rate": 6.953062258177146e-06, "loss": 0.5589, "step": 6059 }, { "epoch": 0.39, "grad_norm": 1.1341618299484253, "learning_rate": 6.952099944418578e-06, "loss": 0.5788, "step": 6060 }, { "epoch": 0.39, "grad_norm": 1.3333853483200073, "learning_rate": 6.951137545338757e-06, "loss": 0.544, "step": 6061 }, { "epoch": 0.39, "grad_norm": 1.2189106941223145, "learning_rate": 6.950175060979747e-06, "loss": 0.5349, "step": 6062 }, { "epoch": 0.39, "grad_norm": 1.1392309665679932, "learning_rate": 6.949212491383617e-06, "loss": 0.5015, "step": 6063 }, { "epoch": 0.39, "grad_norm": 1.3420276641845703, "learning_rate": 6.9482498365924375e-06, "loss": 0.5944, "step": 6064 }, { "epoch": 0.39, "grad_norm": 1.235608458518982, "learning_rate": 6.947287096648285e-06, "loss": 0.5454, "step": 6065 }, { "epoch": 0.39, "grad_norm": 1.10098397731781, "learning_rate": 6.946324271593238e-06, "loss": 0.5209, "step": 6066 }, { "epoch": 0.39, "grad_norm": 1.3501427173614502, "learning_rate": 6.945361361469379e-06, "loss": 0.5534, "step": 6067 }, { "epoch": 0.39, "grad_norm": 1.1535749435424805, "learning_rate": 6.944398366318792e-06, "loss": 0.5382, "step": 6068 }, { "epoch": 0.39, "grad_norm": 1.1553162336349487, "learning_rate": 6.9434352861835704e-06, "loss": 0.5639, "step": 6069 }, { "epoch": 0.39, "grad_norm": 1.162508487701416, "learning_rate": 6.9424721211058054e-06, "loss": 0.5693, "step": 6070 }, { "epoch": 0.39, "grad_norm": 1.2420175075531006, "learning_rate": 6.941508871127597e-06, "loss": 0.6023, "step": 6071 }, { "epoch": 0.39, "grad_norm": 1.287576675415039, "learning_rate": 6.940545536291045e-06, "loss": 0.5576, "step": 6072 }, { "epoch": 0.39, "grad_norm": 1.1756218671798706, "learning_rate": 6.939582116638252e-06, "loss": 0.555, "step": 6073 }, { "epoch": 0.39, "grad_norm": 1.2404284477233887, "learning_rate": 6.938618612211332e-06, "loss": 0.6254, "step": 6074 }, { "epoch": 0.39, "grad_norm": 1.1259849071502686, "learning_rate": 6.937655023052393e-06, "loss": 0.5273, "step": 6075 }, { "epoch": 0.39, "grad_norm": 1.155800700187683, "learning_rate": 6.936691349203551e-06, "loss": 0.5188, "step": 6076 }, { "epoch": 0.39, "grad_norm": 1.1240016222000122, "learning_rate": 6.935727590706928e-06, "loss": 0.5447, "step": 6077 }, { "epoch": 0.39, "grad_norm": 1.308215618133545, "learning_rate": 6.934763747604647e-06, "loss": 0.5384, "step": 6078 }, { "epoch": 0.39, "grad_norm": 1.1890195608139038, "learning_rate": 6.933799819938833e-06, "loss": 0.5288, "step": 6079 }, { "epoch": 0.39, "grad_norm": 1.0816084146499634, "learning_rate": 6.932835807751618e-06, "loss": 0.4904, "step": 6080 }, { "epoch": 0.39, "grad_norm": 1.1856869459152222, "learning_rate": 6.931871711085139e-06, "loss": 0.5494, "step": 6081 }, { "epoch": 0.39, "grad_norm": 1.0613024234771729, "learning_rate": 6.930907529981529e-06, "loss": 0.4673, "step": 6082 }, { "epoch": 0.39, "grad_norm": 1.157705307006836, "learning_rate": 6.929943264482932e-06, "loss": 0.551, "step": 6083 }, { "epoch": 0.39, "grad_norm": 1.1117149591445923, "learning_rate": 6.928978914631498e-06, "loss": 0.5332, "step": 6084 }, { "epoch": 0.39, "grad_norm": 1.2020317316055298, "learning_rate": 6.928014480469369e-06, "loss": 0.522, "step": 6085 }, { "epoch": 0.39, "grad_norm": 1.2499897480010986, "learning_rate": 6.927049962038703e-06, "loss": 0.5472, "step": 6086 }, { "epoch": 0.39, "grad_norm": 1.0832566022872925, "learning_rate": 6.926085359381656e-06, "loss": 0.5244, "step": 6087 }, { "epoch": 0.39, "grad_norm": 1.1607149839401245, "learning_rate": 6.9251206725403854e-06, "loss": 0.5385, "step": 6088 }, { "epoch": 0.39, "grad_norm": 1.182313084602356, "learning_rate": 6.924155901557057e-06, "loss": 0.5622, "step": 6089 }, { "epoch": 0.39, "grad_norm": 1.1370834112167358, "learning_rate": 6.92319104647384e-06, "loss": 0.5503, "step": 6090 }, { "epoch": 0.39, "grad_norm": 1.0400400161743164, "learning_rate": 6.922226107332903e-06, "loss": 0.4962, "step": 6091 }, { "epoch": 0.39, "grad_norm": 1.1370179653167725, "learning_rate": 6.9212610841764226e-06, "loss": 0.5846, "step": 6092 }, { "epoch": 0.39, "grad_norm": 1.0927611589431763, "learning_rate": 6.920295977046578e-06, "loss": 0.5192, "step": 6093 }, { "epoch": 0.39, "grad_norm": 1.1084381341934204, "learning_rate": 6.919330785985549e-06, "loss": 0.4967, "step": 6094 }, { "epoch": 0.39, "grad_norm": 1.2064365148544312, "learning_rate": 6.918365511035527e-06, "loss": 0.5619, "step": 6095 }, { "epoch": 0.39, "grad_norm": 1.226320505142212, "learning_rate": 6.917400152238694e-06, "loss": 0.5767, "step": 6096 }, { "epoch": 0.39, "grad_norm": 1.110213279724121, "learning_rate": 6.916434709637248e-06, "loss": 0.4685, "step": 6097 }, { "epoch": 0.39, "grad_norm": 1.1119186878204346, "learning_rate": 6.9154691832733865e-06, "loss": 0.5033, "step": 6098 }, { "epoch": 0.39, "grad_norm": 1.0962107181549072, "learning_rate": 6.914503573189308e-06, "loss": 0.613, "step": 6099 }, { "epoch": 0.39, "grad_norm": 1.1141856908798218, "learning_rate": 6.913537879427219e-06, "loss": 0.5468, "step": 6100 }, { "epoch": 0.39, "grad_norm": 1.1862989664077759, "learning_rate": 6.9125721020293255e-06, "loss": 0.5301, "step": 6101 }, { "epoch": 0.39, "grad_norm": 1.0286545753479004, "learning_rate": 6.9116062410378405e-06, "loss": 0.5086, "step": 6102 }, { "epoch": 0.39, "grad_norm": 1.1831122636795044, "learning_rate": 6.910640296494977e-06, "loss": 0.5668, "step": 6103 }, { "epoch": 0.39, "grad_norm": 1.1462862491607666, "learning_rate": 6.909674268442957e-06, "loss": 0.5045, "step": 6104 }, { "epoch": 0.39, "grad_norm": 1.1306096315383911, "learning_rate": 6.908708156924002e-06, "loss": 0.5474, "step": 6105 }, { "epoch": 0.39, "grad_norm": 1.1910330057144165, "learning_rate": 6.907741961980337e-06, "loss": 0.5143, "step": 6106 }, { "epoch": 0.39, "grad_norm": 1.2889235019683838, "learning_rate": 6.9067756836541945e-06, "loss": 0.6005, "step": 6107 }, { "epoch": 0.39, "grad_norm": 1.2152642011642456, "learning_rate": 6.905809321987805e-06, "loss": 0.5317, "step": 6108 }, { "epoch": 0.39, "grad_norm": 1.2721601724624634, "learning_rate": 6.904842877023407e-06, "loss": 0.5249, "step": 6109 }, { "epoch": 0.39, "grad_norm": 1.1971611976623535, "learning_rate": 6.9038763488032425e-06, "loss": 0.5914, "step": 6110 }, { "epoch": 0.39, "grad_norm": 1.0841588973999023, "learning_rate": 6.902909737369554e-06, "loss": 0.5496, "step": 6111 }, { "epoch": 0.39, "grad_norm": 1.228035807609558, "learning_rate": 6.9019430427645895e-06, "loss": 0.5594, "step": 6112 }, { "epoch": 0.39, "grad_norm": 1.2929840087890625, "learning_rate": 6.9009762650306036e-06, "loss": 0.5709, "step": 6113 }, { "epoch": 0.39, "grad_norm": 1.1694189310073853, "learning_rate": 6.900009404209849e-06, "loss": 0.5636, "step": 6114 }, { "epoch": 0.39, "grad_norm": 1.1296736001968384, "learning_rate": 6.899042460344585e-06, "loss": 0.5613, "step": 6115 }, { "epoch": 0.39, "grad_norm": 1.1676527261734009, "learning_rate": 6.898075433477076e-06, "loss": 0.5164, "step": 6116 }, { "epoch": 0.39, "grad_norm": 1.1821339130401611, "learning_rate": 6.897108323649585e-06, "loss": 0.5263, "step": 6117 }, { "epoch": 0.39, "grad_norm": 1.140968680381775, "learning_rate": 6.8961411309043845e-06, "loss": 0.5253, "step": 6118 }, { "epoch": 0.39, "grad_norm": 1.1595821380615234, "learning_rate": 6.895173855283748e-06, "loss": 0.5075, "step": 6119 }, { "epoch": 0.4, "grad_norm": 1.2225855588912964, "learning_rate": 6.8942064968299515e-06, "loss": 0.5876, "step": 6120 }, { "epoch": 0.4, "grad_norm": 1.127852201461792, "learning_rate": 6.893239055585275e-06, "loss": 0.5228, "step": 6121 }, { "epoch": 0.4, "grad_norm": 1.1002229452133179, "learning_rate": 6.892271531592006e-06, "loss": 0.5185, "step": 6122 }, { "epoch": 0.4, "grad_norm": 1.1367205381393433, "learning_rate": 6.8913039248924295e-06, "loss": 0.5658, "step": 6123 }, { "epoch": 0.4, "grad_norm": 1.235048532485962, "learning_rate": 6.8903362355288376e-06, "loss": 0.5398, "step": 6124 }, { "epoch": 0.4, "grad_norm": 1.137373924255371, "learning_rate": 6.889368463543527e-06, "loss": 0.5657, "step": 6125 }, { "epoch": 0.4, "grad_norm": 1.1165766716003418, "learning_rate": 6.8884006089787945e-06, "loss": 0.5543, "step": 6126 }, { "epoch": 0.4, "grad_norm": 1.2754043340682983, "learning_rate": 6.887432671876943e-06, "loss": 0.5902, "step": 6127 }, { "epoch": 0.4, "grad_norm": 1.2752269506454468, "learning_rate": 6.886464652280282e-06, "loss": 0.5573, "step": 6128 }, { "epoch": 0.4, "grad_norm": 1.1119431257247925, "learning_rate": 6.885496550231115e-06, "loss": 0.5573, "step": 6129 }, { "epoch": 0.4, "grad_norm": 1.2171928882598877, "learning_rate": 6.884528365771759e-06, "loss": 0.5799, "step": 6130 }, { "epoch": 0.4, "grad_norm": 1.1726396083831787, "learning_rate": 6.883560098944532e-06, "loss": 0.5684, "step": 6131 }, { "epoch": 0.4, "grad_norm": 1.0792771577835083, "learning_rate": 6.882591749791752e-06, "loss": 0.518, "step": 6132 }, { "epoch": 0.4, "grad_norm": 1.142101764678955, "learning_rate": 6.881623318355742e-06, "loss": 0.5596, "step": 6133 }, { "epoch": 0.4, "grad_norm": 1.2917985916137695, "learning_rate": 6.880654804678833e-06, "loss": 0.5688, "step": 6134 }, { "epoch": 0.4, "grad_norm": 1.1225167512893677, "learning_rate": 6.879686208803354e-06, "loss": 0.5501, "step": 6135 }, { "epoch": 0.4, "grad_norm": 1.2047563791275024, "learning_rate": 6.878717530771642e-06, "loss": 0.5714, "step": 6136 }, { "epoch": 0.4, "grad_norm": 1.1681021451950073, "learning_rate": 6.877748770626033e-06, "loss": 0.5762, "step": 6137 }, { "epoch": 0.4, "grad_norm": 1.1962636709213257, "learning_rate": 6.8767799284088696e-06, "loss": 0.5578, "step": 6138 }, { "epoch": 0.4, "grad_norm": 1.1685866117477417, "learning_rate": 6.875811004162498e-06, "loss": 0.5497, "step": 6139 }, { "epoch": 0.4, "grad_norm": 1.3156406879425049, "learning_rate": 6.874841997929267e-06, "loss": 0.565, "step": 6140 }, { "epoch": 0.4, "grad_norm": 1.1825908422470093, "learning_rate": 6.8738729097515285e-06, "loss": 0.5466, "step": 6141 }, { "epoch": 0.4, "grad_norm": 1.0602933168411255, "learning_rate": 6.872903739671641e-06, "loss": 0.5444, "step": 6142 }, { "epoch": 0.4, "grad_norm": 1.235167384147644, "learning_rate": 6.871934487731962e-06, "loss": 0.5747, "step": 6143 }, { "epoch": 0.4, "grad_norm": 1.224481463432312, "learning_rate": 6.870965153974858e-06, "loss": 0.5575, "step": 6144 }, { "epoch": 0.4, "grad_norm": 1.2230557203292847, "learning_rate": 6.869995738442693e-06, "loss": 0.5136, "step": 6145 }, { "epoch": 0.4, "grad_norm": 1.1425994634628296, "learning_rate": 6.869026241177838e-06, "loss": 0.5318, "step": 6146 }, { "epoch": 0.4, "grad_norm": 1.127217173576355, "learning_rate": 6.868056662222671e-06, "loss": 0.5206, "step": 6147 }, { "epoch": 0.4, "grad_norm": 1.1490542888641357, "learning_rate": 6.867087001619564e-06, "loss": 0.5072, "step": 6148 }, { "epoch": 0.4, "grad_norm": 1.1053237915039062, "learning_rate": 6.8661172594109035e-06, "loss": 0.5337, "step": 6149 }, { "epoch": 0.4, "grad_norm": 1.1072332859039307, "learning_rate": 6.865147435639071e-06, "loss": 0.5614, "step": 6150 }, { "epoch": 0.4, "grad_norm": 1.141748070716858, "learning_rate": 6.8641775303464575e-06, "loss": 0.5623, "step": 6151 }, { "epoch": 0.4, "grad_norm": 1.2590699195861816, "learning_rate": 6.863207543575452e-06, "loss": 0.5777, "step": 6152 }, { "epoch": 0.4, "grad_norm": 1.1894596815109253, "learning_rate": 6.862237475368453e-06, "loss": 0.5385, "step": 6153 }, { "epoch": 0.4, "grad_norm": 1.274526834487915, "learning_rate": 6.861267325767859e-06, "loss": 0.5232, "step": 6154 }, { "epoch": 0.4, "grad_norm": 1.219224452972412, "learning_rate": 6.8602970948160705e-06, "loss": 0.5333, "step": 6155 }, { "epoch": 0.4, "grad_norm": 1.171020269393921, "learning_rate": 6.859326782555497e-06, "loss": 0.5411, "step": 6156 }, { "epoch": 0.4, "grad_norm": 1.1788372993469238, "learning_rate": 6.858356389028548e-06, "loss": 0.5558, "step": 6157 }, { "epoch": 0.4, "grad_norm": 1.1266663074493408, "learning_rate": 6.857385914277633e-06, "loss": 0.5303, "step": 6158 }, { "epoch": 0.4, "grad_norm": 1.162420630455017, "learning_rate": 6.8564153583451745e-06, "loss": 0.5927, "step": 6159 }, { "epoch": 0.4, "grad_norm": 1.1025091409683228, "learning_rate": 6.855444721273589e-06, "loss": 0.586, "step": 6160 }, { "epoch": 0.4, "grad_norm": 1.3147732019424438, "learning_rate": 6.8544740031053015e-06, "loss": 0.5942, "step": 6161 }, { "epoch": 0.4, "grad_norm": 1.1288824081420898, "learning_rate": 6.853503203882741e-06, "loss": 0.554, "step": 6162 }, { "epoch": 0.4, "grad_norm": 1.1374365091323853, "learning_rate": 6.852532323648337e-06, "loss": 0.5253, "step": 6163 }, { "epoch": 0.4, "grad_norm": 1.1132382154464722, "learning_rate": 6.851561362444525e-06, "loss": 0.5195, "step": 6164 }, { "epoch": 0.4, "grad_norm": 1.1583560705184937, "learning_rate": 6.8505903203137416e-06, "loss": 0.549, "step": 6165 }, { "epoch": 0.4, "grad_norm": 1.1371887922286987, "learning_rate": 6.849619197298431e-06, "loss": 0.5189, "step": 6166 }, { "epoch": 0.4, "grad_norm": 1.1734853982925415, "learning_rate": 6.848647993441038e-06, "loss": 0.5658, "step": 6167 }, { "epoch": 0.4, "grad_norm": 1.1767969131469727, "learning_rate": 6.84767670878401e-06, "loss": 0.5709, "step": 6168 }, { "epoch": 0.4, "grad_norm": 1.4434725046157837, "learning_rate": 6.846705343369801e-06, "loss": 0.579, "step": 6169 }, { "epoch": 0.4, "grad_norm": 1.1053180694580078, "learning_rate": 6.845733897240865e-06, "loss": 0.5353, "step": 6170 }, { "epoch": 0.4, "grad_norm": 1.2441545724868774, "learning_rate": 6.8447623704396615e-06, "loss": 0.5695, "step": 6171 }, { "epoch": 0.4, "grad_norm": 1.0617939233779907, "learning_rate": 6.843790763008657e-06, "loss": 0.51, "step": 6172 }, { "epoch": 0.4, "grad_norm": 1.1638846397399902, "learning_rate": 6.842819074990312e-06, "loss": 0.5659, "step": 6173 }, { "epoch": 0.4, "grad_norm": 1.1758981943130493, "learning_rate": 6.841847306427102e-06, "loss": 0.5973, "step": 6174 }, { "epoch": 0.4, "grad_norm": 1.2003722190856934, "learning_rate": 6.840875457361499e-06, "loss": 0.5419, "step": 6175 }, { "epoch": 0.4, "grad_norm": 1.1963058710098267, "learning_rate": 6.839903527835977e-06, "loss": 0.5416, "step": 6176 }, { "epoch": 0.4, "grad_norm": 1.2201193571090698, "learning_rate": 6.838931517893019e-06, "loss": 0.5432, "step": 6177 }, { "epoch": 0.4, "grad_norm": 1.1270015239715576, "learning_rate": 6.8379594275751115e-06, "loss": 0.5511, "step": 6178 }, { "epoch": 0.4, "grad_norm": 1.2811580896377563, "learning_rate": 6.836987256924737e-06, "loss": 0.6132, "step": 6179 }, { "epoch": 0.4, "grad_norm": 1.2309744358062744, "learning_rate": 6.836015005984389e-06, "loss": 0.5965, "step": 6180 }, { "epoch": 0.4, "grad_norm": 1.0574713945388794, "learning_rate": 6.8350426747965635e-06, "loss": 0.4923, "step": 6181 }, { "epoch": 0.4, "grad_norm": 1.0433019399642944, "learning_rate": 6.834070263403756e-06, "loss": 0.5067, "step": 6182 }, { "epoch": 0.4, "grad_norm": 1.069512963294983, "learning_rate": 6.833097771848471e-06, "loss": 0.5129, "step": 6183 }, { "epoch": 0.4, "grad_norm": 1.2669780254364014, "learning_rate": 6.83212520017321e-06, "loss": 0.5622, "step": 6184 }, { "epoch": 0.4, "grad_norm": 1.060896635055542, "learning_rate": 6.831152548420483e-06, "loss": 0.5641, "step": 6185 }, { "epoch": 0.4, "grad_norm": 1.0976567268371582, "learning_rate": 6.830179816632805e-06, "loss": 0.5629, "step": 6186 }, { "epoch": 0.4, "grad_norm": 1.1793856620788574, "learning_rate": 6.829207004852687e-06, "loss": 0.5607, "step": 6187 }, { "epoch": 0.4, "grad_norm": 1.164475917816162, "learning_rate": 6.82823411312265e-06, "loss": 0.5013, "step": 6188 }, { "epoch": 0.4, "grad_norm": 1.1587343215942383, "learning_rate": 6.827261141485219e-06, "loss": 0.5535, "step": 6189 }, { "epoch": 0.4, "grad_norm": 1.1384254693984985, "learning_rate": 6.826288089982916e-06, "loss": 0.5572, "step": 6190 }, { "epoch": 0.4, "grad_norm": 1.2110923528671265, "learning_rate": 6.825314958658273e-06, "loss": 0.5059, "step": 6191 }, { "epoch": 0.4, "grad_norm": 1.1341521739959717, "learning_rate": 6.824341747553823e-06, "loss": 0.4954, "step": 6192 }, { "epoch": 0.4, "grad_norm": 1.061622142791748, "learning_rate": 6.823368456712102e-06, "loss": 0.511, "step": 6193 }, { "epoch": 0.4, "grad_norm": 1.2237030267715454, "learning_rate": 6.82239508617565e-06, "loss": 0.609, "step": 6194 }, { "epoch": 0.4, "grad_norm": 1.2021212577819824, "learning_rate": 6.82142163598701e-06, "loss": 0.5813, "step": 6195 }, { "epoch": 0.4, "grad_norm": 1.120028018951416, "learning_rate": 6.8204481061887306e-06, "loss": 0.513, "step": 6196 }, { "epoch": 0.4, "grad_norm": 1.2263654470443726, "learning_rate": 6.81947449682336e-06, "loss": 0.6019, "step": 6197 }, { "epoch": 0.4, "grad_norm": 1.0577806234359741, "learning_rate": 6.818500807933455e-06, "loss": 0.5287, "step": 6198 }, { "epoch": 0.4, "grad_norm": 1.0778228044509888, "learning_rate": 6.817527039561571e-06, "loss": 0.512, "step": 6199 }, { "epoch": 0.4, "grad_norm": 1.2022285461425781, "learning_rate": 6.816553191750268e-06, "loss": 0.6124, "step": 6200 }, { "epoch": 0.4, "grad_norm": 1.1082208156585693, "learning_rate": 6.8155792645421136e-06, "loss": 0.5635, "step": 6201 }, { "epoch": 0.4, "grad_norm": 1.1202552318572998, "learning_rate": 6.814605257979673e-06, "loss": 0.5229, "step": 6202 }, { "epoch": 0.4, "grad_norm": 1.1562079191207886, "learning_rate": 6.8136311721055186e-06, "loss": 0.5246, "step": 6203 }, { "epoch": 0.4, "grad_norm": 1.1903308629989624, "learning_rate": 6.8126570069622245e-06, "loss": 0.5677, "step": 6204 }, { "epoch": 0.4, "grad_norm": 1.264663577079773, "learning_rate": 6.8116827625923686e-06, "loss": 0.5911, "step": 6205 }, { "epoch": 0.4, "grad_norm": 1.2879760265350342, "learning_rate": 6.810708439038535e-06, "loss": 0.5404, "step": 6206 }, { "epoch": 0.4, "grad_norm": 1.256507158279419, "learning_rate": 6.809734036343307e-06, "loss": 0.6145, "step": 6207 }, { "epoch": 0.4, "grad_norm": 1.0830813646316528, "learning_rate": 6.8087595545492725e-06, "loss": 0.5273, "step": 6208 }, { "epoch": 0.4, "grad_norm": 1.1100066900253296, "learning_rate": 6.8077849936990245e-06, "loss": 0.5399, "step": 6209 }, { "epoch": 0.4, "grad_norm": 1.2983148097991943, "learning_rate": 6.80681035383516e-06, "loss": 0.5568, "step": 6210 }, { "epoch": 0.4, "grad_norm": 1.1740249395370483, "learning_rate": 6.805835635000275e-06, "loss": 0.5468, "step": 6211 }, { "epoch": 0.4, "grad_norm": 1.11387038230896, "learning_rate": 6.804860837236973e-06, "loss": 0.532, "step": 6212 }, { "epoch": 0.4, "grad_norm": 1.2114689350128174, "learning_rate": 6.803885960587863e-06, "loss": 0.5406, "step": 6213 }, { "epoch": 0.4, "grad_norm": 1.135493516921997, "learning_rate": 6.802911005095549e-06, "loss": 0.4731, "step": 6214 }, { "epoch": 0.4, "grad_norm": 1.2647852897644043, "learning_rate": 6.8019359708026474e-06, "loss": 0.5618, "step": 6215 }, { "epoch": 0.4, "grad_norm": 1.1029995679855347, "learning_rate": 6.800960857751775e-06, "loss": 0.5178, "step": 6216 }, { "epoch": 0.4, "grad_norm": 1.0246410369873047, "learning_rate": 6.799985665985549e-06, "loss": 0.555, "step": 6217 }, { "epoch": 0.4, "grad_norm": 1.0813298225402832, "learning_rate": 6.799010395546592e-06, "loss": 0.5144, "step": 6218 }, { "epoch": 0.4, "grad_norm": 1.017193078994751, "learning_rate": 6.798035046477535e-06, "loss": 0.4846, "step": 6219 }, { "epoch": 0.4, "grad_norm": 1.1529186964035034, "learning_rate": 6.797059618821004e-06, "loss": 0.5234, "step": 6220 }, { "epoch": 0.4, "grad_norm": 1.073604941368103, "learning_rate": 6.796084112619633e-06, "loss": 0.5467, "step": 6221 }, { "epoch": 0.4, "grad_norm": 1.1191658973693848, "learning_rate": 6.79510852791606e-06, "loss": 0.5389, "step": 6222 }, { "epoch": 0.4, "grad_norm": 1.1216115951538086, "learning_rate": 6.794132864752925e-06, "loss": 0.5852, "step": 6223 }, { "epoch": 0.4, "grad_norm": 1.1300222873687744, "learning_rate": 6.793157123172871e-06, "loss": 0.5497, "step": 6224 }, { "epoch": 0.4, "grad_norm": 1.2048866748809814, "learning_rate": 6.792181303218544e-06, "loss": 0.5766, "step": 6225 }, { "epoch": 0.4, "grad_norm": 1.0980743169784546, "learning_rate": 6.7912054049325985e-06, "loss": 0.5766, "step": 6226 }, { "epoch": 0.4, "grad_norm": 1.1443074941635132, "learning_rate": 6.790229428357685e-06, "loss": 0.5427, "step": 6227 }, { "epoch": 0.4, "grad_norm": 1.2571258544921875, "learning_rate": 6.789253373536462e-06, "loss": 0.5413, "step": 6228 }, { "epoch": 0.4, "grad_norm": 1.0964856147766113, "learning_rate": 6.7882772405115904e-06, "loss": 0.5149, "step": 6229 }, { "epoch": 0.4, "grad_norm": 1.154441237449646, "learning_rate": 6.787301029325735e-06, "loss": 0.529, "step": 6230 }, { "epoch": 0.4, "grad_norm": 1.1493343114852905, "learning_rate": 6.786324740021563e-06, "loss": 0.5042, "step": 6231 }, { "epoch": 0.4, "grad_norm": 1.132506251335144, "learning_rate": 6.785348372641743e-06, "loss": 0.555, "step": 6232 }, { "epoch": 0.4, "grad_norm": 1.1958585977554321, "learning_rate": 6.784371927228956e-06, "loss": 0.6142, "step": 6233 }, { "epoch": 0.4, "grad_norm": 1.1657016277313232, "learning_rate": 6.783395403825872e-06, "loss": 0.5581, "step": 6234 }, { "epoch": 0.4, "grad_norm": 1.1391973495483398, "learning_rate": 6.782418802475178e-06, "loss": 0.571, "step": 6235 }, { "epoch": 0.4, "grad_norm": 1.2203484773635864, "learning_rate": 6.781442123219557e-06, "loss": 0.5481, "step": 6236 }, { "epoch": 0.4, "grad_norm": 1.1346732378005981, "learning_rate": 6.780465366101696e-06, "loss": 0.5566, "step": 6237 }, { "epoch": 0.4, "grad_norm": 1.2492563724517822, "learning_rate": 6.779488531164289e-06, "loss": 0.5061, "step": 6238 }, { "epoch": 0.4, "grad_norm": 1.1482207775115967, "learning_rate": 6.778511618450028e-06, "loss": 0.5498, "step": 6239 }, { "epoch": 0.4, "grad_norm": 0.9758678078651428, "learning_rate": 6.777534628001614e-06, "loss": 0.486, "step": 6240 }, { "epoch": 0.4, "grad_norm": 1.0964272022247314, "learning_rate": 6.776557559861746e-06, "loss": 0.5293, "step": 6241 }, { "epoch": 0.4, "grad_norm": 1.0478154420852661, "learning_rate": 6.7755804140731306e-06, "loss": 0.5072, "step": 6242 }, { "epoch": 0.4, "grad_norm": 1.156906008720398, "learning_rate": 6.774603190678478e-06, "loss": 0.5491, "step": 6243 }, { "epoch": 0.4, "grad_norm": 1.0266377925872803, "learning_rate": 6.773625889720497e-06, "loss": 0.5301, "step": 6244 }, { "epoch": 0.4, "grad_norm": 1.2333756685256958, "learning_rate": 6.772648511241906e-06, "loss": 0.5809, "step": 6245 }, { "epoch": 0.4, "grad_norm": 1.1691718101501465, "learning_rate": 6.771671055285422e-06, "loss": 0.5684, "step": 6246 }, { "epoch": 0.4, "grad_norm": 1.2058039903640747, "learning_rate": 6.770693521893765e-06, "loss": 0.5987, "step": 6247 }, { "epoch": 0.4, "grad_norm": 1.0195897817611694, "learning_rate": 6.769715911109664e-06, "loss": 0.4787, "step": 6248 }, { "epoch": 0.4, "grad_norm": 1.1055066585540771, "learning_rate": 6.7687382229758455e-06, "loss": 0.5211, "step": 6249 }, { "epoch": 0.4, "grad_norm": 1.233374834060669, "learning_rate": 6.767760457535044e-06, "loss": 0.5693, "step": 6250 }, { "epoch": 0.4, "grad_norm": 1.2746943235397339, "learning_rate": 6.766782614829994e-06, "loss": 0.551, "step": 6251 }, { "epoch": 0.4, "grad_norm": 1.1243152618408203, "learning_rate": 6.765804694903433e-06, "loss": 0.5397, "step": 6252 }, { "epoch": 0.4, "grad_norm": 1.14677894115448, "learning_rate": 6.764826697798105e-06, "loss": 0.5453, "step": 6253 }, { "epoch": 0.4, "grad_norm": 1.04678213596344, "learning_rate": 6.763848623556756e-06, "loss": 0.5189, "step": 6254 }, { "epoch": 0.4, "grad_norm": 1.1775751113891602, "learning_rate": 6.762870472222134e-06, "loss": 0.5779, "step": 6255 }, { "epoch": 0.4, "grad_norm": 1.1438959836959839, "learning_rate": 6.761892243836992e-06, "loss": 0.5051, "step": 6256 }, { "epoch": 0.4, "grad_norm": 1.0450092554092407, "learning_rate": 6.7609139384440844e-06, "loss": 0.5699, "step": 6257 }, { "epoch": 0.4, "grad_norm": 1.2851392030715942, "learning_rate": 6.759935556086174e-06, "loss": 0.5467, "step": 6258 }, { "epoch": 0.4, "grad_norm": 1.1169039011001587, "learning_rate": 6.758957096806019e-06, "loss": 0.5598, "step": 6259 }, { "epoch": 0.4, "grad_norm": 1.2297056913375854, "learning_rate": 6.75797856064639e-06, "loss": 0.5866, "step": 6260 }, { "epoch": 0.4, "grad_norm": 1.1827945709228516, "learning_rate": 6.756999947650052e-06, "loss": 0.5776, "step": 6261 }, { "epoch": 0.4, "grad_norm": 1.0756721496582031, "learning_rate": 6.75602125785978e-06, "loss": 0.5552, "step": 6262 }, { "epoch": 0.4, "grad_norm": 1.0931344032287598, "learning_rate": 6.755042491318349e-06, "loss": 0.552, "step": 6263 }, { "epoch": 0.4, "grad_norm": 1.089536428451538, "learning_rate": 6.754063648068538e-06, "loss": 0.5688, "step": 6264 }, { "epoch": 0.4, "grad_norm": 1.2518059015274048, "learning_rate": 6.753084728153132e-06, "loss": 0.5297, "step": 6265 }, { "epoch": 0.4, "grad_norm": 1.1687607765197754, "learning_rate": 6.752105731614915e-06, "loss": 0.5643, "step": 6266 }, { "epoch": 0.4, "grad_norm": 1.0466270446777344, "learning_rate": 6.751126658496678e-06, "loss": 0.5616, "step": 6267 }, { "epoch": 0.4, "grad_norm": 1.094366192817688, "learning_rate": 6.750147508841211e-06, "loss": 0.5885, "step": 6268 }, { "epoch": 0.4, "grad_norm": 1.1145809888839722, "learning_rate": 6.7491682826913115e-06, "loss": 0.5385, "step": 6269 }, { "epoch": 0.4, "grad_norm": 1.1392521858215332, "learning_rate": 6.748188980089781e-06, "loss": 0.5328, "step": 6270 }, { "epoch": 0.4, "grad_norm": 1.1769298315048218, "learning_rate": 6.747209601079421e-06, "loss": 0.5478, "step": 6271 }, { "epoch": 0.4, "grad_norm": 1.2151753902435303, "learning_rate": 6.746230145703035e-06, "loss": 0.5249, "step": 6272 }, { "epoch": 0.4, "grad_norm": 1.0883586406707764, "learning_rate": 6.745250614003436e-06, "loss": 0.5217, "step": 6273 }, { "epoch": 0.4, "grad_norm": 1.1880080699920654, "learning_rate": 6.744271006023435e-06, "loss": 0.5645, "step": 6274 }, { "epoch": 0.41, "grad_norm": 1.10122549533844, "learning_rate": 6.743291321805849e-06, "loss": 0.5284, "step": 6275 }, { "epoch": 0.41, "grad_norm": 1.1182899475097656, "learning_rate": 6.7423115613934965e-06, "loss": 0.5461, "step": 6276 }, { "epoch": 0.41, "grad_norm": 1.122790813446045, "learning_rate": 6.741331724829202e-06, "loss": 0.5181, "step": 6277 }, { "epoch": 0.41, "grad_norm": 1.1554746627807617, "learning_rate": 6.740351812155789e-06, "loss": 0.5574, "step": 6278 }, { "epoch": 0.41, "grad_norm": 1.185194730758667, "learning_rate": 6.739371823416089e-06, "loss": 0.5292, "step": 6279 }, { "epoch": 0.41, "grad_norm": 1.1467399597167969, "learning_rate": 6.738391758652936e-06, "loss": 0.5425, "step": 6280 }, { "epoch": 0.41, "grad_norm": 1.1716421842575073, "learning_rate": 6.737411617909162e-06, "loss": 0.557, "step": 6281 }, { "epoch": 0.41, "grad_norm": 1.1658376455307007, "learning_rate": 6.736431401227609e-06, "loss": 0.5153, "step": 6282 }, { "epoch": 0.41, "grad_norm": 1.2030457258224487, "learning_rate": 6.735451108651121e-06, "loss": 0.5855, "step": 6283 }, { "epoch": 0.41, "grad_norm": 1.0651942491531372, "learning_rate": 6.734470740222541e-06, "loss": 0.5646, "step": 6284 }, { "epoch": 0.41, "grad_norm": 1.11702299118042, "learning_rate": 6.733490295984722e-06, "loss": 0.5121, "step": 6285 }, { "epoch": 0.41, "grad_norm": 1.1901098489761353, "learning_rate": 6.732509775980512e-06, "loss": 0.5074, "step": 6286 }, { "epoch": 0.41, "grad_norm": 1.4055209159851074, "learning_rate": 6.731529180252772e-06, "loss": 0.5932, "step": 6287 }, { "epoch": 0.41, "grad_norm": 1.280261754989624, "learning_rate": 6.730548508844357e-06, "loss": 0.601, "step": 6288 }, { "epoch": 0.41, "grad_norm": 1.1971303224563599, "learning_rate": 6.729567761798132e-06, "loss": 0.5259, "step": 6289 }, { "epoch": 0.41, "grad_norm": 1.1573731899261475, "learning_rate": 6.728586939156962e-06, "loss": 0.5369, "step": 6290 }, { "epoch": 0.41, "grad_norm": 1.1583197116851807, "learning_rate": 6.727606040963718e-06, "loss": 0.5343, "step": 6291 }, { "epoch": 0.41, "grad_norm": 1.1185407638549805, "learning_rate": 6.726625067261272e-06, "loss": 0.5183, "step": 6292 }, { "epoch": 0.41, "grad_norm": 1.1885689496994019, "learning_rate": 6.725644018092497e-06, "loss": 0.5688, "step": 6293 }, { "epoch": 0.41, "grad_norm": 1.256095051765442, "learning_rate": 6.724662893500275e-06, "loss": 0.5291, "step": 6294 }, { "epoch": 0.41, "grad_norm": 1.1036159992218018, "learning_rate": 6.723681693527488e-06, "loss": 0.5153, "step": 6295 }, { "epoch": 0.41, "grad_norm": 1.1492301225662231, "learning_rate": 6.7227004182170205e-06, "loss": 0.5205, "step": 6296 }, { "epoch": 0.41, "grad_norm": 1.2431910037994385, "learning_rate": 6.721719067611763e-06, "loss": 0.5522, "step": 6297 }, { "epoch": 0.41, "grad_norm": 1.1456705331802368, "learning_rate": 6.720737641754607e-06, "loss": 0.561, "step": 6298 }, { "epoch": 0.41, "grad_norm": 1.0209542512893677, "learning_rate": 6.719756140688447e-06, "loss": 0.5325, "step": 6299 }, { "epoch": 0.41, "grad_norm": 1.2147228717803955, "learning_rate": 6.718774564456184e-06, "loss": 0.589, "step": 6300 }, { "epoch": 0.41, "grad_norm": 1.1776267290115356, "learning_rate": 6.71779291310072e-06, "loss": 0.5283, "step": 6301 }, { "epoch": 0.41, "grad_norm": 1.2467334270477295, "learning_rate": 6.71681118666496e-06, "loss": 0.5467, "step": 6302 }, { "epoch": 0.41, "grad_norm": 1.3062012195587158, "learning_rate": 6.715829385191811e-06, "loss": 0.5468, "step": 6303 }, { "epoch": 0.41, "grad_norm": 1.1605373620986938, "learning_rate": 6.714847508724188e-06, "loss": 0.5534, "step": 6304 }, { "epoch": 0.41, "grad_norm": 1.1769636869430542, "learning_rate": 6.7138655573050035e-06, "loss": 0.531, "step": 6305 }, { "epoch": 0.41, "grad_norm": 1.1623165607452393, "learning_rate": 6.712883530977178e-06, "loss": 0.5627, "step": 6306 }, { "epoch": 0.41, "grad_norm": 1.227622628211975, "learning_rate": 6.711901429783633e-06, "loss": 0.5846, "step": 6307 }, { "epoch": 0.41, "grad_norm": 1.202528953552246, "learning_rate": 6.710919253767291e-06, "loss": 0.5561, "step": 6308 }, { "epoch": 0.41, "grad_norm": 1.0370571613311768, "learning_rate": 6.709937002971086e-06, "loss": 0.5348, "step": 6309 }, { "epoch": 0.41, "grad_norm": 1.151185393333435, "learning_rate": 6.708954677437944e-06, "loss": 0.513, "step": 6310 }, { "epoch": 0.41, "grad_norm": 1.1768633127212524, "learning_rate": 6.707972277210804e-06, "loss": 0.5213, "step": 6311 }, { "epoch": 0.41, "grad_norm": 1.1964523792266846, "learning_rate": 6.706989802332601e-06, "loss": 0.5298, "step": 6312 }, { "epoch": 0.41, "grad_norm": 1.1625571250915527, "learning_rate": 6.7060072528462785e-06, "loss": 0.5269, "step": 6313 }, { "epoch": 0.41, "grad_norm": 1.2007899284362793, "learning_rate": 6.705024628794779e-06, "loss": 0.5207, "step": 6314 }, { "epoch": 0.41, "grad_norm": 1.146846890449524, "learning_rate": 6.7040419302210535e-06, "loss": 0.5195, "step": 6315 }, { "epoch": 0.41, "grad_norm": 1.2867826223373413, "learning_rate": 6.7030591571680516e-06, "loss": 0.5452, "step": 6316 }, { "epoch": 0.41, "grad_norm": 1.1866170167922974, "learning_rate": 6.702076309678727e-06, "loss": 0.5628, "step": 6317 }, { "epoch": 0.41, "grad_norm": 1.2565979957580566, "learning_rate": 6.701093387796039e-06, "loss": 0.5486, "step": 6318 }, { "epoch": 0.41, "grad_norm": 1.20365571975708, "learning_rate": 6.700110391562949e-06, "loss": 0.474, "step": 6319 }, { "epoch": 0.41, "grad_norm": 1.0465160608291626, "learning_rate": 6.699127321022419e-06, "loss": 0.5101, "step": 6320 }, { "epoch": 0.41, "grad_norm": 1.1653847694396973, "learning_rate": 6.698144176217417e-06, "loss": 0.549, "step": 6321 }, { "epoch": 0.41, "grad_norm": 1.2128024101257324, "learning_rate": 6.697160957190915e-06, "loss": 0.5497, "step": 6322 }, { "epoch": 0.41, "grad_norm": 1.1027005910873413, "learning_rate": 6.696177663985886e-06, "loss": 0.5846, "step": 6323 }, { "epoch": 0.41, "grad_norm": 1.1410253047943115, "learning_rate": 6.695194296645307e-06, "loss": 0.5921, "step": 6324 }, { "epoch": 0.41, "grad_norm": 1.1728044748306274, "learning_rate": 6.69421085521216e-06, "loss": 0.5802, "step": 6325 }, { "epoch": 0.41, "grad_norm": 1.1309561729431152, "learning_rate": 6.6932273397294265e-06, "loss": 0.5537, "step": 6326 }, { "epoch": 0.41, "grad_norm": 1.2311294078826904, "learning_rate": 6.692243750240097e-06, "loss": 0.5658, "step": 6327 }, { "epoch": 0.41, "grad_norm": 1.2538518905639648, "learning_rate": 6.691260086787157e-06, "loss": 0.5799, "step": 6328 }, { "epoch": 0.41, "grad_norm": 1.110002040863037, "learning_rate": 6.6902763494136034e-06, "loss": 0.5676, "step": 6329 }, { "epoch": 0.41, "grad_norm": 1.1669672727584839, "learning_rate": 6.689292538162431e-06, "loss": 0.5672, "step": 6330 }, { "epoch": 0.41, "grad_norm": 1.1729036569595337, "learning_rate": 6.6883086530766395e-06, "loss": 0.5157, "step": 6331 }, { "epoch": 0.41, "grad_norm": 1.2224267721176147, "learning_rate": 6.6873246941992335e-06, "loss": 0.5875, "step": 6332 }, { "epoch": 0.41, "grad_norm": 1.1500275135040283, "learning_rate": 6.686340661573218e-06, "loss": 0.5681, "step": 6333 }, { "epoch": 0.41, "grad_norm": 1.1554208993911743, "learning_rate": 6.685356555241605e-06, "loss": 0.574, "step": 6334 }, { "epoch": 0.41, "grad_norm": 1.2515990734100342, "learning_rate": 6.684372375247402e-06, "loss": 0.5525, "step": 6335 }, { "epoch": 0.41, "grad_norm": 1.117400050163269, "learning_rate": 6.6833881216336304e-06, "loss": 0.5439, "step": 6336 }, { "epoch": 0.41, "grad_norm": 1.266126036643982, "learning_rate": 6.682403794443306e-06, "loss": 0.589, "step": 6337 }, { "epoch": 0.41, "grad_norm": 1.1382323503494263, "learning_rate": 6.6814193937194525e-06, "loss": 0.5218, "step": 6338 }, { "epoch": 0.41, "grad_norm": 1.1491038799285889, "learning_rate": 6.6804349195050965e-06, "loss": 0.557, "step": 6339 }, { "epoch": 0.41, "grad_norm": 1.2072148323059082, "learning_rate": 6.679450371843264e-06, "loss": 0.5964, "step": 6340 }, { "epoch": 0.41, "grad_norm": 1.2006354331970215, "learning_rate": 6.67846575077699e-06, "loss": 0.5452, "step": 6341 }, { "epoch": 0.41, "grad_norm": 1.1840280294418335, "learning_rate": 6.677481056349309e-06, "loss": 0.551, "step": 6342 }, { "epoch": 0.41, "grad_norm": 1.2193806171417236, "learning_rate": 6.676496288603258e-06, "loss": 0.5408, "step": 6343 }, { "epoch": 0.41, "grad_norm": 1.1710160970687866, "learning_rate": 6.675511447581879e-06, "loss": 0.584, "step": 6344 }, { "epoch": 0.41, "grad_norm": 1.1271004676818848, "learning_rate": 6.674526533328221e-06, "loss": 0.545, "step": 6345 }, { "epoch": 0.41, "grad_norm": 1.134392261505127, "learning_rate": 6.673541545885325e-06, "loss": 0.5038, "step": 6346 }, { "epoch": 0.41, "grad_norm": 1.1542716026306152, "learning_rate": 6.672556485296246e-06, "loss": 0.5518, "step": 6347 }, { "epoch": 0.41, "grad_norm": 1.1988677978515625, "learning_rate": 6.67157135160404e-06, "loss": 0.5469, "step": 6348 }, { "epoch": 0.41, "grad_norm": 1.1352051496505737, "learning_rate": 6.670586144851762e-06, "loss": 0.5672, "step": 6349 }, { "epoch": 0.41, "grad_norm": 1.2292823791503906, "learning_rate": 6.669600865082473e-06, "loss": 0.6121, "step": 6350 }, { "epoch": 0.41, "grad_norm": 1.068996787071228, "learning_rate": 6.66861551233924e-06, "loss": 0.5448, "step": 6351 }, { "epoch": 0.41, "grad_norm": 1.21305513381958, "learning_rate": 6.667630086665126e-06, "loss": 0.5381, "step": 6352 }, { "epoch": 0.41, "grad_norm": 1.083625078201294, "learning_rate": 6.666644588103205e-06, "loss": 0.524, "step": 6353 }, { "epoch": 0.41, "grad_norm": 1.3192425966262817, "learning_rate": 6.665659016696548e-06, "loss": 0.5551, "step": 6354 }, { "epoch": 0.41, "grad_norm": 1.2251273393630981, "learning_rate": 6.664673372488233e-06, "loss": 0.574, "step": 6355 }, { "epoch": 0.41, "grad_norm": 1.33936607837677, "learning_rate": 6.66368765552134e-06, "loss": 0.583, "step": 6356 }, { "epoch": 0.41, "grad_norm": 1.2004035711288452, "learning_rate": 6.662701865838952e-06, "loss": 0.5366, "step": 6357 }, { "epoch": 0.41, "grad_norm": 1.1118152141571045, "learning_rate": 6.6617160034841545e-06, "loss": 0.4922, "step": 6358 }, { "epoch": 0.41, "grad_norm": 1.0830525159835815, "learning_rate": 6.660730068500039e-06, "loss": 0.5295, "step": 6359 }, { "epoch": 0.41, "grad_norm": 1.1804012060165405, "learning_rate": 6.659744060929696e-06, "loss": 0.5579, "step": 6360 }, { "epoch": 0.41, "grad_norm": 1.096242070198059, "learning_rate": 6.658757980816221e-06, "loss": 0.5601, "step": 6361 }, { "epoch": 0.41, "grad_norm": 1.1759225130081177, "learning_rate": 6.657771828202717e-06, "loss": 0.5318, "step": 6362 }, { "epoch": 0.41, "grad_norm": 1.0963573455810547, "learning_rate": 6.656785603132283e-06, "loss": 0.5613, "step": 6363 }, { "epoch": 0.41, "grad_norm": 1.0952262878417969, "learning_rate": 6.655799305648023e-06, "loss": 0.5251, "step": 6364 }, { "epoch": 0.41, "grad_norm": 1.1451870203018188, "learning_rate": 6.65481293579305e-06, "loss": 0.5774, "step": 6365 }, { "epoch": 0.41, "grad_norm": 1.1811652183532715, "learning_rate": 6.653826493610471e-06, "loss": 0.5891, "step": 6366 }, { "epoch": 0.41, "grad_norm": 1.0731854438781738, "learning_rate": 6.652839979143404e-06, "loss": 0.525, "step": 6367 }, { "epoch": 0.41, "grad_norm": 1.4018687009811401, "learning_rate": 6.651853392434966e-06, "loss": 0.5286, "step": 6368 }, { "epoch": 0.41, "grad_norm": 1.1871150732040405, "learning_rate": 6.650866733528276e-06, "loss": 0.557, "step": 6369 }, { "epoch": 0.41, "grad_norm": 1.2895883321762085, "learning_rate": 6.6498800024664625e-06, "loss": 0.5689, "step": 6370 }, { "epoch": 0.41, "grad_norm": 1.0161128044128418, "learning_rate": 6.648893199292651e-06, "loss": 0.5515, "step": 6371 }, { "epoch": 0.41, "grad_norm": 1.2568004131317139, "learning_rate": 6.647906324049971e-06, "loss": 0.527, "step": 6372 }, { "epoch": 0.41, "grad_norm": 1.1755331754684448, "learning_rate": 6.646919376781556e-06, "loss": 0.5215, "step": 6373 }, { "epoch": 0.41, "grad_norm": 1.2173248529434204, "learning_rate": 6.6459323575305464e-06, "loss": 0.5679, "step": 6374 }, { "epoch": 0.41, "grad_norm": 1.1039235591888428, "learning_rate": 6.64494526634008e-06, "loss": 0.5369, "step": 6375 }, { "epoch": 0.41, "grad_norm": 1.1955426931381226, "learning_rate": 6.6439581032533e-06, "loss": 0.5536, "step": 6376 }, { "epoch": 0.41, "grad_norm": 1.1012099981307983, "learning_rate": 6.642970868313351e-06, "loss": 0.5544, "step": 6377 }, { "epoch": 0.41, "grad_norm": 1.170602798461914, "learning_rate": 6.641983561563385e-06, "loss": 0.5551, "step": 6378 }, { "epoch": 0.41, "grad_norm": 1.1975862979888916, "learning_rate": 6.640996183046555e-06, "loss": 0.5907, "step": 6379 }, { "epoch": 0.41, "grad_norm": 1.2511367797851562, "learning_rate": 6.6400087328060135e-06, "loss": 0.5434, "step": 6380 }, { "epoch": 0.41, "grad_norm": 1.051764726638794, "learning_rate": 6.639021210884924e-06, "loss": 0.5005, "step": 6381 }, { "epoch": 0.41, "grad_norm": 1.2102220058441162, "learning_rate": 6.638033617326445e-06, "loss": 0.5684, "step": 6382 }, { "epoch": 0.41, "grad_norm": 1.1723636388778687, "learning_rate": 6.637045952173744e-06, "loss": 0.5483, "step": 6383 }, { "epoch": 0.41, "grad_norm": 1.1824206113815308, "learning_rate": 6.636058215469986e-06, "loss": 0.5299, "step": 6384 }, { "epoch": 0.41, "grad_norm": 1.2315547466278076, "learning_rate": 6.635070407258345e-06, "loss": 0.5465, "step": 6385 }, { "epoch": 0.41, "grad_norm": 1.2243916988372803, "learning_rate": 6.634082527581995e-06, "loss": 0.598, "step": 6386 }, { "epoch": 0.41, "grad_norm": 1.17058527469635, "learning_rate": 6.633094576484115e-06, "loss": 0.5327, "step": 6387 }, { "epoch": 0.41, "grad_norm": 1.2349302768707275, "learning_rate": 6.632106554007882e-06, "loss": 0.538, "step": 6388 }, { "epoch": 0.41, "grad_norm": 1.1229583024978638, "learning_rate": 6.631118460196485e-06, "loss": 0.4865, "step": 6389 }, { "epoch": 0.41, "grad_norm": 1.1268433332443237, "learning_rate": 6.630130295093108e-06, "loss": 0.5124, "step": 6390 }, { "epoch": 0.41, "grad_norm": 1.1061879396438599, "learning_rate": 6.629142058740941e-06, "loss": 0.5163, "step": 6391 }, { "epoch": 0.41, "grad_norm": 1.1793768405914307, "learning_rate": 6.628153751183178e-06, "loss": 0.5445, "step": 6392 }, { "epoch": 0.41, "grad_norm": 1.1699786186218262, "learning_rate": 6.6271653724630145e-06, "loss": 0.5416, "step": 6393 }, { "epoch": 0.41, "grad_norm": 1.2632052898406982, "learning_rate": 6.62617692262365e-06, "loss": 0.5788, "step": 6394 }, { "epoch": 0.41, "grad_norm": 1.1178058385849, "learning_rate": 6.625188401708291e-06, "loss": 0.5851, "step": 6395 }, { "epoch": 0.41, "grad_norm": 1.06032133102417, "learning_rate": 6.624199809760138e-06, "loss": 0.5576, "step": 6396 }, { "epoch": 0.41, "grad_norm": 1.0654114484786987, "learning_rate": 6.623211146822403e-06, "loss": 0.5117, "step": 6397 }, { "epoch": 0.41, "grad_norm": 1.1455568075180054, "learning_rate": 6.622222412938295e-06, "loss": 0.5465, "step": 6398 }, { "epoch": 0.41, "grad_norm": 1.0963408946990967, "learning_rate": 6.6212336081510295e-06, "loss": 0.4989, "step": 6399 }, { "epoch": 0.41, "grad_norm": 1.259628415107727, "learning_rate": 6.6202447325038285e-06, "loss": 0.5444, "step": 6400 }, { "epoch": 0.41, "grad_norm": 1.2005512714385986, "learning_rate": 6.6192557860399094e-06, "loss": 0.5595, "step": 6401 }, { "epoch": 0.41, "grad_norm": 1.168248176574707, "learning_rate": 6.618266768802498e-06, "loss": 0.5919, "step": 6402 }, { "epoch": 0.41, "grad_norm": 1.411630392074585, "learning_rate": 6.617277680834823e-06, "loss": 0.6024, "step": 6403 }, { "epoch": 0.41, "grad_norm": 1.2355128526687622, "learning_rate": 6.61628852218011e-06, "loss": 0.49, "step": 6404 }, { "epoch": 0.41, "grad_norm": 1.2309452295303345, "learning_rate": 6.615299292881597e-06, "loss": 0.5236, "step": 6405 }, { "epoch": 0.41, "grad_norm": 1.378099799156189, "learning_rate": 6.61430999298252e-06, "loss": 0.5658, "step": 6406 }, { "epoch": 0.41, "grad_norm": 1.3019894361495972, "learning_rate": 6.613320622526118e-06, "loss": 0.5565, "step": 6407 }, { "epoch": 0.41, "grad_norm": 1.1030197143554688, "learning_rate": 6.612331181555634e-06, "loss": 0.5127, "step": 6408 }, { "epoch": 0.41, "grad_norm": 1.342702865600586, "learning_rate": 6.611341670114314e-06, "loss": 0.5542, "step": 6409 }, { "epoch": 0.41, "grad_norm": 1.1623096466064453, "learning_rate": 6.610352088245407e-06, "loss": 0.5866, "step": 6410 }, { "epoch": 0.41, "grad_norm": 1.1968433856964111, "learning_rate": 6.6093624359921636e-06, "loss": 0.538, "step": 6411 }, { "epoch": 0.41, "grad_norm": 1.0935519933700562, "learning_rate": 6.608372713397843e-06, "loss": 0.506, "step": 6412 }, { "epoch": 0.41, "grad_norm": 1.2367308139801025, "learning_rate": 6.6073829205056985e-06, "loss": 0.525, "step": 6413 }, { "epoch": 0.41, "grad_norm": 1.298614740371704, "learning_rate": 6.606393057358994e-06, "loss": 0.5884, "step": 6414 }, { "epoch": 0.41, "grad_norm": 1.0914673805236816, "learning_rate": 6.605403124000994e-06, "loss": 0.5085, "step": 6415 }, { "epoch": 0.41, "grad_norm": 1.1336848735809326, "learning_rate": 6.604413120474965e-06, "loss": 0.5207, "step": 6416 }, { "epoch": 0.41, "grad_norm": 1.0858327150344849, "learning_rate": 6.603423046824178e-06, "loss": 0.519, "step": 6417 }, { "epoch": 0.41, "grad_norm": 1.138595461845398, "learning_rate": 6.6024329030919074e-06, "loss": 0.5293, "step": 6418 }, { "epoch": 0.41, "grad_norm": 1.107965350151062, "learning_rate": 6.601442689321428e-06, "loss": 0.4918, "step": 6419 }, { "epoch": 0.41, "grad_norm": 1.2376289367675781, "learning_rate": 6.60045240555602e-06, "loss": 0.5722, "step": 6420 }, { "epoch": 0.41, "grad_norm": 1.2686485052108765, "learning_rate": 6.599462051838968e-06, "loss": 0.5357, "step": 6421 }, { "epoch": 0.41, "grad_norm": 1.1631369590759277, "learning_rate": 6.598471628213556e-06, "loss": 0.5663, "step": 6422 }, { "epoch": 0.41, "grad_norm": 1.1431550979614258, "learning_rate": 6.597481134723074e-06, "loss": 0.5269, "step": 6423 }, { "epoch": 0.41, "grad_norm": 1.033138632774353, "learning_rate": 6.596490571410811e-06, "loss": 0.5269, "step": 6424 }, { "epoch": 0.41, "grad_norm": 1.2308365106582642, "learning_rate": 6.595499938320066e-06, "loss": 0.5276, "step": 6425 }, { "epoch": 0.41, "grad_norm": 1.1304383277893066, "learning_rate": 6.594509235494134e-06, "loss": 0.5255, "step": 6426 }, { "epoch": 0.41, "grad_norm": 1.3158416748046875, "learning_rate": 6.593518462976317e-06, "loss": 0.5979, "step": 6427 }, { "epoch": 0.41, "grad_norm": 1.122625708580017, "learning_rate": 6.592527620809921e-06, "loss": 0.4996, "step": 6428 }, { "epoch": 0.41, "grad_norm": 1.2616710662841797, "learning_rate": 6.59153670903825e-06, "loss": 0.5408, "step": 6429 }, { "epoch": 0.42, "grad_norm": 1.0955191850662231, "learning_rate": 6.590545727704616e-06, "loss": 0.5663, "step": 6430 }, { "epoch": 0.42, "grad_norm": 1.2140111923217773, "learning_rate": 6.589554676852332e-06, "loss": 0.5642, "step": 6431 }, { "epoch": 0.42, "grad_norm": 1.2185280323028564, "learning_rate": 6.588563556524714e-06, "loss": 0.5617, "step": 6432 }, { "epoch": 0.42, "grad_norm": 1.2568849325180054, "learning_rate": 6.587572366765083e-06, "loss": 0.6031, "step": 6433 }, { "epoch": 0.42, "grad_norm": 1.1378408670425415, "learning_rate": 6.586581107616756e-06, "loss": 0.5539, "step": 6434 }, { "epoch": 0.42, "grad_norm": 0.9244281649589539, "learning_rate": 6.585589779123065e-06, "loss": 0.4383, "step": 6435 }, { "epoch": 0.42, "grad_norm": 1.2214806079864502, "learning_rate": 6.584598381327336e-06, "loss": 0.612, "step": 6436 }, { "epoch": 0.42, "grad_norm": 1.2123531103134155, "learning_rate": 6.5836069142729e-06, "loss": 0.5499, "step": 6437 }, { "epoch": 0.42, "grad_norm": 1.1468191146850586, "learning_rate": 6.582615378003091e-06, "loss": 0.548, "step": 6438 }, { "epoch": 0.42, "grad_norm": 1.214171051979065, "learning_rate": 6.581623772561247e-06, "loss": 0.5438, "step": 6439 }, { "epoch": 0.42, "grad_norm": 1.1217727661132812, "learning_rate": 6.580632097990707e-06, "loss": 0.537, "step": 6440 }, { "epoch": 0.42, "grad_norm": 1.180538535118103, "learning_rate": 6.579640354334819e-06, "loss": 0.542, "step": 6441 }, { "epoch": 0.42, "grad_norm": 1.0619580745697021, "learning_rate": 6.578648541636925e-06, "loss": 0.5369, "step": 6442 }, { "epoch": 0.42, "grad_norm": 1.1541341543197632, "learning_rate": 6.577656659940376e-06, "loss": 0.5454, "step": 6443 }, { "epoch": 0.42, "grad_norm": 1.2314313650131226, "learning_rate": 6.576664709288525e-06, "loss": 0.5459, "step": 6444 }, { "epoch": 0.42, "grad_norm": 1.1731164455413818, "learning_rate": 6.575672689724728e-06, "loss": 0.6153, "step": 6445 }, { "epoch": 0.42, "grad_norm": 1.146694540977478, "learning_rate": 6.574680601292342e-06, "loss": 0.5744, "step": 6446 }, { "epoch": 0.42, "grad_norm": 1.2653840780258179, "learning_rate": 6.57368844403473e-06, "loss": 0.5452, "step": 6447 }, { "epoch": 0.42, "grad_norm": 1.3024829626083374, "learning_rate": 6.572696217995257e-06, "loss": 0.5249, "step": 6448 }, { "epoch": 0.42, "grad_norm": 1.0661342144012451, "learning_rate": 6.571703923217289e-06, "loss": 0.5023, "step": 6449 }, { "epoch": 0.42, "grad_norm": 1.0703691244125366, "learning_rate": 6.570711559744199e-06, "loss": 0.5111, "step": 6450 }, { "epoch": 0.42, "grad_norm": 1.1806811094284058, "learning_rate": 6.569719127619357e-06, "loss": 0.5489, "step": 6451 }, { "epoch": 0.42, "grad_norm": 1.1601269245147705, "learning_rate": 6.568726626886144e-06, "loss": 0.5663, "step": 6452 }, { "epoch": 0.42, "grad_norm": 1.1337939500808716, "learning_rate": 6.567734057587937e-06, "loss": 0.5653, "step": 6453 }, { "epoch": 0.42, "grad_norm": 1.2052937746047974, "learning_rate": 6.566741419768118e-06, "loss": 0.5087, "step": 6454 }, { "epoch": 0.42, "grad_norm": 1.2714141607284546, "learning_rate": 6.565748713470075e-06, "loss": 0.572, "step": 6455 }, { "epoch": 0.42, "grad_norm": 1.1335417032241821, "learning_rate": 6.564755938737195e-06, "loss": 0.5586, "step": 6456 }, { "epoch": 0.42, "grad_norm": 1.1569414138793945, "learning_rate": 6.563763095612869e-06, "loss": 0.5769, "step": 6457 }, { "epoch": 0.42, "grad_norm": 1.201076865196228, "learning_rate": 6.562770184140494e-06, "loss": 0.5606, "step": 6458 }, { "epoch": 0.42, "grad_norm": 1.1677947044372559, "learning_rate": 6.561777204363466e-06, "loss": 0.5383, "step": 6459 }, { "epoch": 0.42, "grad_norm": 1.1738440990447998, "learning_rate": 6.560784156325187e-06, "loss": 0.5661, "step": 6460 }, { "epoch": 0.42, "grad_norm": 1.2563058137893677, "learning_rate": 6.559791040069057e-06, "loss": 0.5585, "step": 6461 }, { "epoch": 0.42, "grad_norm": 1.2404048442840576, "learning_rate": 6.558797855638487e-06, "loss": 0.6084, "step": 6462 }, { "epoch": 0.42, "grad_norm": 1.1106928586959839, "learning_rate": 6.557804603076883e-06, "loss": 0.5157, "step": 6463 }, { "epoch": 0.42, "grad_norm": 1.0759639739990234, "learning_rate": 6.556811282427659e-06, "loss": 0.5551, "step": 6464 }, { "epoch": 0.42, "grad_norm": 1.1361122131347656, "learning_rate": 6.555817893734232e-06, "loss": 0.5778, "step": 6465 }, { "epoch": 0.42, "grad_norm": 1.178391695022583, "learning_rate": 6.554824437040017e-06, "loss": 0.5184, "step": 6466 }, { "epoch": 0.42, "grad_norm": 1.0828596353530884, "learning_rate": 6.5538309123884385e-06, "loss": 0.5094, "step": 6467 }, { "epoch": 0.42, "grad_norm": 1.1790050268173218, "learning_rate": 6.55283731982292e-06, "loss": 0.5367, "step": 6468 }, { "epoch": 0.42, "grad_norm": 1.2084314823150635, "learning_rate": 6.551843659386888e-06, "loss": 0.5374, "step": 6469 }, { "epoch": 0.42, "grad_norm": 1.2667276859283447, "learning_rate": 6.550849931123775e-06, "loss": 0.5051, "step": 6470 }, { "epoch": 0.42, "grad_norm": 1.0852926969528198, "learning_rate": 6.549856135077011e-06, "loss": 0.4775, "step": 6471 }, { "epoch": 0.42, "grad_norm": 1.3176331520080566, "learning_rate": 6.548862271290035e-06, "loss": 0.589, "step": 6472 }, { "epoch": 0.42, "grad_norm": 0.9936521053314209, "learning_rate": 6.547868339806284e-06, "loss": 0.5257, "step": 6473 }, { "epoch": 0.42, "grad_norm": 1.1750985383987427, "learning_rate": 6.546874340669202e-06, "loss": 0.5257, "step": 6474 }, { "epoch": 0.42, "grad_norm": 1.1563140153884888, "learning_rate": 6.545880273922234e-06, "loss": 0.5494, "step": 6475 }, { "epoch": 0.42, "grad_norm": 1.1912215948104858, "learning_rate": 6.544886139608827e-06, "loss": 0.5306, "step": 6476 }, { "epoch": 0.42, "grad_norm": 1.081659197807312, "learning_rate": 6.543891937772434e-06, "loss": 0.5612, "step": 6477 }, { "epoch": 0.42, "grad_norm": 1.2453172206878662, "learning_rate": 6.542897668456506e-06, "loss": 0.5305, "step": 6478 }, { "epoch": 0.42, "grad_norm": 1.1311237812042236, "learning_rate": 6.541903331704502e-06, "loss": 0.5427, "step": 6479 }, { "epoch": 0.42, "grad_norm": 1.1115033626556396, "learning_rate": 6.540908927559882e-06, "loss": 0.5343, "step": 6480 }, { "epoch": 0.42, "grad_norm": 1.2927124500274658, "learning_rate": 6.539914456066109e-06, "loss": 0.5801, "step": 6481 }, { "epoch": 0.42, "grad_norm": 1.3170307874679565, "learning_rate": 6.538919917266647e-06, "loss": 0.5113, "step": 6482 }, { "epoch": 0.42, "grad_norm": 1.1460731029510498, "learning_rate": 6.5379253112049664e-06, "loss": 0.5512, "step": 6483 }, { "epoch": 0.42, "grad_norm": 1.2150250673294067, "learning_rate": 6.536930637924538e-06, "loss": 0.5537, "step": 6484 }, { "epoch": 0.42, "grad_norm": 1.3829330205917358, "learning_rate": 6.535935897468838e-06, "loss": 0.5686, "step": 6485 }, { "epoch": 0.42, "grad_norm": 1.1818227767944336, "learning_rate": 6.534941089881341e-06, "loss": 0.5598, "step": 6486 }, { "epoch": 0.42, "grad_norm": 1.084725260734558, "learning_rate": 6.53394621520553e-06, "loss": 0.4978, "step": 6487 }, { "epoch": 0.42, "grad_norm": 1.17100191116333, "learning_rate": 6.532951273484888e-06, "loss": 0.4976, "step": 6488 }, { "epoch": 0.42, "grad_norm": 1.2034944295883179, "learning_rate": 6.5319562647629e-06, "loss": 0.5654, "step": 6489 }, { "epoch": 0.42, "grad_norm": 1.1468963623046875, "learning_rate": 6.530961189083056e-06, "loss": 0.5576, "step": 6490 }, { "epoch": 0.42, "grad_norm": 1.097683310508728, "learning_rate": 6.529966046488849e-06, "loss": 0.5332, "step": 6491 }, { "epoch": 0.42, "grad_norm": 1.1047625541687012, "learning_rate": 6.528970837023773e-06, "loss": 0.4728, "step": 6492 }, { "epoch": 0.42, "grad_norm": 1.0632836818695068, "learning_rate": 6.527975560731327e-06, "loss": 0.5048, "step": 6493 }, { "epoch": 0.42, "grad_norm": 1.1984783411026, "learning_rate": 6.526980217655012e-06, "loss": 0.5639, "step": 6494 }, { "epoch": 0.42, "grad_norm": 1.1513334512710571, "learning_rate": 6.52598480783833e-06, "loss": 0.5681, "step": 6495 }, { "epoch": 0.42, "grad_norm": 1.227001667022705, "learning_rate": 6.5249893313247894e-06, "loss": 0.5445, "step": 6496 }, { "epoch": 0.42, "grad_norm": 1.243669867515564, "learning_rate": 6.523993788157901e-06, "loss": 0.5722, "step": 6497 }, { "epoch": 0.42, "grad_norm": 1.1914722919464111, "learning_rate": 6.522998178381175e-06, "loss": 0.6005, "step": 6498 }, { "epoch": 0.42, "grad_norm": 1.160252332687378, "learning_rate": 6.522002502038127e-06, "loss": 0.552, "step": 6499 }, { "epoch": 0.42, "grad_norm": 1.1074241399765015, "learning_rate": 6.521006759172279e-06, "loss": 0.5445, "step": 6500 }, { "epoch": 0.42, "grad_norm": 1.0862497091293335, "learning_rate": 6.520010949827148e-06, "loss": 0.52, "step": 6501 }, { "epoch": 0.42, "grad_norm": 1.2841413021087646, "learning_rate": 6.51901507404626e-06, "loss": 0.5546, "step": 6502 }, { "epoch": 0.42, "grad_norm": 1.1143382787704468, "learning_rate": 6.518019131873144e-06, "loss": 0.5782, "step": 6503 }, { "epoch": 0.42, "grad_norm": 1.2278432846069336, "learning_rate": 6.517023123351326e-06, "loss": 0.5504, "step": 6504 }, { "epoch": 0.42, "grad_norm": 1.1630496978759766, "learning_rate": 6.516027048524341e-06, "loss": 0.5246, "step": 6505 }, { "epoch": 0.42, "grad_norm": 1.2954891920089722, "learning_rate": 6.515030907435728e-06, "loss": 0.5283, "step": 6506 }, { "epoch": 0.42, "grad_norm": 1.194011926651001, "learning_rate": 6.514034700129021e-06, "loss": 0.5493, "step": 6507 }, { "epoch": 0.42, "grad_norm": 1.1157996654510498, "learning_rate": 6.513038426647763e-06, "loss": 0.5665, "step": 6508 }, { "epoch": 0.42, "grad_norm": 1.1275120973587036, "learning_rate": 6.5120420870355e-06, "loss": 0.5463, "step": 6509 }, { "epoch": 0.42, "grad_norm": 1.1664215326309204, "learning_rate": 6.511045681335778e-06, "loss": 0.546, "step": 6510 }, { "epoch": 0.42, "grad_norm": 1.1856566667556763, "learning_rate": 6.5100492095921485e-06, "loss": 0.5476, "step": 6511 }, { "epoch": 0.42, "grad_norm": 1.085951805114746, "learning_rate": 6.509052671848164e-06, "loss": 0.5137, "step": 6512 }, { "epoch": 0.42, "grad_norm": 1.2214829921722412, "learning_rate": 6.508056068147379e-06, "loss": 0.5308, "step": 6513 }, { "epoch": 0.42, "grad_norm": 1.1401190757751465, "learning_rate": 6.507059398533357e-06, "loss": 0.5698, "step": 6514 }, { "epoch": 0.42, "grad_norm": 1.2145204544067383, "learning_rate": 6.506062663049655e-06, "loss": 0.5563, "step": 6515 }, { "epoch": 0.42, "grad_norm": 1.1598267555236816, "learning_rate": 6.505065861739839e-06, "loss": 0.5198, "step": 6516 }, { "epoch": 0.42, "grad_norm": 1.0989022254943848, "learning_rate": 6.504068994647479e-06, "loss": 0.4841, "step": 6517 }, { "epoch": 0.42, "grad_norm": 1.0424436330795288, "learning_rate": 6.503072061816142e-06, "loss": 0.5654, "step": 6518 }, { "epoch": 0.42, "grad_norm": 1.0843185186386108, "learning_rate": 6.5020750632894056e-06, "loss": 0.5272, "step": 6519 }, { "epoch": 0.42, "grad_norm": 1.1769781112670898, "learning_rate": 6.501077999110842e-06, "loss": 0.5515, "step": 6520 }, { "epoch": 0.42, "grad_norm": 1.1620430946350098, "learning_rate": 6.500080869324032e-06, "loss": 0.5008, "step": 6521 }, { "epoch": 0.42, "grad_norm": 1.2195816040039062, "learning_rate": 6.499083673972558e-06, "loss": 0.5401, "step": 6522 }, { "epoch": 0.42, "grad_norm": 1.1984553337097168, "learning_rate": 6.498086413100004e-06, "loss": 0.5284, "step": 6523 }, { "epoch": 0.42, "grad_norm": 1.1165575981140137, "learning_rate": 6.497089086749958e-06, "loss": 0.5305, "step": 6524 }, { "epoch": 0.42, "grad_norm": 1.3566195964813232, "learning_rate": 6.49609169496601e-06, "loss": 0.5731, "step": 6525 }, { "epoch": 0.42, "grad_norm": 1.244009256362915, "learning_rate": 6.495094237791756e-06, "loss": 0.4944, "step": 6526 }, { "epoch": 0.42, "grad_norm": 1.1200028657913208, "learning_rate": 6.494096715270788e-06, "loss": 0.5157, "step": 6527 }, { "epoch": 0.42, "grad_norm": 1.119694709777832, "learning_rate": 6.493099127446707e-06, "loss": 0.5177, "step": 6528 }, { "epoch": 0.42, "grad_norm": 1.2170084714889526, "learning_rate": 6.492101474363117e-06, "loss": 0.5558, "step": 6529 }, { "epoch": 0.42, "grad_norm": 1.2728379964828491, "learning_rate": 6.4911037560636216e-06, "loss": 0.5332, "step": 6530 }, { "epoch": 0.42, "grad_norm": 1.145156979560852, "learning_rate": 6.490105972591827e-06, "loss": 0.5117, "step": 6531 }, { "epoch": 0.42, "grad_norm": 1.2222323417663574, "learning_rate": 6.4891081239913455e-06, "loss": 0.5477, "step": 6532 }, { "epoch": 0.42, "grad_norm": 1.161497712135315, "learning_rate": 6.488110210305789e-06, "loss": 0.5095, "step": 6533 }, { "epoch": 0.42, "grad_norm": 1.1802942752838135, "learning_rate": 6.4871122315787735e-06, "loss": 0.5358, "step": 6534 }, { "epoch": 0.42, "grad_norm": 1.2568918466567993, "learning_rate": 6.486114187853922e-06, "loss": 0.5235, "step": 6535 }, { "epoch": 0.42, "grad_norm": 1.2871118783950806, "learning_rate": 6.4851160791748525e-06, "loss": 0.5808, "step": 6536 }, { "epoch": 0.42, "grad_norm": 1.1954400539398193, "learning_rate": 6.48411790558519e-06, "loss": 0.5601, "step": 6537 }, { "epoch": 0.42, "grad_norm": 1.054521083831787, "learning_rate": 6.483119667128564e-06, "loss": 0.5411, "step": 6538 }, { "epoch": 0.42, "grad_norm": 1.2769381999969482, "learning_rate": 6.482121363848603e-06, "loss": 0.5485, "step": 6539 }, { "epoch": 0.42, "grad_norm": 1.1276302337646484, "learning_rate": 6.481122995788941e-06, "loss": 0.5553, "step": 6540 }, { "epoch": 0.42, "grad_norm": 1.1836591958999634, "learning_rate": 6.4801245629932155e-06, "loss": 0.5473, "step": 6541 }, { "epoch": 0.42, "grad_norm": 1.1741724014282227, "learning_rate": 6.479126065505063e-06, "loss": 0.5453, "step": 6542 }, { "epoch": 0.42, "grad_norm": 1.1631110906600952, "learning_rate": 6.478127503368126e-06, "loss": 0.5724, "step": 6543 }, { "epoch": 0.42, "grad_norm": 1.2217190265655518, "learning_rate": 6.477128876626051e-06, "loss": 0.5074, "step": 6544 }, { "epoch": 0.42, "grad_norm": 1.2191715240478516, "learning_rate": 6.476130185322483e-06, "loss": 0.5203, "step": 6545 }, { "epoch": 0.42, "grad_norm": 1.2208924293518066, "learning_rate": 6.475131429501073e-06, "loss": 0.5286, "step": 6546 }, { "epoch": 0.42, "grad_norm": 1.2019959688186646, "learning_rate": 6.4741326092054745e-06, "loss": 0.5816, "step": 6547 }, { "epoch": 0.42, "grad_norm": 1.1330933570861816, "learning_rate": 6.473133724479342e-06, "loss": 0.5599, "step": 6548 }, { "epoch": 0.42, "grad_norm": 1.185291051864624, "learning_rate": 6.472134775366336e-06, "loss": 0.514, "step": 6549 }, { "epoch": 0.42, "grad_norm": 1.2323737144470215, "learning_rate": 6.471135761910117e-06, "loss": 0.5825, "step": 6550 }, { "epoch": 0.42, "grad_norm": 1.2045947313308716, "learning_rate": 6.470136684154349e-06, "loss": 0.5733, "step": 6551 }, { "epoch": 0.42, "grad_norm": 1.2465046644210815, "learning_rate": 6.469137542142699e-06, "loss": 0.5679, "step": 6552 }, { "epoch": 0.42, "grad_norm": 1.2201220989227295, "learning_rate": 6.468138335918839e-06, "loss": 0.5531, "step": 6553 }, { "epoch": 0.42, "grad_norm": 1.1146903038024902, "learning_rate": 6.4671390655264395e-06, "loss": 0.5611, "step": 6554 }, { "epoch": 0.42, "grad_norm": 1.2473349571228027, "learning_rate": 6.466139731009176e-06, "loss": 0.4825, "step": 6555 }, { "epoch": 0.42, "grad_norm": 1.1073189973831177, "learning_rate": 6.465140332410728e-06, "loss": 0.5503, "step": 6556 }, { "epoch": 0.42, "grad_norm": 1.0782474279403687, "learning_rate": 6.464140869774777e-06, "loss": 0.5015, "step": 6557 }, { "epoch": 0.42, "grad_norm": 1.3525093793869019, "learning_rate": 6.463141343145005e-06, "loss": 0.6091, "step": 6558 }, { "epoch": 0.42, "grad_norm": 1.3064671754837036, "learning_rate": 6.462141752565101e-06, "loss": 0.5799, "step": 6559 }, { "epoch": 0.42, "grad_norm": 1.2298293113708496, "learning_rate": 6.461142098078752e-06, "loss": 0.5401, "step": 6560 }, { "epoch": 0.42, "grad_norm": 1.119503140449524, "learning_rate": 6.4601423797296535e-06, "loss": 0.5244, "step": 6561 }, { "epoch": 0.42, "grad_norm": 1.0844284296035767, "learning_rate": 6.459142597561497e-06, "loss": 0.5331, "step": 6562 }, { "epoch": 0.42, "grad_norm": 1.1083976030349731, "learning_rate": 6.458142751617984e-06, "loss": 0.5142, "step": 6563 }, { "epoch": 0.42, "grad_norm": 1.1576032638549805, "learning_rate": 6.4571428419428115e-06, "loss": 0.5097, "step": 6564 }, { "epoch": 0.42, "grad_norm": 1.1547585725784302, "learning_rate": 6.456142868579686e-06, "loss": 0.5415, "step": 6565 }, { "epoch": 0.42, "grad_norm": 1.1888320446014404, "learning_rate": 6.455142831572313e-06, "loss": 0.5253, "step": 6566 }, { "epoch": 0.42, "grad_norm": 1.2864222526550293, "learning_rate": 6.4541427309644e-06, "loss": 0.5512, "step": 6567 }, { "epoch": 0.42, "grad_norm": 1.155187964439392, "learning_rate": 6.4531425667996615e-06, "loss": 0.5733, "step": 6568 }, { "epoch": 0.42, "grad_norm": 1.1031780242919922, "learning_rate": 6.452142339121808e-06, "loss": 0.5011, "step": 6569 }, { "epoch": 0.42, "grad_norm": 1.1033365726470947, "learning_rate": 6.451142047974562e-06, "loss": 0.5087, "step": 6570 }, { "epoch": 0.42, "grad_norm": 1.0494935512542725, "learning_rate": 6.45014169340164e-06, "loss": 0.5749, "step": 6571 }, { "epoch": 0.42, "grad_norm": 1.1919910907745361, "learning_rate": 6.4491412754467655e-06, "loss": 0.5537, "step": 6572 }, { "epoch": 0.42, "grad_norm": 1.1241531372070312, "learning_rate": 6.448140794153666e-06, "loss": 0.5045, "step": 6573 }, { "epoch": 0.42, "grad_norm": 1.2140698432922363, "learning_rate": 6.447140249566067e-06, "loss": 0.509, "step": 6574 }, { "epoch": 0.42, "grad_norm": 1.1315535306930542, "learning_rate": 6.446139641727702e-06, "loss": 0.5553, "step": 6575 }, { "epoch": 0.42, "grad_norm": 1.1722344160079956, "learning_rate": 6.445138970682305e-06, "loss": 0.5579, "step": 6576 }, { "epoch": 0.42, "grad_norm": 1.226839303970337, "learning_rate": 6.444138236473612e-06, "loss": 0.5501, "step": 6577 }, { "epoch": 0.42, "grad_norm": 1.1184508800506592, "learning_rate": 6.443137439145362e-06, "loss": 0.4971, "step": 6578 }, { "epoch": 0.42, "grad_norm": 1.2314963340759277, "learning_rate": 6.442136578741298e-06, "loss": 0.6087, "step": 6579 }, { "epoch": 0.42, "grad_norm": 1.1103886365890503, "learning_rate": 6.441135655305165e-06, "loss": 0.613, "step": 6580 }, { "epoch": 0.42, "grad_norm": 1.2330327033996582, "learning_rate": 6.440134668880711e-06, "loss": 0.5651, "step": 6581 }, { "epoch": 0.42, "grad_norm": 1.1710935831069946, "learning_rate": 6.439133619511686e-06, "loss": 0.572, "step": 6582 }, { "epoch": 0.42, "grad_norm": 1.16312575340271, "learning_rate": 6.438132507241843e-06, "loss": 0.5239, "step": 6583 }, { "epoch": 0.42, "grad_norm": 1.1307291984558105, "learning_rate": 6.4371313321149385e-06, "loss": 0.5623, "step": 6584 }, { "epoch": 0.43, "grad_norm": 1.0567017793655396, "learning_rate": 6.436130094174733e-06, "loss": 0.5195, "step": 6585 }, { "epoch": 0.43, "grad_norm": 1.251857042312622, "learning_rate": 6.435128793464984e-06, "loss": 0.5483, "step": 6586 }, { "epoch": 0.43, "grad_norm": 1.138644814491272, "learning_rate": 6.4341274300294595e-06, "loss": 0.5388, "step": 6587 }, { "epoch": 0.43, "grad_norm": 1.346029281616211, "learning_rate": 6.433126003911925e-06, "loss": 0.5648, "step": 6588 }, { "epoch": 0.43, "grad_norm": 1.187017798423767, "learning_rate": 6.43212451515615e-06, "loss": 0.5641, "step": 6589 }, { "epoch": 0.43, "grad_norm": 1.0825623273849487, "learning_rate": 6.431122963805907e-06, "loss": 0.5501, "step": 6590 }, { "epoch": 0.43, "grad_norm": 1.179492712020874, "learning_rate": 6.430121349904973e-06, "loss": 0.5112, "step": 6591 }, { "epoch": 0.43, "grad_norm": 1.138490915298462, "learning_rate": 6.429119673497124e-06, "loss": 0.5403, "step": 6592 }, { "epoch": 0.43, "grad_norm": 1.277349591255188, "learning_rate": 6.42811793462614e-06, "loss": 0.5615, "step": 6593 }, { "epoch": 0.43, "grad_norm": 1.1628623008728027, "learning_rate": 6.427116133335808e-06, "loss": 0.5589, "step": 6594 }, { "epoch": 0.43, "grad_norm": 1.3342269659042358, "learning_rate": 6.42611426966991e-06, "loss": 0.5212, "step": 6595 }, { "epoch": 0.43, "grad_norm": 1.1112178564071655, "learning_rate": 6.425112343672238e-06, "loss": 0.5656, "step": 6596 }, { "epoch": 0.43, "grad_norm": 1.1740977764129639, "learning_rate": 6.424110355386581e-06, "loss": 0.5602, "step": 6597 }, { "epoch": 0.43, "grad_norm": 1.129568099975586, "learning_rate": 6.423108304856736e-06, "loss": 0.5432, "step": 6598 }, { "epoch": 0.43, "grad_norm": 1.1887847185134888, "learning_rate": 6.422106192126496e-06, "loss": 0.5845, "step": 6599 }, { "epoch": 0.43, "grad_norm": 1.223628044128418, "learning_rate": 6.421104017239667e-06, "loss": 0.5837, "step": 6600 }, { "epoch": 0.43, "grad_norm": 1.1120392084121704, "learning_rate": 6.420101780240046e-06, "loss": 0.5076, "step": 6601 }, { "epoch": 0.43, "grad_norm": 1.2164313793182373, "learning_rate": 6.419099481171439e-06, "loss": 0.505, "step": 6602 }, { "epoch": 0.43, "grad_norm": 1.06851065158844, "learning_rate": 6.418097120077658e-06, "loss": 0.4792, "step": 6603 }, { "epoch": 0.43, "grad_norm": 1.2134630680084229, "learning_rate": 6.417094697002507e-06, "loss": 0.5613, "step": 6604 }, { "epoch": 0.43, "grad_norm": 1.231666088104248, "learning_rate": 6.416092211989805e-06, "loss": 0.5205, "step": 6605 }, { "epoch": 0.43, "grad_norm": 1.2104313373565674, "learning_rate": 6.415089665083367e-06, "loss": 0.5517, "step": 6606 }, { "epoch": 0.43, "grad_norm": 1.1207605600357056, "learning_rate": 6.414087056327009e-06, "loss": 0.5649, "step": 6607 }, { "epoch": 0.43, "grad_norm": 1.0961625576019287, "learning_rate": 6.413084385764555e-06, "loss": 0.4994, "step": 6608 }, { "epoch": 0.43, "grad_norm": 1.279389500617981, "learning_rate": 6.412081653439828e-06, "loss": 0.5604, "step": 6609 }, { "epoch": 0.43, "grad_norm": 1.1544851064682007, "learning_rate": 6.411078859396655e-06, "loss": 0.5026, "step": 6610 }, { "epoch": 0.43, "grad_norm": 1.1514633893966675, "learning_rate": 6.410076003678866e-06, "loss": 0.5753, "step": 6611 }, { "epoch": 0.43, "grad_norm": 1.2217381000518799, "learning_rate": 6.409073086330293e-06, "loss": 0.5887, "step": 6612 }, { "epoch": 0.43, "grad_norm": 1.0254279375076294, "learning_rate": 6.4080701073947705e-06, "loss": 0.5653, "step": 6613 }, { "epoch": 0.43, "grad_norm": 1.1473677158355713, "learning_rate": 6.407067066916136e-06, "loss": 0.539, "step": 6614 }, { "epoch": 0.43, "grad_norm": 1.315632700920105, "learning_rate": 6.406063964938232e-06, "loss": 0.6143, "step": 6615 }, { "epoch": 0.43, "grad_norm": 1.1848424673080444, "learning_rate": 6.405060801504899e-06, "loss": 0.5513, "step": 6616 }, { "epoch": 0.43, "grad_norm": 1.1267943382263184, "learning_rate": 6.404057576659983e-06, "loss": 0.5655, "step": 6617 }, { "epoch": 0.43, "grad_norm": 1.1279431581497192, "learning_rate": 6.403054290447332e-06, "loss": 0.5397, "step": 6618 }, { "epoch": 0.43, "grad_norm": 1.148730754852295, "learning_rate": 6.4020509429107995e-06, "loss": 0.5343, "step": 6619 }, { "epoch": 0.43, "grad_norm": 1.188791036605835, "learning_rate": 6.401047534094237e-06, "loss": 0.5532, "step": 6620 }, { "epoch": 0.43, "grad_norm": 1.2213340997695923, "learning_rate": 6.4000440640415015e-06, "loss": 0.5925, "step": 6621 }, { "epoch": 0.43, "grad_norm": 1.0299311876296997, "learning_rate": 6.399040532796451e-06, "loss": 0.5309, "step": 6622 }, { "epoch": 0.43, "grad_norm": 1.203584909439087, "learning_rate": 6.398036940402951e-06, "loss": 0.5427, "step": 6623 }, { "epoch": 0.43, "grad_norm": 1.0805253982543945, "learning_rate": 6.397033286904861e-06, "loss": 0.5442, "step": 6624 }, { "epoch": 0.43, "grad_norm": 1.1812987327575684, "learning_rate": 6.396029572346051e-06, "loss": 0.6021, "step": 6625 }, { "epoch": 0.43, "grad_norm": 1.0691444873809814, "learning_rate": 6.3950257967703914e-06, "loss": 0.5671, "step": 6626 }, { "epoch": 0.43, "grad_norm": 1.2303935289382935, "learning_rate": 6.3940219602217525e-06, "loss": 0.5632, "step": 6627 }, { "epoch": 0.43, "grad_norm": 1.2286838293075562, "learning_rate": 6.393018062744009e-06, "loss": 0.5775, "step": 6628 }, { "epoch": 0.43, "grad_norm": 1.1486866474151611, "learning_rate": 6.392014104381042e-06, "loss": 0.5235, "step": 6629 }, { "epoch": 0.43, "grad_norm": 1.3349716663360596, "learning_rate": 6.3910100851767295e-06, "loss": 0.5646, "step": 6630 }, { "epoch": 0.43, "grad_norm": 1.1280685663223267, "learning_rate": 6.390006005174955e-06, "loss": 0.5047, "step": 6631 }, { "epoch": 0.43, "grad_norm": 1.133534550666809, "learning_rate": 6.389001864419604e-06, "loss": 0.555, "step": 6632 }, { "epoch": 0.43, "grad_norm": 1.1288557052612305, "learning_rate": 6.3879976629545645e-06, "loss": 0.5362, "step": 6633 }, { "epoch": 0.43, "grad_norm": 1.3132325410842896, "learning_rate": 6.386993400823729e-06, "loss": 0.5717, "step": 6634 }, { "epoch": 0.43, "grad_norm": 1.1655572652816772, "learning_rate": 6.385989078070991e-06, "loss": 0.5467, "step": 6635 }, { "epoch": 0.43, "grad_norm": 1.157091498374939, "learning_rate": 6.384984694740246e-06, "loss": 0.5016, "step": 6636 }, { "epoch": 0.43, "grad_norm": 1.1884989738464355, "learning_rate": 6.383980250875392e-06, "loss": 0.5238, "step": 6637 }, { "epoch": 0.43, "grad_norm": 1.0953603982925415, "learning_rate": 6.3829757465203345e-06, "loss": 0.5149, "step": 6638 }, { "epoch": 0.43, "grad_norm": 1.0973867177963257, "learning_rate": 6.381971181718973e-06, "loss": 0.5256, "step": 6639 }, { "epoch": 0.43, "grad_norm": 1.175165057182312, "learning_rate": 6.380966556515217e-06, "loss": 0.5759, "step": 6640 }, { "epoch": 0.43, "grad_norm": 1.11885666847229, "learning_rate": 6.379961870952976e-06, "loss": 0.5576, "step": 6641 }, { "epoch": 0.43, "grad_norm": 1.0745065212249756, "learning_rate": 6.378957125076163e-06, "loss": 0.5205, "step": 6642 }, { "epoch": 0.43, "grad_norm": 1.2098608016967773, "learning_rate": 6.377952318928691e-06, "loss": 0.5378, "step": 6643 }, { "epoch": 0.43, "grad_norm": 1.2507938146591187, "learning_rate": 6.376947452554478e-06, "loss": 0.5615, "step": 6644 }, { "epoch": 0.43, "grad_norm": 1.1873574256896973, "learning_rate": 6.375942525997445e-06, "loss": 0.5286, "step": 6645 }, { "epoch": 0.43, "grad_norm": 1.1325312852859497, "learning_rate": 6.374937539301514e-06, "loss": 0.5581, "step": 6646 }, { "epoch": 0.43, "grad_norm": 1.129692554473877, "learning_rate": 6.373932492510611e-06, "loss": 0.5465, "step": 6647 }, { "epoch": 0.43, "grad_norm": 0.9921581149101257, "learning_rate": 6.3729273856686615e-06, "loss": 0.4919, "step": 6648 }, { "epoch": 0.43, "grad_norm": 1.2173042297363281, "learning_rate": 6.371922218819599e-06, "loss": 0.5061, "step": 6649 }, { "epoch": 0.43, "grad_norm": 1.1615071296691895, "learning_rate": 6.370916992007357e-06, "loss": 0.5207, "step": 6650 }, { "epoch": 0.43, "grad_norm": 1.0929685831069946, "learning_rate": 6.36991170527587e-06, "loss": 0.5454, "step": 6651 }, { "epoch": 0.43, "grad_norm": 1.080666184425354, "learning_rate": 6.368906358669078e-06, "loss": 0.5227, "step": 6652 }, { "epoch": 0.43, "grad_norm": 1.1975092887878418, "learning_rate": 6.367900952230919e-06, "loss": 0.5604, "step": 6653 }, { "epoch": 0.43, "grad_norm": 1.35564124584198, "learning_rate": 6.366895486005338e-06, "loss": 0.5687, "step": 6654 }, { "epoch": 0.43, "grad_norm": 1.1031297445297241, "learning_rate": 6.365889960036285e-06, "loss": 0.5601, "step": 6655 }, { "epoch": 0.43, "grad_norm": 1.2017227411270142, "learning_rate": 6.364884374367702e-06, "loss": 0.5403, "step": 6656 }, { "epoch": 0.43, "grad_norm": 1.1492711305618286, "learning_rate": 6.363878729043548e-06, "loss": 0.5446, "step": 6657 }, { "epoch": 0.43, "grad_norm": 1.1351749897003174, "learning_rate": 6.3628730241077715e-06, "loss": 0.5344, "step": 6658 }, { "epoch": 0.43, "grad_norm": 1.1813573837280273, "learning_rate": 6.3618672596043325e-06, "loss": 0.5851, "step": 6659 }, { "epoch": 0.43, "grad_norm": 1.2490595579147339, "learning_rate": 6.360861435577188e-06, "loss": 0.5468, "step": 6660 }, { "epoch": 0.43, "grad_norm": 1.2459640502929688, "learning_rate": 6.359855552070303e-06, "loss": 0.5573, "step": 6661 }, { "epoch": 0.43, "grad_norm": 1.2059787511825562, "learning_rate": 6.358849609127638e-06, "loss": 0.5266, "step": 6662 }, { "epoch": 0.43, "grad_norm": 1.208771824836731, "learning_rate": 6.357843606793164e-06, "loss": 0.5385, "step": 6663 }, { "epoch": 0.43, "grad_norm": 1.0617045164108276, "learning_rate": 6.35683754511085e-06, "loss": 0.5072, "step": 6664 }, { "epoch": 0.43, "grad_norm": 1.0791255235671997, "learning_rate": 6.355831424124665e-06, "loss": 0.4904, "step": 6665 }, { "epoch": 0.43, "grad_norm": 1.1288702487945557, "learning_rate": 6.354825243878588e-06, "loss": 0.5656, "step": 6666 }, { "epoch": 0.43, "grad_norm": 1.3236563205718994, "learning_rate": 6.353819004416596e-06, "loss": 0.5275, "step": 6667 }, { "epoch": 0.43, "grad_norm": 1.1537981033325195, "learning_rate": 6.352812705782667e-06, "loss": 0.6133, "step": 6668 }, { "epoch": 0.43, "grad_norm": 1.1847662925720215, "learning_rate": 6.351806348020785e-06, "loss": 0.509, "step": 6669 }, { "epoch": 0.43, "grad_norm": 1.1303380727767944, "learning_rate": 6.350799931174936e-06, "loss": 0.5139, "step": 6670 }, { "epoch": 0.43, "grad_norm": 1.0975379943847656, "learning_rate": 6.349793455289106e-06, "loss": 0.5214, "step": 6671 }, { "epoch": 0.43, "grad_norm": 1.245522141456604, "learning_rate": 6.348786920407286e-06, "loss": 0.5756, "step": 6672 }, { "epoch": 0.43, "grad_norm": 1.045872449874878, "learning_rate": 6.347780326573471e-06, "loss": 0.5043, "step": 6673 }, { "epoch": 0.43, "grad_norm": 1.0814244747161865, "learning_rate": 6.3467736738316555e-06, "loss": 0.5372, "step": 6674 }, { "epoch": 0.43, "grad_norm": 1.3269814252853394, "learning_rate": 6.3457669622258364e-06, "loss": 0.5932, "step": 6675 }, { "epoch": 0.43, "grad_norm": 1.2324572801589966, "learning_rate": 6.344760191800017e-06, "loss": 0.5633, "step": 6676 }, { "epoch": 0.43, "grad_norm": 1.2351977825164795, "learning_rate": 6.343753362598198e-06, "loss": 0.5662, "step": 6677 }, { "epoch": 0.43, "grad_norm": 1.1592023372650146, "learning_rate": 6.3427464746643865e-06, "loss": 0.5304, "step": 6678 }, { "epoch": 0.43, "grad_norm": 1.0792720317840576, "learning_rate": 6.341739528042592e-06, "loss": 0.5447, "step": 6679 }, { "epoch": 0.43, "grad_norm": 1.1341445446014404, "learning_rate": 6.3407325227768225e-06, "loss": 0.5386, "step": 6680 }, { "epoch": 0.43, "grad_norm": 1.1487798690795898, "learning_rate": 6.339725458911094e-06, "loss": 0.5513, "step": 6681 }, { "epoch": 0.43, "grad_norm": 1.4244287014007568, "learning_rate": 6.3387183364894245e-06, "loss": 0.5338, "step": 6682 }, { "epoch": 0.43, "grad_norm": 1.2064626216888428, "learning_rate": 6.337711155555828e-06, "loss": 0.5932, "step": 6683 }, { "epoch": 0.43, "grad_norm": 1.1867822408676147, "learning_rate": 6.336703916154329e-06, "loss": 0.5112, "step": 6684 }, { "epoch": 0.43, "grad_norm": 1.1184139251708984, "learning_rate": 6.335696618328951e-06, "loss": 0.5525, "step": 6685 }, { "epoch": 0.43, "grad_norm": 1.0915924310684204, "learning_rate": 6.334689262123719e-06, "loss": 0.5172, "step": 6686 }, { "epoch": 0.43, "grad_norm": 1.1091763973236084, "learning_rate": 6.333681847582662e-06, "loss": 0.5385, "step": 6687 }, { "epoch": 0.43, "grad_norm": 1.1292967796325684, "learning_rate": 6.3326743747498155e-06, "loss": 0.5519, "step": 6688 }, { "epoch": 0.43, "grad_norm": 1.1103756427764893, "learning_rate": 6.3316668436692085e-06, "loss": 0.5533, "step": 6689 }, { "epoch": 0.43, "grad_norm": 1.1017751693725586, "learning_rate": 6.330659254384879e-06, "loss": 0.5788, "step": 6690 }, { "epoch": 0.43, "grad_norm": 1.1455641984939575, "learning_rate": 6.329651606940869e-06, "loss": 0.5097, "step": 6691 }, { "epoch": 0.43, "grad_norm": 1.2292841672897339, "learning_rate": 6.328643901381215e-06, "loss": 0.542, "step": 6692 }, { "epoch": 0.43, "grad_norm": 1.2608271837234497, "learning_rate": 6.327636137749965e-06, "loss": 0.5774, "step": 6693 }, { "epoch": 0.43, "grad_norm": 1.1127986907958984, "learning_rate": 6.326628316091166e-06, "loss": 0.5212, "step": 6694 }, { "epoch": 0.43, "grad_norm": 1.1808961629867554, "learning_rate": 6.325620436448865e-06, "loss": 0.5294, "step": 6695 }, { "epoch": 0.43, "grad_norm": 1.1238285303115845, "learning_rate": 6.324612498867115e-06, "loss": 0.5651, "step": 6696 }, { "epoch": 0.43, "grad_norm": 1.1303997039794922, "learning_rate": 6.3236045033899696e-06, "loss": 0.5362, "step": 6697 }, { "epoch": 0.43, "grad_norm": 1.3803335428237915, "learning_rate": 6.322596450061487e-06, "loss": 0.5134, "step": 6698 }, { "epoch": 0.43, "grad_norm": 1.0673322677612305, "learning_rate": 6.321588338925726e-06, "loss": 0.4917, "step": 6699 }, { "epoch": 0.43, "grad_norm": 1.1752586364746094, "learning_rate": 6.3205801700267476e-06, "loss": 0.5605, "step": 6700 }, { "epoch": 0.43, "grad_norm": 1.0984467267990112, "learning_rate": 6.319571943408619e-06, "loss": 0.5256, "step": 6701 }, { "epoch": 0.43, "grad_norm": 1.1406782865524292, "learning_rate": 6.3185636591154035e-06, "loss": 0.5187, "step": 6702 }, { "epoch": 0.43, "grad_norm": 1.14393949508667, "learning_rate": 6.317555317191172e-06, "loss": 0.567, "step": 6703 }, { "epoch": 0.43, "grad_norm": 1.2339775562286377, "learning_rate": 6.316546917679998e-06, "loss": 0.5791, "step": 6704 }, { "epoch": 0.43, "grad_norm": 1.2755604982376099, "learning_rate": 6.315538460625955e-06, "loss": 0.5608, "step": 6705 }, { "epoch": 0.43, "grad_norm": 1.3232152462005615, "learning_rate": 6.31452994607312e-06, "loss": 0.5954, "step": 6706 }, { "epoch": 0.43, "grad_norm": 1.1122984886169434, "learning_rate": 6.313521374065572e-06, "loss": 0.6005, "step": 6707 }, { "epoch": 0.43, "grad_norm": 1.0708887577056885, "learning_rate": 6.3125127446473955e-06, "loss": 0.5349, "step": 6708 }, { "epoch": 0.43, "grad_norm": 1.173997163772583, "learning_rate": 6.311504057862672e-06, "loss": 0.5706, "step": 6709 }, { "epoch": 0.43, "grad_norm": 1.380385398864746, "learning_rate": 6.310495313755489e-06, "loss": 0.5417, "step": 6710 }, { "epoch": 0.43, "grad_norm": 1.2725497484207153, "learning_rate": 6.309486512369938e-06, "loss": 0.5543, "step": 6711 }, { "epoch": 0.43, "grad_norm": 1.1294059753417969, "learning_rate": 6.3084776537501095e-06, "loss": 0.5765, "step": 6712 }, { "epoch": 0.43, "grad_norm": 1.1973272562026978, "learning_rate": 6.307468737940099e-06, "loss": 0.4861, "step": 6713 }, { "epoch": 0.43, "grad_norm": 1.1852600574493408, "learning_rate": 6.306459764984003e-06, "loss": 0.5294, "step": 6714 }, { "epoch": 0.43, "grad_norm": 1.1862261295318604, "learning_rate": 6.305450734925922e-06, "loss": 0.5798, "step": 6715 }, { "epoch": 0.43, "grad_norm": 1.0680848360061646, "learning_rate": 6.304441647809956e-06, "loss": 0.51, "step": 6716 }, { "epoch": 0.43, "grad_norm": 1.1967236995697021, "learning_rate": 6.303432503680213e-06, "loss": 0.5669, "step": 6717 }, { "epoch": 0.43, "grad_norm": 1.135853886604309, "learning_rate": 6.302423302580797e-06, "loss": 0.542, "step": 6718 }, { "epoch": 0.43, "grad_norm": 1.061126708984375, "learning_rate": 6.301414044555818e-06, "loss": 0.4726, "step": 6719 }, { "epoch": 0.43, "grad_norm": 1.1675070524215698, "learning_rate": 6.300404729649391e-06, "loss": 0.5491, "step": 6720 }, { "epoch": 0.43, "grad_norm": 1.1430466175079346, "learning_rate": 6.299395357905627e-06, "loss": 0.5447, "step": 6721 }, { "epoch": 0.43, "grad_norm": 1.1651065349578857, "learning_rate": 6.298385929368645e-06, "loss": 0.5464, "step": 6722 }, { "epoch": 0.43, "grad_norm": 1.2074726819992065, "learning_rate": 6.297376444082563e-06, "loss": 0.5876, "step": 6723 }, { "epoch": 0.43, "grad_norm": 1.195034146308899, "learning_rate": 6.296366902091504e-06, "loss": 0.5266, "step": 6724 }, { "epoch": 0.43, "grad_norm": 1.0903384685516357, "learning_rate": 6.295357303439592e-06, "loss": 0.5474, "step": 6725 }, { "epoch": 0.43, "grad_norm": 1.092481017112732, "learning_rate": 6.294347648170957e-06, "loss": 0.5091, "step": 6726 }, { "epoch": 0.43, "grad_norm": 1.1371031999588013, "learning_rate": 6.293337936329722e-06, "loss": 0.5117, "step": 6727 }, { "epoch": 0.43, "grad_norm": 1.1473870277404785, "learning_rate": 6.292328167960025e-06, "loss": 0.5614, "step": 6728 }, { "epoch": 0.43, "grad_norm": 1.2164448499679565, "learning_rate": 6.291318343105997e-06, "loss": 0.5274, "step": 6729 }, { "epoch": 0.43, "grad_norm": 1.1511257886886597, "learning_rate": 6.290308461811777e-06, "loss": 0.5705, "step": 6730 }, { "epoch": 0.43, "grad_norm": 1.2114503383636475, "learning_rate": 6.289298524121502e-06, "loss": 0.5415, "step": 6731 }, { "epoch": 0.43, "grad_norm": 1.1016029119491577, "learning_rate": 6.288288530079316e-06, "loss": 0.5595, "step": 6732 }, { "epoch": 0.43, "grad_norm": 1.1568325757980347, "learning_rate": 6.287278479729362e-06, "loss": 0.5907, "step": 6733 }, { "epoch": 0.43, "grad_norm": 1.0815144777297974, "learning_rate": 6.286268373115786e-06, "loss": 0.5103, "step": 6734 }, { "epoch": 0.43, "grad_norm": 1.2289143800735474, "learning_rate": 6.285258210282738e-06, "loss": 0.4916, "step": 6735 }, { "epoch": 0.43, "grad_norm": 1.2151367664337158, "learning_rate": 6.284247991274372e-06, "loss": 0.5758, "step": 6736 }, { "epoch": 0.43, "grad_norm": 1.309800148010254, "learning_rate": 6.283237716134838e-06, "loss": 0.5187, "step": 6737 }, { "epoch": 0.43, "grad_norm": 1.0966089963912964, "learning_rate": 6.282227384908294e-06, "loss": 0.5253, "step": 6738 }, { "epoch": 0.43, "grad_norm": 1.1494872570037842, "learning_rate": 6.281216997638898e-06, "loss": 0.5309, "step": 6739 }, { "epoch": 0.44, "grad_norm": 1.1756571531295776, "learning_rate": 6.280206554370816e-06, "loss": 0.544, "step": 6740 }, { "epoch": 0.44, "grad_norm": 1.2683603763580322, "learning_rate": 6.279196055148206e-06, "loss": 0.5311, "step": 6741 }, { "epoch": 0.44, "grad_norm": 1.2944855690002441, "learning_rate": 6.278185500015236e-06, "loss": 0.5596, "step": 6742 }, { "epoch": 0.44, "grad_norm": 1.2681838274002075, "learning_rate": 6.277174889016078e-06, "loss": 0.5211, "step": 6743 }, { "epoch": 0.44, "grad_norm": 1.2191762924194336, "learning_rate": 6.276164222194899e-06, "loss": 0.5621, "step": 6744 }, { "epoch": 0.44, "grad_norm": 1.1472587585449219, "learning_rate": 6.275153499595874e-06, "loss": 0.5128, "step": 6745 }, { "epoch": 0.44, "grad_norm": 1.1296839714050293, "learning_rate": 6.274142721263181e-06, "loss": 0.5666, "step": 6746 }, { "epoch": 0.44, "grad_norm": 1.2434459924697876, "learning_rate": 6.273131887240996e-06, "loss": 0.5613, "step": 6747 }, { "epoch": 0.44, "grad_norm": 1.1882269382476807, "learning_rate": 6.2721209975735034e-06, "loss": 0.5306, "step": 6748 }, { "epoch": 0.44, "grad_norm": 1.1770398616790771, "learning_rate": 6.271110052304881e-06, "loss": 0.538, "step": 6749 }, { "epoch": 0.44, "grad_norm": 1.189838171005249, "learning_rate": 6.270099051479319e-06, "loss": 0.4995, "step": 6750 }, { "epoch": 0.44, "grad_norm": 1.23344886302948, "learning_rate": 6.269087995141004e-06, "loss": 0.5928, "step": 6751 }, { "epoch": 0.44, "grad_norm": 1.2089359760284424, "learning_rate": 6.268076883334127e-06, "loss": 0.512, "step": 6752 }, { "epoch": 0.44, "grad_norm": 1.1183671951293945, "learning_rate": 6.2670657161028826e-06, "loss": 0.5284, "step": 6753 }, { "epoch": 0.44, "grad_norm": 1.2292174100875854, "learning_rate": 6.266054493491462e-06, "loss": 0.5398, "step": 6754 }, { "epoch": 0.44, "grad_norm": 1.1819254159927368, "learning_rate": 6.265043215544069e-06, "loss": 0.5596, "step": 6755 }, { "epoch": 0.44, "grad_norm": 1.131583333015442, "learning_rate": 6.2640318823048996e-06, "loss": 0.5565, "step": 6756 }, { "epoch": 0.44, "grad_norm": 1.1999036073684692, "learning_rate": 6.263020493818157e-06, "loss": 0.5802, "step": 6757 }, { "epoch": 0.44, "grad_norm": 1.2208539247512817, "learning_rate": 6.2620090501280484e-06, "loss": 0.5479, "step": 6758 }, { "epoch": 0.44, "grad_norm": 1.1515026092529297, "learning_rate": 6.260997551278779e-06, "loss": 0.5295, "step": 6759 }, { "epoch": 0.44, "grad_norm": 1.196018934249878, "learning_rate": 6.25998599731456e-06, "loss": 0.5007, "step": 6760 }, { "epoch": 0.44, "grad_norm": 1.1870059967041016, "learning_rate": 6.258974388279606e-06, "loss": 0.5714, "step": 6761 }, { "epoch": 0.44, "grad_norm": 1.0037510395050049, "learning_rate": 6.257962724218127e-06, "loss": 0.5295, "step": 6762 }, { "epoch": 0.44, "grad_norm": 1.2523930072784424, "learning_rate": 6.256951005174345e-06, "loss": 0.5624, "step": 6763 }, { "epoch": 0.44, "grad_norm": 1.4893923997879028, "learning_rate": 6.2559392311924774e-06, "loss": 0.5845, "step": 6764 }, { "epoch": 0.44, "grad_norm": 1.1023805141448975, "learning_rate": 6.254927402316745e-06, "loss": 0.5095, "step": 6765 }, { "epoch": 0.44, "grad_norm": 1.1563806533813477, "learning_rate": 6.253915518591374e-06, "loss": 0.5427, "step": 6766 }, { "epoch": 0.44, "grad_norm": 1.0851210355758667, "learning_rate": 6.252903580060592e-06, "loss": 0.5333, "step": 6767 }, { "epoch": 0.44, "grad_norm": 1.1057111024856567, "learning_rate": 6.251891586768626e-06, "loss": 0.5355, "step": 6768 }, { "epoch": 0.44, "grad_norm": 1.2233909368515015, "learning_rate": 6.250879538759709e-06, "loss": 0.5494, "step": 6769 }, { "epoch": 0.44, "grad_norm": 1.182733416557312, "learning_rate": 6.2498674360780745e-06, "loss": 0.5068, "step": 6770 }, { "epoch": 0.44, "grad_norm": 1.205485463142395, "learning_rate": 6.248855278767959e-06, "loss": 0.5675, "step": 6771 }, { "epoch": 0.44, "grad_norm": 1.0751527547836304, "learning_rate": 6.247843066873601e-06, "loss": 0.5223, "step": 6772 }, { "epoch": 0.44, "grad_norm": 1.315495491027832, "learning_rate": 6.246830800439243e-06, "loss": 0.5353, "step": 6773 }, { "epoch": 0.44, "grad_norm": 1.1986193656921387, "learning_rate": 6.245818479509127e-06, "loss": 0.5267, "step": 6774 }, { "epoch": 0.44, "grad_norm": 1.0868256092071533, "learning_rate": 6.2448061041275e-06, "loss": 0.6003, "step": 6775 }, { "epoch": 0.44, "grad_norm": 1.2363835573196411, "learning_rate": 6.24379367433861e-06, "loss": 0.561, "step": 6776 }, { "epoch": 0.44, "grad_norm": 1.1596654653549194, "learning_rate": 6.242781190186707e-06, "loss": 0.5361, "step": 6777 }, { "epoch": 0.44, "grad_norm": 1.3279205560684204, "learning_rate": 6.2417686517160435e-06, "loss": 0.5165, "step": 6778 }, { "epoch": 0.44, "grad_norm": 1.0950692892074585, "learning_rate": 6.2407560589708786e-06, "loss": 0.5533, "step": 6779 }, { "epoch": 0.44, "grad_norm": 1.2047258615493774, "learning_rate": 6.239743411995465e-06, "loss": 0.5375, "step": 6780 }, { "epoch": 0.44, "grad_norm": 1.1128743886947632, "learning_rate": 6.2387307108340665e-06, "loss": 0.5014, "step": 6781 }, { "epoch": 0.44, "grad_norm": 1.205800175666809, "learning_rate": 6.237717955530944e-06, "loss": 0.5996, "step": 6782 }, { "epoch": 0.44, "grad_norm": 1.2907524108886719, "learning_rate": 6.236705146130364e-06, "loss": 0.5574, "step": 6783 }, { "epoch": 0.44, "grad_norm": 1.2035510540008545, "learning_rate": 6.235692282676593e-06, "loss": 0.5548, "step": 6784 }, { "epoch": 0.44, "grad_norm": 1.0500940084457397, "learning_rate": 6.234679365213899e-06, "loss": 0.5363, "step": 6785 }, { "epoch": 0.44, "grad_norm": 1.1030137538909912, "learning_rate": 6.233666393786557e-06, "loss": 0.5199, "step": 6786 }, { "epoch": 0.44, "grad_norm": 1.153594732284546, "learning_rate": 6.232653368438839e-06, "loss": 0.5203, "step": 6787 }, { "epoch": 0.44, "grad_norm": 1.1269268989562988, "learning_rate": 6.231640289215023e-06, "loss": 0.5302, "step": 6788 }, { "epoch": 0.44, "grad_norm": 1.1981059312820435, "learning_rate": 6.230627156159387e-06, "loss": 0.5524, "step": 6789 }, { "epoch": 0.44, "grad_norm": 1.1417278051376343, "learning_rate": 6.229613969316214e-06, "loss": 0.5445, "step": 6790 }, { "epoch": 0.44, "grad_norm": 1.212053894996643, "learning_rate": 6.228600728729786e-06, "loss": 0.5793, "step": 6791 }, { "epoch": 0.44, "grad_norm": 1.3741447925567627, "learning_rate": 6.22758743444439e-06, "loss": 0.6064, "step": 6792 }, { "epoch": 0.44, "grad_norm": 1.2508163452148438, "learning_rate": 6.226574086504316e-06, "loss": 0.5464, "step": 6793 }, { "epoch": 0.44, "grad_norm": 1.19459867477417, "learning_rate": 6.225560684953851e-06, "loss": 0.5646, "step": 6794 }, { "epoch": 0.44, "grad_norm": 1.1799181699752808, "learning_rate": 6.2245472298372925e-06, "loss": 0.5299, "step": 6795 }, { "epoch": 0.44, "grad_norm": 1.1762018203735352, "learning_rate": 6.223533721198932e-06, "loss": 0.5589, "step": 6796 }, { "epoch": 0.44, "grad_norm": 1.0845612287521362, "learning_rate": 6.222520159083071e-06, "loss": 0.5655, "step": 6797 }, { "epoch": 0.44, "grad_norm": 1.128162145614624, "learning_rate": 6.221506543534007e-06, "loss": 0.5012, "step": 6798 }, { "epoch": 0.44, "grad_norm": 1.255300521850586, "learning_rate": 6.220492874596044e-06, "loss": 0.5701, "step": 6799 }, { "epoch": 0.44, "grad_norm": 1.2035983800888062, "learning_rate": 6.219479152313485e-06, "loss": 0.4999, "step": 6800 }, { "epoch": 0.44, "grad_norm": 1.1504770517349243, "learning_rate": 6.21846537673064e-06, "loss": 0.5836, "step": 6801 }, { "epoch": 0.44, "grad_norm": 1.1710485219955444, "learning_rate": 6.217451547891817e-06, "loss": 0.575, "step": 6802 }, { "epoch": 0.44, "grad_norm": 1.2430715560913086, "learning_rate": 6.216437665841328e-06, "loss": 0.5543, "step": 6803 }, { "epoch": 0.44, "grad_norm": 1.1131243705749512, "learning_rate": 6.215423730623487e-06, "loss": 0.5578, "step": 6804 }, { "epoch": 0.44, "grad_norm": 1.0806444883346558, "learning_rate": 6.21440974228261e-06, "loss": 0.5445, "step": 6805 }, { "epoch": 0.44, "grad_norm": 1.0826936960220337, "learning_rate": 6.213395700863016e-06, "loss": 0.5885, "step": 6806 }, { "epoch": 0.44, "grad_norm": 1.1116716861724854, "learning_rate": 6.212381606409027e-06, "loss": 0.5722, "step": 6807 }, { "epoch": 0.44, "grad_norm": 1.3260307312011719, "learning_rate": 6.211367458964966e-06, "loss": 0.6112, "step": 6808 }, { "epoch": 0.44, "grad_norm": 1.0990418195724487, "learning_rate": 6.2103532585751594e-06, "loss": 0.4977, "step": 6809 }, { "epoch": 0.44, "grad_norm": 1.10409414768219, "learning_rate": 6.209339005283934e-06, "loss": 0.5473, "step": 6810 }, { "epoch": 0.44, "grad_norm": 1.2042126655578613, "learning_rate": 6.208324699135621e-06, "loss": 0.5193, "step": 6811 }, { "epoch": 0.44, "grad_norm": 1.0610891580581665, "learning_rate": 6.207310340174553e-06, "loss": 0.5013, "step": 6812 }, { "epoch": 0.44, "grad_norm": 1.3609453439712524, "learning_rate": 6.206295928445064e-06, "loss": 0.5489, "step": 6813 }, { "epoch": 0.44, "grad_norm": 1.1665799617767334, "learning_rate": 6.2052814639914935e-06, "loss": 0.5649, "step": 6814 }, { "epoch": 0.44, "grad_norm": 1.2962816953659058, "learning_rate": 6.204266946858179e-06, "loss": 0.5814, "step": 6815 }, { "epoch": 0.44, "grad_norm": 1.1365458965301514, "learning_rate": 6.2032523770894635e-06, "loss": 0.5636, "step": 6816 }, { "epoch": 0.44, "grad_norm": 1.1331777572631836, "learning_rate": 6.202237754729692e-06, "loss": 0.5902, "step": 6817 }, { "epoch": 0.44, "grad_norm": 1.2504254579544067, "learning_rate": 6.201223079823209e-06, "loss": 0.5582, "step": 6818 }, { "epoch": 0.44, "grad_norm": 1.1318262815475464, "learning_rate": 6.2002083524143645e-06, "loss": 0.5343, "step": 6819 }, { "epoch": 0.44, "grad_norm": 1.2116131782531738, "learning_rate": 6.199193572547511e-06, "loss": 0.5676, "step": 6820 }, { "epoch": 0.44, "grad_norm": 1.1973435878753662, "learning_rate": 6.198178740266998e-06, "loss": 0.4994, "step": 6821 }, { "epoch": 0.44, "grad_norm": 1.1052238941192627, "learning_rate": 6.197163855617184e-06, "loss": 0.5531, "step": 6822 }, { "epoch": 0.44, "grad_norm": 1.241852045059204, "learning_rate": 6.196148918642428e-06, "loss": 0.5633, "step": 6823 }, { "epoch": 0.44, "grad_norm": 1.1600520610809326, "learning_rate": 6.195133929387087e-06, "loss": 0.5266, "step": 6824 }, { "epoch": 0.44, "grad_norm": 1.099768877029419, "learning_rate": 6.194118887895528e-06, "loss": 0.5744, "step": 6825 }, { "epoch": 0.44, "grad_norm": 1.1344801187515259, "learning_rate": 6.193103794212111e-06, "loss": 0.5666, "step": 6826 }, { "epoch": 0.44, "grad_norm": 1.0778671503067017, "learning_rate": 6.192088648381206e-06, "loss": 0.5405, "step": 6827 }, { "epoch": 0.44, "grad_norm": 1.2843166589736938, "learning_rate": 6.1910734504471825e-06, "loss": 0.4985, "step": 6828 }, { "epoch": 0.44, "grad_norm": 1.346063494682312, "learning_rate": 6.1900582004544105e-06, "loss": 0.5344, "step": 6829 }, { "epoch": 0.44, "grad_norm": 1.2234519720077515, "learning_rate": 6.189042898447266e-06, "loss": 0.5475, "step": 6830 }, { "epoch": 0.44, "grad_norm": 1.214995265007019, "learning_rate": 6.188027544470124e-06, "loss": 0.549, "step": 6831 }, { "epoch": 0.44, "grad_norm": 1.2876015901565552, "learning_rate": 6.187012138567362e-06, "loss": 0.5805, "step": 6832 }, { "epoch": 0.44, "grad_norm": 1.064635157585144, "learning_rate": 6.185996680783363e-06, "loss": 0.5104, "step": 6833 }, { "epoch": 0.44, "grad_norm": 1.100745439529419, "learning_rate": 6.18498117116251e-06, "loss": 0.5159, "step": 6834 }, { "epoch": 0.44, "grad_norm": 1.150368571281433, "learning_rate": 6.183965609749185e-06, "loss": 0.5565, "step": 6835 }, { "epoch": 0.44, "grad_norm": 1.1655161380767822, "learning_rate": 6.182949996587779e-06, "loss": 0.5787, "step": 6836 }, { "epoch": 0.44, "grad_norm": 1.1012574434280396, "learning_rate": 6.181934331722681e-06, "loss": 0.5191, "step": 6837 }, { "epoch": 0.44, "grad_norm": 1.142985224723816, "learning_rate": 6.180918615198283e-06, "loss": 0.5533, "step": 6838 }, { "epoch": 0.44, "grad_norm": 1.1668835878372192, "learning_rate": 6.179902847058977e-06, "loss": 0.5396, "step": 6839 }, { "epoch": 0.44, "grad_norm": 1.3387573957443237, "learning_rate": 6.178887027349164e-06, "loss": 0.5563, "step": 6840 }, { "epoch": 0.44, "grad_norm": 1.2068159580230713, "learning_rate": 6.1778711561132396e-06, "loss": 0.5285, "step": 6841 }, { "epoch": 0.44, "grad_norm": 1.1703168153762817, "learning_rate": 6.176855233395607e-06, "loss": 0.5395, "step": 6842 }, { "epoch": 0.44, "grad_norm": 1.0629645586013794, "learning_rate": 6.175839259240668e-06, "loss": 0.5056, "step": 6843 }, { "epoch": 0.44, "grad_norm": 1.1129047870635986, "learning_rate": 6.1748232336928285e-06, "loss": 0.5282, "step": 6844 }, { "epoch": 0.44, "grad_norm": 1.1376420259475708, "learning_rate": 6.173807156796497e-06, "loss": 0.5656, "step": 6845 }, { "epoch": 0.44, "grad_norm": 1.1830896139144897, "learning_rate": 6.172791028596082e-06, "loss": 0.5775, "step": 6846 }, { "epoch": 0.44, "grad_norm": 1.1860480308532715, "learning_rate": 6.1717748491359976e-06, "loss": 0.5581, "step": 6847 }, { "epoch": 0.44, "grad_norm": 1.2150946855545044, "learning_rate": 6.170758618460657e-06, "loss": 0.5512, "step": 6848 }, { "epoch": 0.44, "grad_norm": 1.2508666515350342, "learning_rate": 6.169742336614479e-06, "loss": 0.5636, "step": 6849 }, { "epoch": 0.44, "grad_norm": 1.256807565689087, "learning_rate": 6.168726003641882e-06, "loss": 0.5361, "step": 6850 }, { "epoch": 0.44, "grad_norm": 1.189353346824646, "learning_rate": 6.167709619587286e-06, "loss": 0.5415, "step": 6851 }, { "epoch": 0.44, "grad_norm": 1.3421300649642944, "learning_rate": 6.166693184495114e-06, "loss": 0.5679, "step": 6852 }, { "epoch": 0.44, "grad_norm": 1.1568185091018677, "learning_rate": 6.165676698409794e-06, "loss": 0.4746, "step": 6853 }, { "epoch": 0.44, "grad_norm": 1.0798197984695435, "learning_rate": 6.1646601613757525e-06, "loss": 0.4836, "step": 6854 }, { "epoch": 0.44, "grad_norm": 1.1420283317565918, "learning_rate": 6.1636435734374215e-06, "loss": 0.5359, "step": 6855 }, { "epoch": 0.44, "grad_norm": 1.1259574890136719, "learning_rate": 6.162626934639231e-06, "loss": 0.4932, "step": 6856 }, { "epoch": 0.44, "grad_norm": 1.1614348888397217, "learning_rate": 6.161610245025616e-06, "loss": 0.5174, "step": 6857 }, { "epoch": 0.44, "grad_norm": 1.1322102546691895, "learning_rate": 6.160593504641017e-06, "loss": 0.5478, "step": 6858 }, { "epoch": 0.44, "grad_norm": 1.4071545600891113, "learning_rate": 6.159576713529868e-06, "loss": 0.6075, "step": 6859 }, { "epoch": 0.44, "grad_norm": 1.1745421886444092, "learning_rate": 6.1585598717366125e-06, "loss": 0.5387, "step": 6860 }, { "epoch": 0.44, "grad_norm": 1.2689851522445679, "learning_rate": 6.157542979305696e-06, "loss": 0.4969, "step": 6861 }, { "epoch": 0.44, "grad_norm": 1.3587299585342407, "learning_rate": 6.156526036281561e-06, "loss": 0.528, "step": 6862 }, { "epoch": 0.44, "grad_norm": 1.1894311904907227, "learning_rate": 6.155509042708656e-06, "loss": 0.5249, "step": 6863 }, { "epoch": 0.44, "grad_norm": 1.203784704208374, "learning_rate": 6.154491998631434e-06, "loss": 0.5866, "step": 6864 }, { "epoch": 0.44, "grad_norm": 1.170100212097168, "learning_rate": 6.153474904094344e-06, "loss": 0.5294, "step": 6865 }, { "epoch": 0.44, "grad_norm": 1.2339046001434326, "learning_rate": 6.152457759141843e-06, "loss": 0.5856, "step": 6866 }, { "epoch": 0.44, "grad_norm": 1.1352452039718628, "learning_rate": 6.151440563818386e-06, "loss": 0.504, "step": 6867 }, { "epoch": 0.44, "grad_norm": 1.1479681730270386, "learning_rate": 6.150423318168432e-06, "loss": 0.5416, "step": 6868 }, { "epoch": 0.44, "grad_norm": 1.2320725917816162, "learning_rate": 6.149406022236445e-06, "loss": 0.5341, "step": 6869 }, { "epoch": 0.44, "grad_norm": 1.0997556447982788, "learning_rate": 6.1483886760668846e-06, "loss": 0.5894, "step": 6870 }, { "epoch": 0.44, "grad_norm": 1.0862987041473389, "learning_rate": 6.147371279704218e-06, "loss": 0.5399, "step": 6871 }, { "epoch": 0.44, "grad_norm": 1.149946928024292, "learning_rate": 6.146353833192914e-06, "loss": 0.5257, "step": 6872 }, { "epoch": 0.44, "grad_norm": 1.205517053604126, "learning_rate": 6.145336336577441e-06, "loss": 0.5266, "step": 6873 }, { "epoch": 0.44, "grad_norm": 1.2027140855789185, "learning_rate": 6.1443187899022706e-06, "loss": 0.5311, "step": 6874 }, { "epoch": 0.44, "grad_norm": 1.1767908334732056, "learning_rate": 6.14330119321188e-06, "loss": 0.5711, "step": 6875 }, { "epoch": 0.44, "grad_norm": 1.159110426902771, "learning_rate": 6.142283546550743e-06, "loss": 0.5933, "step": 6876 }, { "epoch": 0.44, "grad_norm": 1.0859628915786743, "learning_rate": 6.141265849963341e-06, "loss": 0.5443, "step": 6877 }, { "epoch": 0.44, "grad_norm": 1.117640495300293, "learning_rate": 6.140248103494152e-06, "loss": 0.497, "step": 6878 }, { "epoch": 0.44, "grad_norm": 1.1615726947784424, "learning_rate": 6.139230307187659e-06, "loss": 0.5353, "step": 6879 }, { "epoch": 0.44, "grad_norm": 1.105777621269226, "learning_rate": 6.13821246108835e-06, "loss": 0.5474, "step": 6880 }, { "epoch": 0.44, "grad_norm": 1.0338246822357178, "learning_rate": 6.1371945652407115e-06, "loss": 0.498, "step": 6881 }, { "epoch": 0.44, "grad_norm": 1.2316995859146118, "learning_rate": 6.136176619689231e-06, "loss": 0.5742, "step": 6882 }, { "epoch": 0.44, "grad_norm": 1.1649198532104492, "learning_rate": 6.1351586244784025e-06, "loss": 0.5416, "step": 6883 }, { "epoch": 0.44, "grad_norm": 1.08013117313385, "learning_rate": 6.1341405796527196e-06, "loss": 0.5143, "step": 6884 }, { "epoch": 0.44, "grad_norm": 1.1496353149414062, "learning_rate": 6.133122485256677e-06, "loss": 0.5693, "step": 6885 }, { "epoch": 0.44, "grad_norm": 1.2254605293273926, "learning_rate": 6.132104341334776e-06, "loss": 0.5168, "step": 6886 }, { "epoch": 0.44, "grad_norm": 1.085961103439331, "learning_rate": 6.131086147931515e-06, "loss": 0.5064, "step": 6887 }, { "epoch": 0.44, "grad_norm": 1.3374356031417847, "learning_rate": 6.130067905091395e-06, "loss": 0.564, "step": 6888 }, { "epoch": 0.44, "grad_norm": 1.1483800411224365, "learning_rate": 6.129049612858923e-06, "loss": 0.535, "step": 6889 }, { "epoch": 0.44, "grad_norm": 1.274415135383606, "learning_rate": 6.128031271278605e-06, "loss": 0.5588, "step": 6890 }, { "epoch": 0.44, "grad_norm": 1.070971131324768, "learning_rate": 6.127012880394952e-06, "loss": 0.5267, "step": 6891 }, { "epoch": 0.44, "grad_norm": 1.1772958040237427, "learning_rate": 6.125994440252473e-06, "loss": 0.5215, "step": 6892 }, { "epoch": 0.44, "grad_norm": 1.1364041566848755, "learning_rate": 6.1249759508956815e-06, "loss": 0.5251, "step": 6893 }, { "epoch": 0.44, "grad_norm": 1.276293396949768, "learning_rate": 6.123957412369095e-06, "loss": 0.5337, "step": 6894 }, { "epoch": 0.45, "grad_norm": 1.1166425943374634, "learning_rate": 6.122938824717228e-06, "loss": 0.5459, "step": 6895 }, { "epoch": 0.45, "grad_norm": 1.2419952154159546, "learning_rate": 6.121920187984603e-06, "loss": 0.6152, "step": 6896 }, { "epoch": 0.45, "grad_norm": 1.123056173324585, "learning_rate": 6.120901502215742e-06, "loss": 0.5715, "step": 6897 }, { "epoch": 0.45, "grad_norm": 1.2045583724975586, "learning_rate": 6.119882767455168e-06, "loss": 0.5583, "step": 6898 }, { "epoch": 0.45, "grad_norm": 1.1461639404296875, "learning_rate": 6.118863983747407e-06, "loss": 0.5171, "step": 6899 }, { "epoch": 0.45, "grad_norm": 1.2066595554351807, "learning_rate": 6.117845151136987e-06, "loss": 0.511, "step": 6900 }, { "epoch": 0.45, "grad_norm": 1.2811715602874756, "learning_rate": 6.116826269668441e-06, "loss": 0.6115, "step": 6901 }, { "epoch": 0.45, "grad_norm": 1.2878565788269043, "learning_rate": 6.115807339386302e-06, "loss": 0.5353, "step": 6902 }, { "epoch": 0.45, "grad_norm": 1.2333589792251587, "learning_rate": 6.1147883603351e-06, "loss": 0.5462, "step": 6903 }, { "epoch": 0.45, "grad_norm": 1.1998435258865356, "learning_rate": 6.1137693325593746e-06, "loss": 0.5493, "step": 6904 }, { "epoch": 0.45, "grad_norm": 1.1754056215286255, "learning_rate": 6.112750256103668e-06, "loss": 0.534, "step": 6905 }, { "epoch": 0.45, "grad_norm": 1.0580908060073853, "learning_rate": 6.111731131012516e-06, "loss": 0.5324, "step": 6906 }, { "epoch": 0.45, "grad_norm": 1.1103858947753906, "learning_rate": 6.110711957330466e-06, "loss": 0.5283, "step": 6907 }, { "epoch": 0.45, "grad_norm": 1.2017755508422852, "learning_rate": 6.109692735102062e-06, "loss": 0.5203, "step": 6908 }, { "epoch": 0.45, "grad_norm": 1.1003741025924683, "learning_rate": 6.108673464371852e-06, "loss": 0.5245, "step": 6909 }, { "epoch": 0.45, "grad_norm": 1.2565042972564697, "learning_rate": 6.1076541451843855e-06, "loss": 0.5008, "step": 6910 }, { "epoch": 0.45, "grad_norm": 1.2812249660491943, "learning_rate": 6.106634777584212e-06, "loss": 0.579, "step": 6911 }, { "epoch": 0.45, "grad_norm": 1.2687897682189941, "learning_rate": 6.105615361615888e-06, "loss": 0.5653, "step": 6912 }, { "epoch": 0.45, "grad_norm": 1.1418226957321167, "learning_rate": 6.10459589732397e-06, "loss": 0.5271, "step": 6913 }, { "epoch": 0.45, "grad_norm": 1.1955909729003906, "learning_rate": 6.103576384753016e-06, "loss": 0.5589, "step": 6914 }, { "epoch": 0.45, "grad_norm": 1.1373348236083984, "learning_rate": 6.102556823947584e-06, "loss": 0.5333, "step": 6915 }, { "epoch": 0.45, "grad_norm": 1.2555791139602661, "learning_rate": 6.101537214952239e-06, "loss": 0.5909, "step": 6916 }, { "epoch": 0.45, "grad_norm": 1.1201770305633545, "learning_rate": 6.100517557811543e-06, "loss": 0.5191, "step": 6917 }, { "epoch": 0.45, "grad_norm": 1.1109187602996826, "learning_rate": 6.099497852570064e-06, "loss": 0.5922, "step": 6918 }, { "epoch": 0.45, "grad_norm": 1.1129502058029175, "learning_rate": 6.098478099272372e-06, "loss": 0.5256, "step": 6919 }, { "epoch": 0.45, "grad_norm": 1.1379363536834717, "learning_rate": 6.097458297963036e-06, "loss": 0.5531, "step": 6920 }, { "epoch": 0.45, "grad_norm": 1.4197282791137695, "learning_rate": 6.096438448686628e-06, "loss": 0.5007, "step": 6921 }, { "epoch": 0.45, "grad_norm": 1.2429826259613037, "learning_rate": 6.095418551487726e-06, "loss": 0.5527, "step": 6922 }, { "epoch": 0.45, "grad_norm": 1.192883014678955, "learning_rate": 6.094398606410905e-06, "loss": 0.5676, "step": 6923 }, { "epoch": 0.45, "grad_norm": 1.125188946723938, "learning_rate": 6.093378613500743e-06, "loss": 0.5629, "step": 6924 }, { "epoch": 0.45, "grad_norm": 1.143788456916809, "learning_rate": 6.092358572801824e-06, "loss": 0.5761, "step": 6925 }, { "epoch": 0.45, "grad_norm": 1.2048099040985107, "learning_rate": 6.091338484358731e-06, "loss": 0.5694, "step": 6926 }, { "epoch": 0.45, "grad_norm": 1.1391016244888306, "learning_rate": 6.090318348216047e-06, "loss": 0.5523, "step": 6927 }, { "epoch": 0.45, "grad_norm": 1.2975993156433105, "learning_rate": 6.089298164418361e-06, "loss": 0.5448, "step": 6928 }, { "epoch": 0.45, "grad_norm": 1.2026902437210083, "learning_rate": 6.088277933010264e-06, "loss": 0.4988, "step": 6929 }, { "epoch": 0.45, "grad_norm": 1.1874703168869019, "learning_rate": 6.0872576540363446e-06, "loss": 0.5499, "step": 6930 }, { "epoch": 0.45, "grad_norm": 1.3025424480438232, "learning_rate": 6.0862373275412005e-06, "loss": 0.5194, "step": 6931 }, { "epoch": 0.45, "grad_norm": 1.1151621341705322, "learning_rate": 6.085216953569423e-06, "loss": 0.5128, "step": 6932 }, { "epoch": 0.45, "grad_norm": 1.3313432931900024, "learning_rate": 6.084196532165613e-06, "loss": 0.6616, "step": 6933 }, { "epoch": 0.45, "grad_norm": 1.1578853130340576, "learning_rate": 6.0831760633743705e-06, "loss": 0.5986, "step": 6934 }, { "epoch": 0.45, "grad_norm": 1.2367453575134277, "learning_rate": 6.082155547240298e-06, "loss": 0.4838, "step": 6935 }, { "epoch": 0.45, "grad_norm": 1.1424942016601562, "learning_rate": 6.081134983807997e-06, "loss": 0.4957, "step": 6936 }, { "epoch": 0.45, "grad_norm": 1.1837821006774902, "learning_rate": 6.080114373122074e-06, "loss": 0.6002, "step": 6937 }, { "epoch": 0.45, "grad_norm": 1.3252869844436646, "learning_rate": 6.079093715227141e-06, "loss": 0.5503, "step": 6938 }, { "epoch": 0.45, "grad_norm": 1.1789227724075317, "learning_rate": 6.0780730101678044e-06, "loss": 0.5355, "step": 6939 }, { "epoch": 0.45, "grad_norm": 1.1361958980560303, "learning_rate": 6.0770522579886795e-06, "loss": 0.5042, "step": 6940 }, { "epoch": 0.45, "grad_norm": 1.226374626159668, "learning_rate": 6.076031458734377e-06, "loss": 0.5543, "step": 6941 }, { "epoch": 0.45, "grad_norm": 1.1038196086883545, "learning_rate": 6.075010612449516e-06, "loss": 0.4968, "step": 6942 }, { "epoch": 0.45, "grad_norm": 1.1220859289169312, "learning_rate": 6.073989719178717e-06, "loss": 0.5252, "step": 6943 }, { "epoch": 0.45, "grad_norm": 1.2320291996002197, "learning_rate": 6.072968778966596e-06, "loss": 0.5896, "step": 6944 }, { "epoch": 0.45, "grad_norm": 1.1234543323516846, "learning_rate": 6.071947791857779e-06, "loss": 0.5737, "step": 6945 }, { "epoch": 0.45, "grad_norm": 1.059490442276001, "learning_rate": 6.070926757896889e-06, "loss": 0.5072, "step": 6946 }, { "epoch": 0.45, "grad_norm": 1.0287580490112305, "learning_rate": 6.069905677128554e-06, "loss": 0.5077, "step": 6947 }, { "epoch": 0.45, "grad_norm": 1.1467387676239014, "learning_rate": 6.0688845495974015e-06, "loss": 0.5344, "step": 6948 }, { "epoch": 0.45, "grad_norm": 1.0704792737960815, "learning_rate": 6.067863375348064e-06, "loss": 0.5172, "step": 6949 }, { "epoch": 0.45, "grad_norm": 1.2045773267745972, "learning_rate": 6.066842154425172e-06, "loss": 0.5591, "step": 6950 }, { "epoch": 0.45, "grad_norm": 1.2028446197509766, "learning_rate": 6.065820886873364e-06, "loss": 0.6162, "step": 6951 }, { "epoch": 0.45, "grad_norm": 1.1603527069091797, "learning_rate": 6.064799572737274e-06, "loss": 0.5821, "step": 6952 }, { "epoch": 0.45, "grad_norm": 1.2501369714736938, "learning_rate": 6.063778212061541e-06, "loss": 0.6014, "step": 6953 }, { "epoch": 0.45, "grad_norm": 1.3015440702438354, "learning_rate": 6.062756804890808e-06, "loss": 0.5485, "step": 6954 }, { "epoch": 0.45, "grad_norm": 1.1642909049987793, "learning_rate": 6.061735351269717e-06, "loss": 0.5085, "step": 6955 }, { "epoch": 0.45, "grad_norm": 1.2431012392044067, "learning_rate": 6.060713851242912e-06, "loss": 0.5601, "step": 6956 }, { "epoch": 0.45, "grad_norm": 1.1892461776733398, "learning_rate": 6.059692304855042e-06, "loss": 0.5381, "step": 6957 }, { "epoch": 0.45, "grad_norm": 1.1218311786651611, "learning_rate": 6.058670712150756e-06, "loss": 0.5223, "step": 6958 }, { "epoch": 0.45, "grad_norm": 1.124794840812683, "learning_rate": 6.057649073174704e-06, "loss": 0.5327, "step": 6959 }, { "epoch": 0.45, "grad_norm": 1.136541724205017, "learning_rate": 6.056627387971541e-06, "loss": 0.5563, "step": 6960 }, { "epoch": 0.45, "grad_norm": 1.2420718669891357, "learning_rate": 6.055605656585919e-06, "loss": 0.5861, "step": 6961 }, { "epoch": 0.45, "grad_norm": 1.2616393566131592, "learning_rate": 6.0545838790624976e-06, "loss": 0.5763, "step": 6962 }, { "epoch": 0.45, "grad_norm": 1.2301294803619385, "learning_rate": 6.0535620554459375e-06, "loss": 0.5662, "step": 6963 }, { "epoch": 0.45, "grad_norm": 1.0470224618911743, "learning_rate": 6.052540185780896e-06, "loss": 0.4895, "step": 6964 }, { "epoch": 0.45, "grad_norm": 1.0339360237121582, "learning_rate": 6.051518270112039e-06, "loss": 0.5317, "step": 6965 }, { "epoch": 0.45, "grad_norm": 1.083105206489563, "learning_rate": 6.050496308484033e-06, "loss": 0.549, "step": 6966 }, { "epoch": 0.45, "grad_norm": 1.1693315505981445, "learning_rate": 6.049474300941541e-06, "loss": 0.5528, "step": 6967 }, { "epoch": 0.45, "grad_norm": 1.4059628248214722, "learning_rate": 6.048452247529237e-06, "loss": 0.5424, "step": 6968 }, { "epoch": 0.45, "grad_norm": 1.1858714818954468, "learning_rate": 6.0474301482917906e-06, "loss": 0.5435, "step": 6969 }, { "epoch": 0.45, "grad_norm": 1.175567388534546, "learning_rate": 6.0464080032738734e-06, "loss": 0.5401, "step": 6970 }, { "epoch": 0.45, "grad_norm": 1.116309404373169, "learning_rate": 6.0453858125201625e-06, "loss": 0.5075, "step": 6971 }, { "epoch": 0.45, "grad_norm": 1.21378493309021, "learning_rate": 6.044363576075336e-06, "loss": 0.5866, "step": 6972 }, { "epoch": 0.45, "grad_norm": 1.198718786239624, "learning_rate": 6.043341293984071e-06, "loss": 0.557, "step": 6973 }, { "epoch": 0.45, "grad_norm": 1.1098276376724243, "learning_rate": 6.042318966291048e-06, "loss": 0.5308, "step": 6974 }, { "epoch": 0.45, "grad_norm": 1.2023783922195435, "learning_rate": 6.0412965930409565e-06, "loss": 0.5713, "step": 6975 }, { "epoch": 0.45, "grad_norm": 1.2298327684402466, "learning_rate": 6.040274174278474e-06, "loss": 0.5905, "step": 6976 }, { "epoch": 0.45, "grad_norm": 1.1733394861221313, "learning_rate": 6.039251710048293e-06, "loss": 0.5254, "step": 6977 }, { "epoch": 0.45, "grad_norm": 1.1789696216583252, "learning_rate": 6.0382292003951e-06, "loss": 0.5431, "step": 6978 }, { "epoch": 0.45, "grad_norm": 1.1449627876281738, "learning_rate": 6.037206645363589e-06, "loss": 0.5767, "step": 6979 }, { "epoch": 0.45, "grad_norm": 1.1783287525177002, "learning_rate": 6.036184044998451e-06, "loss": 0.5287, "step": 6980 }, { "epoch": 0.45, "grad_norm": 1.135278582572937, "learning_rate": 6.03516139934438e-06, "loss": 0.5116, "step": 6981 }, { "epoch": 0.45, "grad_norm": 1.1376773118972778, "learning_rate": 6.034138708446078e-06, "loss": 0.572, "step": 6982 }, { "epoch": 0.45, "grad_norm": 1.1538376808166504, "learning_rate": 6.033115972348239e-06, "loss": 0.5506, "step": 6983 }, { "epoch": 0.45, "grad_norm": 1.212990641593933, "learning_rate": 6.032093191095567e-06, "loss": 0.5272, "step": 6984 }, { "epoch": 0.45, "grad_norm": 1.2472628355026245, "learning_rate": 6.031070364732764e-06, "loss": 0.5545, "step": 6985 }, { "epoch": 0.45, "grad_norm": 1.147210717201233, "learning_rate": 6.030047493304536e-06, "loss": 0.5376, "step": 6986 }, { "epoch": 0.45, "grad_norm": 1.1983224153518677, "learning_rate": 6.02902457685559e-06, "loss": 0.5301, "step": 6987 }, { "epoch": 0.45, "grad_norm": 1.1243195533752441, "learning_rate": 6.028001615430633e-06, "loss": 0.5398, "step": 6988 }, { "epoch": 0.45, "grad_norm": 1.1818209886550903, "learning_rate": 6.026978609074379e-06, "loss": 0.5327, "step": 6989 }, { "epoch": 0.45, "grad_norm": 1.0997995138168335, "learning_rate": 6.02595555783154e-06, "loss": 0.598, "step": 6990 }, { "epoch": 0.45, "grad_norm": 1.1080836057662964, "learning_rate": 6.024932461746829e-06, "loss": 0.526, "step": 6991 }, { "epoch": 0.45, "grad_norm": 1.0571951866149902, "learning_rate": 6.0239093208649655e-06, "loss": 0.5201, "step": 6992 }, { "epoch": 0.45, "grad_norm": 1.129940390586853, "learning_rate": 6.022886135230668e-06, "loss": 0.5355, "step": 6993 }, { "epoch": 0.45, "grad_norm": 1.1094368696212769, "learning_rate": 6.021862904888655e-06, "loss": 0.5241, "step": 6994 }, { "epoch": 0.45, "grad_norm": 1.1842767000198364, "learning_rate": 6.020839629883652e-06, "loss": 0.5355, "step": 6995 }, { "epoch": 0.45, "grad_norm": 1.1161704063415527, "learning_rate": 6.019816310260383e-06, "loss": 0.5704, "step": 6996 }, { "epoch": 0.45, "grad_norm": 1.178011417388916, "learning_rate": 6.018792946063573e-06, "loss": 0.5865, "step": 6997 }, { "epoch": 0.45, "grad_norm": 1.2122691869735718, "learning_rate": 6.017769537337953e-06, "loss": 0.5261, "step": 6998 }, { "epoch": 0.45, "grad_norm": 1.141683578491211, "learning_rate": 6.016746084128251e-06, "loss": 0.5385, "step": 6999 }, { "epoch": 0.45, "grad_norm": 1.0923717021942139, "learning_rate": 6.0157225864792015e-06, "loss": 0.5135, "step": 7000 }, { "epoch": 0.45, "grad_norm": 1.1151437759399414, "learning_rate": 6.014699044435539e-06, "loss": 0.5628, "step": 7001 }, { "epoch": 0.45, "grad_norm": 1.128084659576416, "learning_rate": 6.013675458041999e-06, "loss": 0.5358, "step": 7002 }, { "epoch": 0.45, "grad_norm": 1.13532555103302, "learning_rate": 6.0126518273433184e-06, "loss": 0.5749, "step": 7003 }, { "epoch": 0.45, "grad_norm": 1.3040001392364502, "learning_rate": 6.011628152384241e-06, "loss": 0.5975, "step": 7004 }, { "epoch": 0.45, "grad_norm": 1.158665418624878, "learning_rate": 6.010604433209507e-06, "loss": 0.5783, "step": 7005 }, { "epoch": 0.45, "grad_norm": 1.2915266752243042, "learning_rate": 6.009580669863859e-06, "loss": 0.5363, "step": 7006 }, { "epoch": 0.45, "grad_norm": 1.1785944700241089, "learning_rate": 6.0085568623920475e-06, "loss": 0.5433, "step": 7007 }, { "epoch": 0.45, "grad_norm": 1.051912546157837, "learning_rate": 6.007533010838816e-06, "loss": 0.5069, "step": 7008 }, { "epoch": 0.45, "grad_norm": 1.1329219341278076, "learning_rate": 6.006509115248915e-06, "loss": 0.5288, "step": 7009 }, { "epoch": 0.45, "grad_norm": 1.1081008911132812, "learning_rate": 6.0054851756671e-06, "loss": 0.5553, "step": 7010 }, { "epoch": 0.45, "grad_norm": 1.1036452054977417, "learning_rate": 6.004461192138119e-06, "loss": 0.4886, "step": 7011 }, { "epoch": 0.45, "grad_norm": 1.0853906869888306, "learning_rate": 6.003437164706733e-06, "loss": 0.5518, "step": 7012 }, { "epoch": 0.45, "grad_norm": 1.0918858051300049, "learning_rate": 6.002413093417697e-06, "loss": 0.5475, "step": 7013 }, { "epoch": 0.45, "grad_norm": 1.3135555982589722, "learning_rate": 6.001388978315771e-06, "loss": 0.5359, "step": 7014 }, { "epoch": 0.45, "grad_norm": 1.1895381212234497, "learning_rate": 6.000364819445715e-06, "loss": 0.502, "step": 7015 }, { "epoch": 0.45, "grad_norm": 1.2498503923416138, "learning_rate": 5.999340616852296e-06, "loss": 0.5588, "step": 7016 }, { "epoch": 0.45, "grad_norm": 1.1100401878356934, "learning_rate": 5.998316370580276e-06, "loss": 0.5698, "step": 7017 }, { "epoch": 0.45, "grad_norm": 1.1009092330932617, "learning_rate": 5.997292080674423e-06, "loss": 0.5311, "step": 7018 }, { "epoch": 0.45, "grad_norm": 1.1471046209335327, "learning_rate": 5.996267747179506e-06, "loss": 0.5837, "step": 7019 }, { "epoch": 0.45, "grad_norm": 1.12753427028656, "learning_rate": 5.995243370140297e-06, "loss": 0.5395, "step": 7020 }, { "epoch": 0.45, "grad_norm": 1.133609652519226, "learning_rate": 5.994218949601567e-06, "loss": 0.474, "step": 7021 }, { "epoch": 0.45, "grad_norm": 1.1180349588394165, "learning_rate": 5.9931944856080924e-06, "loss": 0.5675, "step": 7022 }, { "epoch": 0.45, "grad_norm": 1.1419007778167725, "learning_rate": 5.992169978204649e-06, "loss": 0.4941, "step": 7023 }, { "epoch": 0.45, "grad_norm": 1.220670461654663, "learning_rate": 5.991145427436017e-06, "loss": 0.5052, "step": 7024 }, { "epoch": 0.45, "grad_norm": 1.06138277053833, "learning_rate": 5.990120833346974e-06, "loss": 0.5371, "step": 7025 }, { "epoch": 0.45, "grad_norm": 1.096816062927246, "learning_rate": 5.989096195982304e-06, "loss": 0.5685, "step": 7026 }, { "epoch": 0.45, "grad_norm": 1.148297667503357, "learning_rate": 5.988071515386792e-06, "loss": 0.5152, "step": 7027 }, { "epoch": 0.45, "grad_norm": 1.0805389881134033, "learning_rate": 5.9870467916052224e-06, "loss": 0.5715, "step": 7028 }, { "epoch": 0.45, "grad_norm": 1.2956234216690063, "learning_rate": 5.986022024682385e-06, "loss": 0.5096, "step": 7029 }, { "epoch": 0.45, "grad_norm": 1.1507041454315186, "learning_rate": 5.984997214663069e-06, "loss": 0.5519, "step": 7030 }, { "epoch": 0.45, "grad_norm": 1.3216300010681152, "learning_rate": 5.9839723615920665e-06, "loss": 0.4849, "step": 7031 }, { "epoch": 0.45, "grad_norm": 1.0301127433776855, "learning_rate": 5.98294746551417e-06, "loss": 0.5163, "step": 7032 }, { "epoch": 0.45, "grad_norm": 1.2603439092636108, "learning_rate": 5.981922526474174e-06, "loss": 0.4943, "step": 7033 }, { "epoch": 0.45, "grad_norm": 1.1709715127944946, "learning_rate": 5.98089754451688e-06, "loss": 0.5217, "step": 7034 }, { "epoch": 0.45, "grad_norm": 1.1007013320922852, "learning_rate": 5.979872519687084e-06, "loss": 0.5546, "step": 7035 }, { "epoch": 0.45, "grad_norm": 1.1157145500183105, "learning_rate": 5.978847452029589e-06, "loss": 0.498, "step": 7036 }, { "epoch": 0.45, "grad_norm": 1.1912013292312622, "learning_rate": 5.977822341589198e-06, "loss": 0.5581, "step": 7037 }, { "epoch": 0.45, "grad_norm": 1.1249502897262573, "learning_rate": 5.976797188410713e-06, "loss": 0.5356, "step": 7038 }, { "epoch": 0.45, "grad_norm": 1.2992759943008423, "learning_rate": 5.9757719925389455e-06, "loss": 0.5847, "step": 7039 }, { "epoch": 0.45, "grad_norm": 1.2629941701889038, "learning_rate": 5.974746754018701e-06, "loss": 0.5586, "step": 7040 }, { "epoch": 0.45, "grad_norm": 1.2308690547943115, "learning_rate": 5.97372147289479e-06, "loss": 0.5329, "step": 7041 }, { "epoch": 0.45, "grad_norm": 1.1007192134857178, "learning_rate": 5.9726961492120275e-06, "loss": 0.5287, "step": 7042 }, { "epoch": 0.45, "grad_norm": 1.1376594305038452, "learning_rate": 5.971670783015223e-06, "loss": 0.5426, "step": 7043 }, { "epoch": 0.45, "grad_norm": 1.08981192111969, "learning_rate": 5.970645374349197e-06, "loss": 0.5075, "step": 7044 }, { "epoch": 0.45, "grad_norm": 1.26777184009552, "learning_rate": 5.969619923258767e-06, "loss": 0.5516, "step": 7045 }, { "epoch": 0.45, "grad_norm": 1.1337201595306396, "learning_rate": 5.96859442978875e-06, "loss": 0.5339, "step": 7046 }, { "epoch": 0.45, "grad_norm": 1.2601927518844604, "learning_rate": 5.96756889398397e-06, "loss": 0.5665, "step": 7047 }, { "epoch": 0.45, "grad_norm": 1.1123794317245483, "learning_rate": 5.966543315889251e-06, "loss": 0.5197, "step": 7048 }, { "epoch": 0.46, "grad_norm": 1.2102056741714478, "learning_rate": 5.965517695549416e-06, "loss": 0.5102, "step": 7049 }, { "epoch": 0.46, "grad_norm": 1.2553540468215942, "learning_rate": 5.964492033009295e-06, "loss": 0.5504, "step": 7050 }, { "epoch": 0.46, "grad_norm": 1.1361783742904663, "learning_rate": 5.963466328313715e-06, "loss": 0.5456, "step": 7051 }, { "epoch": 0.46, "grad_norm": 1.2059879302978516, "learning_rate": 5.962440581507507e-06, "loss": 0.527, "step": 7052 }, { "epoch": 0.46, "grad_norm": 1.4651079177856445, "learning_rate": 5.961414792635505e-06, "loss": 0.5933, "step": 7053 }, { "epoch": 0.46, "grad_norm": 1.1848951578140259, "learning_rate": 5.960388961742543e-06, "loss": 0.55, "step": 7054 }, { "epoch": 0.46, "grad_norm": 1.2397123575210571, "learning_rate": 5.959363088873457e-06, "loss": 0.5759, "step": 7055 }, { "epoch": 0.46, "grad_norm": 1.204705834388733, "learning_rate": 5.958337174073084e-06, "loss": 0.5447, "step": 7056 }, { "epoch": 0.46, "grad_norm": 1.328669786453247, "learning_rate": 5.957311217386269e-06, "loss": 0.5289, "step": 7057 }, { "epoch": 0.46, "grad_norm": 1.1649115085601807, "learning_rate": 5.956285218857848e-06, "loss": 0.5441, "step": 7058 }, { "epoch": 0.46, "grad_norm": 1.1933577060699463, "learning_rate": 5.955259178532667e-06, "loss": 0.5669, "step": 7059 }, { "epoch": 0.46, "grad_norm": 1.144672155380249, "learning_rate": 5.954233096455575e-06, "loss": 0.5058, "step": 7060 }, { "epoch": 0.46, "grad_norm": 1.1832162141799927, "learning_rate": 5.953206972671414e-06, "loss": 0.5239, "step": 7061 }, { "epoch": 0.46, "grad_norm": 1.3020633459091187, "learning_rate": 5.952180807225035e-06, "loss": 0.4983, "step": 7062 }, { "epoch": 0.46, "grad_norm": 1.2527557611465454, "learning_rate": 5.9511546001612915e-06, "loss": 0.6019, "step": 7063 }, { "epoch": 0.46, "grad_norm": 1.1798521280288696, "learning_rate": 5.950128351525033e-06, "loss": 0.5764, "step": 7064 }, { "epoch": 0.46, "grad_norm": 1.2391186952590942, "learning_rate": 5.9491020613611165e-06, "loss": 0.5366, "step": 7065 }, { "epoch": 0.46, "grad_norm": 1.1717358827590942, "learning_rate": 5.948075729714398e-06, "loss": 0.5415, "step": 7066 }, { "epoch": 0.46, "grad_norm": 1.1242059469223022, "learning_rate": 5.9470493566297324e-06, "loss": 0.5578, "step": 7067 }, { "epoch": 0.46, "grad_norm": 1.3448609113693237, "learning_rate": 5.9460229421519856e-06, "loss": 0.5788, "step": 7068 }, { "epoch": 0.46, "grad_norm": 1.1465730667114258, "learning_rate": 5.944996486326017e-06, "loss": 0.5386, "step": 7069 }, { "epoch": 0.46, "grad_norm": 1.3668200969696045, "learning_rate": 5.943969989196688e-06, "loss": 0.5247, "step": 7070 }, { "epoch": 0.46, "grad_norm": 1.3023626804351807, "learning_rate": 5.942943450808869e-06, "loss": 0.6193, "step": 7071 }, { "epoch": 0.46, "grad_norm": 1.3515015840530396, "learning_rate": 5.941916871207423e-06, "loss": 0.5617, "step": 7072 }, { "epoch": 0.46, "grad_norm": 1.2155603170394897, "learning_rate": 5.940890250437222e-06, "loss": 0.5147, "step": 7073 }, { "epoch": 0.46, "grad_norm": 1.1511644124984741, "learning_rate": 5.939863588543133e-06, "loss": 0.565, "step": 7074 }, { "epoch": 0.46, "grad_norm": 1.0994306802749634, "learning_rate": 5.9388368855700325e-06, "loss": 0.5383, "step": 7075 }, { "epoch": 0.46, "grad_norm": 1.200391173362732, "learning_rate": 5.937810141562796e-06, "loss": 0.527, "step": 7076 }, { "epoch": 0.46, "grad_norm": 1.1874831914901733, "learning_rate": 5.9367833565662956e-06, "loss": 0.5311, "step": 7077 }, { "epoch": 0.46, "grad_norm": 1.1639721393585205, "learning_rate": 5.935756530625413e-06, "loss": 0.5753, "step": 7078 }, { "epoch": 0.46, "grad_norm": 1.183159589767456, "learning_rate": 5.934729663785026e-06, "loss": 0.518, "step": 7079 }, { "epoch": 0.46, "grad_norm": 1.2083972692489624, "learning_rate": 5.9337027560900175e-06, "loss": 0.5561, "step": 7080 }, { "epoch": 0.46, "grad_norm": 1.227286458015442, "learning_rate": 5.93267580758527e-06, "loss": 0.5639, "step": 7081 }, { "epoch": 0.46, "grad_norm": 1.1906683444976807, "learning_rate": 5.931648818315669e-06, "loss": 0.4865, "step": 7082 }, { "epoch": 0.46, "grad_norm": 1.1117982864379883, "learning_rate": 5.930621788326103e-06, "loss": 0.5123, "step": 7083 }, { "epoch": 0.46, "grad_norm": 1.148348093032837, "learning_rate": 5.9295947176614575e-06, "loss": 0.5225, "step": 7084 }, { "epoch": 0.46, "grad_norm": 1.1892168521881104, "learning_rate": 5.928567606366626e-06, "loss": 0.5598, "step": 7085 }, { "epoch": 0.46, "grad_norm": 1.2229039669036865, "learning_rate": 5.927540454486503e-06, "loss": 0.5664, "step": 7086 }, { "epoch": 0.46, "grad_norm": 1.1351975202560425, "learning_rate": 5.926513262065976e-06, "loss": 0.5432, "step": 7087 }, { "epoch": 0.46, "grad_norm": 1.181786298751831, "learning_rate": 5.925486029149946e-06, "loss": 0.579, "step": 7088 }, { "epoch": 0.46, "grad_norm": 1.103115200996399, "learning_rate": 5.924458755783311e-06, "loss": 0.5047, "step": 7089 }, { "epoch": 0.46, "grad_norm": 1.1913024187088013, "learning_rate": 5.923431442010968e-06, "loss": 0.5462, "step": 7090 }, { "epoch": 0.46, "grad_norm": 1.1999688148498535, "learning_rate": 5.922404087877818e-06, "loss": 0.5624, "step": 7091 }, { "epoch": 0.46, "grad_norm": 1.3509241342544556, "learning_rate": 5.921376693428766e-06, "loss": 0.5352, "step": 7092 }, { "epoch": 0.46, "grad_norm": 1.2193914651870728, "learning_rate": 5.920349258708716e-06, "loss": 0.5628, "step": 7093 }, { "epoch": 0.46, "grad_norm": 1.1618402004241943, "learning_rate": 5.919321783762574e-06, "loss": 0.5328, "step": 7094 }, { "epoch": 0.46, "grad_norm": 1.2360836267471313, "learning_rate": 5.91829426863525e-06, "loss": 0.582, "step": 7095 }, { "epoch": 0.46, "grad_norm": 1.2369436025619507, "learning_rate": 5.917266713371651e-06, "loss": 0.4997, "step": 7096 }, { "epoch": 0.46, "grad_norm": 1.1576792001724243, "learning_rate": 5.916239118016691e-06, "loss": 0.5184, "step": 7097 }, { "epoch": 0.46, "grad_norm": 1.2064887285232544, "learning_rate": 5.915211482615284e-06, "loss": 0.5638, "step": 7098 }, { "epoch": 0.46, "grad_norm": 1.2707489728927612, "learning_rate": 5.914183807212344e-06, "loss": 0.5791, "step": 7099 }, { "epoch": 0.46, "grad_norm": 1.0910022258758545, "learning_rate": 5.913156091852787e-06, "loss": 0.5828, "step": 7100 }, { "epoch": 0.46, "grad_norm": 1.1299078464508057, "learning_rate": 5.912128336581536e-06, "loss": 0.5299, "step": 7101 }, { "epoch": 0.46, "grad_norm": 1.250430941581726, "learning_rate": 5.911100541443507e-06, "loss": 0.5577, "step": 7102 }, { "epoch": 0.46, "grad_norm": 1.2062722444534302, "learning_rate": 5.910072706483624e-06, "loss": 0.5206, "step": 7103 }, { "epoch": 0.46, "grad_norm": 1.1620376110076904, "learning_rate": 5.909044831746812e-06, "loss": 0.539, "step": 7104 }, { "epoch": 0.46, "grad_norm": 1.2470544576644897, "learning_rate": 5.908016917277995e-06, "loss": 0.5557, "step": 7105 }, { "epoch": 0.46, "grad_norm": 1.2086061239242554, "learning_rate": 5.9069889631221e-06, "loss": 0.5779, "step": 7106 }, { "epoch": 0.46, "grad_norm": 1.2281237840652466, "learning_rate": 5.90596096932406e-06, "loss": 0.5359, "step": 7107 }, { "epoch": 0.46, "grad_norm": 1.1145950555801392, "learning_rate": 5.904932935928801e-06, "loss": 0.5597, "step": 7108 }, { "epoch": 0.46, "grad_norm": 1.1388436555862427, "learning_rate": 5.903904862981259e-06, "loss": 0.5445, "step": 7109 }, { "epoch": 0.46, "grad_norm": 1.2146440744400024, "learning_rate": 5.902876750526369e-06, "loss": 0.495, "step": 7110 }, { "epoch": 0.46, "grad_norm": 1.1551258563995361, "learning_rate": 5.9018485986090636e-06, "loss": 0.4854, "step": 7111 }, { "epoch": 0.46, "grad_norm": 1.2976340055465698, "learning_rate": 5.900820407274284e-06, "loss": 0.5496, "step": 7112 }, { "epoch": 0.46, "grad_norm": 1.2068699598312378, "learning_rate": 5.8997921765669685e-06, "loss": 0.4823, "step": 7113 }, { "epoch": 0.46, "grad_norm": 1.1521987915039062, "learning_rate": 5.898763906532058e-06, "loss": 0.4974, "step": 7114 }, { "epoch": 0.46, "grad_norm": 1.2401506900787354, "learning_rate": 5.8977355972144956e-06, "loss": 0.5627, "step": 7115 }, { "epoch": 0.46, "grad_norm": 1.1744056940078735, "learning_rate": 5.896707248659228e-06, "loss": 0.5685, "step": 7116 }, { "epoch": 0.46, "grad_norm": 1.2511736154556274, "learning_rate": 5.895678860911198e-06, "loss": 0.5985, "step": 7117 }, { "epoch": 0.46, "grad_norm": 1.1359459161758423, "learning_rate": 5.894650434015358e-06, "loss": 0.5724, "step": 7118 }, { "epoch": 0.46, "grad_norm": 1.2132835388183594, "learning_rate": 5.893621968016654e-06, "loss": 0.5421, "step": 7119 }, { "epoch": 0.46, "grad_norm": 1.0692241191864014, "learning_rate": 5.892593462960041e-06, "loss": 0.5108, "step": 7120 }, { "epoch": 0.46, "grad_norm": 1.1267825365066528, "learning_rate": 5.891564918890469e-06, "loss": 0.514, "step": 7121 }, { "epoch": 0.46, "grad_norm": 1.1410753726959229, "learning_rate": 5.890536335852895e-06, "loss": 0.5128, "step": 7122 }, { "epoch": 0.46, "grad_norm": 1.2318369150161743, "learning_rate": 5.889507713892278e-06, "loss": 0.5044, "step": 7123 }, { "epoch": 0.46, "grad_norm": 1.0461605787277222, "learning_rate": 5.88847905305357e-06, "loss": 0.5228, "step": 7124 }, { "epoch": 0.46, "grad_norm": 1.2099930047988892, "learning_rate": 5.887450353381737e-06, "loss": 0.4835, "step": 7125 }, { "epoch": 0.46, "grad_norm": 1.1189651489257812, "learning_rate": 5.886421614921738e-06, "loss": 0.5357, "step": 7126 }, { "epoch": 0.46, "grad_norm": 1.2236427068710327, "learning_rate": 5.885392837718538e-06, "loss": 0.5505, "step": 7127 }, { "epoch": 0.46, "grad_norm": 1.147836446762085, "learning_rate": 5.8843640218171e-06, "loss": 0.5097, "step": 7128 }, { "epoch": 0.46, "grad_norm": 1.0567926168441772, "learning_rate": 5.8833351672623924e-06, "loss": 0.5364, "step": 7129 }, { "epoch": 0.46, "grad_norm": 1.1172949075698853, "learning_rate": 5.882306274099385e-06, "loss": 0.5173, "step": 7130 }, { "epoch": 0.46, "grad_norm": 1.1532988548278809, "learning_rate": 5.881277342373046e-06, "loss": 0.5285, "step": 7131 }, { "epoch": 0.46, "grad_norm": 1.2508212327957153, "learning_rate": 5.880248372128349e-06, "loss": 0.5964, "step": 7132 }, { "epoch": 0.46, "grad_norm": 1.2713563442230225, "learning_rate": 5.879219363410266e-06, "loss": 0.5435, "step": 7133 }, { "epoch": 0.46, "grad_norm": 1.0671669244766235, "learning_rate": 5.878190316263772e-06, "loss": 0.5559, "step": 7134 }, { "epoch": 0.46, "grad_norm": 1.246773600578308, "learning_rate": 5.877161230733847e-06, "loss": 0.5868, "step": 7135 }, { "epoch": 0.46, "grad_norm": 1.2480053901672363, "learning_rate": 5.876132106865467e-06, "loss": 0.5193, "step": 7136 }, { "epoch": 0.46, "grad_norm": 1.0801043510437012, "learning_rate": 5.875102944703612e-06, "loss": 0.5401, "step": 7137 }, { "epoch": 0.46, "grad_norm": 1.1630189418792725, "learning_rate": 5.874073744293265e-06, "loss": 0.5536, "step": 7138 }, { "epoch": 0.46, "grad_norm": 1.094649076461792, "learning_rate": 5.873044505679411e-06, "loss": 0.5255, "step": 7139 }, { "epoch": 0.46, "grad_norm": 1.1093640327453613, "learning_rate": 5.872015228907034e-06, "loss": 0.5391, "step": 7140 }, { "epoch": 0.46, "grad_norm": 1.059866189956665, "learning_rate": 5.870985914021121e-06, "loss": 0.5397, "step": 7141 }, { "epoch": 0.46, "grad_norm": 1.3353691101074219, "learning_rate": 5.869956561066661e-06, "loss": 0.544, "step": 7142 }, { "epoch": 0.46, "grad_norm": 1.210226058959961, "learning_rate": 5.8689271700886445e-06, "loss": 0.5844, "step": 7143 }, { "epoch": 0.46, "grad_norm": 1.2633999586105347, "learning_rate": 5.867897741132061e-06, "loss": 0.5579, "step": 7144 }, { "epoch": 0.46, "grad_norm": 1.1621229648590088, "learning_rate": 5.866868274241909e-06, "loss": 0.5393, "step": 7145 }, { "epoch": 0.46, "grad_norm": 1.2291841506958008, "learning_rate": 5.8658387694631815e-06, "loss": 0.5342, "step": 7146 }, { "epoch": 0.46, "grad_norm": 1.1242361068725586, "learning_rate": 5.864809226840873e-06, "loss": 0.544, "step": 7147 }, { "epoch": 0.46, "grad_norm": 1.1954939365386963, "learning_rate": 5.863779646419987e-06, "loss": 0.5154, "step": 7148 }, { "epoch": 0.46, "grad_norm": 1.2198059558868408, "learning_rate": 5.86275002824552e-06, "loss": 0.5939, "step": 7149 }, { "epoch": 0.46, "grad_norm": 1.2360706329345703, "learning_rate": 5.861720372362474e-06, "loss": 0.51, "step": 7150 }, { "epoch": 0.46, "grad_norm": 1.252976655960083, "learning_rate": 5.860690678815856e-06, "loss": 0.5464, "step": 7151 }, { "epoch": 0.46, "grad_norm": 1.3179508447647095, "learning_rate": 5.859660947650667e-06, "loss": 0.5663, "step": 7152 }, { "epoch": 0.46, "grad_norm": 1.2314503192901611, "learning_rate": 5.858631178911917e-06, "loss": 0.5283, "step": 7153 }, { "epoch": 0.46, "grad_norm": 1.2126014232635498, "learning_rate": 5.857601372644613e-06, "loss": 0.5548, "step": 7154 }, { "epoch": 0.46, "grad_norm": 1.0586429834365845, "learning_rate": 5.856571528893766e-06, "loss": 0.5323, "step": 7155 }, { "epoch": 0.46, "grad_norm": 1.2017481327056885, "learning_rate": 5.8555416477043884e-06, "loss": 0.5027, "step": 7156 }, { "epoch": 0.46, "grad_norm": 1.3212392330169678, "learning_rate": 5.854511729121492e-06, "loss": 0.5477, "step": 7157 }, { "epoch": 0.46, "grad_norm": 1.3286858797073364, "learning_rate": 5.853481773190092e-06, "loss": 0.5929, "step": 7158 }, { "epoch": 0.46, "grad_norm": 1.2241542339324951, "learning_rate": 5.852451779955206e-06, "loss": 0.5344, "step": 7159 }, { "epoch": 0.46, "grad_norm": 1.1748000383377075, "learning_rate": 5.851421749461852e-06, "loss": 0.5372, "step": 7160 }, { "epoch": 0.46, "grad_norm": 1.1051560640335083, "learning_rate": 5.85039168175505e-06, "loss": 0.5703, "step": 7161 }, { "epoch": 0.46, "grad_norm": 1.221572995185852, "learning_rate": 5.849361576879821e-06, "loss": 0.547, "step": 7162 }, { "epoch": 0.46, "grad_norm": 1.1137073040008545, "learning_rate": 5.848331434881191e-06, "loss": 0.5319, "step": 7163 }, { "epoch": 0.46, "grad_norm": 1.1604585647583008, "learning_rate": 5.847301255804181e-06, "loss": 0.5631, "step": 7164 }, { "epoch": 0.46, "grad_norm": 1.1175224781036377, "learning_rate": 5.846271039693821e-06, "loss": 0.5444, "step": 7165 }, { "epoch": 0.46, "grad_norm": 1.1462916135787964, "learning_rate": 5.8452407865951346e-06, "loss": 0.4899, "step": 7166 }, { "epoch": 0.46, "grad_norm": 1.253014087677002, "learning_rate": 5.8442104965531556e-06, "loss": 0.5609, "step": 7167 }, { "epoch": 0.46, "grad_norm": 1.1545838117599487, "learning_rate": 5.8431801696129145e-06, "loss": 0.5425, "step": 7168 }, { "epoch": 0.46, "grad_norm": 1.2241910696029663, "learning_rate": 5.842149805819442e-06, "loss": 0.5535, "step": 7169 }, { "epoch": 0.46, "grad_norm": 1.1698452234268188, "learning_rate": 5.841119405217775e-06, "loss": 0.5282, "step": 7170 }, { "epoch": 0.46, "grad_norm": 1.2112250328063965, "learning_rate": 5.840088967852949e-06, "loss": 0.5986, "step": 7171 }, { "epoch": 0.46, "grad_norm": 1.1762975454330444, "learning_rate": 5.839058493770003e-06, "loss": 0.5829, "step": 7172 }, { "epoch": 0.46, "grad_norm": 1.1610665321350098, "learning_rate": 5.838027983013973e-06, "loss": 0.5553, "step": 7173 }, { "epoch": 0.46, "grad_norm": 1.176383137702942, "learning_rate": 5.836997435629903e-06, "loss": 0.5806, "step": 7174 }, { "epoch": 0.46, "grad_norm": 1.3649547100067139, "learning_rate": 5.8359668516628344e-06, "loss": 0.5426, "step": 7175 }, { "epoch": 0.46, "grad_norm": 1.296371579170227, "learning_rate": 5.83493623115781e-06, "loss": 0.6175, "step": 7176 }, { "epoch": 0.46, "grad_norm": 1.2163527011871338, "learning_rate": 5.8339055741598794e-06, "loss": 0.5928, "step": 7177 }, { "epoch": 0.46, "grad_norm": 1.2003259658813477, "learning_rate": 5.832874880714087e-06, "loss": 0.5548, "step": 7178 }, { "epoch": 0.46, "grad_norm": 1.1812655925750732, "learning_rate": 5.831844150865481e-06, "loss": 0.523, "step": 7179 }, { "epoch": 0.46, "grad_norm": 1.175706386566162, "learning_rate": 5.830813384659115e-06, "loss": 0.545, "step": 7180 }, { "epoch": 0.46, "grad_norm": 1.1339889764785767, "learning_rate": 5.829782582140039e-06, "loss": 0.5159, "step": 7181 }, { "epoch": 0.46, "grad_norm": 1.1552784442901611, "learning_rate": 5.828751743353307e-06, "loss": 0.5208, "step": 7182 }, { "epoch": 0.46, "grad_norm": 1.081678867340088, "learning_rate": 5.827720868343976e-06, "loss": 0.5152, "step": 7183 }, { "epoch": 0.46, "grad_norm": 1.1482439041137695, "learning_rate": 5.826689957157099e-06, "loss": 0.5532, "step": 7184 }, { "epoch": 0.46, "grad_norm": 1.1184988021850586, "learning_rate": 5.8256590098377374e-06, "loss": 0.4961, "step": 7185 }, { "epoch": 0.46, "grad_norm": 1.2735705375671387, "learning_rate": 5.824628026430952e-06, "loss": 0.5333, "step": 7186 }, { "epoch": 0.46, "grad_norm": 1.2559492588043213, "learning_rate": 5.823597006981803e-06, "loss": 0.5721, "step": 7187 }, { "epoch": 0.46, "grad_norm": 1.162428855895996, "learning_rate": 5.822565951535352e-06, "loss": 0.557, "step": 7188 }, { "epoch": 0.46, "grad_norm": 1.286717176437378, "learning_rate": 5.821534860136667e-06, "loss": 0.5705, "step": 7189 }, { "epoch": 0.46, "grad_norm": 1.2541226148605347, "learning_rate": 5.820503732830812e-06, "loss": 0.5442, "step": 7190 }, { "epoch": 0.46, "grad_norm": 1.1627603769302368, "learning_rate": 5.8194725696628565e-06, "loss": 0.5308, "step": 7191 }, { "epoch": 0.46, "grad_norm": 1.111397385597229, "learning_rate": 5.8184413706778695e-06, "loss": 0.5401, "step": 7192 }, { "epoch": 0.46, "grad_norm": 1.132348656654358, "learning_rate": 5.817410135920921e-06, "loss": 0.5133, "step": 7193 }, { "epoch": 0.46, "grad_norm": 1.1581875085830688, "learning_rate": 5.816378865437085e-06, "loss": 0.5689, "step": 7194 }, { "epoch": 0.46, "grad_norm": 1.066846489906311, "learning_rate": 5.8153475592714345e-06, "loss": 0.475, "step": 7195 }, { "epoch": 0.46, "grad_norm": 1.3011525869369507, "learning_rate": 5.814316217469046e-06, "loss": 0.58, "step": 7196 }, { "epoch": 0.46, "grad_norm": 1.1437499523162842, "learning_rate": 5.813284840074996e-06, "loss": 0.5457, "step": 7197 }, { "epoch": 0.46, "grad_norm": 1.4464155435562134, "learning_rate": 5.812253427134366e-06, "loss": 0.5224, "step": 7198 }, { "epoch": 0.46, "grad_norm": 1.4026434421539307, "learning_rate": 5.811221978692232e-06, "loss": 0.5527, "step": 7199 }, { "epoch": 0.46, "grad_norm": 1.299089789390564, "learning_rate": 5.8101904947936795e-06, "loss": 0.5567, "step": 7200 }, { "epoch": 0.46, "grad_norm": 1.249489188194275, "learning_rate": 5.809158975483791e-06, "loss": 0.5266, "step": 7201 }, { "epoch": 0.46, "grad_norm": 1.1579229831695557, "learning_rate": 5.80812742080765e-06, "loss": 0.5726, "step": 7202 }, { "epoch": 0.46, "grad_norm": 1.1627153158187866, "learning_rate": 5.807095830810346e-06, "loss": 0.5023, "step": 7203 }, { "epoch": 0.47, "grad_norm": 1.2744877338409424, "learning_rate": 5.8060642055369645e-06, "loss": 0.5613, "step": 7204 }, { "epoch": 0.47, "grad_norm": 1.0973200798034668, "learning_rate": 5.8050325450325965e-06, "loss": 0.5089, "step": 7205 }, { "epoch": 0.47, "grad_norm": 1.1296730041503906, "learning_rate": 5.8040008493423324e-06, "loss": 0.5334, "step": 7206 }, { "epoch": 0.47, "grad_norm": 1.1748251914978027, "learning_rate": 5.802969118511267e-06, "loss": 0.5783, "step": 7207 }, { "epoch": 0.47, "grad_norm": 1.0700501203536987, "learning_rate": 5.801937352584493e-06, "loss": 0.5255, "step": 7208 }, { "epoch": 0.47, "grad_norm": 1.2018234729766846, "learning_rate": 5.800905551607106e-06, "loss": 0.5796, "step": 7209 }, { "epoch": 0.47, "grad_norm": 1.181500792503357, "learning_rate": 5.799873715624202e-06, "loss": 0.5459, "step": 7210 }, { "epoch": 0.47, "grad_norm": 1.179744005203247, "learning_rate": 5.798841844680883e-06, "loss": 0.5733, "step": 7211 }, { "epoch": 0.47, "grad_norm": 1.194836139678955, "learning_rate": 5.797809938822249e-06, "loss": 0.4773, "step": 7212 }, { "epoch": 0.47, "grad_norm": 1.2443724870681763, "learning_rate": 5.796777998093399e-06, "loss": 0.5169, "step": 7213 }, { "epoch": 0.47, "grad_norm": 1.262325406074524, "learning_rate": 5.79574602253944e-06, "loss": 0.5917, "step": 7214 }, { "epoch": 0.47, "grad_norm": 1.1820968389511108, "learning_rate": 5.794714012205474e-06, "loss": 0.5225, "step": 7215 }, { "epoch": 0.47, "grad_norm": 1.206945538520813, "learning_rate": 5.793681967136612e-06, "loss": 0.4595, "step": 7216 }, { "epoch": 0.47, "grad_norm": 1.179128885269165, "learning_rate": 5.792649887377956e-06, "loss": 0.5348, "step": 7217 }, { "epoch": 0.47, "grad_norm": 1.0428766012191772, "learning_rate": 5.791617772974619e-06, "loss": 0.5361, "step": 7218 }, { "epoch": 0.47, "grad_norm": 1.136227011680603, "learning_rate": 5.790585623971712e-06, "loss": 0.541, "step": 7219 }, { "epoch": 0.47, "grad_norm": 1.3587874174118042, "learning_rate": 5.789553440414346e-06, "loss": 0.5341, "step": 7220 }, { "epoch": 0.47, "grad_norm": 1.2320612668991089, "learning_rate": 5.788521222347638e-06, "loss": 0.5538, "step": 7221 }, { "epoch": 0.47, "grad_norm": 1.241917610168457, "learning_rate": 5.7874889698167e-06, "loss": 0.5234, "step": 7222 }, { "epoch": 0.47, "grad_norm": 1.1511484384536743, "learning_rate": 5.786456682866652e-06, "loss": 0.5508, "step": 7223 }, { "epoch": 0.47, "grad_norm": 1.176261067390442, "learning_rate": 5.785424361542611e-06, "loss": 0.5096, "step": 7224 }, { "epoch": 0.47, "grad_norm": 1.1109082698822021, "learning_rate": 5.784392005889698e-06, "loss": 0.5046, "step": 7225 }, { "epoch": 0.47, "grad_norm": 1.0975091457366943, "learning_rate": 5.7833596159530325e-06, "loss": 0.5258, "step": 7226 }, { "epoch": 0.47, "grad_norm": 1.1671857833862305, "learning_rate": 5.782327191777741e-06, "loss": 0.5268, "step": 7227 }, { "epoch": 0.47, "grad_norm": 1.161896824836731, "learning_rate": 5.781294733408946e-06, "loss": 0.5685, "step": 7228 }, { "epoch": 0.47, "grad_norm": 1.2742457389831543, "learning_rate": 5.780262240891774e-06, "loss": 0.5618, "step": 7229 }, { "epoch": 0.47, "grad_norm": 1.2804501056671143, "learning_rate": 5.7792297142713534e-06, "loss": 0.597, "step": 7230 }, { "epoch": 0.47, "grad_norm": 1.0884604454040527, "learning_rate": 5.778197153592811e-06, "loss": 0.5113, "step": 7231 }, { "epoch": 0.47, "grad_norm": 1.1937174797058105, "learning_rate": 5.777164558901279e-06, "loss": 0.5669, "step": 7232 }, { "epoch": 0.47, "grad_norm": 1.171720266342163, "learning_rate": 5.776131930241891e-06, "loss": 0.4968, "step": 7233 }, { "epoch": 0.47, "grad_norm": 1.1570345163345337, "learning_rate": 5.775099267659776e-06, "loss": 0.5281, "step": 7234 }, { "epoch": 0.47, "grad_norm": 1.068222999572754, "learning_rate": 5.774066571200073e-06, "loss": 0.5358, "step": 7235 }, { "epoch": 0.47, "grad_norm": 1.1363463401794434, "learning_rate": 5.773033840907919e-06, "loss": 0.5744, "step": 7236 }, { "epoch": 0.47, "grad_norm": 1.1393052339553833, "learning_rate": 5.772001076828448e-06, "loss": 0.544, "step": 7237 }, { "epoch": 0.47, "grad_norm": 1.1759463548660278, "learning_rate": 5.770968279006803e-06, "loss": 0.5092, "step": 7238 }, { "epoch": 0.47, "grad_norm": 1.2242093086242676, "learning_rate": 5.769935447488123e-06, "loss": 0.5233, "step": 7239 }, { "epoch": 0.47, "grad_norm": 1.2012656927108765, "learning_rate": 5.76890258231755e-06, "loss": 0.4889, "step": 7240 }, { "epoch": 0.47, "grad_norm": 1.2202996015548706, "learning_rate": 5.7678696835402305e-06, "loss": 0.5472, "step": 7241 }, { "epoch": 0.47, "grad_norm": 1.2504611015319824, "learning_rate": 5.766836751201308e-06, "loss": 0.5487, "step": 7242 }, { "epoch": 0.47, "grad_norm": 1.2210071086883545, "learning_rate": 5.7658037853459295e-06, "loss": 0.5347, "step": 7243 }, { "epoch": 0.47, "grad_norm": 1.0732014179229736, "learning_rate": 5.764770786019243e-06, "loss": 0.4735, "step": 7244 }, { "epoch": 0.47, "grad_norm": 1.2162115573883057, "learning_rate": 5.7637377532664e-06, "loss": 0.5448, "step": 7245 }, { "epoch": 0.47, "grad_norm": 1.1529276371002197, "learning_rate": 5.762704687132548e-06, "loss": 0.5201, "step": 7246 }, { "epoch": 0.47, "grad_norm": 1.1530059576034546, "learning_rate": 5.761671587662843e-06, "loss": 0.4832, "step": 7247 }, { "epoch": 0.47, "grad_norm": 1.209709882736206, "learning_rate": 5.7606384549024385e-06, "loss": 0.5588, "step": 7248 }, { "epoch": 0.47, "grad_norm": 1.2996402978897095, "learning_rate": 5.759605288896489e-06, "loss": 0.6001, "step": 7249 }, { "epoch": 0.47, "grad_norm": 1.2192944288253784, "learning_rate": 5.758572089690152e-06, "loss": 0.5905, "step": 7250 }, { "epoch": 0.47, "grad_norm": 1.2044199705123901, "learning_rate": 5.757538857328587e-06, "loss": 0.5462, "step": 7251 }, { "epoch": 0.47, "grad_norm": 1.0928486585617065, "learning_rate": 5.756505591856952e-06, "loss": 0.4889, "step": 7252 }, { "epoch": 0.47, "grad_norm": 1.2142056226730347, "learning_rate": 5.75547229332041e-06, "loss": 0.5047, "step": 7253 }, { "epoch": 0.47, "grad_norm": 1.1331971883773804, "learning_rate": 5.7544389617641225e-06, "loss": 0.527, "step": 7254 }, { "epoch": 0.47, "grad_norm": 1.2109256982803345, "learning_rate": 5.753405597233255e-06, "loss": 0.5762, "step": 7255 }, { "epoch": 0.47, "grad_norm": 1.1691864728927612, "learning_rate": 5.752372199772973e-06, "loss": 0.5405, "step": 7256 }, { "epoch": 0.47, "grad_norm": 1.0658754110336304, "learning_rate": 5.751338769428443e-06, "loss": 0.5344, "step": 7257 }, { "epoch": 0.47, "grad_norm": 1.1489115953445435, "learning_rate": 5.750305306244834e-06, "loss": 0.5502, "step": 7258 }, { "epoch": 0.47, "grad_norm": 1.2037489414215088, "learning_rate": 5.749271810267316e-06, "loss": 0.5243, "step": 7259 }, { "epoch": 0.47, "grad_norm": 1.1464831829071045, "learning_rate": 5.74823828154106e-06, "loss": 0.5397, "step": 7260 }, { "epoch": 0.47, "grad_norm": 1.1295496225357056, "learning_rate": 5.747204720111239e-06, "loss": 0.5677, "step": 7261 }, { "epoch": 0.47, "grad_norm": 1.20768141746521, "learning_rate": 5.746171126023028e-06, "loss": 0.6344, "step": 7262 }, { "epoch": 0.47, "grad_norm": 1.1278836727142334, "learning_rate": 5.745137499321602e-06, "loss": 0.5472, "step": 7263 }, { "epoch": 0.47, "grad_norm": 1.2797412872314453, "learning_rate": 5.744103840052138e-06, "loss": 0.5325, "step": 7264 }, { "epoch": 0.47, "grad_norm": 1.1874698400497437, "learning_rate": 5.743070148259817e-06, "loss": 0.5361, "step": 7265 }, { "epoch": 0.47, "grad_norm": 1.0997222661972046, "learning_rate": 5.742036423989814e-06, "loss": 0.5082, "step": 7266 }, { "epoch": 0.47, "grad_norm": 1.1415494680404663, "learning_rate": 5.741002667287315e-06, "loss": 0.5094, "step": 7267 }, { "epoch": 0.47, "grad_norm": 1.116712212562561, "learning_rate": 5.739968878197502e-06, "loss": 0.5561, "step": 7268 }, { "epoch": 0.47, "grad_norm": 1.120242953300476, "learning_rate": 5.738935056765556e-06, "loss": 0.5356, "step": 7269 }, { "epoch": 0.47, "grad_norm": 1.1203124523162842, "learning_rate": 5.737901203036666e-06, "loss": 0.5061, "step": 7270 }, { "epoch": 0.47, "grad_norm": 1.22609281539917, "learning_rate": 5.736867317056019e-06, "loss": 0.5926, "step": 7271 }, { "epoch": 0.47, "grad_norm": 1.1933239698410034, "learning_rate": 5.735833398868803e-06, "loss": 0.563, "step": 7272 }, { "epoch": 0.47, "grad_norm": 1.3461095094680786, "learning_rate": 5.734799448520206e-06, "loss": 0.6066, "step": 7273 }, { "epoch": 0.47, "grad_norm": 1.0845367908477783, "learning_rate": 5.733765466055423e-06, "loss": 0.5232, "step": 7274 }, { "epoch": 0.47, "grad_norm": 1.1771855354309082, "learning_rate": 5.732731451519643e-06, "loss": 0.5362, "step": 7275 }, { "epoch": 0.47, "grad_norm": 1.3316630125045776, "learning_rate": 5.731697404958062e-06, "loss": 0.4956, "step": 7276 }, { "epoch": 0.47, "grad_norm": 1.1932517290115356, "learning_rate": 5.7306633264158764e-06, "loss": 0.4945, "step": 7277 }, { "epoch": 0.47, "grad_norm": 1.2484925985336304, "learning_rate": 5.72962921593828e-06, "loss": 0.5504, "step": 7278 }, { "epoch": 0.47, "grad_norm": 1.1243408918380737, "learning_rate": 5.728595073570474e-06, "loss": 0.5295, "step": 7279 }, { "epoch": 0.47, "grad_norm": 1.298840045928955, "learning_rate": 5.7275608993576586e-06, "loss": 0.5701, "step": 7280 }, { "epoch": 0.47, "grad_norm": 1.2263392210006714, "learning_rate": 5.7265266933450316e-06, "loss": 0.5232, "step": 7281 }, { "epoch": 0.47, "grad_norm": 1.2392489910125732, "learning_rate": 5.725492455577798e-06, "loss": 0.5849, "step": 7282 }, { "epoch": 0.47, "grad_norm": 1.1606427431106567, "learning_rate": 5.724458186101161e-06, "loss": 0.5122, "step": 7283 }, { "epoch": 0.47, "grad_norm": 1.2044318914413452, "learning_rate": 5.723423884960325e-06, "loss": 0.5581, "step": 7284 }, { "epoch": 0.47, "grad_norm": 1.151931643486023, "learning_rate": 5.722389552200498e-06, "loss": 0.5198, "step": 7285 }, { "epoch": 0.47, "grad_norm": 1.1369937658309937, "learning_rate": 5.721355187866888e-06, "loss": 0.5539, "step": 7286 }, { "epoch": 0.47, "grad_norm": 1.1847953796386719, "learning_rate": 5.720320792004703e-06, "loss": 0.5156, "step": 7287 }, { "epoch": 0.47, "grad_norm": 1.2365010976791382, "learning_rate": 5.719286364659155e-06, "loss": 0.5324, "step": 7288 }, { "epoch": 0.47, "grad_norm": 1.072968602180481, "learning_rate": 5.718251905875456e-06, "loss": 0.5554, "step": 7289 }, { "epoch": 0.47, "grad_norm": 1.0796452760696411, "learning_rate": 5.717217415698818e-06, "loss": 0.5188, "step": 7290 }, { "epoch": 0.47, "grad_norm": 1.3103773593902588, "learning_rate": 5.716182894174458e-06, "loss": 0.5536, "step": 7291 }, { "epoch": 0.47, "grad_norm": 1.1612085103988647, "learning_rate": 5.715148341347593e-06, "loss": 0.5421, "step": 7292 }, { "epoch": 0.47, "grad_norm": 1.0961323976516724, "learning_rate": 5.714113757263437e-06, "loss": 0.5378, "step": 7293 }, { "epoch": 0.47, "grad_norm": 1.1178137063980103, "learning_rate": 5.7130791419672125e-06, "loss": 0.5385, "step": 7294 }, { "epoch": 0.47, "grad_norm": 1.1490551233291626, "learning_rate": 5.712044495504138e-06, "loss": 0.5082, "step": 7295 }, { "epoch": 0.47, "grad_norm": 1.2319395542144775, "learning_rate": 5.711009817919435e-06, "loss": 0.5364, "step": 7296 }, { "epoch": 0.47, "grad_norm": 1.164250373840332, "learning_rate": 5.709975109258329e-06, "loss": 0.5875, "step": 7297 }, { "epoch": 0.47, "grad_norm": 1.16639244556427, "learning_rate": 5.708940369566041e-06, "loss": 0.4959, "step": 7298 }, { "epoch": 0.47, "grad_norm": 1.2350678443908691, "learning_rate": 5.707905598887798e-06, "loss": 0.5465, "step": 7299 }, { "epoch": 0.47, "grad_norm": 1.3073400259017944, "learning_rate": 5.706870797268831e-06, "loss": 0.5124, "step": 7300 }, { "epoch": 0.47, "grad_norm": 1.184635877609253, "learning_rate": 5.7058359647543625e-06, "loss": 0.5532, "step": 7301 }, { "epoch": 0.47, "grad_norm": 1.1802558898925781, "learning_rate": 5.7048011013896255e-06, "loss": 0.5072, "step": 7302 }, { "epoch": 0.47, "grad_norm": 1.2019025087356567, "learning_rate": 5.703766207219851e-06, "loss": 0.5756, "step": 7303 }, { "epoch": 0.47, "grad_norm": 1.3785700798034668, "learning_rate": 5.70273128229027e-06, "loss": 0.5714, "step": 7304 }, { "epoch": 0.47, "grad_norm": 1.196584939956665, "learning_rate": 5.70169632664612e-06, "loss": 0.5342, "step": 7305 }, { "epoch": 0.47, "grad_norm": 1.266177773475647, "learning_rate": 5.700661340332633e-06, "loss": 0.5331, "step": 7306 }, { "epoch": 0.47, "grad_norm": 1.1753085851669312, "learning_rate": 5.699626323395046e-06, "loss": 0.5422, "step": 7307 }, { "epoch": 0.47, "grad_norm": 1.3526986837387085, "learning_rate": 5.698591275878599e-06, "loss": 0.5744, "step": 7308 }, { "epoch": 0.47, "grad_norm": 1.3314452171325684, "learning_rate": 5.6975561978285275e-06, "loss": 0.556, "step": 7309 }, { "epoch": 0.47, "grad_norm": 1.1297588348388672, "learning_rate": 5.696521089290077e-06, "loss": 0.5411, "step": 7310 }, { "epoch": 0.47, "grad_norm": 1.084571123123169, "learning_rate": 5.695485950308484e-06, "loss": 0.4997, "step": 7311 }, { "epoch": 0.47, "grad_norm": 1.1589901447296143, "learning_rate": 5.694450780928997e-06, "loss": 0.5283, "step": 7312 }, { "epoch": 0.47, "grad_norm": 1.1705058813095093, "learning_rate": 5.6934155811968565e-06, "loss": 0.5601, "step": 7313 }, { "epoch": 0.47, "grad_norm": 1.1671676635742188, "learning_rate": 5.69238035115731e-06, "loss": 0.5191, "step": 7314 }, { "epoch": 0.47, "grad_norm": 1.0952956676483154, "learning_rate": 5.691345090855605e-06, "loss": 0.5534, "step": 7315 }, { "epoch": 0.47, "grad_norm": 1.151823878288269, "learning_rate": 5.690309800336989e-06, "loss": 0.5516, "step": 7316 }, { "epoch": 0.47, "grad_norm": 1.0717395544052124, "learning_rate": 5.689274479646714e-06, "loss": 0.4754, "step": 7317 }, { "epoch": 0.47, "grad_norm": 1.1732017993927002, "learning_rate": 5.68823912883003e-06, "loss": 0.5371, "step": 7318 }, { "epoch": 0.47, "grad_norm": 1.2329660654067993, "learning_rate": 5.687203747932187e-06, "loss": 0.5534, "step": 7319 }, { "epoch": 0.47, "grad_norm": 1.2168726921081543, "learning_rate": 5.686168336998444e-06, "loss": 0.5345, "step": 7320 }, { "epoch": 0.47, "grad_norm": 1.1125175952911377, "learning_rate": 5.685132896074052e-06, "loss": 0.5077, "step": 7321 }, { "epoch": 0.47, "grad_norm": 1.4166851043701172, "learning_rate": 5.684097425204268e-06, "loss": 0.5607, "step": 7322 }, { "epoch": 0.47, "grad_norm": 1.196679949760437, "learning_rate": 5.683061924434351e-06, "loss": 0.5535, "step": 7323 }, { "epoch": 0.47, "grad_norm": 1.1940780878067017, "learning_rate": 5.682026393809561e-06, "loss": 0.5446, "step": 7324 }, { "epoch": 0.47, "grad_norm": 1.146020770072937, "learning_rate": 5.680990833375155e-06, "loss": 0.5448, "step": 7325 }, { "epoch": 0.47, "grad_norm": 1.1703016757965088, "learning_rate": 5.679955243176398e-06, "loss": 0.5101, "step": 7326 }, { "epoch": 0.47, "grad_norm": 1.2316566705703735, "learning_rate": 5.678919623258552e-06, "loss": 0.5429, "step": 7327 }, { "epoch": 0.47, "grad_norm": 1.1433426141738892, "learning_rate": 5.67788397366688e-06, "loss": 0.5349, "step": 7328 }, { "epoch": 0.47, "grad_norm": 1.1991231441497803, "learning_rate": 5.676848294446648e-06, "loss": 0.5708, "step": 7329 }, { "epoch": 0.47, "grad_norm": 1.1955552101135254, "learning_rate": 5.675812585643124e-06, "loss": 0.5513, "step": 7330 }, { "epoch": 0.47, "grad_norm": 1.2832142114639282, "learning_rate": 5.674776847301575e-06, "loss": 0.535, "step": 7331 }, { "epoch": 0.47, "grad_norm": 1.3097350597381592, "learning_rate": 5.673741079467272e-06, "loss": 0.5428, "step": 7332 }, { "epoch": 0.47, "grad_norm": 1.161712408065796, "learning_rate": 5.672705282185484e-06, "loss": 0.5092, "step": 7333 }, { "epoch": 0.47, "grad_norm": 1.1734837293624878, "learning_rate": 5.671669455501484e-06, "loss": 0.5765, "step": 7334 }, { "epoch": 0.47, "grad_norm": 1.0733187198638916, "learning_rate": 5.6706335994605445e-06, "loss": 0.4923, "step": 7335 }, { "epoch": 0.47, "grad_norm": 1.2475993633270264, "learning_rate": 5.6695977141079415e-06, "loss": 0.5788, "step": 7336 }, { "epoch": 0.47, "grad_norm": 1.2780604362487793, "learning_rate": 5.66856179948895e-06, "loss": 0.544, "step": 7337 }, { "epoch": 0.47, "grad_norm": 1.1814031600952148, "learning_rate": 5.6675258556488465e-06, "loss": 0.6074, "step": 7338 }, { "epoch": 0.47, "grad_norm": 1.1492490768432617, "learning_rate": 5.666489882632911e-06, "loss": 0.531, "step": 7339 }, { "epoch": 0.47, "grad_norm": 1.232742428779602, "learning_rate": 5.66545388048642e-06, "loss": 0.5586, "step": 7340 }, { "epoch": 0.47, "grad_norm": 1.1003855466842651, "learning_rate": 5.66441784925466e-06, "loss": 0.5386, "step": 7341 }, { "epoch": 0.47, "grad_norm": 1.2231792211532593, "learning_rate": 5.663381788982907e-06, "loss": 0.4937, "step": 7342 }, { "epoch": 0.47, "grad_norm": 1.2316664457321167, "learning_rate": 5.662345699716449e-06, "loss": 0.5438, "step": 7343 }, { "epoch": 0.47, "grad_norm": 1.2306331396102905, "learning_rate": 5.6613095815005705e-06, "loss": 0.5466, "step": 7344 }, { "epoch": 0.47, "grad_norm": 1.1409529447555542, "learning_rate": 5.660273434380554e-06, "loss": 0.5314, "step": 7345 }, { "epoch": 0.47, "grad_norm": 1.3176580667495728, "learning_rate": 5.6592372584016895e-06, "loss": 0.5787, "step": 7346 }, { "epoch": 0.47, "grad_norm": 1.107416033744812, "learning_rate": 5.658201053609267e-06, "loss": 0.4958, "step": 7347 }, { "epoch": 0.47, "grad_norm": 1.1645156145095825, "learning_rate": 5.657164820048574e-06, "loss": 0.5418, "step": 7348 }, { "epoch": 0.47, "grad_norm": 1.1168419122695923, "learning_rate": 5.656128557764901e-06, "loss": 0.4798, "step": 7349 }, { "epoch": 0.47, "grad_norm": 1.1791855096817017, "learning_rate": 5.655092266803544e-06, "loss": 0.5379, "step": 7350 }, { "epoch": 0.47, "grad_norm": 1.1186221837997437, "learning_rate": 5.6540559472097925e-06, "loss": 0.5295, "step": 7351 }, { "epoch": 0.47, "grad_norm": 1.0734858512878418, "learning_rate": 5.6530195990289435e-06, "loss": 0.523, "step": 7352 }, { "epoch": 0.47, "grad_norm": 1.0710773468017578, "learning_rate": 5.651983222306292e-06, "loss": 0.4651, "step": 7353 }, { "epoch": 0.47, "grad_norm": 1.285923719406128, "learning_rate": 5.650946817087137e-06, "loss": 0.5131, "step": 7354 }, { "epoch": 0.47, "grad_norm": 1.1292014122009277, "learning_rate": 5.649910383416776e-06, "loss": 0.5558, "step": 7355 }, { "epoch": 0.47, "grad_norm": 1.1084808111190796, "learning_rate": 5.648873921340509e-06, "loss": 0.5343, "step": 7356 }, { "epoch": 0.47, "grad_norm": 1.0465142726898193, "learning_rate": 5.647837430903635e-06, "loss": 0.4995, "step": 7357 }, { "epoch": 0.47, "grad_norm": 1.1775082349777222, "learning_rate": 5.64680091215146e-06, "loss": 0.4997, "step": 7358 }, { "epoch": 0.48, "grad_norm": 1.122299313545227, "learning_rate": 5.645764365129287e-06, "loss": 0.5569, "step": 7359 }, { "epoch": 0.48, "grad_norm": 1.1747353076934814, "learning_rate": 5.644727789882417e-06, "loss": 0.5295, "step": 7360 }, { "epoch": 0.48, "grad_norm": 1.190399408340454, "learning_rate": 5.64369118645616e-06, "loss": 0.5526, "step": 7361 }, { "epoch": 0.48, "grad_norm": 1.1292719841003418, "learning_rate": 5.642654554895823e-06, "loss": 0.5061, "step": 7362 }, { "epoch": 0.48, "grad_norm": 1.1638281345367432, "learning_rate": 5.6416178952467125e-06, "loss": 0.5523, "step": 7363 }, { "epoch": 0.48, "grad_norm": 1.101309061050415, "learning_rate": 5.640581207554139e-06, "loss": 0.5342, "step": 7364 }, { "epoch": 0.48, "grad_norm": 1.120405673980713, "learning_rate": 5.639544491863414e-06, "loss": 0.5867, "step": 7365 }, { "epoch": 0.48, "grad_norm": 1.1212835311889648, "learning_rate": 5.638507748219849e-06, "loss": 0.5452, "step": 7366 }, { "epoch": 0.48, "grad_norm": 1.3072446584701538, "learning_rate": 5.6374709766687575e-06, "loss": 0.5881, "step": 7367 }, { "epoch": 0.48, "grad_norm": 1.2935326099395752, "learning_rate": 5.6364341772554555e-06, "loss": 0.5413, "step": 7368 }, { "epoch": 0.48, "grad_norm": 1.1630290746688843, "learning_rate": 5.635397350025257e-06, "loss": 0.5518, "step": 7369 }, { "epoch": 0.48, "grad_norm": 1.1442545652389526, "learning_rate": 5.634360495023479e-06, "loss": 0.5355, "step": 7370 }, { "epoch": 0.48, "grad_norm": 1.1508458852767944, "learning_rate": 5.633323612295441e-06, "loss": 0.5029, "step": 7371 }, { "epoch": 0.48, "grad_norm": 1.1462135314941406, "learning_rate": 5.632286701886462e-06, "loss": 0.5378, "step": 7372 }, { "epoch": 0.48, "grad_norm": 1.267900824546814, "learning_rate": 5.6312497638418616e-06, "loss": 0.5553, "step": 7373 }, { "epoch": 0.48, "grad_norm": 1.2382103204727173, "learning_rate": 5.630212798206964e-06, "loss": 0.5426, "step": 7374 }, { "epoch": 0.48, "grad_norm": 1.123482584953308, "learning_rate": 5.62917580502709e-06, "loss": 0.4939, "step": 7375 }, { "epoch": 0.48, "grad_norm": 1.1294193267822266, "learning_rate": 5.628138784347565e-06, "loss": 0.5001, "step": 7376 }, { "epoch": 0.48, "grad_norm": 1.1049503087997437, "learning_rate": 5.627101736213716e-06, "loss": 0.4818, "step": 7377 }, { "epoch": 0.48, "grad_norm": 1.2430757284164429, "learning_rate": 5.6260646606708665e-06, "loss": 0.5814, "step": 7378 }, { "epoch": 0.48, "grad_norm": 1.1193368434906006, "learning_rate": 5.625027557764345e-06, "loss": 0.5061, "step": 7379 }, { "epoch": 0.48, "grad_norm": 1.214971661567688, "learning_rate": 5.623990427539484e-06, "loss": 0.5723, "step": 7380 }, { "epoch": 0.48, "grad_norm": 1.1999611854553223, "learning_rate": 5.62295327004161e-06, "loss": 0.5093, "step": 7381 }, { "epoch": 0.48, "grad_norm": 1.129082441329956, "learning_rate": 5.621916085316056e-06, "loss": 0.5243, "step": 7382 }, { "epoch": 0.48, "grad_norm": 1.275686264038086, "learning_rate": 5.6208788734081544e-06, "loss": 0.5382, "step": 7383 }, { "epoch": 0.48, "grad_norm": 1.089695692062378, "learning_rate": 5.619841634363239e-06, "loss": 0.4994, "step": 7384 }, { "epoch": 0.48, "grad_norm": 1.1986641883850098, "learning_rate": 5.618804368226646e-06, "loss": 0.5382, "step": 7385 }, { "epoch": 0.48, "grad_norm": 1.2308392524719238, "learning_rate": 5.6177670750437085e-06, "loss": 0.6316, "step": 7386 }, { "epoch": 0.48, "grad_norm": 1.1218442916870117, "learning_rate": 5.6167297548597665e-06, "loss": 0.5175, "step": 7387 }, { "epoch": 0.48, "grad_norm": 1.1042224168777466, "learning_rate": 5.6156924077201605e-06, "loss": 0.5263, "step": 7388 }, { "epoch": 0.48, "grad_norm": 1.088131070137024, "learning_rate": 5.6146550336702255e-06, "loss": 0.4713, "step": 7389 }, { "epoch": 0.48, "grad_norm": 1.1686369180679321, "learning_rate": 5.613617632755305e-06, "loss": 0.5406, "step": 7390 }, { "epoch": 0.48, "grad_norm": 1.1116900444030762, "learning_rate": 5.6125802050207425e-06, "loss": 0.5177, "step": 7391 }, { "epoch": 0.48, "grad_norm": 1.3075282573699951, "learning_rate": 5.611542750511878e-06, "loss": 0.5325, "step": 7392 }, { "epoch": 0.48, "grad_norm": 1.1972472667694092, "learning_rate": 5.610505269274058e-06, "loss": 0.5599, "step": 7393 }, { "epoch": 0.48, "grad_norm": 1.3445974588394165, "learning_rate": 5.609467761352628e-06, "loss": 0.5836, "step": 7394 }, { "epoch": 0.48, "grad_norm": 1.1089301109313965, "learning_rate": 5.608430226792934e-06, "loss": 0.4793, "step": 7395 }, { "epoch": 0.48, "grad_norm": 1.1769744157791138, "learning_rate": 5.607392665640326e-06, "loss": 0.5053, "step": 7396 }, { "epoch": 0.48, "grad_norm": 1.2184284925460815, "learning_rate": 5.606355077940151e-06, "loss": 0.5795, "step": 7397 }, { "epoch": 0.48, "grad_norm": 1.142028570175171, "learning_rate": 5.60531746373776e-06, "loss": 0.5553, "step": 7398 }, { "epoch": 0.48, "grad_norm": 1.1111090183258057, "learning_rate": 5.604279823078505e-06, "loss": 0.5275, "step": 7399 }, { "epoch": 0.48, "grad_norm": 1.2327908277511597, "learning_rate": 5.603242156007737e-06, "loss": 0.5488, "step": 7400 }, { "epoch": 0.48, "grad_norm": 1.1939854621887207, "learning_rate": 5.60220446257081e-06, "loss": 0.55, "step": 7401 }, { "epoch": 0.48, "grad_norm": 1.1794737577438354, "learning_rate": 5.601166742813081e-06, "loss": 0.5443, "step": 7402 }, { "epoch": 0.48, "grad_norm": 1.2536227703094482, "learning_rate": 5.600128996779905e-06, "loss": 0.5763, "step": 7403 }, { "epoch": 0.48, "grad_norm": 1.220594882965088, "learning_rate": 5.599091224516638e-06, "loss": 0.5396, "step": 7404 }, { "epoch": 0.48, "grad_norm": 0.9804705381393433, "learning_rate": 5.598053426068639e-06, "loss": 0.4807, "step": 7405 }, { "epoch": 0.48, "grad_norm": 1.2161142826080322, "learning_rate": 5.597015601481269e-06, "loss": 0.5604, "step": 7406 }, { "epoch": 0.48, "grad_norm": 1.1386032104492188, "learning_rate": 5.5959777507998865e-06, "loss": 0.5178, "step": 7407 }, { "epoch": 0.48, "grad_norm": 1.2543987035751343, "learning_rate": 5.594939874069853e-06, "loss": 0.5574, "step": 7408 }, { "epoch": 0.48, "grad_norm": 1.2180044651031494, "learning_rate": 5.593901971336536e-06, "loss": 0.525, "step": 7409 }, { "epoch": 0.48, "grad_norm": 1.2856501340866089, "learning_rate": 5.592864042645293e-06, "loss": 0.5447, "step": 7410 }, { "epoch": 0.48, "grad_norm": 1.2006208896636963, "learning_rate": 5.591826088041493e-06, "loss": 0.499, "step": 7411 }, { "epoch": 0.48, "grad_norm": 1.1369978189468384, "learning_rate": 5.590788107570503e-06, "loss": 0.5155, "step": 7412 }, { "epoch": 0.48, "grad_norm": 1.127638339996338, "learning_rate": 5.5897501012776874e-06, "loss": 0.4846, "step": 7413 }, { "epoch": 0.48, "grad_norm": 1.1356931924819946, "learning_rate": 5.588712069208416e-06, "loss": 0.5084, "step": 7414 }, { "epoch": 0.48, "grad_norm": 1.1752568483352661, "learning_rate": 5.587674011408062e-06, "loss": 0.5482, "step": 7415 }, { "epoch": 0.48, "grad_norm": 1.1434810161590576, "learning_rate": 5.586635927921991e-06, "loss": 0.5065, "step": 7416 }, { "epoch": 0.48, "grad_norm": 1.216009259223938, "learning_rate": 5.585597818795576e-06, "loss": 0.582, "step": 7417 }, { "epoch": 0.48, "grad_norm": 1.1266969442367554, "learning_rate": 5.584559684074193e-06, "loss": 0.5247, "step": 7418 }, { "epoch": 0.48, "grad_norm": 1.2449439764022827, "learning_rate": 5.583521523803214e-06, "loss": 0.5256, "step": 7419 }, { "epoch": 0.48, "grad_norm": 1.094051480293274, "learning_rate": 5.582483338028014e-06, "loss": 0.5188, "step": 7420 }, { "epoch": 0.48, "grad_norm": 1.0908280611038208, "learning_rate": 5.5814451267939715e-06, "loss": 0.4833, "step": 7421 }, { "epoch": 0.48, "grad_norm": 1.275408387184143, "learning_rate": 5.5804068901464615e-06, "loss": 0.552, "step": 7422 }, { "epoch": 0.48, "grad_norm": 1.2808630466461182, "learning_rate": 5.5793686281308645e-06, "loss": 0.5315, "step": 7423 }, { "epoch": 0.48, "grad_norm": 1.2362663745880127, "learning_rate": 5.578330340792559e-06, "loss": 0.5194, "step": 7424 }, { "epoch": 0.48, "grad_norm": 1.2671676874160767, "learning_rate": 5.577292028176926e-06, "loss": 0.5682, "step": 7425 }, { "epoch": 0.48, "grad_norm": 1.1129413843154907, "learning_rate": 5.57625369032935e-06, "loss": 0.5194, "step": 7426 }, { "epoch": 0.48, "grad_norm": 1.0642670392990112, "learning_rate": 5.57521532729521e-06, "loss": 0.4981, "step": 7427 }, { "epoch": 0.48, "grad_norm": 1.3844115734100342, "learning_rate": 5.574176939119892e-06, "loss": 0.4543, "step": 7428 }, { "epoch": 0.48, "grad_norm": 1.1798300743103027, "learning_rate": 5.573138525848783e-06, "loss": 0.4658, "step": 7429 }, { "epoch": 0.48, "grad_norm": 1.1165355443954468, "learning_rate": 5.572100087527266e-06, "loss": 0.5229, "step": 7430 }, { "epoch": 0.48, "grad_norm": 1.1284679174423218, "learning_rate": 5.571061624200731e-06, "loss": 0.5191, "step": 7431 }, { "epoch": 0.48, "grad_norm": 1.1103107929229736, "learning_rate": 5.570023135914566e-06, "loss": 0.5193, "step": 7432 }, { "epoch": 0.48, "grad_norm": 1.1626291275024414, "learning_rate": 5.56898462271416e-06, "loss": 0.5661, "step": 7433 }, { "epoch": 0.48, "grad_norm": 1.034399151802063, "learning_rate": 5.567946084644904e-06, "loss": 0.498, "step": 7434 }, { "epoch": 0.48, "grad_norm": 1.097126841545105, "learning_rate": 5.56690752175219e-06, "loss": 0.5591, "step": 7435 }, { "epoch": 0.48, "grad_norm": 1.2174564599990845, "learning_rate": 5.5658689340814106e-06, "loss": 0.4609, "step": 7436 }, { "epoch": 0.48, "grad_norm": 1.2078808546066284, "learning_rate": 5.564830321677961e-06, "loss": 0.5627, "step": 7437 }, { "epoch": 0.48, "grad_norm": 1.200832486152649, "learning_rate": 5.563791684587235e-06, "loss": 0.5276, "step": 7438 }, { "epoch": 0.48, "grad_norm": 1.1463117599487305, "learning_rate": 5.562753022854629e-06, "loss": 0.5477, "step": 7439 }, { "epoch": 0.48, "grad_norm": 1.1073706150054932, "learning_rate": 5.56171433652554e-06, "loss": 0.4948, "step": 7440 }, { "epoch": 0.48, "grad_norm": 1.1868702173233032, "learning_rate": 5.560675625645368e-06, "loss": 0.572, "step": 7441 }, { "epoch": 0.48, "grad_norm": 1.0597845315933228, "learning_rate": 5.559636890259509e-06, "loss": 0.4988, "step": 7442 }, { "epoch": 0.48, "grad_norm": 1.1161922216415405, "learning_rate": 5.558598130413366e-06, "loss": 0.5233, "step": 7443 }, { "epoch": 0.48, "grad_norm": 1.167739987373352, "learning_rate": 5.55755934615234e-06, "loss": 0.5368, "step": 7444 }, { "epoch": 0.48, "grad_norm": 1.2489598989486694, "learning_rate": 5.556520537521834e-06, "loss": 0.5615, "step": 7445 }, { "epoch": 0.48, "grad_norm": 1.1282446384429932, "learning_rate": 5.555481704567251e-06, "loss": 0.5384, "step": 7446 }, { "epoch": 0.48, "grad_norm": 1.2043570280075073, "learning_rate": 5.554442847333995e-06, "loss": 0.5287, "step": 7447 }, { "epoch": 0.48, "grad_norm": 1.0843989849090576, "learning_rate": 5.553403965867474e-06, "loss": 0.4813, "step": 7448 }, { "epoch": 0.48, "grad_norm": 1.2118008136749268, "learning_rate": 5.552365060213093e-06, "loss": 0.5234, "step": 7449 }, { "epoch": 0.48, "grad_norm": 1.185767412185669, "learning_rate": 5.55132613041626e-06, "loss": 0.5783, "step": 7450 }, { "epoch": 0.48, "grad_norm": 1.1086757183074951, "learning_rate": 5.550287176522384e-06, "loss": 0.5007, "step": 7451 }, { "epoch": 0.48, "grad_norm": 1.0778818130493164, "learning_rate": 5.549248198576875e-06, "loss": 0.5021, "step": 7452 }, { "epoch": 0.48, "grad_norm": 1.2819104194641113, "learning_rate": 5.548209196625146e-06, "loss": 0.5261, "step": 7453 }, { "epoch": 0.48, "grad_norm": 1.1498173475265503, "learning_rate": 5.5471701707126054e-06, "loss": 0.4967, "step": 7454 }, { "epoch": 0.48, "grad_norm": 1.2154611349105835, "learning_rate": 5.54613112088467e-06, "loss": 0.5468, "step": 7455 }, { "epoch": 0.48, "grad_norm": 1.1577284336090088, "learning_rate": 5.545092047186752e-06, "loss": 0.5561, "step": 7456 }, { "epoch": 0.48, "grad_norm": 1.2826188802719116, "learning_rate": 5.5440529496642656e-06, "loss": 0.4688, "step": 7457 }, { "epoch": 0.48, "grad_norm": 1.3368346691131592, "learning_rate": 5.54301382836263e-06, "loss": 0.5372, "step": 7458 }, { "epoch": 0.48, "grad_norm": 1.1156913042068481, "learning_rate": 5.541974683327261e-06, "loss": 0.5155, "step": 7459 }, { "epoch": 0.48, "grad_norm": 1.1882025003433228, "learning_rate": 5.540935514603576e-06, "loss": 0.5614, "step": 7460 }, { "epoch": 0.48, "grad_norm": 1.483730673789978, "learning_rate": 5.539896322236995e-06, "loss": 0.5424, "step": 7461 }, { "epoch": 0.48, "grad_norm": 1.3236299753189087, "learning_rate": 5.53885710627294e-06, "loss": 0.5662, "step": 7462 }, { "epoch": 0.48, "grad_norm": 1.1419572830200195, "learning_rate": 5.537817866756831e-06, "loss": 0.5212, "step": 7463 }, { "epoch": 0.48, "grad_norm": 1.113572597503662, "learning_rate": 5.536778603734088e-06, "loss": 0.4982, "step": 7464 }, { "epoch": 0.48, "grad_norm": 1.149654746055603, "learning_rate": 5.53573931725014e-06, "loss": 0.467, "step": 7465 }, { "epoch": 0.48, "grad_norm": 1.1790484189987183, "learning_rate": 5.5347000073504085e-06, "loss": 0.536, "step": 7466 }, { "epoch": 0.48, "grad_norm": 1.14736008644104, "learning_rate": 5.5336606740803185e-06, "loss": 0.5389, "step": 7467 }, { "epoch": 0.48, "grad_norm": 1.3531467914581299, "learning_rate": 5.532621317485297e-06, "loss": 0.5515, "step": 7468 }, { "epoch": 0.48, "grad_norm": 1.2944021224975586, "learning_rate": 5.531581937610772e-06, "loss": 0.5419, "step": 7469 }, { "epoch": 0.48, "grad_norm": 1.221036434173584, "learning_rate": 5.530542534502174e-06, "loss": 0.5473, "step": 7470 }, { "epoch": 0.48, "grad_norm": 1.139631748199463, "learning_rate": 5.529503108204928e-06, "loss": 0.5621, "step": 7471 }, { "epoch": 0.48, "grad_norm": 1.2799079418182373, "learning_rate": 5.528463658764468e-06, "loss": 0.5677, "step": 7472 }, { "epoch": 0.48, "grad_norm": 1.1192346811294556, "learning_rate": 5.527424186226226e-06, "loss": 0.5345, "step": 7473 }, { "epoch": 0.48, "grad_norm": 1.1312687397003174, "learning_rate": 5.5263846906356325e-06, "loss": 0.5639, "step": 7474 }, { "epoch": 0.48, "grad_norm": 1.1525782346725464, "learning_rate": 5.525345172038121e-06, "loss": 0.5273, "step": 7475 }, { "epoch": 0.48, "grad_norm": 1.320946216583252, "learning_rate": 5.524305630479131e-06, "loss": 0.4697, "step": 7476 }, { "epoch": 0.48, "grad_norm": 1.249483585357666, "learning_rate": 5.523266066004092e-06, "loss": 0.5061, "step": 7477 }, { "epoch": 0.48, "grad_norm": 1.2367006540298462, "learning_rate": 5.5222264786584436e-06, "loss": 0.5899, "step": 7478 }, { "epoch": 0.48, "grad_norm": 1.1431522369384766, "learning_rate": 5.521186868487623e-06, "loss": 0.5298, "step": 7479 }, { "epoch": 0.48, "grad_norm": 1.1639615297317505, "learning_rate": 5.52014723553707e-06, "loss": 0.5861, "step": 7480 }, { "epoch": 0.48, "grad_norm": 1.2568554878234863, "learning_rate": 5.519107579852222e-06, "loss": 0.4991, "step": 7481 }, { "epoch": 0.48, "grad_norm": 1.208962082862854, "learning_rate": 5.518067901478523e-06, "loss": 0.5426, "step": 7482 }, { "epoch": 0.48, "grad_norm": 1.035959005355835, "learning_rate": 5.517028200461411e-06, "loss": 0.4986, "step": 7483 }, { "epoch": 0.48, "grad_norm": 1.1493992805480957, "learning_rate": 5.51598847684633e-06, "loss": 0.5595, "step": 7484 }, { "epoch": 0.48, "grad_norm": 1.0599784851074219, "learning_rate": 5.5149487306787265e-06, "loss": 0.5124, "step": 7485 }, { "epoch": 0.48, "grad_norm": 1.2194594144821167, "learning_rate": 5.5139089620040395e-06, "loss": 0.5362, "step": 7486 }, { "epoch": 0.48, "grad_norm": 1.1730655431747437, "learning_rate": 5.512869170867718e-06, "loss": 0.5634, "step": 7487 }, { "epoch": 0.48, "grad_norm": 1.1683387756347656, "learning_rate": 5.511829357315211e-06, "loss": 0.5719, "step": 7488 }, { "epoch": 0.48, "grad_norm": 1.0607818365097046, "learning_rate": 5.510789521391961e-06, "loss": 0.5232, "step": 7489 }, { "epoch": 0.48, "grad_norm": 1.0947049856185913, "learning_rate": 5.50974966314342e-06, "loss": 0.523, "step": 7490 }, { "epoch": 0.48, "grad_norm": 1.107517957687378, "learning_rate": 5.508709782615036e-06, "loss": 0.5091, "step": 7491 }, { "epoch": 0.48, "grad_norm": 1.1836967468261719, "learning_rate": 5.5076698798522595e-06, "loss": 0.5524, "step": 7492 }, { "epoch": 0.48, "grad_norm": 1.216438889503479, "learning_rate": 5.506629954900543e-06, "loss": 0.5448, "step": 7493 }, { "epoch": 0.48, "grad_norm": 1.1675580739974976, "learning_rate": 5.505590007805337e-06, "loss": 0.5746, "step": 7494 }, { "epoch": 0.48, "grad_norm": 1.1233049631118774, "learning_rate": 5.504550038612099e-06, "loss": 0.5485, "step": 7495 }, { "epoch": 0.48, "grad_norm": 1.0697113275527954, "learning_rate": 5.503510047366277e-06, "loss": 0.5403, "step": 7496 }, { "epoch": 0.48, "grad_norm": 1.1263704299926758, "learning_rate": 5.502470034113333e-06, "loss": 0.4908, "step": 7497 }, { "epoch": 0.48, "grad_norm": 1.190281629562378, "learning_rate": 5.501429998898718e-06, "loss": 0.5342, "step": 7498 }, { "epoch": 0.48, "grad_norm": 1.2123337984085083, "learning_rate": 5.500389941767892e-06, "loss": 0.5151, "step": 7499 }, { "epoch": 0.48, "grad_norm": 1.3786234855651855, "learning_rate": 5.499349862766313e-06, "loss": 0.5433, "step": 7500 }, { "epoch": 0.48, "grad_norm": 1.240857720375061, "learning_rate": 5.49830976193944e-06, "loss": 0.5485, "step": 7501 }, { "epoch": 0.48, "grad_norm": 1.1043405532836914, "learning_rate": 5.497269639332732e-06, "loss": 0.5831, "step": 7502 }, { "epoch": 0.48, "grad_norm": 1.2037343978881836, "learning_rate": 5.4962294949916524e-06, "loss": 0.5439, "step": 7503 }, { "epoch": 0.48, "grad_norm": 1.2104079723358154, "learning_rate": 5.495189328961661e-06, "loss": 0.4816, "step": 7504 }, { "epoch": 0.48, "grad_norm": 1.255854845046997, "learning_rate": 5.494149141288222e-06, "loss": 0.5327, "step": 7505 }, { "epoch": 0.48, "grad_norm": 1.0737870931625366, "learning_rate": 5.4931089320168e-06, "loss": 0.4765, "step": 7506 }, { "epoch": 0.48, "grad_norm": 1.1565004587173462, "learning_rate": 5.492068701192856e-06, "loss": 0.5107, "step": 7507 }, { "epoch": 0.48, "grad_norm": 1.1343287229537964, "learning_rate": 5.491028448861861e-06, "loss": 0.5051, "step": 7508 }, { "epoch": 0.48, "grad_norm": 1.1426048278808594, "learning_rate": 5.489988175069279e-06, "loss": 0.5521, "step": 7509 }, { "epoch": 0.48, "grad_norm": 1.1920166015625, "learning_rate": 5.488947879860577e-06, "loss": 0.5248, "step": 7510 }, { "epoch": 0.48, "grad_norm": 1.1175310611724854, "learning_rate": 5.487907563281226e-06, "loss": 0.4617, "step": 7511 }, { "epoch": 0.48, "grad_norm": 1.179135799407959, "learning_rate": 5.486867225376693e-06, "loss": 0.562, "step": 7512 }, { "epoch": 0.48, "grad_norm": 1.1997991800308228, "learning_rate": 5.48582686619245e-06, "loss": 0.5386, "step": 7513 }, { "epoch": 0.49, "grad_norm": 1.2142462730407715, "learning_rate": 5.484786485773968e-06, "loss": 0.569, "step": 7514 }, { "epoch": 0.49, "grad_norm": 1.0824421644210815, "learning_rate": 5.48374608416672e-06, "loss": 0.5272, "step": 7515 }, { "epoch": 0.49, "grad_norm": 1.3244361877441406, "learning_rate": 5.482705661416179e-06, "loss": 0.607, "step": 7516 }, { "epoch": 0.49, "grad_norm": 1.1709526777267456, "learning_rate": 5.481665217567819e-06, "loss": 0.5411, "step": 7517 }, { "epoch": 0.49, "grad_norm": 1.1726189851760864, "learning_rate": 5.480624752667114e-06, "loss": 0.516, "step": 7518 }, { "epoch": 0.49, "grad_norm": 1.1831390857696533, "learning_rate": 5.4795842667595415e-06, "loss": 0.5106, "step": 7519 }, { "epoch": 0.49, "grad_norm": 1.1831670999526978, "learning_rate": 5.478543759890579e-06, "loss": 0.5294, "step": 7520 }, { "epoch": 0.49, "grad_norm": 1.2695212364196777, "learning_rate": 5.477503232105702e-06, "loss": 0.5813, "step": 7521 }, { "epoch": 0.49, "grad_norm": 1.099642276763916, "learning_rate": 5.4764626834503905e-06, "loss": 0.5206, "step": 7522 }, { "epoch": 0.49, "grad_norm": 1.0621463060379028, "learning_rate": 5.4754221139701265e-06, "loss": 0.5041, "step": 7523 }, { "epoch": 0.49, "grad_norm": 1.1110841035842896, "learning_rate": 5.474381523710387e-06, "loss": 0.5133, "step": 7524 }, { "epoch": 0.49, "grad_norm": 1.1977777481079102, "learning_rate": 5.473340912716655e-06, "loss": 0.583, "step": 7525 }, { "epoch": 0.49, "grad_norm": 1.2487170696258545, "learning_rate": 5.472300281034414e-06, "loss": 0.5797, "step": 7526 }, { "epoch": 0.49, "grad_norm": 1.2031081914901733, "learning_rate": 5.4712596287091446e-06, "loss": 0.6097, "step": 7527 }, { "epoch": 0.49, "grad_norm": 1.3186728954315186, "learning_rate": 5.470218955786334e-06, "loss": 0.5812, "step": 7528 }, { "epoch": 0.49, "grad_norm": 1.2135895490646362, "learning_rate": 5.469178262311465e-06, "loss": 0.5773, "step": 7529 }, { "epoch": 0.49, "grad_norm": 1.1062792539596558, "learning_rate": 5.468137548330026e-06, "loss": 0.4779, "step": 7530 }, { "epoch": 0.49, "grad_norm": 1.310847282409668, "learning_rate": 5.4670968138875015e-06, "loss": 0.56, "step": 7531 }, { "epoch": 0.49, "grad_norm": 1.1470617055892944, "learning_rate": 5.466056059029381e-06, "loss": 0.5352, "step": 7532 }, { "epoch": 0.49, "grad_norm": 1.1276906728744507, "learning_rate": 5.4650152838011515e-06, "loss": 0.5896, "step": 7533 }, { "epoch": 0.49, "grad_norm": 1.1057453155517578, "learning_rate": 5.463974488248305e-06, "loss": 0.5634, "step": 7534 }, { "epoch": 0.49, "grad_norm": 1.1040935516357422, "learning_rate": 5.46293367241633e-06, "loss": 0.5298, "step": 7535 }, { "epoch": 0.49, "grad_norm": 1.155350685119629, "learning_rate": 5.461892836350718e-06, "loss": 0.54, "step": 7536 }, { "epoch": 0.49, "grad_norm": 1.2865573167800903, "learning_rate": 5.460851980096964e-06, "loss": 0.5347, "step": 7537 }, { "epoch": 0.49, "grad_norm": 1.1265594959259033, "learning_rate": 5.459811103700557e-06, "loss": 0.5291, "step": 7538 }, { "epoch": 0.49, "grad_norm": 1.1297451257705688, "learning_rate": 5.458770207206995e-06, "loss": 0.5134, "step": 7539 }, { "epoch": 0.49, "grad_norm": 1.0785000324249268, "learning_rate": 5.457729290661769e-06, "loss": 0.5359, "step": 7540 }, { "epoch": 0.49, "grad_norm": 1.4019850492477417, "learning_rate": 5.456688354110377e-06, "loss": 0.5993, "step": 7541 }, { "epoch": 0.49, "grad_norm": 1.2276278734207153, "learning_rate": 5.455647397598316e-06, "loss": 0.5597, "step": 7542 }, { "epoch": 0.49, "grad_norm": 1.0616801977157593, "learning_rate": 5.454606421171082e-06, "loss": 0.5368, "step": 7543 }, { "epoch": 0.49, "grad_norm": 1.1965124607086182, "learning_rate": 5.453565424874174e-06, "loss": 0.4984, "step": 7544 }, { "epoch": 0.49, "grad_norm": 1.1826143264770508, "learning_rate": 5.452524408753091e-06, "loss": 0.5341, "step": 7545 }, { "epoch": 0.49, "grad_norm": 1.13352632522583, "learning_rate": 5.451483372853335e-06, "loss": 0.4864, "step": 7546 }, { "epoch": 0.49, "grad_norm": 1.082914113998413, "learning_rate": 5.450442317220406e-06, "loss": 0.5385, "step": 7547 }, { "epoch": 0.49, "grad_norm": 1.4130308628082275, "learning_rate": 5.449401241899804e-06, "loss": 0.5126, "step": 7548 }, { "epoch": 0.49, "grad_norm": 1.1435538530349731, "learning_rate": 5.448360146937034e-06, "loss": 0.5227, "step": 7549 }, { "epoch": 0.49, "grad_norm": 1.1469309329986572, "learning_rate": 5.4473190323776e-06, "loss": 0.5183, "step": 7550 }, { "epoch": 0.49, "grad_norm": 1.1054426431655884, "learning_rate": 5.4462778982670025e-06, "loss": 0.4654, "step": 7551 }, { "epoch": 0.49, "grad_norm": 1.1534759998321533, "learning_rate": 5.445236744650751e-06, "loss": 0.4828, "step": 7552 }, { "epoch": 0.49, "grad_norm": 1.0704281330108643, "learning_rate": 5.44419557157435e-06, "loss": 0.512, "step": 7553 }, { "epoch": 0.49, "grad_norm": 1.1473495960235596, "learning_rate": 5.443154379083306e-06, "loss": 0.5385, "step": 7554 }, { "epoch": 0.49, "grad_norm": 1.1790461540222168, "learning_rate": 5.442113167223129e-06, "loss": 0.5165, "step": 7555 }, { "epoch": 0.49, "grad_norm": 1.194181203842163, "learning_rate": 5.441071936039325e-06, "loss": 0.5319, "step": 7556 }, { "epoch": 0.49, "grad_norm": 1.0553873777389526, "learning_rate": 5.440030685577404e-06, "loss": 0.5267, "step": 7557 }, { "epoch": 0.49, "grad_norm": 1.1386165618896484, "learning_rate": 5.438989415882878e-06, "loss": 0.5772, "step": 7558 }, { "epoch": 0.49, "grad_norm": 1.1515640020370483, "learning_rate": 5.437948127001257e-06, "loss": 0.5111, "step": 7559 }, { "epoch": 0.49, "grad_norm": 1.19203519821167, "learning_rate": 5.436906818978052e-06, "loss": 0.5567, "step": 7560 }, { "epoch": 0.49, "grad_norm": 1.1427297592163086, "learning_rate": 5.435865491858781e-06, "loss": 0.5113, "step": 7561 }, { "epoch": 0.49, "grad_norm": 1.0752452611923218, "learning_rate": 5.43482414568895e-06, "loss": 0.5072, "step": 7562 }, { "epoch": 0.49, "grad_norm": 1.1580218076705933, "learning_rate": 5.433782780514079e-06, "loss": 0.5484, "step": 7563 }, { "epoch": 0.49, "grad_norm": 1.0958975553512573, "learning_rate": 5.432741396379681e-06, "loss": 0.5075, "step": 7564 }, { "epoch": 0.49, "grad_norm": 1.1717114448547363, "learning_rate": 5.431699993331274e-06, "loss": 0.5568, "step": 7565 }, { "epoch": 0.49, "grad_norm": 1.126702904701233, "learning_rate": 5.430658571414374e-06, "loss": 0.5372, "step": 7566 }, { "epoch": 0.49, "grad_norm": 1.1134731769561768, "learning_rate": 5.4296171306745e-06, "loss": 0.534, "step": 7567 }, { "epoch": 0.49, "grad_norm": 1.2368642091751099, "learning_rate": 5.4285756711571665e-06, "loss": 0.5442, "step": 7568 }, { "epoch": 0.49, "grad_norm": 1.0851322412490845, "learning_rate": 5.427534192907899e-06, "loss": 0.5238, "step": 7569 }, { "epoch": 0.49, "grad_norm": 1.1732951402664185, "learning_rate": 5.426492695972214e-06, "loss": 0.544, "step": 7570 }, { "epoch": 0.49, "grad_norm": 1.0686619281768799, "learning_rate": 5.425451180395633e-06, "loss": 0.5031, "step": 7571 }, { "epoch": 0.49, "grad_norm": 1.2761403322219849, "learning_rate": 5.424409646223679e-06, "loss": 0.5724, "step": 7572 }, { "epoch": 0.49, "grad_norm": 1.261584997177124, "learning_rate": 5.423368093501876e-06, "loss": 0.5605, "step": 7573 }, { "epoch": 0.49, "grad_norm": 1.1667840480804443, "learning_rate": 5.422326522275744e-06, "loss": 0.5462, "step": 7574 }, { "epoch": 0.49, "grad_norm": 1.0708638429641724, "learning_rate": 5.421284932590809e-06, "loss": 0.5406, "step": 7575 }, { "epoch": 0.49, "grad_norm": 1.372812032699585, "learning_rate": 5.420243324492599e-06, "loss": 0.5395, "step": 7576 }, { "epoch": 0.49, "grad_norm": 1.0707883834838867, "learning_rate": 5.419201698026635e-06, "loss": 0.5139, "step": 7577 }, { "epoch": 0.49, "grad_norm": 1.1148146390914917, "learning_rate": 5.418160053238447e-06, "loss": 0.5428, "step": 7578 }, { "epoch": 0.49, "grad_norm": 1.3387830257415771, "learning_rate": 5.417118390173562e-06, "loss": 0.5041, "step": 7579 }, { "epoch": 0.49, "grad_norm": 1.159958004951477, "learning_rate": 5.416076708877509e-06, "loss": 0.5007, "step": 7580 }, { "epoch": 0.49, "grad_norm": 1.083439588546753, "learning_rate": 5.415035009395817e-06, "loss": 0.5989, "step": 7581 }, { "epoch": 0.49, "grad_norm": 1.1043624877929688, "learning_rate": 5.413993291774015e-06, "loss": 0.547, "step": 7582 }, { "epoch": 0.49, "grad_norm": 1.1289608478546143, "learning_rate": 5.412951556057633e-06, "loss": 0.5417, "step": 7583 }, { "epoch": 0.49, "grad_norm": 1.199938416481018, "learning_rate": 5.411909802292206e-06, "loss": 0.5725, "step": 7584 }, { "epoch": 0.49, "grad_norm": 1.1412816047668457, "learning_rate": 5.410868030523262e-06, "loss": 0.5687, "step": 7585 }, { "epoch": 0.49, "grad_norm": 1.120273232460022, "learning_rate": 5.409826240796339e-06, "loss": 0.4875, "step": 7586 }, { "epoch": 0.49, "grad_norm": 1.184046745300293, "learning_rate": 5.408784433156966e-06, "loss": 0.5597, "step": 7587 }, { "epoch": 0.49, "grad_norm": 1.277559518814087, "learning_rate": 5.407742607650682e-06, "loss": 0.548, "step": 7588 }, { "epoch": 0.49, "grad_norm": 1.3164942264556885, "learning_rate": 5.4067007643230195e-06, "loss": 0.5473, "step": 7589 }, { "epoch": 0.49, "grad_norm": 1.2030270099639893, "learning_rate": 5.405658903219515e-06, "loss": 0.5677, "step": 7590 }, { "epoch": 0.49, "grad_norm": 1.2162764072418213, "learning_rate": 5.404617024385709e-06, "loss": 0.6023, "step": 7591 }, { "epoch": 0.49, "grad_norm": 1.2411948442459106, "learning_rate": 5.403575127867134e-06, "loss": 0.5613, "step": 7592 }, { "epoch": 0.49, "grad_norm": 1.0399229526519775, "learning_rate": 5.402533213709333e-06, "loss": 0.4868, "step": 7593 }, { "epoch": 0.49, "grad_norm": 1.0695172548294067, "learning_rate": 5.401491281957845e-06, "loss": 0.4918, "step": 7594 }, { "epoch": 0.49, "grad_norm": 1.1840404272079468, "learning_rate": 5.400449332658206e-06, "loss": 0.4945, "step": 7595 }, { "epoch": 0.49, "grad_norm": 1.1798686981201172, "learning_rate": 5.399407365855962e-06, "loss": 0.5594, "step": 7596 }, { "epoch": 0.49, "grad_norm": 1.1747539043426514, "learning_rate": 5.398365381596652e-06, "loss": 0.5282, "step": 7597 }, { "epoch": 0.49, "grad_norm": 1.1698263883590698, "learning_rate": 5.397323379925818e-06, "loss": 0.5074, "step": 7598 }, { "epoch": 0.49, "grad_norm": 1.0969332456588745, "learning_rate": 5.396281360889006e-06, "loss": 0.4978, "step": 7599 }, { "epoch": 0.49, "grad_norm": 1.355273723602295, "learning_rate": 5.395239324531756e-06, "loss": 0.5664, "step": 7600 }, { "epoch": 0.49, "grad_norm": 1.1648430824279785, "learning_rate": 5.394197270899616e-06, "loss": 0.5474, "step": 7601 }, { "epoch": 0.49, "grad_norm": 1.1862645149230957, "learning_rate": 5.3931552000381305e-06, "loss": 0.5215, "step": 7602 }, { "epoch": 0.49, "grad_norm": 1.1146520376205444, "learning_rate": 5.392113111992845e-06, "loss": 0.5222, "step": 7603 }, { "epoch": 0.49, "grad_norm": 1.1740812063217163, "learning_rate": 5.391071006809308e-06, "loss": 0.5101, "step": 7604 }, { "epoch": 0.49, "grad_norm": 1.080734133720398, "learning_rate": 5.390028884533066e-06, "loss": 0.5399, "step": 7605 }, { "epoch": 0.49, "grad_norm": 1.2208127975463867, "learning_rate": 5.388986745209667e-06, "loss": 0.5579, "step": 7606 }, { "epoch": 0.49, "grad_norm": 1.2010571956634521, "learning_rate": 5.387944588884661e-06, "loss": 0.5848, "step": 7607 }, { "epoch": 0.49, "grad_norm": 1.1374953985214233, "learning_rate": 5.386902415603599e-06, "loss": 0.5396, "step": 7608 }, { "epoch": 0.49, "grad_norm": 1.214041829109192, "learning_rate": 5.3858602254120294e-06, "loss": 0.5808, "step": 7609 }, { "epoch": 0.49, "grad_norm": 1.214555025100708, "learning_rate": 5.384818018355504e-06, "loss": 0.5517, "step": 7610 }, { "epoch": 0.49, "grad_norm": 1.1429259777069092, "learning_rate": 5.383775794479579e-06, "loss": 0.5226, "step": 7611 }, { "epoch": 0.49, "grad_norm": 1.1833561658859253, "learning_rate": 5.382733553829802e-06, "loss": 0.5351, "step": 7612 }, { "epoch": 0.49, "grad_norm": 1.1328636407852173, "learning_rate": 5.381691296451729e-06, "loss": 0.5461, "step": 7613 }, { "epoch": 0.49, "grad_norm": 1.253623127937317, "learning_rate": 5.380649022390915e-06, "loss": 0.5337, "step": 7614 }, { "epoch": 0.49, "grad_norm": 1.3033480644226074, "learning_rate": 5.379606731692914e-06, "loss": 0.545, "step": 7615 }, { "epoch": 0.49, "grad_norm": 1.1514121294021606, "learning_rate": 5.378564424403281e-06, "loss": 0.5354, "step": 7616 }, { "epoch": 0.49, "grad_norm": 1.2629382610321045, "learning_rate": 5.377522100567575e-06, "loss": 0.5497, "step": 7617 }, { "epoch": 0.49, "grad_norm": 1.3691564798355103, "learning_rate": 5.376479760231351e-06, "loss": 0.5661, "step": 7618 }, { "epoch": 0.49, "grad_norm": 1.2239465713500977, "learning_rate": 5.375437403440169e-06, "loss": 0.5329, "step": 7619 }, { "epoch": 0.49, "grad_norm": 1.1650367975234985, "learning_rate": 5.374395030239587e-06, "loss": 0.6094, "step": 7620 }, { "epoch": 0.49, "grad_norm": 1.3047343492507935, "learning_rate": 5.373352640675163e-06, "loss": 0.5556, "step": 7621 }, { "epoch": 0.49, "grad_norm": 1.2360669374465942, "learning_rate": 5.372310234792459e-06, "loss": 0.553, "step": 7622 }, { "epoch": 0.49, "grad_norm": 1.1202698945999146, "learning_rate": 5.371267812637035e-06, "loss": 0.5575, "step": 7623 }, { "epoch": 0.49, "grad_norm": 1.153233528137207, "learning_rate": 5.370225374254453e-06, "loss": 0.5134, "step": 7624 }, { "epoch": 0.49, "grad_norm": 1.1413947343826294, "learning_rate": 5.369182919690275e-06, "loss": 0.5489, "step": 7625 }, { "epoch": 0.49, "grad_norm": 1.2393964529037476, "learning_rate": 5.368140448990064e-06, "loss": 0.5491, "step": 7626 }, { "epoch": 0.49, "grad_norm": 1.2612650394439697, "learning_rate": 5.367097962199385e-06, "loss": 0.5611, "step": 7627 }, { "epoch": 0.49, "grad_norm": 1.2568209171295166, "learning_rate": 5.3660554593638e-06, "loss": 0.5963, "step": 7628 }, { "epoch": 0.49, "grad_norm": 1.2485072612762451, "learning_rate": 5.365012940528876e-06, "loss": 0.5191, "step": 7629 }, { "epoch": 0.49, "grad_norm": 1.1815074682235718, "learning_rate": 5.363970405740178e-06, "loss": 0.5276, "step": 7630 }, { "epoch": 0.49, "grad_norm": 1.302311658859253, "learning_rate": 5.3629278550432705e-06, "loss": 0.4968, "step": 7631 }, { "epoch": 0.49, "grad_norm": 1.1231929063796997, "learning_rate": 5.361885288483725e-06, "loss": 0.5017, "step": 7632 }, { "epoch": 0.49, "grad_norm": 1.1382217407226562, "learning_rate": 5.360842706107107e-06, "loss": 0.5041, "step": 7633 }, { "epoch": 0.49, "grad_norm": 1.153680682182312, "learning_rate": 5.359800107958985e-06, "loss": 0.51, "step": 7634 }, { "epoch": 0.49, "grad_norm": 1.124799132347107, "learning_rate": 5.358757494084928e-06, "loss": 0.5194, "step": 7635 }, { "epoch": 0.49, "grad_norm": 1.345058560371399, "learning_rate": 5.357714864530508e-06, "loss": 0.5631, "step": 7636 }, { "epoch": 0.49, "grad_norm": 1.137969732284546, "learning_rate": 5.356672219341293e-06, "loss": 0.5604, "step": 7637 }, { "epoch": 0.49, "grad_norm": 1.2086584568023682, "learning_rate": 5.355629558562856e-06, "loss": 0.5757, "step": 7638 }, { "epoch": 0.49, "grad_norm": 1.1892982721328735, "learning_rate": 5.3545868822407674e-06, "loss": 0.5378, "step": 7639 }, { "epoch": 0.49, "grad_norm": 1.1151397228240967, "learning_rate": 5.353544190420603e-06, "loss": 0.5089, "step": 7640 }, { "epoch": 0.49, "grad_norm": 1.2737196683883667, "learning_rate": 5.352501483147933e-06, "loss": 0.5585, "step": 7641 }, { "epoch": 0.49, "grad_norm": 1.130834937095642, "learning_rate": 5.351458760468332e-06, "loss": 0.529, "step": 7642 }, { "epoch": 0.49, "grad_norm": 1.2628551721572876, "learning_rate": 5.350416022427377e-06, "loss": 0.5745, "step": 7643 }, { "epoch": 0.49, "grad_norm": 1.1733088493347168, "learning_rate": 5.349373269070641e-06, "loss": 0.5303, "step": 7644 }, { "epoch": 0.49, "grad_norm": 1.117324948310852, "learning_rate": 5.348330500443701e-06, "loss": 0.4808, "step": 7645 }, { "epoch": 0.49, "grad_norm": 1.1062273979187012, "learning_rate": 5.347287716592133e-06, "loss": 0.5602, "step": 7646 }, { "epoch": 0.49, "grad_norm": 1.1009920835494995, "learning_rate": 5.346244917561515e-06, "loss": 0.5002, "step": 7647 }, { "epoch": 0.49, "grad_norm": 1.1694538593292236, "learning_rate": 5.345202103397424e-06, "loss": 0.5807, "step": 7648 }, { "epoch": 0.49, "grad_norm": 1.1766613721847534, "learning_rate": 5.344159274145441e-06, "loss": 0.5096, "step": 7649 }, { "epoch": 0.49, "grad_norm": 1.1559892892837524, "learning_rate": 5.343116429851145e-06, "loss": 0.5437, "step": 7650 }, { "epoch": 0.49, "grad_norm": 1.1181423664093018, "learning_rate": 5.342073570560114e-06, "loss": 0.5071, "step": 7651 }, { "epoch": 0.49, "grad_norm": 1.2396737337112427, "learning_rate": 5.34103069631793e-06, "loss": 0.5516, "step": 7652 }, { "epoch": 0.49, "grad_norm": 1.2803380489349365, "learning_rate": 5.339987807170174e-06, "loss": 0.5362, "step": 7653 }, { "epoch": 0.49, "grad_norm": 1.1374140977859497, "learning_rate": 5.338944903162428e-06, "loss": 0.5413, "step": 7654 }, { "epoch": 0.49, "grad_norm": 1.0542104244232178, "learning_rate": 5.337901984340275e-06, "loss": 0.5215, "step": 7655 }, { "epoch": 0.49, "grad_norm": 1.2445341348648071, "learning_rate": 5.336859050749297e-06, "loss": 0.5397, "step": 7656 }, { "epoch": 0.49, "grad_norm": 1.1120123863220215, "learning_rate": 5.335816102435079e-06, "loss": 0.5008, "step": 7657 }, { "epoch": 0.49, "grad_norm": 1.2559176683425903, "learning_rate": 5.334773139443208e-06, "loss": 0.5698, "step": 7658 }, { "epoch": 0.49, "grad_norm": 1.1421748399734497, "learning_rate": 5.3337301618192646e-06, "loss": 0.5259, "step": 7659 }, { "epoch": 0.49, "grad_norm": 1.117864727973938, "learning_rate": 5.332687169608836e-06, "loss": 0.5661, "step": 7660 }, { "epoch": 0.49, "grad_norm": 1.0668822526931763, "learning_rate": 5.331644162857513e-06, "loss": 0.5327, "step": 7661 }, { "epoch": 0.49, "grad_norm": 1.088028073310852, "learning_rate": 5.330601141610877e-06, "loss": 0.5003, "step": 7662 }, { "epoch": 0.49, "grad_norm": 1.279388189315796, "learning_rate": 5.329558105914518e-06, "loss": 0.5703, "step": 7663 }, { "epoch": 0.49, "grad_norm": 1.0780879259109497, "learning_rate": 5.328515055814025e-06, "loss": 0.54, "step": 7664 }, { "epoch": 0.49, "grad_norm": 1.1467276811599731, "learning_rate": 5.327471991354986e-06, "loss": 0.5106, "step": 7665 }, { "epoch": 0.49, "grad_norm": 1.0698986053466797, "learning_rate": 5.326428912582992e-06, "loss": 0.516, "step": 7666 }, { "epoch": 0.49, "grad_norm": 1.0738861560821533, "learning_rate": 5.325385819543633e-06, "loss": 0.4649, "step": 7667 }, { "epoch": 0.49, "grad_norm": 1.0663354396820068, "learning_rate": 5.324342712282497e-06, "loss": 0.5443, "step": 7668 }, { "epoch": 0.5, "grad_norm": 1.1535755395889282, "learning_rate": 5.323299590845179e-06, "loss": 0.5705, "step": 7669 }, { "epoch": 0.5, "grad_norm": 1.1505372524261475, "learning_rate": 5.322256455277271e-06, "loss": 0.5179, "step": 7670 }, { "epoch": 0.5, "grad_norm": 1.1854480504989624, "learning_rate": 5.321213305624364e-06, "loss": 0.5432, "step": 7671 }, { "epoch": 0.5, "grad_norm": 1.1520501375198364, "learning_rate": 5.320170141932052e-06, "loss": 0.5435, "step": 7672 }, { "epoch": 0.5, "grad_norm": 1.1068906784057617, "learning_rate": 5.31912696424593e-06, "loss": 0.5659, "step": 7673 }, { "epoch": 0.5, "grad_norm": 1.1782119274139404, "learning_rate": 5.31808377261159e-06, "loss": 0.5389, "step": 7674 }, { "epoch": 0.5, "grad_norm": 1.1200708150863647, "learning_rate": 5.317040567074631e-06, "loss": 0.49, "step": 7675 }, { "epoch": 0.5, "grad_norm": 1.3199434280395508, "learning_rate": 5.3159973476806446e-06, "loss": 0.5701, "step": 7676 }, { "epoch": 0.5, "grad_norm": 1.144039273262024, "learning_rate": 5.314954114475231e-06, "loss": 0.5041, "step": 7677 }, { "epoch": 0.5, "grad_norm": 1.1640132665634155, "learning_rate": 5.313910867503986e-06, "loss": 0.5395, "step": 7678 }, { "epoch": 0.5, "grad_norm": 1.3213038444519043, "learning_rate": 5.312867606812506e-06, "loss": 0.5583, "step": 7679 }, { "epoch": 0.5, "grad_norm": 1.19076406955719, "learning_rate": 5.31182433244639e-06, "loss": 0.5553, "step": 7680 }, { "epoch": 0.5, "grad_norm": 1.1752960681915283, "learning_rate": 5.310781044451238e-06, "loss": 0.5402, "step": 7681 }, { "epoch": 0.5, "grad_norm": 1.2068455219268799, "learning_rate": 5.309737742872647e-06, "loss": 0.5403, "step": 7682 }, { "epoch": 0.5, "grad_norm": 1.1109999418258667, "learning_rate": 5.308694427756219e-06, "loss": 0.5752, "step": 7683 }, { "epoch": 0.5, "grad_norm": 1.1735050678253174, "learning_rate": 5.307651099147555e-06, "loss": 0.5181, "step": 7684 }, { "epoch": 0.5, "grad_norm": 1.1827718019485474, "learning_rate": 5.306607757092254e-06, "loss": 0.5269, "step": 7685 }, { "epoch": 0.5, "grad_norm": 1.1320620775222778, "learning_rate": 5.305564401635919e-06, "loss": 0.5579, "step": 7686 }, { "epoch": 0.5, "grad_norm": 1.3520952463150024, "learning_rate": 5.304521032824153e-06, "loss": 0.5362, "step": 7687 }, { "epoch": 0.5, "grad_norm": 1.3297280073165894, "learning_rate": 5.303477650702556e-06, "loss": 0.4625, "step": 7688 }, { "epoch": 0.5, "grad_norm": 1.0874214172363281, "learning_rate": 5.302434255316737e-06, "loss": 0.5543, "step": 7689 }, { "epoch": 0.5, "grad_norm": 1.1298993825912476, "learning_rate": 5.301390846712295e-06, "loss": 0.5293, "step": 7690 }, { "epoch": 0.5, "grad_norm": 1.0955666303634644, "learning_rate": 5.300347424934837e-06, "loss": 0.5313, "step": 7691 }, { "epoch": 0.5, "grad_norm": 1.1799259185791016, "learning_rate": 5.299303990029969e-06, "loss": 0.5283, "step": 7692 }, { "epoch": 0.5, "grad_norm": 1.1845637559890747, "learning_rate": 5.298260542043295e-06, "loss": 0.5574, "step": 7693 }, { "epoch": 0.5, "grad_norm": 1.252231240272522, "learning_rate": 5.297217081020422e-06, "loss": 0.5356, "step": 7694 }, { "epoch": 0.5, "grad_norm": 1.1643908023834229, "learning_rate": 5.296173607006958e-06, "loss": 0.5649, "step": 7695 }, { "epoch": 0.5, "grad_norm": 1.0983246564865112, "learning_rate": 5.295130120048509e-06, "loss": 0.534, "step": 7696 }, { "epoch": 0.5, "grad_norm": 1.20734703540802, "learning_rate": 5.294086620190685e-06, "loss": 0.5697, "step": 7697 }, { "epoch": 0.5, "grad_norm": 1.3500770330429077, "learning_rate": 5.293043107479093e-06, "loss": 0.5086, "step": 7698 }, { "epoch": 0.5, "grad_norm": 1.1966341733932495, "learning_rate": 5.291999581959343e-06, "loss": 0.55, "step": 7699 }, { "epoch": 0.5, "grad_norm": 1.3095871210098267, "learning_rate": 5.290956043677043e-06, "loss": 0.5297, "step": 7700 }, { "epoch": 0.5, "grad_norm": 1.1187002658843994, "learning_rate": 5.289912492677807e-06, "loss": 0.5284, "step": 7701 }, { "epoch": 0.5, "grad_norm": 1.3143887519836426, "learning_rate": 5.2888689290072435e-06, "loss": 0.5367, "step": 7702 }, { "epoch": 0.5, "grad_norm": 1.129076361656189, "learning_rate": 5.287825352710964e-06, "loss": 0.5593, "step": 7703 }, { "epoch": 0.5, "grad_norm": 1.0785973072052002, "learning_rate": 5.286781763834581e-06, "loss": 0.5028, "step": 7704 }, { "epoch": 0.5, "grad_norm": 1.2011215686798096, "learning_rate": 5.285738162423708e-06, "loss": 0.5158, "step": 7705 }, { "epoch": 0.5, "grad_norm": 1.0963995456695557, "learning_rate": 5.284694548523956e-06, "loss": 0.5448, "step": 7706 }, { "epoch": 0.5, "grad_norm": 1.2299007177352905, "learning_rate": 5.283650922180939e-06, "loss": 0.5343, "step": 7707 }, { "epoch": 0.5, "grad_norm": 1.2024122476577759, "learning_rate": 5.2826072834402745e-06, "loss": 0.5742, "step": 7708 }, { "epoch": 0.5, "grad_norm": 1.164455533027649, "learning_rate": 5.281563632347573e-06, "loss": 0.5725, "step": 7709 }, { "epoch": 0.5, "grad_norm": 1.232858419418335, "learning_rate": 5.280519968948451e-06, "loss": 0.6036, "step": 7710 }, { "epoch": 0.5, "grad_norm": 1.1068354845046997, "learning_rate": 5.279476293288527e-06, "loss": 0.5436, "step": 7711 }, { "epoch": 0.5, "grad_norm": 1.1316955089569092, "learning_rate": 5.278432605413414e-06, "loss": 0.5431, "step": 7712 }, { "epoch": 0.5, "grad_norm": 1.09087336063385, "learning_rate": 5.277388905368729e-06, "loss": 0.4957, "step": 7713 }, { "epoch": 0.5, "grad_norm": 1.1370335817337036, "learning_rate": 5.2763451932000935e-06, "loss": 0.5015, "step": 7714 }, { "epoch": 0.5, "grad_norm": 1.0507025718688965, "learning_rate": 5.27530146895312e-06, "loss": 0.5301, "step": 7715 }, { "epoch": 0.5, "grad_norm": 1.2254458665847778, "learning_rate": 5.27425773267343e-06, "loss": 0.5338, "step": 7716 }, { "epoch": 0.5, "grad_norm": 1.198370099067688, "learning_rate": 5.273213984406643e-06, "loss": 0.5579, "step": 7717 }, { "epoch": 0.5, "grad_norm": 1.1375353336334229, "learning_rate": 5.272170224198377e-06, "loss": 0.5389, "step": 7718 }, { "epoch": 0.5, "grad_norm": 1.3397808074951172, "learning_rate": 5.271126452094254e-06, "loss": 0.5995, "step": 7719 }, { "epoch": 0.5, "grad_norm": 1.314558506011963, "learning_rate": 5.270082668139892e-06, "loss": 0.5209, "step": 7720 }, { "epoch": 0.5, "grad_norm": 1.272697925567627, "learning_rate": 5.2690388723809135e-06, "loss": 0.5382, "step": 7721 }, { "epoch": 0.5, "grad_norm": 1.167311191558838, "learning_rate": 5.267995064862941e-06, "loss": 0.5077, "step": 7722 }, { "epoch": 0.5, "grad_norm": 1.244244933128357, "learning_rate": 5.266951245631595e-06, "loss": 0.5438, "step": 7723 }, { "epoch": 0.5, "grad_norm": 1.140529990196228, "learning_rate": 5.2659074147324995e-06, "loss": 0.5449, "step": 7724 }, { "epoch": 0.5, "grad_norm": 1.3005108833312988, "learning_rate": 5.264863572211275e-06, "loss": 0.5424, "step": 7725 }, { "epoch": 0.5, "grad_norm": 1.2076908349990845, "learning_rate": 5.26381971811355e-06, "loss": 0.5714, "step": 7726 }, { "epoch": 0.5, "grad_norm": 1.282137393951416, "learning_rate": 5.262775852484942e-06, "loss": 0.5573, "step": 7727 }, { "epoch": 0.5, "grad_norm": 1.410156011581421, "learning_rate": 5.261731975371084e-06, "loss": 0.5437, "step": 7728 }, { "epoch": 0.5, "grad_norm": 1.1432827711105347, "learning_rate": 5.260688086817594e-06, "loss": 0.5219, "step": 7729 }, { "epoch": 0.5, "grad_norm": 1.308428406715393, "learning_rate": 5.259644186870099e-06, "loss": 0.5586, "step": 7730 }, { "epoch": 0.5, "grad_norm": 1.2948791980743408, "learning_rate": 5.258600275574229e-06, "loss": 0.5812, "step": 7731 }, { "epoch": 0.5, "grad_norm": 1.2543741464614868, "learning_rate": 5.257556352975607e-06, "loss": 0.5871, "step": 7732 }, { "epoch": 0.5, "grad_norm": 1.1764028072357178, "learning_rate": 5.25651241911986e-06, "loss": 0.5446, "step": 7733 }, { "epoch": 0.5, "grad_norm": 1.2863543033599854, "learning_rate": 5.25546847405262e-06, "loss": 0.5534, "step": 7734 }, { "epoch": 0.5, "grad_norm": 1.3736060857772827, "learning_rate": 5.254424517819509e-06, "loss": 0.5436, "step": 7735 }, { "epoch": 0.5, "grad_norm": 1.2238651514053345, "learning_rate": 5.253380550466161e-06, "loss": 0.539, "step": 7736 }, { "epoch": 0.5, "grad_norm": 1.3032926321029663, "learning_rate": 5.252336572038203e-06, "loss": 0.5468, "step": 7737 }, { "epoch": 0.5, "grad_norm": 1.1452652215957642, "learning_rate": 5.251292582581263e-06, "loss": 0.5002, "step": 7738 }, { "epoch": 0.5, "grad_norm": 1.2036973237991333, "learning_rate": 5.250248582140972e-06, "loss": 0.5773, "step": 7739 }, { "epoch": 0.5, "grad_norm": 1.160822868347168, "learning_rate": 5.249204570762963e-06, "loss": 0.5296, "step": 7740 }, { "epoch": 0.5, "grad_norm": 1.1997766494750977, "learning_rate": 5.248160548492864e-06, "loss": 0.5401, "step": 7741 }, { "epoch": 0.5, "grad_norm": 1.1246157884597778, "learning_rate": 5.247116515376308e-06, "loss": 0.4928, "step": 7742 }, { "epoch": 0.5, "grad_norm": 1.190597414970398, "learning_rate": 5.246072471458929e-06, "loss": 0.5828, "step": 7743 }, { "epoch": 0.5, "grad_norm": 1.1749919652938843, "learning_rate": 5.245028416786353e-06, "loss": 0.5057, "step": 7744 }, { "epoch": 0.5, "grad_norm": 1.215651273727417, "learning_rate": 5.243984351404219e-06, "loss": 0.5152, "step": 7745 }, { "epoch": 0.5, "grad_norm": 1.1775078773498535, "learning_rate": 5.242940275358159e-06, "loss": 0.5435, "step": 7746 }, { "epoch": 0.5, "grad_norm": 1.1864814758300781, "learning_rate": 5.241896188693805e-06, "loss": 0.5682, "step": 7747 }, { "epoch": 0.5, "grad_norm": 1.2912836074829102, "learning_rate": 5.240852091456794e-06, "loss": 0.5497, "step": 7748 }, { "epoch": 0.5, "grad_norm": 1.3571783304214478, "learning_rate": 5.2398079836927595e-06, "loss": 0.5997, "step": 7749 }, { "epoch": 0.5, "grad_norm": 1.0967847108840942, "learning_rate": 5.238763865447336e-06, "loss": 0.505, "step": 7750 }, { "epoch": 0.5, "grad_norm": 1.2204644680023193, "learning_rate": 5.23771973676616e-06, "loss": 0.5432, "step": 7751 }, { "epoch": 0.5, "grad_norm": 1.2159931659698486, "learning_rate": 5.236675597694869e-06, "loss": 0.5347, "step": 7752 }, { "epoch": 0.5, "grad_norm": 1.135953426361084, "learning_rate": 5.235631448279097e-06, "loss": 0.4929, "step": 7753 }, { "epoch": 0.5, "grad_norm": 1.1237037181854248, "learning_rate": 5.2345872885644825e-06, "loss": 0.5266, "step": 7754 }, { "epoch": 0.5, "grad_norm": 1.1232815980911255, "learning_rate": 5.2335431185966634e-06, "loss": 0.5606, "step": 7755 }, { "epoch": 0.5, "grad_norm": 1.107222318649292, "learning_rate": 5.232498938421276e-06, "loss": 0.5304, "step": 7756 }, { "epoch": 0.5, "grad_norm": 1.1825382709503174, "learning_rate": 5.231454748083959e-06, "loss": 0.5629, "step": 7757 }, { "epoch": 0.5, "grad_norm": 1.2357969284057617, "learning_rate": 5.2304105476303545e-06, "loss": 0.5904, "step": 7758 }, { "epoch": 0.5, "grad_norm": 1.1041454076766968, "learning_rate": 5.229366337106099e-06, "loss": 0.5301, "step": 7759 }, { "epoch": 0.5, "grad_norm": 1.1973230838775635, "learning_rate": 5.2283221165568324e-06, "loss": 0.5266, "step": 7760 }, { "epoch": 0.5, "grad_norm": 1.0809707641601562, "learning_rate": 5.227277886028195e-06, "loss": 0.5512, "step": 7761 }, { "epoch": 0.5, "grad_norm": 1.3195117712020874, "learning_rate": 5.2262336455658265e-06, "loss": 0.5662, "step": 7762 }, { "epoch": 0.5, "grad_norm": 1.3206850290298462, "learning_rate": 5.22518939521537e-06, "loss": 0.5729, "step": 7763 }, { "epoch": 0.5, "grad_norm": 1.0694944858551025, "learning_rate": 5.224145135022467e-06, "loss": 0.5383, "step": 7764 }, { "epoch": 0.5, "grad_norm": 1.271786093711853, "learning_rate": 5.223100865032757e-06, "loss": 0.5636, "step": 7765 }, { "epoch": 0.5, "grad_norm": 1.069655418395996, "learning_rate": 5.222056585291885e-06, "loss": 0.4833, "step": 7766 }, { "epoch": 0.5, "grad_norm": 1.1308927536010742, "learning_rate": 5.221012295845492e-06, "loss": 0.4977, "step": 7767 }, { "epoch": 0.5, "grad_norm": 1.1266168355941772, "learning_rate": 5.219967996739222e-06, "loss": 0.4994, "step": 7768 }, { "epoch": 0.5, "grad_norm": 1.0971968173980713, "learning_rate": 5.218923688018717e-06, "loss": 0.526, "step": 7769 }, { "epoch": 0.5, "grad_norm": 1.1781247854232788, "learning_rate": 5.217879369729624e-06, "loss": 0.5313, "step": 7770 }, { "epoch": 0.5, "grad_norm": 1.1731157302856445, "learning_rate": 5.216835041917586e-06, "loss": 0.5712, "step": 7771 }, { "epoch": 0.5, "grad_norm": 1.1592631340026855, "learning_rate": 5.215790704628247e-06, "loss": 0.5576, "step": 7772 }, { "epoch": 0.5, "grad_norm": 1.0690956115722656, "learning_rate": 5.214746357907254e-06, "loss": 0.4989, "step": 7773 }, { "epoch": 0.5, "grad_norm": 1.3169423341751099, "learning_rate": 5.21370200180025e-06, "loss": 0.523, "step": 7774 }, { "epoch": 0.5, "grad_norm": 1.1269420385360718, "learning_rate": 5.212657636352885e-06, "loss": 0.5219, "step": 7775 }, { "epoch": 0.5, "grad_norm": 1.180670142173767, "learning_rate": 5.211613261610801e-06, "loss": 0.5562, "step": 7776 }, { "epoch": 0.5, "grad_norm": 1.1348727941513062, "learning_rate": 5.21056887761965e-06, "loss": 0.5274, "step": 7777 }, { "epoch": 0.5, "grad_norm": 1.1822993755340576, "learning_rate": 5.209524484425075e-06, "loss": 0.5412, "step": 7778 }, { "epoch": 0.5, "grad_norm": 1.0746936798095703, "learning_rate": 5.208480082072724e-06, "loss": 0.5705, "step": 7779 }, { "epoch": 0.5, "grad_norm": 1.1167031526565552, "learning_rate": 5.207435670608249e-06, "loss": 0.5313, "step": 7780 }, { "epoch": 0.5, "grad_norm": 1.0916310548782349, "learning_rate": 5.206391250077295e-06, "loss": 0.4807, "step": 7781 }, { "epoch": 0.5, "grad_norm": 1.1030054092407227, "learning_rate": 5.205346820525512e-06, "loss": 0.4851, "step": 7782 }, { "epoch": 0.5, "grad_norm": 1.1558001041412354, "learning_rate": 5.204302381998548e-06, "loss": 0.571, "step": 7783 }, { "epoch": 0.5, "grad_norm": 1.101823329925537, "learning_rate": 5.203257934542056e-06, "loss": 0.5566, "step": 7784 }, { "epoch": 0.5, "grad_norm": 1.1533459424972534, "learning_rate": 5.202213478201684e-06, "loss": 0.5104, "step": 7785 }, { "epoch": 0.5, "grad_norm": 1.2702189683914185, "learning_rate": 5.20116901302308e-06, "loss": 0.5306, "step": 7786 }, { "epoch": 0.5, "grad_norm": 1.2374298572540283, "learning_rate": 5.200124539051902e-06, "loss": 0.5593, "step": 7787 }, { "epoch": 0.5, "grad_norm": 1.182329773902893, "learning_rate": 5.199080056333793e-06, "loss": 0.531, "step": 7788 }, { "epoch": 0.5, "grad_norm": 1.1431937217712402, "learning_rate": 5.198035564914408e-06, "loss": 0.5325, "step": 7789 }, { "epoch": 0.5, "grad_norm": 1.2384577989578247, "learning_rate": 5.196991064839403e-06, "loss": 0.5401, "step": 7790 }, { "epoch": 0.5, "grad_norm": 1.093711018562317, "learning_rate": 5.195946556154424e-06, "loss": 0.4866, "step": 7791 }, { "epoch": 0.5, "grad_norm": 1.0875180959701538, "learning_rate": 5.1949020389051275e-06, "loss": 0.5577, "step": 7792 }, { "epoch": 0.5, "grad_norm": 1.133819341659546, "learning_rate": 5.193857513137166e-06, "loss": 0.5427, "step": 7793 }, { "epoch": 0.5, "grad_norm": 1.1390849351882935, "learning_rate": 5.192812978896191e-06, "loss": 0.5484, "step": 7794 }, { "epoch": 0.5, "grad_norm": 1.1493046283721924, "learning_rate": 5.19176843622786e-06, "loss": 0.4716, "step": 7795 }, { "epoch": 0.5, "grad_norm": 1.1133166551589966, "learning_rate": 5.1907238851778255e-06, "loss": 0.5306, "step": 7796 }, { "epoch": 0.5, "grad_norm": 1.2264822721481323, "learning_rate": 5.189679325791741e-06, "loss": 0.4986, "step": 7797 }, { "epoch": 0.5, "grad_norm": 1.3358497619628906, "learning_rate": 5.1886347581152615e-06, "loss": 0.5684, "step": 7798 }, { "epoch": 0.5, "grad_norm": 1.2090849876403809, "learning_rate": 5.187590182194047e-06, "loss": 0.4826, "step": 7799 }, { "epoch": 0.5, "grad_norm": 1.2497836351394653, "learning_rate": 5.186545598073747e-06, "loss": 0.5452, "step": 7800 }, { "epoch": 0.5, "grad_norm": 1.1750930547714233, "learning_rate": 5.18550100580002e-06, "loss": 0.5143, "step": 7801 }, { "epoch": 0.5, "grad_norm": 1.3529410362243652, "learning_rate": 5.1844564054185234e-06, "loss": 0.5664, "step": 7802 }, { "epoch": 0.5, "grad_norm": 1.3044902086257935, "learning_rate": 5.183411796974913e-06, "loss": 0.5753, "step": 7803 }, { "epoch": 0.5, "grad_norm": 1.133779764175415, "learning_rate": 5.182367180514846e-06, "loss": 0.5397, "step": 7804 }, { "epoch": 0.5, "grad_norm": 1.2774778604507446, "learning_rate": 5.181322556083981e-06, "loss": 0.5581, "step": 7805 }, { "epoch": 0.5, "grad_norm": 1.3214441537857056, "learning_rate": 5.180277923727975e-06, "loss": 0.5368, "step": 7806 }, { "epoch": 0.5, "grad_norm": 1.3584380149841309, "learning_rate": 5.1792332834924845e-06, "loss": 0.5847, "step": 7807 }, { "epoch": 0.5, "grad_norm": 1.3040573596954346, "learning_rate": 5.17818863542317e-06, "loss": 0.5228, "step": 7808 }, { "epoch": 0.5, "grad_norm": 1.1384456157684326, "learning_rate": 5.1771439795656905e-06, "loss": 0.5465, "step": 7809 }, { "epoch": 0.5, "grad_norm": 1.0705375671386719, "learning_rate": 5.176099315965706e-06, "loss": 0.5066, "step": 7810 }, { "epoch": 0.5, "grad_norm": 1.1589736938476562, "learning_rate": 5.175054644668872e-06, "loss": 0.5842, "step": 7811 }, { "epoch": 0.5, "grad_norm": 1.1041481494903564, "learning_rate": 5.174009965720852e-06, "loss": 0.523, "step": 7812 }, { "epoch": 0.5, "grad_norm": 1.2177927494049072, "learning_rate": 5.172965279167307e-06, "loss": 0.5824, "step": 7813 }, { "epoch": 0.5, "grad_norm": 1.2533961534500122, "learning_rate": 5.171920585053894e-06, "loss": 0.5569, "step": 7814 }, { "epoch": 0.5, "grad_norm": 1.2161046266555786, "learning_rate": 5.1708758834262776e-06, "loss": 0.5175, "step": 7815 }, { "epoch": 0.5, "grad_norm": 1.1651859283447266, "learning_rate": 5.169831174330116e-06, "loss": 0.5026, "step": 7816 }, { "epoch": 0.5, "grad_norm": 1.1907507181167603, "learning_rate": 5.168786457811071e-06, "loss": 0.5317, "step": 7817 }, { "epoch": 0.5, "grad_norm": 1.2115758657455444, "learning_rate": 5.167741733914808e-06, "loss": 0.5176, "step": 7818 }, { "epoch": 0.5, "grad_norm": 1.1690185070037842, "learning_rate": 5.166697002686984e-06, "loss": 0.5412, "step": 7819 }, { "epoch": 0.5, "grad_norm": 1.1858314275741577, "learning_rate": 5.165652264173266e-06, "loss": 0.5433, "step": 7820 }, { "epoch": 0.5, "grad_norm": 1.2075093984603882, "learning_rate": 5.1646075184193135e-06, "loss": 0.535, "step": 7821 }, { "epoch": 0.5, "grad_norm": 1.1743098497390747, "learning_rate": 5.163562765470792e-06, "loss": 0.5244, "step": 7822 }, { "epoch": 0.5, "grad_norm": 1.2845944166183472, "learning_rate": 5.162518005373364e-06, "loss": 0.5279, "step": 7823 }, { "epoch": 0.51, "grad_norm": 1.1602458953857422, "learning_rate": 5.1614732381726915e-06, "loss": 0.5022, "step": 7824 }, { "epoch": 0.51, "grad_norm": 1.25445556640625, "learning_rate": 5.160428463914443e-06, "loss": 0.5491, "step": 7825 }, { "epoch": 0.51, "grad_norm": 1.0861307382583618, "learning_rate": 5.159383682644279e-06, "loss": 0.5134, "step": 7826 }, { "epoch": 0.51, "grad_norm": 1.0675790309906006, "learning_rate": 5.158338894407865e-06, "loss": 0.5755, "step": 7827 }, { "epoch": 0.51, "grad_norm": 1.0819531679153442, "learning_rate": 5.157294099250868e-06, "loss": 0.5707, "step": 7828 }, { "epoch": 0.51, "grad_norm": 1.1471906900405884, "learning_rate": 5.156249297218951e-06, "loss": 0.5274, "step": 7829 }, { "epoch": 0.51, "grad_norm": 1.1323813199996948, "learning_rate": 5.155204488357779e-06, "loss": 0.5726, "step": 7830 }, { "epoch": 0.51, "grad_norm": 1.1184446811676025, "learning_rate": 5.1541596727130204e-06, "loss": 0.4809, "step": 7831 }, { "epoch": 0.51, "grad_norm": 1.1657745838165283, "learning_rate": 5.15311485033034e-06, "loss": 0.5461, "step": 7832 }, { "epoch": 0.51, "grad_norm": 1.1665993928909302, "learning_rate": 5.152070021255404e-06, "loss": 0.5805, "step": 7833 }, { "epoch": 0.51, "grad_norm": 1.084879755973816, "learning_rate": 5.151025185533881e-06, "loss": 0.4901, "step": 7834 }, { "epoch": 0.51, "grad_norm": 1.2155433893203735, "learning_rate": 5.149980343211436e-06, "loss": 0.5552, "step": 7835 }, { "epoch": 0.51, "grad_norm": 1.0997713804244995, "learning_rate": 5.148935494333736e-06, "loss": 0.5107, "step": 7836 }, { "epoch": 0.51, "grad_norm": 1.155659556388855, "learning_rate": 5.147890638946453e-06, "loss": 0.5572, "step": 7837 }, { "epoch": 0.51, "grad_norm": 1.0397793054580688, "learning_rate": 5.14684577709525e-06, "loss": 0.4758, "step": 7838 }, { "epoch": 0.51, "grad_norm": 1.2862887382507324, "learning_rate": 5.1458009088257955e-06, "loss": 0.5131, "step": 7839 }, { "epoch": 0.51, "grad_norm": 1.1842467784881592, "learning_rate": 5.144756034183762e-06, "loss": 0.4467, "step": 7840 }, { "epoch": 0.51, "grad_norm": 1.124894142150879, "learning_rate": 5.143711153214814e-06, "loss": 0.5077, "step": 7841 }, { "epoch": 0.51, "grad_norm": 1.109727144241333, "learning_rate": 5.142666265964622e-06, "loss": 0.5061, "step": 7842 }, { "epoch": 0.51, "grad_norm": 1.0687440633773804, "learning_rate": 5.1416213724788574e-06, "loss": 0.5284, "step": 7843 }, { "epoch": 0.51, "grad_norm": 1.14252769947052, "learning_rate": 5.140576472803186e-06, "loss": 0.5276, "step": 7844 }, { "epoch": 0.51, "grad_norm": 1.2246335744857788, "learning_rate": 5.13953156698328e-06, "loss": 0.5908, "step": 7845 }, { "epoch": 0.51, "grad_norm": 1.168135404586792, "learning_rate": 5.138486655064812e-06, "loss": 0.4986, "step": 7846 }, { "epoch": 0.51, "grad_norm": 1.0348963737487793, "learning_rate": 5.137441737093446e-06, "loss": 0.5226, "step": 7847 }, { "epoch": 0.51, "grad_norm": 1.2862557172775269, "learning_rate": 5.1363968131148575e-06, "loss": 0.5745, "step": 7848 }, { "epoch": 0.51, "grad_norm": 1.1692252159118652, "learning_rate": 5.1353518831747175e-06, "loss": 0.5135, "step": 7849 }, { "epoch": 0.51, "grad_norm": 1.0253355503082275, "learning_rate": 5.134306947318694e-06, "loss": 0.5015, "step": 7850 }, { "epoch": 0.51, "grad_norm": 1.157023310661316, "learning_rate": 5.133262005592462e-06, "loss": 0.5734, "step": 7851 }, { "epoch": 0.51, "grad_norm": 1.2595820426940918, "learning_rate": 5.13221705804169e-06, "loss": 0.5407, "step": 7852 }, { "epoch": 0.51, "grad_norm": 1.2019740343093872, "learning_rate": 5.131172104712052e-06, "loss": 0.5263, "step": 7853 }, { "epoch": 0.51, "grad_norm": 1.087229609489441, "learning_rate": 5.130127145649221e-06, "loss": 0.4867, "step": 7854 }, { "epoch": 0.51, "grad_norm": 1.156124472618103, "learning_rate": 5.129082180898867e-06, "loss": 0.5696, "step": 7855 }, { "epoch": 0.51, "grad_norm": 1.138146996498108, "learning_rate": 5.128037210506664e-06, "loss": 0.5318, "step": 7856 }, { "epoch": 0.51, "grad_norm": 1.05597984790802, "learning_rate": 5.126992234518284e-06, "loss": 0.5077, "step": 7857 }, { "epoch": 0.51, "grad_norm": 1.0713354349136353, "learning_rate": 5.1259472529794015e-06, "loss": 0.4927, "step": 7858 }, { "epoch": 0.51, "grad_norm": 1.2333128452301025, "learning_rate": 5.124902265935691e-06, "loss": 0.5749, "step": 7859 }, { "epoch": 0.51, "grad_norm": 1.221390962600708, "learning_rate": 5.123857273432824e-06, "loss": 0.5509, "step": 7860 }, { "epoch": 0.51, "grad_norm": 1.2459924221038818, "learning_rate": 5.122812275516474e-06, "loss": 0.5563, "step": 7861 }, { "epoch": 0.51, "grad_norm": 1.2095069885253906, "learning_rate": 5.121767272232319e-06, "loss": 0.5591, "step": 7862 }, { "epoch": 0.51, "grad_norm": 1.2746222019195557, "learning_rate": 5.1207222636260276e-06, "loss": 0.578, "step": 7863 }, { "epoch": 0.51, "grad_norm": 1.1508021354675293, "learning_rate": 5.11967724974328e-06, "loss": 0.5511, "step": 7864 }, { "epoch": 0.51, "grad_norm": 1.162165641784668, "learning_rate": 5.118632230629747e-06, "loss": 0.5231, "step": 7865 }, { "epoch": 0.51, "grad_norm": 1.2290794849395752, "learning_rate": 5.117587206331105e-06, "loss": 0.4953, "step": 7866 }, { "epoch": 0.51, "grad_norm": 1.1853337287902832, "learning_rate": 5.116542176893032e-06, "loss": 0.5578, "step": 7867 }, { "epoch": 0.51, "grad_norm": 1.0877392292022705, "learning_rate": 5.115497142361197e-06, "loss": 0.5239, "step": 7868 }, { "epoch": 0.51, "grad_norm": 1.0747884511947632, "learning_rate": 5.114452102781283e-06, "loss": 0.4804, "step": 7869 }, { "epoch": 0.51, "grad_norm": 1.1615222692489624, "learning_rate": 5.113407058198962e-06, "loss": 0.5221, "step": 7870 }, { "epoch": 0.51, "grad_norm": 1.1594516038894653, "learning_rate": 5.112362008659911e-06, "loss": 0.5536, "step": 7871 }, { "epoch": 0.51, "grad_norm": 1.1213009357452393, "learning_rate": 5.1113169542098075e-06, "loss": 0.5701, "step": 7872 }, { "epoch": 0.51, "grad_norm": 1.1296504735946655, "learning_rate": 5.110271894894326e-06, "loss": 0.4999, "step": 7873 }, { "epoch": 0.51, "grad_norm": 1.051378846168518, "learning_rate": 5.109226830759144e-06, "loss": 0.4767, "step": 7874 }, { "epoch": 0.51, "grad_norm": 1.1025536060333252, "learning_rate": 5.108181761849941e-06, "loss": 0.5055, "step": 7875 }, { "epoch": 0.51, "grad_norm": 1.2313265800476074, "learning_rate": 5.107136688212391e-06, "loss": 0.5132, "step": 7876 }, { "epoch": 0.51, "grad_norm": 1.175279140472412, "learning_rate": 5.1060916098921735e-06, "loss": 0.5261, "step": 7877 }, { "epoch": 0.51, "grad_norm": 1.123937964439392, "learning_rate": 5.105046526934965e-06, "loss": 0.5472, "step": 7878 }, { "epoch": 0.51, "grad_norm": 1.2302724123001099, "learning_rate": 5.104001439386443e-06, "loss": 0.5844, "step": 7879 }, { "epoch": 0.51, "grad_norm": 1.0905956029891968, "learning_rate": 5.102956347292287e-06, "loss": 0.5245, "step": 7880 }, { "epoch": 0.51, "grad_norm": 1.2243013381958008, "learning_rate": 5.101911250698175e-06, "loss": 0.5869, "step": 7881 }, { "epoch": 0.51, "grad_norm": 1.1533323526382446, "learning_rate": 5.100866149649785e-06, "loss": 0.5557, "step": 7882 }, { "epoch": 0.51, "grad_norm": 1.0499260425567627, "learning_rate": 5.099821044192795e-06, "loss": 0.5695, "step": 7883 }, { "epoch": 0.51, "grad_norm": 1.2053430080413818, "learning_rate": 5.098775934372887e-06, "loss": 0.5508, "step": 7884 }, { "epoch": 0.51, "grad_norm": 1.1706863641738892, "learning_rate": 5.097730820235736e-06, "loss": 0.5121, "step": 7885 }, { "epoch": 0.51, "grad_norm": 1.3164957761764526, "learning_rate": 5.096685701827022e-06, "loss": 0.5502, "step": 7886 }, { "epoch": 0.51, "grad_norm": 1.2050776481628418, "learning_rate": 5.095640579192428e-06, "loss": 0.5497, "step": 7887 }, { "epoch": 0.51, "grad_norm": 1.2375651597976685, "learning_rate": 5.094595452377629e-06, "loss": 0.5451, "step": 7888 }, { "epoch": 0.51, "grad_norm": 1.1598767042160034, "learning_rate": 5.093550321428308e-06, "loss": 0.5894, "step": 7889 }, { "epoch": 0.51, "grad_norm": 1.2332489490509033, "learning_rate": 5.092505186390143e-06, "loss": 0.5775, "step": 7890 }, { "epoch": 0.51, "grad_norm": 1.1293983459472656, "learning_rate": 5.0914600473088146e-06, "loss": 0.5418, "step": 7891 }, { "epoch": 0.51, "grad_norm": 1.0705021619796753, "learning_rate": 5.0904149042300045e-06, "loss": 0.5391, "step": 7892 }, { "epoch": 0.51, "grad_norm": 1.2931467294692993, "learning_rate": 5.089369757199393e-06, "loss": 0.5341, "step": 7893 }, { "epoch": 0.51, "grad_norm": 1.2430609464645386, "learning_rate": 5.088324606262659e-06, "loss": 0.5571, "step": 7894 }, { "epoch": 0.51, "grad_norm": 1.117497444152832, "learning_rate": 5.087279451465484e-06, "loss": 0.488, "step": 7895 }, { "epoch": 0.51, "grad_norm": 1.1040143966674805, "learning_rate": 5.08623429285355e-06, "loss": 0.5439, "step": 7896 }, { "epoch": 0.51, "grad_norm": 1.1518943309783936, "learning_rate": 5.085189130472536e-06, "loss": 0.5609, "step": 7897 }, { "epoch": 0.51, "grad_norm": 1.1970558166503906, "learning_rate": 5.084143964368128e-06, "loss": 0.5579, "step": 7898 }, { "epoch": 0.51, "grad_norm": 1.076460838317871, "learning_rate": 5.083098794586001e-06, "loss": 0.5409, "step": 7899 }, { "epoch": 0.51, "grad_norm": 1.1624113321304321, "learning_rate": 5.082053621171842e-06, "loss": 0.4953, "step": 7900 }, { "epoch": 0.51, "grad_norm": 1.1238503456115723, "learning_rate": 5.081008444171332e-06, "loss": 0.5657, "step": 7901 }, { "epoch": 0.51, "grad_norm": 1.151694416999817, "learning_rate": 5.079963263630149e-06, "loss": 0.5296, "step": 7902 }, { "epoch": 0.51, "grad_norm": 1.0836559534072876, "learning_rate": 5.07891807959398e-06, "loss": 0.4943, "step": 7903 }, { "epoch": 0.51, "grad_norm": 1.0992660522460938, "learning_rate": 5.077872892108505e-06, "loss": 0.5035, "step": 7904 }, { "epoch": 0.51, "grad_norm": 1.3422142267227173, "learning_rate": 5.076827701219406e-06, "loss": 0.5434, "step": 7905 }, { "epoch": 0.51, "grad_norm": 1.2008804082870483, "learning_rate": 5.075782506972366e-06, "loss": 0.6389, "step": 7906 }, { "epoch": 0.51, "grad_norm": 1.234268307685852, "learning_rate": 5.0747373094130705e-06, "loss": 0.5238, "step": 7907 }, { "epoch": 0.51, "grad_norm": 1.1478403806686401, "learning_rate": 5.073692108587198e-06, "loss": 0.5416, "step": 7908 }, { "epoch": 0.51, "grad_norm": 1.1477307081222534, "learning_rate": 5.072646904540436e-06, "loss": 0.5241, "step": 7909 }, { "epoch": 0.51, "grad_norm": 1.1897927522659302, "learning_rate": 5.071601697318462e-06, "loss": 0.5217, "step": 7910 }, { "epoch": 0.51, "grad_norm": 1.2710164785385132, "learning_rate": 5.070556486966965e-06, "loss": 0.55, "step": 7911 }, { "epoch": 0.51, "grad_norm": 1.0778957605361938, "learning_rate": 5.069511273531625e-06, "loss": 0.5202, "step": 7912 }, { "epoch": 0.51, "grad_norm": 1.1506311893463135, "learning_rate": 5.068466057058127e-06, "loss": 0.4994, "step": 7913 }, { "epoch": 0.51, "grad_norm": 1.1184747219085693, "learning_rate": 5.067420837592154e-06, "loss": 0.5301, "step": 7914 }, { "epoch": 0.51, "grad_norm": 1.119537353515625, "learning_rate": 5.066375615179389e-06, "loss": 0.5284, "step": 7915 }, { "epoch": 0.51, "grad_norm": 1.2381110191345215, "learning_rate": 5.065330389865519e-06, "loss": 0.5403, "step": 7916 }, { "epoch": 0.51, "grad_norm": 1.2164466381072998, "learning_rate": 5.064285161696225e-06, "loss": 0.5496, "step": 7917 }, { "epoch": 0.51, "grad_norm": 1.1371663808822632, "learning_rate": 5.063239930717193e-06, "loss": 0.5279, "step": 7918 }, { "epoch": 0.51, "grad_norm": 1.1311156749725342, "learning_rate": 5.062194696974107e-06, "loss": 0.5692, "step": 7919 }, { "epoch": 0.51, "grad_norm": 1.3084834814071655, "learning_rate": 5.0611494605126506e-06, "loss": 0.5218, "step": 7920 }, { "epoch": 0.51, "grad_norm": 1.2040742635726929, "learning_rate": 5.060104221378509e-06, "loss": 0.5782, "step": 7921 }, { "epoch": 0.51, "grad_norm": 1.1938107013702393, "learning_rate": 5.059058979617368e-06, "loss": 0.5546, "step": 7922 }, { "epoch": 0.51, "grad_norm": 1.303686499595642, "learning_rate": 5.0580137352749105e-06, "loss": 0.5686, "step": 7923 }, { "epoch": 0.51, "grad_norm": 1.1102150678634644, "learning_rate": 5.0569684883968215e-06, "loss": 0.4939, "step": 7924 }, { "epoch": 0.51, "grad_norm": 1.0308175086975098, "learning_rate": 5.055923239028788e-06, "loss": 0.4518, "step": 7925 }, { "epoch": 0.51, "grad_norm": 1.3960767984390259, "learning_rate": 5.054877987216494e-06, "loss": 0.5519, "step": 7926 }, { "epoch": 0.51, "grad_norm": 1.2574938535690308, "learning_rate": 5.053832733005625e-06, "loss": 0.5066, "step": 7927 }, { "epoch": 0.51, "grad_norm": 1.1342564821243286, "learning_rate": 5.052787476441866e-06, "loss": 0.5014, "step": 7928 }, { "epoch": 0.51, "grad_norm": 1.3583768606185913, "learning_rate": 5.0517422175709015e-06, "loss": 0.5543, "step": 7929 }, { "epoch": 0.51, "grad_norm": 1.1602517366409302, "learning_rate": 5.050696956438419e-06, "loss": 0.5245, "step": 7930 }, { "epoch": 0.51, "grad_norm": 1.2659987211227417, "learning_rate": 5.049651693090104e-06, "loss": 0.5407, "step": 7931 }, { "epoch": 0.51, "grad_norm": 1.1699622869491577, "learning_rate": 5.0486064275716405e-06, "loss": 0.5475, "step": 7932 }, { "epoch": 0.51, "grad_norm": 1.0665109157562256, "learning_rate": 5.047561159928716e-06, "loss": 0.5105, "step": 7933 }, { "epoch": 0.51, "grad_norm": 1.1104710102081299, "learning_rate": 5.046515890207015e-06, "loss": 0.5071, "step": 7934 }, { "epoch": 0.51, "grad_norm": 1.4232484102249146, "learning_rate": 5.045470618452226e-06, "loss": 0.5774, "step": 7935 }, { "epoch": 0.51, "grad_norm": 1.2522753477096558, "learning_rate": 5.044425344710033e-06, "loss": 0.5385, "step": 7936 }, { "epoch": 0.51, "grad_norm": 1.1320523023605347, "learning_rate": 5.043380069026123e-06, "loss": 0.5181, "step": 7937 }, { "epoch": 0.51, "grad_norm": 1.3087772130966187, "learning_rate": 5.042334791446181e-06, "loss": 0.5596, "step": 7938 }, { "epoch": 0.51, "grad_norm": 1.2084324359893799, "learning_rate": 5.041289512015896e-06, "loss": 0.5996, "step": 7939 }, { "epoch": 0.51, "grad_norm": 1.1220135688781738, "learning_rate": 5.040244230780952e-06, "loss": 0.562, "step": 7940 }, { "epoch": 0.51, "grad_norm": 1.2238860130310059, "learning_rate": 5.039198947787036e-06, "loss": 0.5239, "step": 7941 }, { "epoch": 0.51, "grad_norm": 1.0966215133666992, "learning_rate": 5.038153663079837e-06, "loss": 0.4861, "step": 7942 }, { "epoch": 0.51, "grad_norm": 1.0901638269424438, "learning_rate": 5.0371083767050375e-06, "loss": 0.5058, "step": 7943 }, { "epoch": 0.51, "grad_norm": 1.2364239692687988, "learning_rate": 5.036063088708327e-06, "loss": 0.5595, "step": 7944 }, { "epoch": 0.51, "grad_norm": 1.17795991897583, "learning_rate": 5.035017799135393e-06, "loss": 0.5779, "step": 7945 }, { "epoch": 0.51, "grad_norm": 1.4046239852905273, "learning_rate": 5.0339725080319215e-06, "loss": 0.5695, "step": 7946 }, { "epoch": 0.51, "grad_norm": 1.251436710357666, "learning_rate": 5.032927215443598e-06, "loss": 0.5078, "step": 7947 }, { "epoch": 0.51, "grad_norm": 1.18096125125885, "learning_rate": 5.0318819214161126e-06, "loss": 0.5565, "step": 7948 }, { "epoch": 0.51, "grad_norm": 1.1384636163711548, "learning_rate": 5.030836625995149e-06, "loss": 0.54, "step": 7949 }, { "epoch": 0.51, "grad_norm": 1.2403874397277832, "learning_rate": 5.0297913292263965e-06, "loss": 0.5039, "step": 7950 }, { "epoch": 0.51, "grad_norm": 1.1689939498901367, "learning_rate": 5.028746031155544e-06, "loss": 0.5128, "step": 7951 }, { "epoch": 0.51, "grad_norm": 1.192336916923523, "learning_rate": 5.027700731828273e-06, "loss": 0.5561, "step": 7952 }, { "epoch": 0.51, "grad_norm": 1.0761829614639282, "learning_rate": 5.026655431290277e-06, "loss": 0.5297, "step": 7953 }, { "epoch": 0.51, "grad_norm": 1.1555906534194946, "learning_rate": 5.025610129587241e-06, "loss": 0.566, "step": 7954 }, { "epoch": 0.51, "grad_norm": 1.2136343717575073, "learning_rate": 5.024564826764851e-06, "loss": 0.5334, "step": 7955 }, { "epoch": 0.51, "grad_norm": 1.1500943899154663, "learning_rate": 5.023519522868796e-06, "loss": 0.5514, "step": 7956 }, { "epoch": 0.51, "grad_norm": 1.119268774986267, "learning_rate": 5.022474217944764e-06, "loss": 0.5538, "step": 7957 }, { "epoch": 0.51, "grad_norm": 1.1716351509094238, "learning_rate": 5.0214289120384415e-06, "loss": 0.5946, "step": 7958 }, { "epoch": 0.51, "grad_norm": 1.212432622909546, "learning_rate": 5.020383605195517e-06, "loss": 0.5123, "step": 7959 }, { "epoch": 0.51, "grad_norm": 1.2881815433502197, "learning_rate": 5.019338297461678e-06, "loss": 0.5277, "step": 7960 }, { "epoch": 0.51, "grad_norm": 1.1460622549057007, "learning_rate": 5.01829298888261e-06, "loss": 0.5874, "step": 7961 }, { "epoch": 0.51, "grad_norm": 1.1817296743392944, "learning_rate": 5.017247679504004e-06, "loss": 0.5735, "step": 7962 }, { "epoch": 0.51, "grad_norm": 1.1968015432357788, "learning_rate": 5.016202369371547e-06, "loss": 0.5342, "step": 7963 }, { "epoch": 0.51, "grad_norm": 1.1453280448913574, "learning_rate": 5.015157058530925e-06, "loss": 0.4967, "step": 7964 }, { "epoch": 0.51, "grad_norm": 1.1846084594726562, "learning_rate": 5.014111747027827e-06, "loss": 0.5398, "step": 7965 }, { "epoch": 0.51, "grad_norm": 1.182232141494751, "learning_rate": 5.0130664349079426e-06, "loss": 0.5569, "step": 7966 }, { "epoch": 0.51, "grad_norm": 1.2124745845794678, "learning_rate": 5.012021122216957e-06, "loss": 0.5441, "step": 7967 }, { "epoch": 0.51, "grad_norm": 1.1674742698669434, "learning_rate": 5.010975809000558e-06, "loss": 0.5754, "step": 7968 }, { "epoch": 0.51, "grad_norm": 1.1914502382278442, "learning_rate": 5.009930495304437e-06, "loss": 0.5179, "step": 7969 }, { "epoch": 0.51, "grad_norm": 1.2866954803466797, "learning_rate": 5.008885181174278e-06, "loss": 0.5737, "step": 7970 }, { "epoch": 0.51, "grad_norm": 1.142993450164795, "learning_rate": 5.0078398666557705e-06, "loss": 0.5335, "step": 7971 }, { "epoch": 0.51, "grad_norm": 1.1476200819015503, "learning_rate": 5.006794551794604e-06, "loss": 0.4846, "step": 7972 }, { "epoch": 0.51, "grad_norm": 1.0798923969268799, "learning_rate": 5.0057492366364644e-06, "loss": 0.5346, "step": 7973 }, { "epoch": 0.51, "grad_norm": 1.1260281801223755, "learning_rate": 5.00470392122704e-06, "loss": 0.5464, "step": 7974 }, { "epoch": 0.51, "grad_norm": 1.1811579465866089, "learning_rate": 5.003658605612021e-06, "loss": 0.5519, "step": 7975 }, { "epoch": 0.51, "grad_norm": 1.189693808555603, "learning_rate": 5.002613289837092e-06, "loss": 0.5713, "step": 7976 }, { "epoch": 0.51, "grad_norm": 1.1127758026123047, "learning_rate": 5.001567973947943e-06, "loss": 0.5748, "step": 7977 }, { "epoch": 0.51, "grad_norm": 1.0631788969039917, "learning_rate": 5.000522657990263e-06, "loss": 0.4835, "step": 7978 }, { "epoch": 0.52, "grad_norm": 1.1634212732315063, "learning_rate": 4.9994773420097395e-06, "loss": 0.5613, "step": 7979 }, { "epoch": 0.52, "grad_norm": 1.0601754188537598, "learning_rate": 4.998432026052059e-06, "loss": 0.5417, "step": 7980 }, { "epoch": 0.52, "grad_norm": 1.2033010721206665, "learning_rate": 4.99738671016291e-06, "loss": 0.5393, "step": 7981 }, { "epoch": 0.52, "grad_norm": 1.2284456491470337, "learning_rate": 4.996341394387981e-06, "loss": 0.5028, "step": 7982 }, { "epoch": 0.52, "grad_norm": 1.2693986892700195, "learning_rate": 4.9952960787729606e-06, "loss": 0.5836, "step": 7983 }, { "epoch": 0.52, "grad_norm": 1.0386279821395874, "learning_rate": 4.994250763363536e-06, "loss": 0.4715, "step": 7984 }, { "epoch": 0.52, "grad_norm": 1.3388015031814575, "learning_rate": 4.993205448205399e-06, "loss": 0.4644, "step": 7985 }, { "epoch": 0.52, "grad_norm": 1.3198546171188354, "learning_rate": 4.992160133344231e-06, "loss": 0.5798, "step": 7986 }, { "epoch": 0.52, "grad_norm": 1.158981442451477, "learning_rate": 4.991114818825724e-06, "loss": 0.5516, "step": 7987 }, { "epoch": 0.52, "grad_norm": 1.1682111024856567, "learning_rate": 4.990069504695565e-06, "loss": 0.5557, "step": 7988 }, { "epoch": 0.52, "grad_norm": 1.0854185819625854, "learning_rate": 4.989024190999443e-06, "loss": 0.5414, "step": 7989 }, { "epoch": 0.52, "grad_norm": 1.1425637006759644, "learning_rate": 4.987978877783044e-06, "loss": 0.5169, "step": 7990 }, { "epoch": 0.52, "grad_norm": 1.0998075008392334, "learning_rate": 4.98693356509206e-06, "loss": 0.5182, "step": 7991 }, { "epoch": 0.52, "grad_norm": 1.1531085968017578, "learning_rate": 4.985888252972174e-06, "loss": 0.5064, "step": 7992 }, { "epoch": 0.52, "grad_norm": 1.144025444984436, "learning_rate": 4.984842941469077e-06, "loss": 0.5819, "step": 7993 }, { "epoch": 0.52, "grad_norm": 1.1697629690170288, "learning_rate": 4.983797630628455e-06, "loss": 0.5067, "step": 7994 }, { "epoch": 0.52, "grad_norm": 1.304699420928955, "learning_rate": 4.982752320495997e-06, "loss": 0.5471, "step": 7995 }, { "epoch": 0.52, "grad_norm": 1.282717227935791, "learning_rate": 4.981707011117391e-06, "loss": 0.5451, "step": 7996 }, { "epoch": 0.52, "grad_norm": 1.2082620859146118, "learning_rate": 4.980661702538326e-06, "loss": 0.5301, "step": 7997 }, { "epoch": 0.52, "grad_norm": 1.1405360698699951, "learning_rate": 4.979616394804485e-06, "loss": 0.5359, "step": 7998 }, { "epoch": 0.52, "grad_norm": 1.28224515914917, "learning_rate": 4.978571087961559e-06, "loss": 0.5465, "step": 7999 }, { "epoch": 0.52, "grad_norm": 1.2004926204681396, "learning_rate": 4.977525782055238e-06, "loss": 0.547, "step": 8000 }, { "epoch": 0.52, "grad_norm": 1.056803584098816, "learning_rate": 4.9764804771312045e-06, "loss": 0.5205, "step": 8001 }, { "epoch": 0.52, "grad_norm": 1.2165778875350952, "learning_rate": 4.975435173235151e-06, "loss": 0.5854, "step": 8002 }, { "epoch": 0.52, "grad_norm": 1.2333409786224365, "learning_rate": 4.974389870412761e-06, "loss": 0.596, "step": 8003 }, { "epoch": 0.52, "grad_norm": 1.152775764465332, "learning_rate": 4.973344568709724e-06, "loss": 0.5272, "step": 8004 }, { "epoch": 0.52, "grad_norm": 1.1680090427398682, "learning_rate": 4.9722992681717284e-06, "loss": 0.5097, "step": 8005 }, { "epoch": 0.52, "grad_norm": 1.1033055782318115, "learning_rate": 4.971253968844458e-06, "loss": 0.5119, "step": 8006 }, { "epoch": 0.52, "grad_norm": 1.3170346021652222, "learning_rate": 4.9702086707736034e-06, "loss": 0.5623, "step": 8007 }, { "epoch": 0.52, "grad_norm": 1.0875848531723022, "learning_rate": 4.969163374004852e-06, "loss": 0.5515, "step": 8008 }, { "epoch": 0.52, "grad_norm": 1.151620626449585, "learning_rate": 4.96811807858389e-06, "loss": 0.543, "step": 8009 }, { "epoch": 0.52, "grad_norm": 1.0720423460006714, "learning_rate": 4.967072784556403e-06, "loss": 0.5088, "step": 8010 }, { "epoch": 0.52, "grad_norm": 1.144519567489624, "learning_rate": 4.96602749196808e-06, "loss": 0.5522, "step": 8011 }, { "epoch": 0.52, "grad_norm": 1.1481246948242188, "learning_rate": 4.964982200864608e-06, "loss": 0.4897, "step": 8012 }, { "epoch": 0.52, "grad_norm": 1.1825557947158813, "learning_rate": 4.963936911291673e-06, "loss": 0.492, "step": 8013 }, { "epoch": 0.52, "grad_norm": 1.1318374872207642, "learning_rate": 4.962891623294965e-06, "loss": 0.5211, "step": 8014 }, { "epoch": 0.52, "grad_norm": 1.0783578157424927, "learning_rate": 4.961846336920167e-06, "loss": 0.5179, "step": 8015 }, { "epoch": 0.52, "grad_norm": 1.208152174949646, "learning_rate": 4.960801052212966e-06, "loss": 0.5137, "step": 8016 }, { "epoch": 0.52, "grad_norm": 1.1784032583236694, "learning_rate": 4.95975576921905e-06, "loss": 0.5195, "step": 8017 }, { "epoch": 0.52, "grad_norm": 1.1918156147003174, "learning_rate": 4.958710487984106e-06, "loss": 0.5482, "step": 8018 }, { "epoch": 0.52, "grad_norm": 1.148669958114624, "learning_rate": 4.95766520855382e-06, "loss": 0.5408, "step": 8019 }, { "epoch": 0.52, "grad_norm": 1.127608299255371, "learning_rate": 4.95661993097388e-06, "loss": 0.528, "step": 8020 }, { "epoch": 0.52, "grad_norm": 1.23704195022583, "learning_rate": 4.9555746552899696e-06, "loss": 0.5011, "step": 8021 }, { "epoch": 0.52, "grad_norm": 1.1272329092025757, "learning_rate": 4.954529381547776e-06, "loss": 0.5553, "step": 8022 }, { "epoch": 0.52, "grad_norm": 1.1693298816680908, "learning_rate": 4.953484109792985e-06, "loss": 0.5096, "step": 8023 }, { "epoch": 0.52, "grad_norm": 1.1965320110321045, "learning_rate": 4.952438840071285e-06, "loss": 0.5692, "step": 8024 }, { "epoch": 0.52, "grad_norm": 1.2014251947402954, "learning_rate": 4.95139357242836e-06, "loss": 0.5248, "step": 8025 }, { "epoch": 0.52, "grad_norm": 1.185165524482727, "learning_rate": 4.950348306909898e-06, "loss": 0.5188, "step": 8026 }, { "epoch": 0.52, "grad_norm": 1.1560256481170654, "learning_rate": 4.949303043561582e-06, "loss": 0.5325, "step": 8027 }, { "epoch": 0.52, "grad_norm": 1.157517671585083, "learning_rate": 4.948257782429099e-06, "loss": 0.525, "step": 8028 }, { "epoch": 0.52, "grad_norm": 1.1206475496292114, "learning_rate": 4.947212523558136e-06, "loss": 0.6118, "step": 8029 }, { "epoch": 0.52, "grad_norm": 1.0966618061065674, "learning_rate": 4.946167266994376e-06, "loss": 0.4799, "step": 8030 }, { "epoch": 0.52, "grad_norm": 1.2237563133239746, "learning_rate": 4.945122012783506e-06, "loss": 0.5869, "step": 8031 }, { "epoch": 0.52, "grad_norm": 1.2203022241592407, "learning_rate": 4.944076760971214e-06, "loss": 0.5612, "step": 8032 }, { "epoch": 0.52, "grad_norm": 1.1289479732513428, "learning_rate": 4.943031511603179e-06, "loss": 0.4969, "step": 8033 }, { "epoch": 0.52, "grad_norm": 1.232291340827942, "learning_rate": 4.941986264725091e-06, "loss": 0.5311, "step": 8034 }, { "epoch": 0.52, "grad_norm": 1.1823241710662842, "learning_rate": 4.940941020382633e-06, "loss": 0.554, "step": 8035 }, { "epoch": 0.52, "grad_norm": 1.1521265506744385, "learning_rate": 4.9398957786214914e-06, "loss": 0.4538, "step": 8036 }, { "epoch": 0.52, "grad_norm": 1.1595228910446167, "learning_rate": 4.93885053948735e-06, "loss": 0.5318, "step": 8037 }, { "epoch": 0.52, "grad_norm": 1.2547328472137451, "learning_rate": 4.937805303025895e-06, "loss": 0.5479, "step": 8038 }, { "epoch": 0.52, "grad_norm": 1.0824640989303589, "learning_rate": 4.936760069282809e-06, "loss": 0.5192, "step": 8039 }, { "epoch": 0.52, "grad_norm": 1.2241911888122559, "learning_rate": 4.935714838303776e-06, "loss": 0.5014, "step": 8040 }, { "epoch": 0.52, "grad_norm": 1.2408641576766968, "learning_rate": 4.934669610134482e-06, "loss": 0.5623, "step": 8041 }, { "epoch": 0.52, "grad_norm": 1.2939774990081787, "learning_rate": 4.9336243848206115e-06, "loss": 0.5275, "step": 8042 }, { "epoch": 0.52, "grad_norm": 1.1179804801940918, "learning_rate": 4.932579162407849e-06, "loss": 0.4891, "step": 8043 }, { "epoch": 0.52, "grad_norm": 1.180972933769226, "learning_rate": 4.931533942941875e-06, "loss": 0.5067, "step": 8044 }, { "epoch": 0.52, "grad_norm": 1.0773169994354248, "learning_rate": 4.930488726468377e-06, "loss": 0.5029, "step": 8045 }, { "epoch": 0.52, "grad_norm": 1.0480151176452637, "learning_rate": 4.9294435130330355e-06, "loss": 0.5204, "step": 8046 }, { "epoch": 0.52, "grad_norm": 1.0881459712982178, "learning_rate": 4.928398302681539e-06, "loss": 0.5161, "step": 8047 }, { "epoch": 0.52, "grad_norm": 1.1145607233047485, "learning_rate": 4.9273530954595664e-06, "loss": 0.5766, "step": 8048 }, { "epoch": 0.52, "grad_norm": 1.1776846647262573, "learning_rate": 4.926307891412803e-06, "loss": 0.5235, "step": 8049 }, { "epoch": 0.52, "grad_norm": 1.1971967220306396, "learning_rate": 4.925262690586931e-06, "loss": 0.5916, "step": 8050 }, { "epoch": 0.52, "grad_norm": 1.315354824066162, "learning_rate": 4.9242174930276345e-06, "loss": 0.5637, "step": 8051 }, { "epoch": 0.52, "grad_norm": 1.2763997316360474, "learning_rate": 4.923172298780595e-06, "loss": 0.5709, "step": 8052 }, { "epoch": 0.52, "grad_norm": 1.0825657844543457, "learning_rate": 4.922127107891497e-06, "loss": 0.5152, "step": 8053 }, { "epoch": 0.52, "grad_norm": 1.2449181079864502, "learning_rate": 4.921081920406021e-06, "loss": 0.5209, "step": 8054 }, { "epoch": 0.52, "grad_norm": 1.1156026124954224, "learning_rate": 4.9200367363698525e-06, "loss": 0.5318, "step": 8055 }, { "epoch": 0.52, "grad_norm": 1.1292692422866821, "learning_rate": 4.918991555828672e-06, "loss": 0.5658, "step": 8056 }, { "epoch": 0.52, "grad_norm": 1.211144208908081, "learning_rate": 4.91794637882816e-06, "loss": 0.5535, "step": 8057 }, { "epoch": 0.52, "grad_norm": 1.1465901136398315, "learning_rate": 4.9169012054139995e-06, "loss": 0.5598, "step": 8058 }, { "epoch": 0.52, "grad_norm": 1.1969743967056274, "learning_rate": 4.915856035631874e-06, "loss": 0.5271, "step": 8059 }, { "epoch": 0.52, "grad_norm": 1.1161463260650635, "learning_rate": 4.914810869527464e-06, "loss": 0.5005, "step": 8060 }, { "epoch": 0.52, "grad_norm": 1.1545350551605225, "learning_rate": 4.9137657071464525e-06, "loss": 0.5185, "step": 8061 }, { "epoch": 0.52, "grad_norm": 1.064485788345337, "learning_rate": 4.9127205485345184e-06, "loss": 0.5248, "step": 8062 }, { "epoch": 0.52, "grad_norm": 1.186637282371521, "learning_rate": 4.911675393737343e-06, "loss": 0.5292, "step": 8063 }, { "epoch": 0.52, "grad_norm": 1.129244089126587, "learning_rate": 4.910630242800609e-06, "loss": 0.5777, "step": 8064 }, { "epoch": 0.52, "grad_norm": 1.1535934209823608, "learning_rate": 4.9095850957699955e-06, "loss": 0.573, "step": 8065 }, { "epoch": 0.52, "grad_norm": 1.2106873989105225, "learning_rate": 4.9085399526911846e-06, "loss": 0.5485, "step": 8066 }, { "epoch": 0.52, "grad_norm": 1.0749443769454956, "learning_rate": 4.907494813609858e-06, "loss": 0.5434, "step": 8067 }, { "epoch": 0.52, "grad_norm": 1.2070767879486084, "learning_rate": 4.906449678571694e-06, "loss": 0.5261, "step": 8068 }, { "epoch": 0.52, "grad_norm": 1.2578260898590088, "learning_rate": 4.905404547622372e-06, "loss": 0.5352, "step": 8069 }, { "epoch": 0.52, "grad_norm": 1.1068708896636963, "learning_rate": 4.904359420807574e-06, "loss": 0.5261, "step": 8070 }, { "epoch": 0.52, "grad_norm": 1.180233359336853, "learning_rate": 4.903314298172978e-06, "loss": 0.6084, "step": 8071 }, { "epoch": 0.52, "grad_norm": 1.070887804031372, "learning_rate": 4.902269179764265e-06, "loss": 0.5134, "step": 8072 }, { "epoch": 0.52, "grad_norm": 1.1202895641326904, "learning_rate": 4.901224065627116e-06, "loss": 0.5166, "step": 8073 }, { "epoch": 0.52, "grad_norm": 1.1602716445922852, "learning_rate": 4.900178955807206e-06, "loss": 0.5248, "step": 8074 }, { "epoch": 0.52, "grad_norm": 1.1545467376708984, "learning_rate": 4.899133850350217e-06, "loss": 0.5558, "step": 8075 }, { "epoch": 0.52, "grad_norm": 1.1013250350952148, "learning_rate": 4.898088749301826e-06, "loss": 0.567, "step": 8076 }, { "epoch": 0.52, "grad_norm": 1.1403826475143433, "learning_rate": 4.897043652707714e-06, "loss": 0.5626, "step": 8077 }, { "epoch": 0.52, "grad_norm": 1.1710419654846191, "learning_rate": 4.895998560613558e-06, "loss": 0.5794, "step": 8078 }, { "epoch": 0.52, "grad_norm": 1.021107792854309, "learning_rate": 4.8949534730650376e-06, "loss": 0.4985, "step": 8079 }, { "epoch": 0.52, "grad_norm": 1.259486198425293, "learning_rate": 4.893908390107829e-06, "loss": 0.5798, "step": 8080 }, { "epoch": 0.52, "grad_norm": 1.1368871927261353, "learning_rate": 4.892863311787611e-06, "loss": 0.5229, "step": 8081 }, { "epoch": 0.52, "grad_norm": 1.1507374048233032, "learning_rate": 4.89181823815006e-06, "loss": 0.5402, "step": 8082 }, { "epoch": 0.52, "grad_norm": 1.1110572814941406, "learning_rate": 4.890773169240856e-06, "loss": 0.5045, "step": 8083 }, { "epoch": 0.52, "grad_norm": 1.2732717990875244, "learning_rate": 4.889728105105674e-06, "loss": 0.5857, "step": 8084 }, { "epoch": 0.52, "grad_norm": 1.1284964084625244, "learning_rate": 4.888683045790194e-06, "loss": 0.5557, "step": 8085 }, { "epoch": 0.52, "grad_norm": 1.2133032083511353, "learning_rate": 4.88763799134009e-06, "loss": 0.5126, "step": 8086 }, { "epoch": 0.52, "grad_norm": 1.1771892309188843, "learning_rate": 4.886592941801039e-06, "loss": 0.5016, "step": 8087 }, { "epoch": 0.52, "grad_norm": 1.1400082111358643, "learning_rate": 4.885547897218718e-06, "loss": 0.5088, "step": 8088 }, { "epoch": 0.52, "grad_norm": 1.1331373453140259, "learning_rate": 4.8845028576388035e-06, "loss": 0.512, "step": 8089 }, { "epoch": 0.52, "grad_norm": 1.182565450668335, "learning_rate": 4.883457823106972e-06, "loss": 0.5641, "step": 8090 }, { "epoch": 0.52, "grad_norm": 1.1512490510940552, "learning_rate": 4.882412793668896e-06, "loss": 0.4985, "step": 8091 }, { "epoch": 0.52, "grad_norm": 1.1258251667022705, "learning_rate": 4.881367769370255e-06, "loss": 0.5507, "step": 8092 }, { "epoch": 0.52, "grad_norm": 1.1079968214035034, "learning_rate": 4.8803227502567215e-06, "loss": 0.5109, "step": 8093 }, { "epoch": 0.52, "grad_norm": 1.2340810298919678, "learning_rate": 4.879277736373973e-06, "loss": 0.5304, "step": 8094 }, { "epoch": 0.52, "grad_norm": 1.088278889656067, "learning_rate": 4.878232727767684e-06, "loss": 0.5035, "step": 8095 }, { "epoch": 0.52, "grad_norm": 1.0778921842575073, "learning_rate": 4.877187724483527e-06, "loss": 0.5163, "step": 8096 }, { "epoch": 0.52, "grad_norm": 1.0685818195343018, "learning_rate": 4.8761427265671775e-06, "loss": 0.4945, "step": 8097 }, { "epoch": 0.52, "grad_norm": 1.1153656244277954, "learning_rate": 4.875097734064311e-06, "loss": 0.5358, "step": 8098 }, { "epoch": 0.52, "grad_norm": 1.2786684036254883, "learning_rate": 4.874052747020599e-06, "loss": 0.5769, "step": 8099 }, { "epoch": 0.52, "grad_norm": 1.1152349710464478, "learning_rate": 4.873007765481717e-06, "loss": 0.5213, "step": 8100 }, { "epoch": 0.52, "grad_norm": 1.095079779624939, "learning_rate": 4.871962789493337e-06, "loss": 0.489, "step": 8101 }, { "epoch": 0.52, "grad_norm": 1.1407055854797363, "learning_rate": 4.8709178191011355e-06, "loss": 0.5455, "step": 8102 }, { "epoch": 0.52, "grad_norm": 1.0632764101028442, "learning_rate": 4.8698728543507815e-06, "loss": 0.4387, "step": 8103 }, { "epoch": 0.52, "grad_norm": 1.3286755084991455, "learning_rate": 4.86882789528795e-06, "loss": 0.5275, "step": 8104 }, { "epoch": 0.52, "grad_norm": 1.1947300434112549, "learning_rate": 4.867782941958311e-06, "loss": 0.5456, "step": 8105 }, { "epoch": 0.52, "grad_norm": 1.1891266107559204, "learning_rate": 4.86673799440754e-06, "loss": 0.5616, "step": 8106 }, { "epoch": 0.52, "grad_norm": 1.2462875843048096, "learning_rate": 4.865693052681306e-06, "loss": 0.5206, "step": 8107 }, { "epoch": 0.52, "grad_norm": 1.1284804344177246, "learning_rate": 4.864648116825286e-06, "loss": 0.5571, "step": 8108 }, { "epoch": 0.52, "grad_norm": 1.0678901672363281, "learning_rate": 4.863603186885144e-06, "loss": 0.5103, "step": 8109 }, { "epoch": 0.52, "grad_norm": 1.147396445274353, "learning_rate": 4.862558262906555e-06, "loss": 0.5429, "step": 8110 }, { "epoch": 0.52, "grad_norm": 1.2267422676086426, "learning_rate": 4.8615133449351905e-06, "loss": 0.4907, "step": 8111 }, { "epoch": 0.52, "grad_norm": 1.2537461519241333, "learning_rate": 4.860468433016719e-06, "loss": 0.5184, "step": 8112 }, { "epoch": 0.52, "grad_norm": 1.2856637239456177, "learning_rate": 4.859423527196814e-06, "loss": 0.5146, "step": 8113 }, { "epoch": 0.52, "grad_norm": 1.1927989721298218, "learning_rate": 4.858378627521145e-06, "loss": 0.5747, "step": 8114 }, { "epoch": 0.52, "grad_norm": 1.2912535667419434, "learning_rate": 4.8573337340353795e-06, "loss": 0.5811, "step": 8115 }, { "epoch": 0.52, "grad_norm": 1.1891772747039795, "learning_rate": 4.856288846785188e-06, "loss": 0.5579, "step": 8116 }, { "epoch": 0.52, "grad_norm": 1.209983468055725, "learning_rate": 4.85524396581624e-06, "loss": 0.5176, "step": 8117 }, { "epoch": 0.52, "grad_norm": 1.0497158765792847, "learning_rate": 4.8541990911742044e-06, "loss": 0.4968, "step": 8118 }, { "epoch": 0.52, "grad_norm": 1.2283934354782104, "learning_rate": 4.853154222904751e-06, "loss": 0.5397, "step": 8119 }, { "epoch": 0.52, "grad_norm": 1.2173794507980347, "learning_rate": 4.8521093610535495e-06, "loss": 0.5494, "step": 8120 }, { "epoch": 0.52, "grad_norm": 1.1005741357803345, "learning_rate": 4.851064505666265e-06, "loss": 0.4859, "step": 8121 }, { "epoch": 0.52, "grad_norm": 1.124204158782959, "learning_rate": 4.850019656788565e-06, "loss": 0.5316, "step": 8122 }, { "epoch": 0.52, "grad_norm": 1.277318000793457, "learning_rate": 4.8489748144661205e-06, "loss": 0.5504, "step": 8123 }, { "epoch": 0.52, "grad_norm": 1.0991110801696777, "learning_rate": 4.847929978744596e-06, "loss": 0.5635, "step": 8124 }, { "epoch": 0.52, "grad_norm": 1.1995604038238525, "learning_rate": 4.84688514966966e-06, "loss": 0.5433, "step": 8125 }, { "epoch": 0.52, "grad_norm": 1.113356113433838, "learning_rate": 4.845840327286982e-06, "loss": 0.5214, "step": 8126 }, { "epoch": 0.52, "grad_norm": 1.2388626337051392, "learning_rate": 4.844795511642223e-06, "loss": 0.5884, "step": 8127 }, { "epoch": 0.52, "grad_norm": 1.11895751953125, "learning_rate": 4.843750702781052e-06, "loss": 0.4773, "step": 8128 }, { "epoch": 0.52, "grad_norm": 1.141050100326538, "learning_rate": 4.842705900749134e-06, "loss": 0.5654, "step": 8129 }, { "epoch": 0.52, "grad_norm": 1.2353888750076294, "learning_rate": 4.841661105592135e-06, "loss": 0.5425, "step": 8130 }, { "epoch": 0.52, "grad_norm": 1.3133591413497925, "learning_rate": 4.840616317355724e-06, "loss": 0.5991, "step": 8131 }, { "epoch": 0.52, "grad_norm": 1.2167809009552002, "learning_rate": 4.839571536085559e-06, "loss": 0.5636, "step": 8132 }, { "epoch": 0.52, "grad_norm": 1.2489213943481445, "learning_rate": 4.838526761827309e-06, "loss": 0.5532, "step": 8133 }, { "epoch": 0.53, "grad_norm": 1.211348295211792, "learning_rate": 4.837481994626638e-06, "loss": 0.5524, "step": 8134 }, { "epoch": 0.53, "grad_norm": 1.1485633850097656, "learning_rate": 4.8364372345292086e-06, "loss": 0.5639, "step": 8135 }, { "epoch": 0.53, "grad_norm": 1.0956655740737915, "learning_rate": 4.835392481580688e-06, "loss": 0.5395, "step": 8136 }, { "epoch": 0.53, "grad_norm": 1.205067753791809, "learning_rate": 4.834347735826737e-06, "loss": 0.5306, "step": 8137 }, { "epoch": 0.53, "grad_norm": 1.2427043914794922, "learning_rate": 4.833302997313017e-06, "loss": 0.5117, "step": 8138 }, { "epoch": 0.53, "grad_norm": 1.221765160560608, "learning_rate": 4.832258266085194e-06, "loss": 0.5233, "step": 8139 }, { "epoch": 0.53, "grad_norm": 1.260284423828125, "learning_rate": 4.83121354218893e-06, "loss": 0.5277, "step": 8140 }, { "epoch": 0.53, "grad_norm": 1.204565405845642, "learning_rate": 4.830168825669886e-06, "loss": 0.5875, "step": 8141 }, { "epoch": 0.53, "grad_norm": 1.1563513278961182, "learning_rate": 4.829124116573724e-06, "loss": 0.5148, "step": 8142 }, { "epoch": 0.53, "grad_norm": 1.097246766090393, "learning_rate": 4.8280794149461075e-06, "loss": 0.4911, "step": 8143 }, { "epoch": 0.53, "grad_norm": 1.1957956552505493, "learning_rate": 4.827034720832695e-06, "loss": 0.5291, "step": 8144 }, { "epoch": 0.53, "grad_norm": 1.1308650970458984, "learning_rate": 4.825990034279149e-06, "loss": 0.4904, "step": 8145 }, { "epoch": 0.53, "grad_norm": 1.1819783449172974, "learning_rate": 4.824945355331129e-06, "loss": 0.5813, "step": 8146 }, { "epoch": 0.53, "grad_norm": 1.1003693342208862, "learning_rate": 4.823900684034296e-06, "loss": 0.4916, "step": 8147 }, { "epoch": 0.53, "grad_norm": 1.1535382270812988, "learning_rate": 4.82285602043431e-06, "loss": 0.5717, "step": 8148 }, { "epoch": 0.53, "grad_norm": 1.1476929187774658, "learning_rate": 4.821811364576831e-06, "loss": 0.5291, "step": 8149 }, { "epoch": 0.53, "grad_norm": 1.0988755226135254, "learning_rate": 4.820766716507518e-06, "loss": 0.5004, "step": 8150 }, { "epoch": 0.53, "grad_norm": 1.261107325553894, "learning_rate": 4.819722076272028e-06, "loss": 0.5511, "step": 8151 }, { "epoch": 0.53, "grad_norm": 1.1452308893203735, "learning_rate": 4.818677443916021e-06, "loss": 0.5615, "step": 8152 }, { "epoch": 0.53, "grad_norm": 1.2321391105651855, "learning_rate": 4.817632819485154e-06, "loss": 0.5593, "step": 8153 }, { "epoch": 0.53, "grad_norm": 1.1700726747512817, "learning_rate": 4.816588203025087e-06, "loss": 0.5366, "step": 8154 }, { "epoch": 0.53, "grad_norm": 1.0430941581726074, "learning_rate": 4.815543594581479e-06, "loss": 0.5066, "step": 8155 }, { "epoch": 0.53, "grad_norm": 1.2010185718536377, "learning_rate": 4.814498994199982e-06, "loss": 0.5303, "step": 8156 }, { "epoch": 0.53, "grad_norm": 1.1285237073898315, "learning_rate": 4.813454401926255e-06, "loss": 0.557, "step": 8157 }, { "epoch": 0.53, "grad_norm": 1.1879571676254272, "learning_rate": 4.8124098178059555e-06, "loss": 0.5024, "step": 8158 }, { "epoch": 0.53, "grad_norm": 1.1320900917053223, "learning_rate": 4.811365241884738e-06, "loss": 0.5525, "step": 8159 }, { "epoch": 0.53, "grad_norm": 1.2342571020126343, "learning_rate": 4.81032067420826e-06, "loss": 0.5137, "step": 8160 }, { "epoch": 0.53, "grad_norm": 1.0757826566696167, "learning_rate": 4.809276114822177e-06, "loss": 0.5304, "step": 8161 }, { "epoch": 0.53, "grad_norm": 1.2065194845199585, "learning_rate": 4.808231563772142e-06, "loss": 0.5157, "step": 8162 }, { "epoch": 0.53, "grad_norm": 1.1984323263168335, "learning_rate": 4.8071870211038095e-06, "loss": 0.536, "step": 8163 }, { "epoch": 0.53, "grad_norm": 1.1790776252746582, "learning_rate": 4.806142486862836e-06, "loss": 0.5505, "step": 8164 }, { "epoch": 0.53, "grad_norm": 1.1769795417785645, "learning_rate": 4.805097961094873e-06, "loss": 0.56, "step": 8165 }, { "epoch": 0.53, "grad_norm": 1.1358827352523804, "learning_rate": 4.804053443845576e-06, "loss": 0.5334, "step": 8166 }, { "epoch": 0.53, "grad_norm": 1.1890476942062378, "learning_rate": 4.8030089351605995e-06, "loss": 0.4985, "step": 8167 }, { "epoch": 0.53, "grad_norm": 1.1645351648330688, "learning_rate": 4.8019644350855925e-06, "loss": 0.5571, "step": 8168 }, { "epoch": 0.53, "grad_norm": 1.1778453588485718, "learning_rate": 4.8009199436662084e-06, "loss": 0.5172, "step": 8169 }, { "epoch": 0.53, "grad_norm": 1.1372579336166382, "learning_rate": 4.7998754609481e-06, "loss": 0.5283, "step": 8170 }, { "epoch": 0.53, "grad_norm": 1.1592153310775757, "learning_rate": 4.798830986976919e-06, "loss": 0.5544, "step": 8171 }, { "epoch": 0.53, "grad_norm": 1.1542644500732422, "learning_rate": 4.797786521798319e-06, "loss": 0.556, "step": 8172 }, { "epoch": 0.53, "grad_norm": 1.160913348197937, "learning_rate": 4.796742065457946e-06, "loss": 0.5124, "step": 8173 }, { "epoch": 0.53, "grad_norm": 1.283668875694275, "learning_rate": 4.795697618001453e-06, "loss": 0.5298, "step": 8174 }, { "epoch": 0.53, "grad_norm": 1.129912257194519, "learning_rate": 4.79465317947449e-06, "loss": 0.5192, "step": 8175 }, { "epoch": 0.53, "grad_norm": 1.2058688402175903, "learning_rate": 4.7936087499227065e-06, "loss": 0.5275, "step": 8176 }, { "epoch": 0.53, "grad_norm": 1.1232171058654785, "learning_rate": 4.792564329391752e-06, "loss": 0.4881, "step": 8177 }, { "epoch": 0.53, "grad_norm": 1.115092396736145, "learning_rate": 4.791519917927277e-06, "loss": 0.509, "step": 8178 }, { "epoch": 0.53, "grad_norm": 1.106917142868042, "learning_rate": 4.790475515574927e-06, "loss": 0.5593, "step": 8179 }, { "epoch": 0.53, "grad_norm": 1.2240461111068726, "learning_rate": 4.789431122380353e-06, "loss": 0.52, "step": 8180 }, { "epoch": 0.53, "grad_norm": 1.2879691123962402, "learning_rate": 4.7883867383891995e-06, "loss": 0.5403, "step": 8181 }, { "epoch": 0.53, "grad_norm": 1.0624521970748901, "learning_rate": 4.787342363647116e-06, "loss": 0.5126, "step": 8182 }, { "epoch": 0.53, "grad_norm": 1.1009708642959595, "learning_rate": 4.786297998199751e-06, "loss": 0.5194, "step": 8183 }, { "epoch": 0.53, "grad_norm": 1.267421007156372, "learning_rate": 4.785253642092748e-06, "loss": 0.5513, "step": 8184 }, { "epoch": 0.53, "grad_norm": 1.1172512769699097, "learning_rate": 4.7842092953717545e-06, "loss": 0.5457, "step": 8185 }, { "epoch": 0.53, "grad_norm": 1.1106741428375244, "learning_rate": 4.783164958082415e-06, "loss": 0.4928, "step": 8186 }, { "epoch": 0.53, "grad_norm": 1.0762842893600464, "learning_rate": 4.782120630270377e-06, "loss": 0.528, "step": 8187 }, { "epoch": 0.53, "grad_norm": 1.3020308017730713, "learning_rate": 4.7810763119812834e-06, "loss": 0.5953, "step": 8188 }, { "epoch": 0.53, "grad_norm": 1.1823352575302124, "learning_rate": 4.7800320032607795e-06, "loss": 0.5351, "step": 8189 }, { "epoch": 0.53, "grad_norm": 1.1215722560882568, "learning_rate": 4.77898770415451e-06, "loss": 0.5688, "step": 8190 }, { "epoch": 0.53, "grad_norm": 1.1437937021255493, "learning_rate": 4.777943414708116e-06, "loss": 0.519, "step": 8191 }, { "epoch": 0.53, "grad_norm": 1.1449319124221802, "learning_rate": 4.776899134967244e-06, "loss": 0.5131, "step": 8192 }, { "epoch": 0.53, "grad_norm": 1.0978341102600098, "learning_rate": 4.7758548649775345e-06, "loss": 0.524, "step": 8193 }, { "epoch": 0.53, "grad_norm": 1.3043230772018433, "learning_rate": 4.77481060478463e-06, "loss": 0.5184, "step": 8194 }, { "epoch": 0.53, "grad_norm": 1.1747500896453857, "learning_rate": 4.7737663544341735e-06, "loss": 0.5158, "step": 8195 }, { "epoch": 0.53, "grad_norm": 1.1156448125839233, "learning_rate": 4.772722113971808e-06, "loss": 0.5579, "step": 8196 }, { "epoch": 0.53, "grad_norm": 1.1345710754394531, "learning_rate": 4.77167788344317e-06, "loss": 0.5273, "step": 8197 }, { "epoch": 0.53, "grad_norm": 1.1183501482009888, "learning_rate": 4.770633662893903e-06, "loss": 0.5432, "step": 8198 }, { "epoch": 0.53, "grad_norm": 1.1124606132507324, "learning_rate": 4.769589452369646e-06, "loss": 0.4838, "step": 8199 }, { "epoch": 0.53, "grad_norm": 1.2354145050048828, "learning_rate": 4.76854525191604e-06, "loss": 0.5227, "step": 8200 }, { "epoch": 0.53, "grad_norm": 1.1900334358215332, "learning_rate": 4.767501061578724e-06, "loss": 0.5914, "step": 8201 }, { "epoch": 0.53, "grad_norm": 1.0571579933166504, "learning_rate": 4.76645688140334e-06, "loss": 0.5047, "step": 8202 }, { "epoch": 0.53, "grad_norm": 1.2029228210449219, "learning_rate": 4.76541271143552e-06, "loss": 0.535, "step": 8203 }, { "epoch": 0.53, "grad_norm": 1.1228770017623901, "learning_rate": 4.764368551720905e-06, "loss": 0.5453, "step": 8204 }, { "epoch": 0.53, "grad_norm": 1.25541353225708, "learning_rate": 4.763324402305133e-06, "loss": 0.4846, "step": 8205 }, { "epoch": 0.53, "grad_norm": 1.1225367784500122, "learning_rate": 4.76228026323384e-06, "loss": 0.5339, "step": 8206 }, { "epoch": 0.53, "grad_norm": 1.0758136510849, "learning_rate": 4.761236134552664e-06, "loss": 0.4999, "step": 8207 }, { "epoch": 0.53, "grad_norm": 1.2055548429489136, "learning_rate": 4.760192016307242e-06, "loss": 0.5824, "step": 8208 }, { "epoch": 0.53, "grad_norm": 1.1096210479736328, "learning_rate": 4.759147908543208e-06, "loss": 0.5621, "step": 8209 }, { "epoch": 0.53, "grad_norm": 1.1423454284667969, "learning_rate": 4.758103811306195e-06, "loss": 0.5016, "step": 8210 }, { "epoch": 0.53, "grad_norm": 1.136344075202942, "learning_rate": 4.757059724641842e-06, "loss": 0.5383, "step": 8211 }, { "epoch": 0.53, "grad_norm": 1.23481023311615, "learning_rate": 4.756015648595781e-06, "loss": 0.5343, "step": 8212 }, { "epoch": 0.53, "grad_norm": 1.3229209184646606, "learning_rate": 4.7549715832136464e-06, "loss": 0.5606, "step": 8213 }, { "epoch": 0.53, "grad_norm": 1.2347193956375122, "learning_rate": 4.753927528541075e-06, "loss": 0.5687, "step": 8214 }, { "epoch": 0.53, "grad_norm": 1.1615185737609863, "learning_rate": 4.752883484623693e-06, "loss": 0.5576, "step": 8215 }, { "epoch": 0.53, "grad_norm": 1.294939398765564, "learning_rate": 4.751839451507137e-06, "loss": 0.5872, "step": 8216 }, { "epoch": 0.53, "grad_norm": 1.2584744691848755, "learning_rate": 4.750795429237038e-06, "loss": 0.5127, "step": 8217 }, { "epoch": 0.53, "grad_norm": 1.1465506553649902, "learning_rate": 4.749751417859028e-06, "loss": 0.503, "step": 8218 }, { "epoch": 0.53, "grad_norm": 1.2610615491867065, "learning_rate": 4.74870741741874e-06, "loss": 0.5448, "step": 8219 }, { "epoch": 0.53, "grad_norm": 1.143346905708313, "learning_rate": 4.7476634279618e-06, "loss": 0.4898, "step": 8220 }, { "epoch": 0.53, "grad_norm": 1.311984658241272, "learning_rate": 4.746619449533841e-06, "loss": 0.5289, "step": 8221 }, { "epoch": 0.53, "grad_norm": 1.1409306526184082, "learning_rate": 4.7455754821804914e-06, "loss": 0.5738, "step": 8222 }, { "epoch": 0.53, "grad_norm": 1.1304432153701782, "learning_rate": 4.744531525947382e-06, "loss": 0.5235, "step": 8223 }, { "epoch": 0.53, "grad_norm": 1.234520435333252, "learning_rate": 4.74348758088014e-06, "loss": 0.503, "step": 8224 }, { "epoch": 0.53, "grad_norm": 1.1464388370513916, "learning_rate": 4.742443647024396e-06, "loss": 0.5252, "step": 8225 }, { "epoch": 0.53, "grad_norm": 1.1888355016708374, "learning_rate": 4.741399724425773e-06, "loss": 0.5753, "step": 8226 }, { "epoch": 0.53, "grad_norm": 1.2219429016113281, "learning_rate": 4.740355813129902e-06, "loss": 0.502, "step": 8227 }, { "epoch": 0.53, "grad_norm": 1.217713475227356, "learning_rate": 4.7393119131824085e-06, "loss": 0.5261, "step": 8228 }, { "epoch": 0.53, "grad_norm": 1.1703346967697144, "learning_rate": 4.738268024628918e-06, "loss": 0.5577, "step": 8229 }, { "epoch": 0.53, "grad_norm": 1.1690936088562012, "learning_rate": 4.7372241475150585e-06, "loss": 0.5003, "step": 8230 }, { "epoch": 0.53, "grad_norm": 1.2735185623168945, "learning_rate": 4.7361802818864534e-06, "loss": 0.5413, "step": 8231 }, { "epoch": 0.53, "grad_norm": 1.2563886642456055, "learning_rate": 4.735136427788726e-06, "loss": 0.5402, "step": 8232 }, { "epoch": 0.53, "grad_norm": 1.2898409366607666, "learning_rate": 4.734092585267502e-06, "loss": 0.5758, "step": 8233 }, { "epoch": 0.53, "grad_norm": 1.218141794204712, "learning_rate": 4.733048754368407e-06, "loss": 0.4994, "step": 8234 }, { "epoch": 0.53, "grad_norm": 1.112335205078125, "learning_rate": 4.73200493513706e-06, "loss": 0.5215, "step": 8235 }, { "epoch": 0.53, "grad_norm": 1.3300228118896484, "learning_rate": 4.7309611276190864e-06, "loss": 0.5512, "step": 8236 }, { "epoch": 0.53, "grad_norm": 1.1667726039886475, "learning_rate": 4.729917331860109e-06, "loss": 0.5565, "step": 8237 }, { "epoch": 0.53, "grad_norm": 0.9906101226806641, "learning_rate": 4.728873547905747e-06, "loss": 0.4873, "step": 8238 }, { "epoch": 0.53, "grad_norm": 1.1489875316619873, "learning_rate": 4.727829775801624e-06, "loss": 0.4918, "step": 8239 }, { "epoch": 0.53, "grad_norm": 1.0757410526275635, "learning_rate": 4.726786015593358e-06, "loss": 0.5235, "step": 8240 }, { "epoch": 0.53, "grad_norm": 1.174072504043579, "learning_rate": 4.72574226732657e-06, "loss": 0.5342, "step": 8241 }, { "epoch": 0.53, "grad_norm": 1.0565721988677979, "learning_rate": 4.7246985310468804e-06, "loss": 0.5158, "step": 8242 }, { "epoch": 0.53, "grad_norm": 1.23117995262146, "learning_rate": 4.723654806799909e-06, "loss": 0.5197, "step": 8243 }, { "epoch": 0.53, "grad_norm": 1.208422064781189, "learning_rate": 4.722611094631272e-06, "loss": 0.5599, "step": 8244 }, { "epoch": 0.53, "grad_norm": 1.2153661251068115, "learning_rate": 4.721567394586588e-06, "loss": 0.5068, "step": 8245 }, { "epoch": 0.53, "grad_norm": 1.13162100315094, "learning_rate": 4.720523706711475e-06, "loss": 0.4908, "step": 8246 }, { "epoch": 0.53, "grad_norm": 1.2955883741378784, "learning_rate": 4.719480031051549e-06, "loss": 0.5956, "step": 8247 }, { "epoch": 0.53, "grad_norm": 1.1690852642059326, "learning_rate": 4.718436367652428e-06, "loss": 0.5692, "step": 8248 }, { "epoch": 0.53, "grad_norm": 1.2878738641738892, "learning_rate": 4.717392716559729e-06, "loss": 0.5135, "step": 8249 }, { "epoch": 0.53, "grad_norm": 1.2215079069137573, "learning_rate": 4.716349077819062e-06, "loss": 0.5466, "step": 8250 }, { "epoch": 0.53, "grad_norm": 1.0548107624053955, "learning_rate": 4.715305451476046e-06, "loss": 0.477, "step": 8251 }, { "epoch": 0.53, "grad_norm": 1.1105678081512451, "learning_rate": 4.7142618375762935e-06, "loss": 0.5011, "step": 8252 }, { "epoch": 0.53, "grad_norm": 1.2153472900390625, "learning_rate": 4.713218236165419e-06, "loss": 0.5498, "step": 8253 }, { "epoch": 0.53, "grad_norm": 1.1851391792297363, "learning_rate": 4.712174647289036e-06, "loss": 0.5775, "step": 8254 }, { "epoch": 0.53, "grad_norm": 1.1438103914260864, "learning_rate": 4.711131070992759e-06, "loss": 0.5217, "step": 8255 }, { "epoch": 0.53, "grad_norm": 1.2148864269256592, "learning_rate": 4.710087507322195e-06, "loss": 0.5299, "step": 8256 }, { "epoch": 0.53, "grad_norm": 1.1815664768218994, "learning_rate": 4.7090439563229575e-06, "loss": 0.4795, "step": 8257 }, { "epoch": 0.53, "grad_norm": 1.2366224527359009, "learning_rate": 4.7080004180406585e-06, "loss": 0.5261, "step": 8258 }, { "epoch": 0.53, "grad_norm": 1.1426153182983398, "learning_rate": 4.706956892520908e-06, "loss": 0.5637, "step": 8259 }, { "epoch": 0.53, "grad_norm": 1.155806064605713, "learning_rate": 4.705913379809318e-06, "loss": 0.5776, "step": 8260 }, { "epoch": 0.53, "grad_norm": 1.2718857526779175, "learning_rate": 4.704869879951493e-06, "loss": 0.5501, "step": 8261 }, { "epoch": 0.53, "grad_norm": 1.1251739263534546, "learning_rate": 4.703826392993044e-06, "loss": 0.5211, "step": 8262 }, { "epoch": 0.53, "grad_norm": 1.2397834062576294, "learning_rate": 4.7027829189795785e-06, "loss": 0.4898, "step": 8263 }, { "epoch": 0.53, "grad_norm": 1.136488437652588, "learning_rate": 4.7017394579567065e-06, "loss": 0.4922, "step": 8264 }, { "epoch": 0.53, "grad_norm": 1.315308928489685, "learning_rate": 4.700696009970031e-06, "loss": 0.5618, "step": 8265 }, { "epoch": 0.53, "grad_norm": 1.0516505241394043, "learning_rate": 4.699652575065165e-06, "loss": 0.5204, "step": 8266 }, { "epoch": 0.53, "grad_norm": 1.3035876750946045, "learning_rate": 4.698609153287707e-06, "loss": 0.5791, "step": 8267 }, { "epoch": 0.53, "grad_norm": 1.1269851922988892, "learning_rate": 4.697565744683265e-06, "loss": 0.4991, "step": 8268 }, { "epoch": 0.53, "grad_norm": 1.342598795890808, "learning_rate": 4.6965223492974444e-06, "loss": 0.5586, "step": 8269 }, { "epoch": 0.53, "grad_norm": 1.2005852460861206, "learning_rate": 4.695478967175849e-06, "loss": 0.5056, "step": 8270 }, { "epoch": 0.53, "grad_norm": 1.1888083219528198, "learning_rate": 4.694435598364082e-06, "loss": 0.5317, "step": 8271 }, { "epoch": 0.53, "grad_norm": 1.202805995941162, "learning_rate": 4.6933922429077485e-06, "loss": 0.5631, "step": 8272 }, { "epoch": 0.53, "grad_norm": 1.1762337684631348, "learning_rate": 4.692348900852448e-06, "loss": 0.5725, "step": 8273 }, { "epoch": 0.53, "grad_norm": 1.1172667741775513, "learning_rate": 4.691305572243783e-06, "loss": 0.4928, "step": 8274 }, { "epoch": 0.53, "grad_norm": 1.1221903562545776, "learning_rate": 4.6902622571273535e-06, "loss": 0.5056, "step": 8275 }, { "epoch": 0.53, "grad_norm": 1.325547456741333, "learning_rate": 4.689218955548764e-06, "loss": 0.5063, "step": 8276 }, { "epoch": 0.53, "grad_norm": 1.1881279945373535, "learning_rate": 4.688175667553611e-06, "loss": 0.5514, "step": 8277 }, { "epoch": 0.53, "grad_norm": 1.219326138496399, "learning_rate": 4.6871323931874955e-06, "loss": 0.5226, "step": 8278 }, { "epoch": 0.53, "grad_norm": 1.2732856273651123, "learning_rate": 4.686089132496016e-06, "loss": 0.5546, "step": 8279 }, { "epoch": 0.53, "grad_norm": 1.1927014589309692, "learning_rate": 4.6850458855247695e-06, "loss": 0.4976, "step": 8280 }, { "epoch": 0.53, "grad_norm": 1.0663071870803833, "learning_rate": 4.684002652319356e-06, "loss": 0.4887, "step": 8281 }, { "epoch": 0.53, "grad_norm": 1.0754941701889038, "learning_rate": 4.682959432925371e-06, "loss": 0.5198, "step": 8282 }, { "epoch": 0.53, "grad_norm": 1.13671875, "learning_rate": 4.68191622738841e-06, "loss": 0.4956, "step": 8283 }, { "epoch": 0.53, "grad_norm": 1.2025537490844727, "learning_rate": 4.680873035754072e-06, "loss": 0.5347, "step": 8284 }, { "epoch": 0.53, "grad_norm": 1.244797706604004, "learning_rate": 4.67982985806795e-06, "loss": 0.5212, "step": 8285 }, { "epoch": 0.53, "grad_norm": 1.2850801944732666, "learning_rate": 4.678786694375637e-06, "loss": 0.5378, "step": 8286 }, { "epoch": 0.53, "grad_norm": 1.1709136962890625, "learning_rate": 4.6777435447227305e-06, "loss": 0.5396, "step": 8287 }, { "epoch": 0.53, "grad_norm": 1.1260002851486206, "learning_rate": 4.67670040915482e-06, "loss": 0.5154, "step": 8288 }, { "epoch": 0.54, "grad_norm": 1.0244226455688477, "learning_rate": 4.675657287717503e-06, "loss": 0.4587, "step": 8289 }, { "epoch": 0.54, "grad_norm": 1.3668960332870483, "learning_rate": 4.67461418045637e-06, "loss": 0.5657, "step": 8290 }, { "epoch": 0.54, "grad_norm": 1.0423706769943237, "learning_rate": 4.673571087417009e-06, "loss": 0.5165, "step": 8291 }, { "epoch": 0.54, "grad_norm": 1.1765934228897095, "learning_rate": 4.6725280086450144e-06, "loss": 0.5488, "step": 8292 }, { "epoch": 0.54, "grad_norm": 1.1717753410339355, "learning_rate": 4.671484944185976e-06, "loss": 0.5727, "step": 8293 }, { "epoch": 0.54, "grad_norm": 1.1505095958709717, "learning_rate": 4.6704418940854825e-06, "loss": 0.5262, "step": 8294 }, { "epoch": 0.54, "grad_norm": 1.158112645149231, "learning_rate": 4.669398858389123e-06, "loss": 0.5386, "step": 8295 }, { "epoch": 0.54, "grad_norm": 1.1334713697433472, "learning_rate": 4.668355837142489e-06, "loss": 0.5747, "step": 8296 }, { "epoch": 0.54, "grad_norm": 1.239405870437622, "learning_rate": 4.667312830391164e-06, "loss": 0.5161, "step": 8297 }, { "epoch": 0.54, "grad_norm": 1.2535912990570068, "learning_rate": 4.666269838180737e-06, "loss": 0.5765, "step": 8298 }, { "epoch": 0.54, "grad_norm": 1.292375087738037, "learning_rate": 4.665226860556793e-06, "loss": 0.5846, "step": 8299 }, { "epoch": 0.54, "grad_norm": 1.1716728210449219, "learning_rate": 4.664183897564921e-06, "loss": 0.4985, "step": 8300 }, { "epoch": 0.54, "grad_norm": 1.169847846031189, "learning_rate": 4.663140949250705e-06, "loss": 0.5969, "step": 8301 }, { "epoch": 0.54, "grad_norm": 1.1423566341400146, "learning_rate": 4.662098015659728e-06, "loss": 0.5637, "step": 8302 }, { "epoch": 0.54, "grad_norm": 1.1393506526947021, "learning_rate": 4.661055096837575e-06, "loss": 0.5196, "step": 8303 }, { "epoch": 0.54, "grad_norm": 1.12614905834198, "learning_rate": 4.6600121928298285e-06, "loss": 0.5406, "step": 8304 }, { "epoch": 0.54, "grad_norm": 1.233603596687317, "learning_rate": 4.6589693036820715e-06, "loss": 0.5105, "step": 8305 }, { "epoch": 0.54, "grad_norm": 1.267814040184021, "learning_rate": 4.657926429439887e-06, "loss": 0.5374, "step": 8306 }, { "epoch": 0.54, "grad_norm": 1.2065272331237793, "learning_rate": 4.656883570148858e-06, "loss": 0.5566, "step": 8307 }, { "epoch": 0.54, "grad_norm": 1.2310470342636108, "learning_rate": 4.65584072585456e-06, "loss": 0.5401, "step": 8308 }, { "epoch": 0.54, "grad_norm": 1.0907819271087646, "learning_rate": 4.6547978966025766e-06, "loss": 0.4784, "step": 8309 }, { "epoch": 0.54, "grad_norm": 1.1346148252487183, "learning_rate": 4.653755082438487e-06, "loss": 0.4921, "step": 8310 }, { "epoch": 0.54, "grad_norm": 1.2563928365707397, "learning_rate": 4.652712283407868e-06, "loss": 0.5521, "step": 8311 }, { "epoch": 0.54, "grad_norm": 1.2134348154067993, "learning_rate": 4.6516694995563e-06, "loss": 0.5743, "step": 8312 }, { "epoch": 0.54, "grad_norm": 1.1666793823242188, "learning_rate": 4.650626730929362e-06, "loss": 0.5318, "step": 8313 }, { "epoch": 0.54, "grad_norm": 1.123459815979004, "learning_rate": 4.649583977572626e-06, "loss": 0.487, "step": 8314 }, { "epoch": 0.54, "grad_norm": 1.1834063529968262, "learning_rate": 4.648541239531669e-06, "loss": 0.511, "step": 8315 }, { "epoch": 0.54, "grad_norm": 1.2307713031768799, "learning_rate": 4.647498516852068e-06, "loss": 0.5337, "step": 8316 }, { "epoch": 0.54, "grad_norm": 1.278768539428711, "learning_rate": 4.646455809579398e-06, "loss": 0.5635, "step": 8317 }, { "epoch": 0.54, "grad_norm": 1.1093112230300903, "learning_rate": 4.645413117759232e-06, "loss": 0.5029, "step": 8318 }, { "epoch": 0.54, "grad_norm": 1.1398804187774658, "learning_rate": 4.644370441437147e-06, "loss": 0.5736, "step": 8319 }, { "epoch": 0.54, "grad_norm": 1.1424874067306519, "learning_rate": 4.643327780658709e-06, "loss": 0.5032, "step": 8320 }, { "epoch": 0.54, "grad_norm": 1.0941201448440552, "learning_rate": 4.642285135469494e-06, "loss": 0.5023, "step": 8321 }, { "epoch": 0.54, "grad_norm": 1.1732964515686035, "learning_rate": 4.641242505915073e-06, "loss": 0.5372, "step": 8322 }, { "epoch": 0.54, "grad_norm": 1.0603901147842407, "learning_rate": 4.640199892041017e-06, "loss": 0.4403, "step": 8323 }, { "epoch": 0.54, "grad_norm": 1.2444994449615479, "learning_rate": 4.639157293892894e-06, "loss": 0.5651, "step": 8324 }, { "epoch": 0.54, "grad_norm": 1.100546956062317, "learning_rate": 4.638114711516277e-06, "loss": 0.5247, "step": 8325 }, { "epoch": 0.54, "grad_norm": 1.0937292575836182, "learning_rate": 4.63707214495673e-06, "loss": 0.5588, "step": 8326 }, { "epoch": 0.54, "grad_norm": 1.179907202720642, "learning_rate": 4.6360295942598245e-06, "loss": 0.5388, "step": 8327 }, { "epoch": 0.54, "grad_norm": 1.1535475254058838, "learning_rate": 4.634987059471127e-06, "loss": 0.5611, "step": 8328 }, { "epoch": 0.54, "grad_norm": 1.253113031387329, "learning_rate": 4.633944540636201e-06, "loss": 0.5028, "step": 8329 }, { "epoch": 0.54, "grad_norm": 1.1411452293395996, "learning_rate": 4.632902037800616e-06, "loss": 0.5079, "step": 8330 }, { "epoch": 0.54, "grad_norm": 1.063599944114685, "learning_rate": 4.6318595510099365e-06, "loss": 0.5714, "step": 8331 }, { "epoch": 0.54, "grad_norm": 1.0681318044662476, "learning_rate": 4.630817080309726e-06, "loss": 0.5178, "step": 8332 }, { "epoch": 0.54, "grad_norm": 1.10597562789917, "learning_rate": 4.629774625745548e-06, "loss": 0.5248, "step": 8333 }, { "epoch": 0.54, "grad_norm": 1.0838189125061035, "learning_rate": 4.628732187362966e-06, "loss": 0.44, "step": 8334 }, { "epoch": 0.54, "grad_norm": 1.2146979570388794, "learning_rate": 4.6276897652075415e-06, "loss": 0.5288, "step": 8335 }, { "epoch": 0.54, "grad_norm": 1.217522144317627, "learning_rate": 4.626647359324837e-06, "loss": 0.5105, "step": 8336 }, { "epoch": 0.54, "grad_norm": 1.288231611251831, "learning_rate": 4.625604969760415e-06, "loss": 0.5603, "step": 8337 }, { "epoch": 0.54, "grad_norm": 1.2168678045272827, "learning_rate": 4.624562596559832e-06, "loss": 0.574, "step": 8338 }, { "epoch": 0.54, "grad_norm": 1.2297332286834717, "learning_rate": 4.6235202397686495e-06, "loss": 0.5511, "step": 8339 }, { "epoch": 0.54, "grad_norm": 1.229080080986023, "learning_rate": 4.622477899432426e-06, "loss": 0.5137, "step": 8340 }, { "epoch": 0.54, "grad_norm": 1.1628926992416382, "learning_rate": 4.62143557559672e-06, "loss": 0.5819, "step": 8341 }, { "epoch": 0.54, "grad_norm": 1.0811055898666382, "learning_rate": 4.620393268307087e-06, "loss": 0.4885, "step": 8342 }, { "epoch": 0.54, "grad_norm": 1.2390354871749878, "learning_rate": 4.6193509776090875e-06, "loss": 0.5383, "step": 8343 }, { "epoch": 0.54, "grad_norm": 1.1510083675384521, "learning_rate": 4.618308703548273e-06, "loss": 0.5502, "step": 8344 }, { "epoch": 0.54, "grad_norm": 1.1245081424713135, "learning_rate": 4.6172664461702e-06, "loss": 0.5113, "step": 8345 }, { "epoch": 0.54, "grad_norm": 1.1638424396514893, "learning_rate": 4.6162242055204225e-06, "loss": 0.5615, "step": 8346 }, { "epoch": 0.54, "grad_norm": 1.1189730167388916, "learning_rate": 4.6151819816444956e-06, "loss": 0.5019, "step": 8347 }, { "epoch": 0.54, "grad_norm": 1.1689287424087524, "learning_rate": 4.614139774587973e-06, "loss": 0.5024, "step": 8348 }, { "epoch": 0.54, "grad_norm": 1.1670292615890503, "learning_rate": 4.6130975843964034e-06, "loss": 0.5297, "step": 8349 }, { "epoch": 0.54, "grad_norm": 1.102510690689087, "learning_rate": 4.612055411115341e-06, "loss": 0.5518, "step": 8350 }, { "epoch": 0.54, "grad_norm": 1.118552565574646, "learning_rate": 4.611013254790335e-06, "loss": 0.5305, "step": 8351 }, { "epoch": 0.54, "grad_norm": 1.3008455038070679, "learning_rate": 4.609971115466936e-06, "loss": 0.5616, "step": 8352 }, { "epoch": 0.54, "grad_norm": 1.3330659866333008, "learning_rate": 4.608928993190693e-06, "loss": 0.5835, "step": 8353 }, { "epoch": 0.54, "grad_norm": 1.1180094480514526, "learning_rate": 4.607886888007157e-06, "loss": 0.5345, "step": 8354 }, { "epoch": 0.54, "grad_norm": 1.1525280475616455, "learning_rate": 4.606844799961871e-06, "loss": 0.5281, "step": 8355 }, { "epoch": 0.54, "grad_norm": 1.1808685064315796, "learning_rate": 4.605802729100385e-06, "loss": 0.5093, "step": 8356 }, { "epoch": 0.54, "grad_norm": 1.135818600654602, "learning_rate": 4.604760675468245e-06, "loss": 0.5063, "step": 8357 }, { "epoch": 0.54, "grad_norm": 1.0834746360778809, "learning_rate": 4.603718639110995e-06, "loss": 0.5496, "step": 8358 }, { "epoch": 0.54, "grad_norm": 1.1470340490341187, "learning_rate": 4.602676620074183e-06, "loss": 0.5241, "step": 8359 }, { "epoch": 0.54, "grad_norm": 1.1832196712493896, "learning_rate": 4.601634618403351e-06, "loss": 0.5983, "step": 8360 }, { "epoch": 0.54, "grad_norm": 1.3565772771835327, "learning_rate": 4.60059263414404e-06, "loss": 0.5494, "step": 8361 }, { "epoch": 0.54, "grad_norm": 1.09331476688385, "learning_rate": 4.599550667341795e-06, "loss": 0.4856, "step": 8362 }, { "epoch": 0.54, "grad_norm": 1.2515835762023926, "learning_rate": 4.5985087180421574e-06, "loss": 0.551, "step": 8363 }, { "epoch": 0.54, "grad_norm": 1.2215042114257812, "learning_rate": 4.597466786290668e-06, "loss": 0.5143, "step": 8364 }, { "epoch": 0.54, "grad_norm": 1.0296509265899658, "learning_rate": 4.596424872132867e-06, "loss": 0.4767, "step": 8365 }, { "epoch": 0.54, "grad_norm": 1.2913646697998047, "learning_rate": 4.595382975614294e-06, "loss": 0.5566, "step": 8366 }, { "epoch": 0.54, "grad_norm": 1.1535762548446655, "learning_rate": 4.594341096780486e-06, "loss": 0.4973, "step": 8367 }, { "epoch": 0.54, "grad_norm": 1.0948950052261353, "learning_rate": 4.593299235676982e-06, "loss": 0.5284, "step": 8368 }, { "epoch": 0.54, "grad_norm": 1.1965126991271973, "learning_rate": 4.5922573923493194e-06, "loss": 0.5496, "step": 8369 }, { "epoch": 0.54, "grad_norm": 1.2149102687835693, "learning_rate": 4.591215566843035e-06, "loss": 0.5395, "step": 8370 }, { "epoch": 0.54, "grad_norm": 1.2152396440505981, "learning_rate": 4.590173759203663e-06, "loss": 0.538, "step": 8371 }, { "epoch": 0.54, "grad_norm": 1.1576285362243652, "learning_rate": 4.58913196947674e-06, "loss": 0.5428, "step": 8372 }, { "epoch": 0.54, "grad_norm": 1.157044768333435, "learning_rate": 4.588090197707796e-06, "loss": 0.5268, "step": 8373 }, { "epoch": 0.54, "grad_norm": 1.122775912284851, "learning_rate": 4.587048443942369e-06, "loss": 0.4678, "step": 8374 }, { "epoch": 0.54, "grad_norm": 1.1679075956344604, "learning_rate": 4.5860067082259875e-06, "loss": 0.5676, "step": 8375 }, { "epoch": 0.54, "grad_norm": 1.1046288013458252, "learning_rate": 4.584964990604185e-06, "loss": 0.5162, "step": 8376 }, { "epoch": 0.54, "grad_norm": 1.0995173454284668, "learning_rate": 4.583923291122491e-06, "loss": 0.4841, "step": 8377 }, { "epoch": 0.54, "grad_norm": 1.2065616846084595, "learning_rate": 4.5828816098264385e-06, "loss": 0.5053, "step": 8378 }, { "epoch": 0.54, "grad_norm": 1.2834886312484741, "learning_rate": 4.581839946761554e-06, "loss": 0.5717, "step": 8379 }, { "epoch": 0.54, "grad_norm": 1.1838468313217163, "learning_rate": 4.580798301973366e-06, "loss": 0.5623, "step": 8380 }, { "epoch": 0.54, "grad_norm": 1.082749843597412, "learning_rate": 4.579756675507403e-06, "loss": 0.547, "step": 8381 }, { "epoch": 0.54, "grad_norm": 1.1397632360458374, "learning_rate": 4.578715067409191e-06, "loss": 0.531, "step": 8382 }, { "epoch": 0.54, "grad_norm": 1.219382405281067, "learning_rate": 4.577673477724257e-06, "loss": 0.5056, "step": 8383 }, { "epoch": 0.54, "grad_norm": 1.1131423711776733, "learning_rate": 4.576631906498127e-06, "loss": 0.5322, "step": 8384 }, { "epoch": 0.54, "grad_norm": 1.3825793266296387, "learning_rate": 4.575590353776322e-06, "loss": 0.5553, "step": 8385 }, { "epoch": 0.54, "grad_norm": 1.1560815572738647, "learning_rate": 4.574548819604368e-06, "loss": 0.5387, "step": 8386 }, { "epoch": 0.54, "grad_norm": 1.1629343032836914, "learning_rate": 4.5735073040277875e-06, "loss": 0.554, "step": 8387 }, { "epoch": 0.54, "grad_norm": 1.1559693813323975, "learning_rate": 4.572465807092102e-06, "loss": 0.5648, "step": 8388 }, { "epoch": 0.54, "grad_norm": 1.134514331817627, "learning_rate": 4.571424328842835e-06, "loss": 0.5474, "step": 8389 }, { "epoch": 0.54, "grad_norm": 1.1509077548980713, "learning_rate": 4.570382869325503e-06, "loss": 0.5413, "step": 8390 }, { "epoch": 0.54, "grad_norm": 1.1033843755722046, "learning_rate": 4.569341428585627e-06, "loss": 0.5403, "step": 8391 }, { "epoch": 0.54, "grad_norm": 1.20550537109375, "learning_rate": 4.568300006668728e-06, "loss": 0.538, "step": 8392 }, { "epoch": 0.54, "grad_norm": 1.0559414625167847, "learning_rate": 4.56725860362032e-06, "loss": 0.5023, "step": 8393 }, { "epoch": 0.54, "grad_norm": 1.2213796377182007, "learning_rate": 4.566217219485922e-06, "loss": 0.5221, "step": 8394 }, { "epoch": 0.54, "grad_norm": 1.1545311212539673, "learning_rate": 4.565175854311052e-06, "loss": 0.5648, "step": 8395 }, { "epoch": 0.54, "grad_norm": 1.1635041236877441, "learning_rate": 4.564134508141223e-06, "loss": 0.5435, "step": 8396 }, { "epoch": 0.54, "grad_norm": 1.044000506401062, "learning_rate": 4.5630931810219484e-06, "loss": 0.479, "step": 8397 }, { "epoch": 0.54, "grad_norm": 1.1878873109817505, "learning_rate": 4.562051872998745e-06, "loss": 0.5209, "step": 8398 }, { "epoch": 0.54, "grad_norm": 1.2192407846450806, "learning_rate": 4.561010584117123e-06, "loss": 0.607, "step": 8399 }, { "epoch": 0.54, "grad_norm": 1.1515883207321167, "learning_rate": 4.559969314422596e-06, "loss": 0.5666, "step": 8400 }, { "epoch": 0.54, "grad_norm": 1.220941424369812, "learning_rate": 4.558928063960678e-06, "loss": 0.5149, "step": 8401 }, { "epoch": 0.54, "grad_norm": 1.180591344833374, "learning_rate": 4.557886832776874e-06, "loss": 0.5255, "step": 8402 }, { "epoch": 0.54, "grad_norm": 1.2679169178009033, "learning_rate": 4.556845620916695e-06, "loss": 0.5681, "step": 8403 }, { "epoch": 0.54, "grad_norm": 1.0882713794708252, "learning_rate": 4.555804428425651e-06, "loss": 0.5125, "step": 8404 }, { "epoch": 0.54, "grad_norm": 1.0666981935501099, "learning_rate": 4.55476325534925e-06, "loss": 0.4903, "step": 8405 }, { "epoch": 0.54, "grad_norm": 1.2805111408233643, "learning_rate": 4.553722101732998e-06, "loss": 0.5408, "step": 8406 }, { "epoch": 0.54, "grad_norm": 1.1378858089447021, "learning_rate": 4.552680967622403e-06, "loss": 0.526, "step": 8407 }, { "epoch": 0.54, "grad_norm": 1.393305778503418, "learning_rate": 4.551639853062968e-06, "loss": 0.6202, "step": 8408 }, { "epoch": 0.54, "grad_norm": 1.2431700229644775, "learning_rate": 4.550598758100197e-06, "loss": 0.5754, "step": 8409 }, { "epoch": 0.54, "grad_norm": 1.1717854738235474, "learning_rate": 4.549557682779595e-06, "loss": 0.5574, "step": 8410 }, { "epoch": 0.54, "grad_norm": 1.1606478691101074, "learning_rate": 4.5485166271466655e-06, "loss": 0.5178, "step": 8411 }, { "epoch": 0.54, "grad_norm": 1.0263675451278687, "learning_rate": 4.547475591246909e-06, "loss": 0.5103, "step": 8412 }, { "epoch": 0.54, "grad_norm": 1.2166345119476318, "learning_rate": 4.546434575125828e-06, "loss": 0.5556, "step": 8413 }, { "epoch": 0.54, "grad_norm": 1.113924264907837, "learning_rate": 4.54539357882892e-06, "loss": 0.5306, "step": 8414 }, { "epoch": 0.54, "grad_norm": 1.3059396743774414, "learning_rate": 4.544352602401686e-06, "loss": 0.534, "step": 8415 }, { "epoch": 0.54, "grad_norm": 1.344445824623108, "learning_rate": 4.543311645889624e-06, "loss": 0.552, "step": 8416 }, { "epoch": 0.54, "grad_norm": 1.1969083547592163, "learning_rate": 4.542270709338233e-06, "loss": 0.5413, "step": 8417 }, { "epoch": 0.54, "grad_norm": 1.1036865711212158, "learning_rate": 4.541229792793007e-06, "loss": 0.4992, "step": 8418 }, { "epoch": 0.54, "grad_norm": 1.3176935911178589, "learning_rate": 4.540188896299444e-06, "loss": 0.5385, "step": 8419 }, { "epoch": 0.54, "grad_norm": 1.253082275390625, "learning_rate": 4.539148019903037e-06, "loss": 0.527, "step": 8420 }, { "epoch": 0.54, "grad_norm": 1.1550796031951904, "learning_rate": 4.5381071636492826e-06, "loss": 0.5515, "step": 8421 }, { "epoch": 0.54, "grad_norm": 1.0877645015716553, "learning_rate": 4.5370663275836714e-06, "loss": 0.5366, "step": 8422 }, { "epoch": 0.54, "grad_norm": 1.40719473361969, "learning_rate": 4.536025511751696e-06, "loss": 0.5541, "step": 8423 }, { "epoch": 0.54, "grad_norm": 1.3241182565689087, "learning_rate": 4.5349847161988484e-06, "loss": 0.5509, "step": 8424 }, { "epoch": 0.54, "grad_norm": 1.2214794158935547, "learning_rate": 4.533943940970621e-06, "loss": 0.5175, "step": 8425 }, { "epoch": 0.54, "grad_norm": 1.1353956460952759, "learning_rate": 4.532903186112501e-06, "loss": 0.5184, "step": 8426 }, { "epoch": 0.54, "grad_norm": 1.22821843624115, "learning_rate": 4.531862451669976e-06, "loss": 0.544, "step": 8427 }, { "epoch": 0.54, "grad_norm": 1.1249263286590576, "learning_rate": 4.5308217376885356e-06, "loss": 0.5509, "step": 8428 }, { "epoch": 0.54, "grad_norm": 1.2248510122299194, "learning_rate": 4.529781044213667e-06, "loss": 0.5254, "step": 8429 }, { "epoch": 0.54, "grad_norm": 1.091289758682251, "learning_rate": 4.528740371290858e-06, "loss": 0.4999, "step": 8430 }, { "epoch": 0.54, "grad_norm": 1.1929551362991333, "learning_rate": 4.527699718965589e-06, "loss": 0.4906, "step": 8431 }, { "epoch": 0.54, "grad_norm": 1.1844017505645752, "learning_rate": 4.526659087283347e-06, "loss": 0.533, "step": 8432 }, { "epoch": 0.54, "grad_norm": 1.464318871498108, "learning_rate": 4.525618476289614e-06, "loss": 0.5971, "step": 8433 }, { "epoch": 0.54, "grad_norm": 1.1113008260726929, "learning_rate": 4.524577886029875e-06, "loss": 0.5366, "step": 8434 }, { "epoch": 0.54, "grad_norm": 1.1193798780441284, "learning_rate": 4.523537316549609e-06, "loss": 0.5522, "step": 8435 }, { "epoch": 0.54, "grad_norm": 1.4876885414123535, "learning_rate": 4.5224967678943e-06, "loss": 0.5745, "step": 8436 }, { "epoch": 0.54, "grad_norm": 1.1615643501281738, "learning_rate": 4.5214562401094234e-06, "loss": 0.5106, "step": 8437 }, { "epoch": 0.54, "grad_norm": 1.409088134765625, "learning_rate": 4.520415733240459e-06, "loss": 0.5411, "step": 8438 }, { "epoch": 0.54, "grad_norm": 1.3240488767623901, "learning_rate": 4.519375247332887e-06, "loss": 0.5454, "step": 8439 }, { "epoch": 0.54, "grad_norm": 1.1820844411849976, "learning_rate": 4.518334782432183e-06, "loss": 0.5307, "step": 8440 }, { "epoch": 0.54, "grad_norm": 1.2618862390518188, "learning_rate": 4.517294338583822e-06, "loss": 0.5341, "step": 8441 }, { "epoch": 0.54, "grad_norm": 1.1331310272216797, "learning_rate": 4.516253915833282e-06, "loss": 0.5337, "step": 8442 }, { "epoch": 0.54, "grad_norm": 1.0759705305099487, "learning_rate": 4.515213514226033e-06, "loss": 0.5242, "step": 8443 }, { "epoch": 0.55, "grad_norm": 1.0464017391204834, "learning_rate": 4.514173133807552e-06, "loss": 0.5298, "step": 8444 }, { "epoch": 0.55, "grad_norm": 1.3657927513122559, "learning_rate": 4.513132774623308e-06, "loss": 0.5572, "step": 8445 }, { "epoch": 0.55, "grad_norm": 1.1696728467941284, "learning_rate": 4.512092436718776e-06, "loss": 0.5077, "step": 8446 }, { "epoch": 0.55, "grad_norm": 1.2905628681182861, "learning_rate": 4.511052120139424e-06, "loss": 0.5435, "step": 8447 }, { "epoch": 0.55, "grad_norm": 1.1115787029266357, "learning_rate": 4.510011824930724e-06, "loss": 0.5284, "step": 8448 }, { "epoch": 0.55, "grad_norm": 1.2492579221725464, "learning_rate": 4.508971551138142e-06, "loss": 0.5406, "step": 8449 }, { "epoch": 0.55, "grad_norm": 1.0881065130233765, "learning_rate": 4.507931298807145e-06, "loss": 0.4838, "step": 8450 }, { "epoch": 0.55, "grad_norm": 1.0761933326721191, "learning_rate": 4.506891067983203e-06, "loss": 0.5012, "step": 8451 }, { "epoch": 0.55, "grad_norm": 1.153408169746399, "learning_rate": 4.5058508587117785e-06, "loss": 0.5481, "step": 8452 }, { "epoch": 0.55, "grad_norm": 1.2258144617080688, "learning_rate": 4.504810671038339e-06, "loss": 0.522, "step": 8453 }, { "epoch": 0.55, "grad_norm": 1.2414369583129883, "learning_rate": 4.50377050500835e-06, "loss": 0.5593, "step": 8454 }, { "epoch": 0.55, "grad_norm": 1.17026686668396, "learning_rate": 4.502730360667269e-06, "loss": 0.5759, "step": 8455 }, { "epoch": 0.55, "grad_norm": 1.1560945510864258, "learning_rate": 4.501690238060561e-06, "loss": 0.4807, "step": 8456 }, { "epoch": 0.55, "grad_norm": 1.3497107028961182, "learning_rate": 4.500650137233688e-06, "loss": 0.5772, "step": 8457 }, { "epoch": 0.55, "grad_norm": 1.1542603969573975, "learning_rate": 4.499610058232108e-06, "loss": 0.5603, "step": 8458 }, { "epoch": 0.55, "grad_norm": 1.1115413904190063, "learning_rate": 4.4985700011012835e-06, "loss": 0.5435, "step": 8459 }, { "epoch": 0.55, "grad_norm": 1.16513991355896, "learning_rate": 4.49752996588667e-06, "loss": 0.5463, "step": 8460 }, { "epoch": 0.55, "grad_norm": 1.1819988489151, "learning_rate": 4.496489952633724e-06, "loss": 0.5644, "step": 8461 }, { "epoch": 0.55, "grad_norm": 1.0789568424224854, "learning_rate": 4.4954499613879035e-06, "loss": 0.5121, "step": 8462 }, { "epoch": 0.55, "grad_norm": 1.030063509941101, "learning_rate": 4.494409992194664e-06, "loss": 0.4838, "step": 8463 }, { "epoch": 0.55, "grad_norm": 1.2103590965270996, "learning_rate": 4.493370045099458e-06, "loss": 0.5722, "step": 8464 }, { "epoch": 0.55, "grad_norm": 1.1295832395553589, "learning_rate": 4.492330120147741e-06, "loss": 0.527, "step": 8465 }, { "epoch": 0.55, "grad_norm": 1.3014538288116455, "learning_rate": 4.4912902173849655e-06, "loss": 0.547, "step": 8466 }, { "epoch": 0.55, "grad_norm": 1.1860641241073608, "learning_rate": 4.490250336856581e-06, "loss": 0.5023, "step": 8467 }, { "epoch": 0.55, "grad_norm": 1.3332468271255493, "learning_rate": 4.48921047860804e-06, "loss": 0.548, "step": 8468 }, { "epoch": 0.55, "grad_norm": 1.2762013673782349, "learning_rate": 4.48817064268479e-06, "loss": 0.5407, "step": 8469 }, { "epoch": 0.55, "grad_norm": 0.9977941513061523, "learning_rate": 4.487130829132282e-06, "loss": 0.5037, "step": 8470 }, { "epoch": 0.55, "grad_norm": 1.1558774709701538, "learning_rate": 4.486091037995961e-06, "loss": 0.5296, "step": 8471 }, { "epoch": 0.55, "grad_norm": 1.176344394683838, "learning_rate": 4.485051269321275e-06, "loss": 0.4964, "step": 8472 }, { "epoch": 0.55, "grad_norm": 1.1440106630325317, "learning_rate": 4.4840115231536706e-06, "loss": 0.522, "step": 8473 }, { "epoch": 0.55, "grad_norm": 1.183984637260437, "learning_rate": 4.48297179953859e-06, "loss": 0.5643, "step": 8474 }, { "epoch": 0.55, "grad_norm": 1.1575251817703247, "learning_rate": 4.481932098521479e-06, "loss": 0.5127, "step": 8475 }, { "epoch": 0.55, "grad_norm": 1.147772192955017, "learning_rate": 4.480892420147778e-06, "loss": 0.4747, "step": 8476 }, { "epoch": 0.55, "grad_norm": 1.2863582372665405, "learning_rate": 4.4798527644629326e-06, "loss": 0.5381, "step": 8477 }, { "epoch": 0.55, "grad_norm": 1.1762901544570923, "learning_rate": 4.478813131512379e-06, "loss": 0.5512, "step": 8478 }, { "epoch": 0.55, "grad_norm": 1.1417592763900757, "learning_rate": 4.477773521341558e-06, "loss": 0.5163, "step": 8479 }, { "epoch": 0.55, "grad_norm": 1.1690384149551392, "learning_rate": 4.47673393399591e-06, "loss": 0.5377, "step": 8480 }, { "epoch": 0.55, "grad_norm": 1.0419799089431763, "learning_rate": 4.475694369520871e-06, "loss": 0.5122, "step": 8481 }, { "epoch": 0.55, "grad_norm": 1.2157166004180908, "learning_rate": 4.474654827961878e-06, "loss": 0.5326, "step": 8482 }, { "epoch": 0.55, "grad_norm": 1.1460254192352295, "learning_rate": 4.47361530936437e-06, "loss": 0.476, "step": 8483 }, { "epoch": 0.55, "grad_norm": 1.0801465511322021, "learning_rate": 4.472575813773777e-06, "loss": 0.5099, "step": 8484 }, { "epoch": 0.55, "grad_norm": 1.1863726377487183, "learning_rate": 4.4715363412355335e-06, "loss": 0.5324, "step": 8485 }, { "epoch": 0.55, "grad_norm": 1.132084846496582, "learning_rate": 4.470496891795073e-06, "loss": 0.5236, "step": 8486 }, { "epoch": 0.55, "grad_norm": 1.2371011972427368, "learning_rate": 4.469457465497828e-06, "loss": 0.5762, "step": 8487 }, { "epoch": 0.55, "grad_norm": 1.056060791015625, "learning_rate": 4.4684180623892285e-06, "loss": 0.5099, "step": 8488 }, { "epoch": 0.55, "grad_norm": 1.2055937051773071, "learning_rate": 4.467378682514705e-06, "loss": 0.5386, "step": 8489 }, { "epoch": 0.55, "grad_norm": 1.1710623502731323, "learning_rate": 4.466339325919684e-06, "loss": 0.5599, "step": 8490 }, { "epoch": 0.55, "grad_norm": 1.1613140106201172, "learning_rate": 4.465299992649593e-06, "loss": 0.4939, "step": 8491 }, { "epoch": 0.55, "grad_norm": 1.2723759412765503, "learning_rate": 4.4642606827498606e-06, "loss": 0.5409, "step": 8492 }, { "epoch": 0.55, "grad_norm": 1.135810136795044, "learning_rate": 4.463221396265912e-06, "loss": 0.4808, "step": 8493 }, { "epoch": 0.55, "grad_norm": 1.2953906059265137, "learning_rate": 4.46218213324317e-06, "loss": 0.5492, "step": 8494 }, { "epoch": 0.55, "grad_norm": 1.3496791124343872, "learning_rate": 4.461142893727063e-06, "loss": 0.5484, "step": 8495 }, { "epoch": 0.55, "grad_norm": 1.1828889846801758, "learning_rate": 4.460103677763006e-06, "loss": 0.5571, "step": 8496 }, { "epoch": 0.55, "grad_norm": 1.218188762664795, "learning_rate": 4.459064485396426e-06, "loss": 0.5046, "step": 8497 }, { "epoch": 0.55, "grad_norm": 1.154309868812561, "learning_rate": 4.45802531667274e-06, "loss": 0.5421, "step": 8498 }, { "epoch": 0.55, "grad_norm": 1.1952362060546875, "learning_rate": 4.456986171637371e-06, "loss": 0.5157, "step": 8499 }, { "epoch": 0.55, "grad_norm": 1.198786973953247, "learning_rate": 4.4559470503357336e-06, "loss": 0.5222, "step": 8500 }, { "epoch": 0.55, "grad_norm": 1.1408642530441284, "learning_rate": 4.45490795281325e-06, "loss": 0.5255, "step": 8501 }, { "epoch": 0.55, "grad_norm": 1.192805290222168, "learning_rate": 4.453868879115332e-06, "loss": 0.5447, "step": 8502 }, { "epoch": 0.55, "grad_norm": 1.2649770975112915, "learning_rate": 4.452829829287395e-06, "loss": 0.5984, "step": 8503 }, { "epoch": 0.55, "grad_norm": 1.1478209495544434, "learning_rate": 4.451790803374855e-06, "loss": 0.5463, "step": 8504 }, { "epoch": 0.55, "grad_norm": 1.085081934928894, "learning_rate": 4.450751801423125e-06, "loss": 0.4758, "step": 8505 }, { "epoch": 0.55, "grad_norm": 1.096448302268982, "learning_rate": 4.449712823477617e-06, "loss": 0.5114, "step": 8506 }, { "epoch": 0.55, "grad_norm": 1.2062368392944336, "learning_rate": 4.448673869583742e-06, "loss": 0.5226, "step": 8507 }, { "epoch": 0.55, "grad_norm": 1.1179002523422241, "learning_rate": 4.447634939786909e-06, "loss": 0.5281, "step": 8508 }, { "epoch": 0.55, "grad_norm": 1.1235811710357666, "learning_rate": 4.446596034132527e-06, "loss": 0.5061, "step": 8509 }, { "epoch": 0.55, "grad_norm": 1.1443963050842285, "learning_rate": 4.445557152666006e-06, "loss": 0.5324, "step": 8510 }, { "epoch": 0.55, "grad_norm": 1.1285715103149414, "learning_rate": 4.44451829543275e-06, "loss": 0.5149, "step": 8511 }, { "epoch": 0.55, "grad_norm": 1.1491644382476807, "learning_rate": 4.443479462478166e-06, "loss": 0.5026, "step": 8512 }, { "epoch": 0.55, "grad_norm": 1.0830602645874023, "learning_rate": 4.4424406538476614e-06, "loss": 0.5048, "step": 8513 }, { "epoch": 0.55, "grad_norm": 1.142780065536499, "learning_rate": 4.441401869586635e-06, "loss": 0.5518, "step": 8514 }, { "epoch": 0.55, "grad_norm": 1.2097036838531494, "learning_rate": 4.440363109740493e-06, "loss": 0.5032, "step": 8515 }, { "epoch": 0.55, "grad_norm": 1.3292218446731567, "learning_rate": 4.439324374354634e-06, "loss": 0.4968, "step": 8516 }, { "epoch": 0.55, "grad_norm": 1.1954458951950073, "learning_rate": 4.43828566347446e-06, "loss": 0.5417, "step": 8517 }, { "epoch": 0.55, "grad_norm": 1.14804208278656, "learning_rate": 4.437246977145372e-06, "loss": 0.472, "step": 8518 }, { "epoch": 0.55, "grad_norm": 1.2404474020004272, "learning_rate": 4.436208315412767e-06, "loss": 0.5132, "step": 8519 }, { "epoch": 0.55, "grad_norm": 1.2690608501434326, "learning_rate": 4.435169678322041e-06, "loss": 0.512, "step": 8520 }, { "epoch": 0.55, "grad_norm": 1.1807544231414795, "learning_rate": 4.43413106591859e-06, "loss": 0.5792, "step": 8521 }, { "epoch": 0.55, "grad_norm": 1.0972795486450195, "learning_rate": 4.4330924782478105e-06, "loss": 0.5099, "step": 8522 }, { "epoch": 0.55, "grad_norm": 1.1924992799758911, "learning_rate": 4.432053915355097e-06, "loss": 0.5448, "step": 8523 }, { "epoch": 0.55, "grad_norm": 1.255406379699707, "learning_rate": 4.431015377285843e-06, "loss": 0.5008, "step": 8524 }, { "epoch": 0.55, "grad_norm": 1.1200392246246338, "learning_rate": 4.429976864085436e-06, "loss": 0.4753, "step": 8525 }, { "epoch": 0.55, "grad_norm": 1.1115468740463257, "learning_rate": 4.428938375799271e-06, "loss": 0.5107, "step": 8526 }, { "epoch": 0.55, "grad_norm": 1.1564204692840576, "learning_rate": 4.427899912472735e-06, "loss": 0.5364, "step": 8527 }, { "epoch": 0.55, "grad_norm": 1.2323541641235352, "learning_rate": 4.426861474151219e-06, "loss": 0.5715, "step": 8528 }, { "epoch": 0.55, "grad_norm": 1.2094662189483643, "learning_rate": 4.425823060880108e-06, "loss": 0.5693, "step": 8529 }, { "epoch": 0.55, "grad_norm": 1.3023293018341064, "learning_rate": 4.424784672704793e-06, "loss": 0.5537, "step": 8530 }, { "epoch": 0.55, "grad_norm": 1.1187561750411987, "learning_rate": 4.423746309670653e-06, "loss": 0.56, "step": 8531 }, { "epoch": 0.55, "grad_norm": 1.180472493171692, "learning_rate": 4.422707971823075e-06, "loss": 0.4797, "step": 8532 }, { "epoch": 0.55, "grad_norm": 1.1637492179870605, "learning_rate": 4.421669659207442e-06, "loss": 0.5285, "step": 8533 }, { "epoch": 0.55, "grad_norm": 1.257311224937439, "learning_rate": 4.420631371869136e-06, "loss": 0.5354, "step": 8534 }, { "epoch": 0.55, "grad_norm": 1.1644442081451416, "learning_rate": 4.4195931098535385e-06, "loss": 0.5119, "step": 8535 }, { "epoch": 0.55, "grad_norm": 1.1562798023223877, "learning_rate": 4.418554873206031e-06, "loss": 0.597, "step": 8536 }, { "epoch": 0.55, "grad_norm": 1.0806646347045898, "learning_rate": 4.417516661971987e-06, "loss": 0.5258, "step": 8537 }, { "epoch": 0.55, "grad_norm": 1.0732454061508179, "learning_rate": 4.416478476196788e-06, "loss": 0.5532, "step": 8538 }, { "epoch": 0.55, "grad_norm": 1.210947036743164, "learning_rate": 4.415440315925808e-06, "loss": 0.5286, "step": 8539 }, { "epoch": 0.55, "grad_norm": 1.1797064542770386, "learning_rate": 4.414402181204424e-06, "loss": 0.5015, "step": 8540 }, { "epoch": 0.55, "grad_norm": 1.1596052646636963, "learning_rate": 4.41336407207801e-06, "loss": 0.5116, "step": 8541 }, { "epoch": 0.55, "grad_norm": 1.1619514226913452, "learning_rate": 4.412325988591941e-06, "loss": 0.5022, "step": 8542 }, { "epoch": 0.55, "grad_norm": 1.1765780448913574, "learning_rate": 4.411287930791585e-06, "loss": 0.491, "step": 8543 }, { "epoch": 0.55, "grad_norm": 1.2793995141983032, "learning_rate": 4.410249898722314e-06, "loss": 0.5641, "step": 8544 }, { "epoch": 0.55, "grad_norm": 1.0894973278045654, "learning_rate": 4.409211892429499e-06, "loss": 0.5406, "step": 8545 }, { "epoch": 0.55, "grad_norm": 1.1569037437438965, "learning_rate": 4.408173911958507e-06, "loss": 0.5453, "step": 8546 }, { "epoch": 0.55, "grad_norm": 1.158223032951355, "learning_rate": 4.4071359573547065e-06, "loss": 0.517, "step": 8547 }, { "epoch": 0.55, "grad_norm": 1.1181343793869019, "learning_rate": 4.406098028663467e-06, "loss": 0.5677, "step": 8548 }, { "epoch": 0.55, "grad_norm": 1.2123892307281494, "learning_rate": 4.405060125930148e-06, "loss": 0.5223, "step": 8549 }, { "epoch": 0.55, "grad_norm": 1.147066593170166, "learning_rate": 4.404022249200115e-06, "loss": 0.5125, "step": 8550 }, { "epoch": 0.55, "grad_norm": 1.2440825700759888, "learning_rate": 4.402984398518732e-06, "loss": 0.4809, "step": 8551 }, { "epoch": 0.55, "grad_norm": 1.1139471530914307, "learning_rate": 4.401946573931362e-06, "loss": 0.5023, "step": 8552 }, { "epoch": 0.55, "grad_norm": 1.1542778015136719, "learning_rate": 4.4009087754833626e-06, "loss": 0.5292, "step": 8553 }, { "epoch": 0.55, "grad_norm": 1.118143081665039, "learning_rate": 4.399871003220097e-06, "loss": 0.5423, "step": 8554 }, { "epoch": 0.55, "grad_norm": 1.0642818212509155, "learning_rate": 4.39883325718692e-06, "loss": 0.4922, "step": 8555 }, { "epoch": 0.55, "grad_norm": 1.250462532043457, "learning_rate": 4.397795537429191e-06, "loss": 0.5769, "step": 8556 }, { "epoch": 0.55, "grad_norm": 1.1846871376037598, "learning_rate": 4.396757843992266e-06, "loss": 0.5416, "step": 8557 }, { "epoch": 0.55, "grad_norm": 1.1248217821121216, "learning_rate": 4.395720176921497e-06, "loss": 0.4938, "step": 8558 }, { "epoch": 0.55, "grad_norm": 1.2114688158035278, "learning_rate": 4.394682536262242e-06, "loss": 0.5781, "step": 8559 }, { "epoch": 0.55, "grad_norm": 1.1638528108596802, "learning_rate": 4.39364492205985e-06, "loss": 0.5137, "step": 8560 }, { "epoch": 0.55, "grad_norm": 1.1369374990463257, "learning_rate": 4.3926073343596746e-06, "loss": 0.4913, "step": 8561 }, { "epoch": 0.55, "grad_norm": 1.2527331113815308, "learning_rate": 4.3915697732070665e-06, "loss": 0.544, "step": 8562 }, { "epoch": 0.55, "grad_norm": 1.2558479309082031, "learning_rate": 4.390532238647373e-06, "loss": 0.5437, "step": 8563 }, { "epoch": 0.55, "grad_norm": 1.0751792192459106, "learning_rate": 4.389494730725943e-06, "loss": 0.5233, "step": 8564 }, { "epoch": 0.55, "grad_norm": 1.1917036771774292, "learning_rate": 4.388457249488124e-06, "loss": 0.5268, "step": 8565 }, { "epoch": 0.55, "grad_norm": 1.075443148612976, "learning_rate": 4.387419794979261e-06, "loss": 0.4886, "step": 8566 }, { "epoch": 0.55, "grad_norm": 1.0668716430664062, "learning_rate": 4.386382367244696e-06, "loss": 0.5009, "step": 8567 }, { "epoch": 0.55, "grad_norm": 1.2066677808761597, "learning_rate": 4.385344966329776e-06, "loss": 0.5403, "step": 8568 }, { "epoch": 0.55, "grad_norm": 1.1856359243392944, "learning_rate": 4.384307592279842e-06, "loss": 0.5406, "step": 8569 }, { "epoch": 0.55, "grad_norm": 1.297877311706543, "learning_rate": 4.3832702451402335e-06, "loss": 0.5594, "step": 8570 }, { "epoch": 0.55, "grad_norm": 1.1954360008239746, "learning_rate": 4.382232924956294e-06, "loss": 0.4907, "step": 8571 }, { "epoch": 0.55, "grad_norm": 1.2675974369049072, "learning_rate": 4.381195631773358e-06, "loss": 0.5205, "step": 8572 }, { "epoch": 0.55, "grad_norm": 1.2392104864120483, "learning_rate": 4.380158365636763e-06, "loss": 0.5572, "step": 8573 }, { "epoch": 0.55, "grad_norm": 1.2904679775238037, "learning_rate": 4.379121126591847e-06, "loss": 0.5694, "step": 8574 }, { "epoch": 0.55, "grad_norm": 1.2555006742477417, "learning_rate": 4.378083914683945e-06, "loss": 0.5535, "step": 8575 }, { "epoch": 0.55, "grad_norm": 1.2174896001815796, "learning_rate": 4.377046729958391e-06, "loss": 0.5233, "step": 8576 }, { "epoch": 0.55, "grad_norm": 1.1458556652069092, "learning_rate": 4.376009572460519e-06, "loss": 0.5046, "step": 8577 }, { "epoch": 0.55, "grad_norm": 1.1738709211349487, "learning_rate": 4.374972442235656e-06, "loss": 0.476, "step": 8578 }, { "epoch": 0.55, "grad_norm": 1.2763652801513672, "learning_rate": 4.373935339329135e-06, "loss": 0.567, "step": 8579 }, { "epoch": 0.55, "grad_norm": 1.353430151939392, "learning_rate": 4.372898263786286e-06, "loss": 0.5408, "step": 8580 }, { "epoch": 0.55, "grad_norm": 1.2434229850769043, "learning_rate": 4.371861215652435e-06, "loss": 0.5373, "step": 8581 }, { "epoch": 0.55, "grad_norm": 1.0887271165847778, "learning_rate": 4.37082419497291e-06, "loss": 0.517, "step": 8582 }, { "epoch": 0.55, "grad_norm": 1.1459232568740845, "learning_rate": 4.3697872017930385e-06, "loss": 0.5041, "step": 8583 }, { "epoch": 0.55, "grad_norm": 1.1889307498931885, "learning_rate": 4.368750236158139e-06, "loss": 0.5485, "step": 8584 }, { "epoch": 0.55, "grad_norm": 1.0980416536331177, "learning_rate": 4.367713298113539e-06, "loss": 0.5096, "step": 8585 }, { "epoch": 0.55, "grad_norm": 1.2244949340820312, "learning_rate": 4.36667638770456e-06, "loss": 0.5605, "step": 8586 }, { "epoch": 0.55, "grad_norm": 1.3030381202697754, "learning_rate": 4.365639504976522e-06, "loss": 0.5685, "step": 8587 }, { "epoch": 0.55, "grad_norm": 1.1517174243927002, "learning_rate": 4.364602649974744e-06, "loss": 0.5156, "step": 8588 }, { "epoch": 0.55, "grad_norm": 1.1016881465911865, "learning_rate": 4.363565822744547e-06, "loss": 0.5367, "step": 8589 }, { "epoch": 0.55, "grad_norm": 1.2045538425445557, "learning_rate": 4.362529023331244e-06, "loss": 0.5453, "step": 8590 }, { "epoch": 0.55, "grad_norm": 1.1485645771026611, "learning_rate": 4.3614922517801525e-06, "loss": 0.5199, "step": 8591 }, { "epoch": 0.55, "grad_norm": 1.180361270904541, "learning_rate": 4.3604555081365874e-06, "loss": 0.5592, "step": 8592 }, { "epoch": 0.55, "grad_norm": 1.1652339696884155, "learning_rate": 4.359418792445862e-06, "loss": 0.5563, "step": 8593 }, { "epoch": 0.55, "grad_norm": 1.164707064628601, "learning_rate": 4.358382104753288e-06, "loss": 0.5301, "step": 8594 }, { "epoch": 0.55, "grad_norm": 1.2388471364974976, "learning_rate": 4.357345445104179e-06, "loss": 0.5448, "step": 8595 }, { "epoch": 0.55, "grad_norm": 1.2384655475616455, "learning_rate": 4.3563088135438415e-06, "loss": 0.5438, "step": 8596 }, { "epoch": 0.55, "grad_norm": 1.1302123069763184, "learning_rate": 4.355272210117584e-06, "loss": 0.5103, "step": 8597 }, { "epoch": 0.55, "grad_norm": 1.1310516595840454, "learning_rate": 4.354235634870715e-06, "loss": 0.4993, "step": 8598 }, { "epoch": 0.56, "grad_norm": 1.0821136236190796, "learning_rate": 4.353199087848541e-06, "loss": 0.5094, "step": 8599 }, { "epoch": 0.56, "grad_norm": 1.098109483718872, "learning_rate": 4.3521625690963674e-06, "loss": 0.4987, "step": 8600 }, { "epoch": 0.56, "grad_norm": 1.1578965187072754, "learning_rate": 4.351126078659494e-06, "loss": 0.5738, "step": 8601 }, { "epoch": 0.56, "grad_norm": 1.1095993518829346, "learning_rate": 4.350089616583226e-06, "loss": 0.5276, "step": 8602 }, { "epoch": 0.56, "grad_norm": 1.1505515575408936, "learning_rate": 4.349053182912864e-06, "loss": 0.5532, "step": 8603 }, { "epoch": 0.56, "grad_norm": 1.169366717338562, "learning_rate": 4.34801677769371e-06, "loss": 0.5163, "step": 8604 }, { "epoch": 0.56, "grad_norm": 1.0509265661239624, "learning_rate": 4.346980400971058e-06, "loss": 0.52, "step": 8605 }, { "epoch": 0.56, "grad_norm": 1.1111665964126587, "learning_rate": 4.345944052790209e-06, "loss": 0.537, "step": 8606 }, { "epoch": 0.56, "grad_norm": 1.209438443183899, "learning_rate": 4.344907733196458e-06, "loss": 0.5369, "step": 8607 }, { "epoch": 0.56, "grad_norm": 1.1597994565963745, "learning_rate": 4.3438714422351e-06, "loss": 0.5049, "step": 8608 }, { "epoch": 0.56, "grad_norm": 1.1446726322174072, "learning_rate": 4.342835179951428e-06, "loss": 0.5213, "step": 8609 }, { "epoch": 0.56, "grad_norm": 1.1437641382217407, "learning_rate": 4.341798946390734e-06, "loss": 0.5327, "step": 8610 }, { "epoch": 0.56, "grad_norm": 1.2306193113327026, "learning_rate": 4.34076274159831e-06, "loss": 0.5153, "step": 8611 }, { "epoch": 0.56, "grad_norm": 1.1632258892059326, "learning_rate": 4.339726565619448e-06, "loss": 0.5323, "step": 8612 }, { "epoch": 0.56, "grad_norm": 1.1953504085540771, "learning_rate": 4.338690418499433e-06, "loss": 0.5204, "step": 8613 }, { "epoch": 0.56, "grad_norm": 1.0462534427642822, "learning_rate": 4.3376543002835526e-06, "loss": 0.4799, "step": 8614 }, { "epoch": 0.56, "grad_norm": 1.237694263458252, "learning_rate": 4.336618211017094e-06, "loss": 0.5356, "step": 8615 }, { "epoch": 0.56, "grad_norm": 1.152794599533081, "learning_rate": 4.335582150745342e-06, "loss": 0.529, "step": 8616 }, { "epoch": 0.56, "grad_norm": 1.1540718078613281, "learning_rate": 4.3345461195135805e-06, "loss": 0.5336, "step": 8617 }, { "epoch": 0.56, "grad_norm": 1.2467602491378784, "learning_rate": 4.3335101173670925e-06, "loss": 0.5338, "step": 8618 }, { "epoch": 0.56, "grad_norm": 1.18288254737854, "learning_rate": 4.332474144351156e-06, "loss": 0.5433, "step": 8619 }, { "epoch": 0.56, "grad_norm": 1.0887012481689453, "learning_rate": 4.3314382005110525e-06, "loss": 0.54, "step": 8620 }, { "epoch": 0.56, "grad_norm": 1.194789171218872, "learning_rate": 4.330402285892059e-06, "loss": 0.5742, "step": 8621 }, { "epoch": 0.56, "grad_norm": 1.2607191801071167, "learning_rate": 4.3293664005394555e-06, "loss": 0.5566, "step": 8622 }, { "epoch": 0.56, "grad_norm": 1.15231192111969, "learning_rate": 4.3283305444985166e-06, "loss": 0.5045, "step": 8623 }, { "epoch": 0.56, "grad_norm": 1.10970139503479, "learning_rate": 4.327294717814518e-06, "loss": 0.4985, "step": 8624 }, { "epoch": 0.56, "grad_norm": 1.2877999544143677, "learning_rate": 4.32625892053273e-06, "loss": 0.5406, "step": 8625 }, { "epoch": 0.56, "grad_norm": 1.146820306777954, "learning_rate": 4.325223152698426e-06, "loss": 0.4795, "step": 8626 }, { "epoch": 0.56, "grad_norm": 1.2077882289886475, "learning_rate": 4.3241874143568765e-06, "loss": 0.5214, "step": 8627 }, { "epoch": 0.56, "grad_norm": 1.1192766427993774, "learning_rate": 4.323151705553352e-06, "loss": 0.545, "step": 8628 }, { "epoch": 0.56, "grad_norm": 1.1426712274551392, "learning_rate": 4.322116026333121e-06, "loss": 0.4908, "step": 8629 }, { "epoch": 0.56, "grad_norm": 1.051571011543274, "learning_rate": 4.32108037674145e-06, "loss": 0.5173, "step": 8630 }, { "epoch": 0.56, "grad_norm": 1.074641227722168, "learning_rate": 4.3200447568236035e-06, "loss": 0.5201, "step": 8631 }, { "epoch": 0.56, "grad_norm": 1.2131468057632446, "learning_rate": 4.319009166624846e-06, "loss": 0.5928, "step": 8632 }, { "epoch": 0.56, "grad_norm": 1.101639986038208, "learning_rate": 4.31797360619044e-06, "loss": 0.4857, "step": 8633 }, { "epoch": 0.56, "grad_norm": 1.2095297574996948, "learning_rate": 4.316938075565649e-06, "loss": 0.5337, "step": 8634 }, { "epoch": 0.56, "grad_norm": 1.304341197013855, "learning_rate": 4.315902574795732e-06, "loss": 0.558, "step": 8635 }, { "epoch": 0.56, "grad_norm": 1.216068983078003, "learning_rate": 4.314867103925951e-06, "loss": 0.5246, "step": 8636 }, { "epoch": 0.56, "grad_norm": 1.3047720193862915, "learning_rate": 4.313831663001559e-06, "loss": 0.5605, "step": 8637 }, { "epoch": 0.56, "grad_norm": 1.1314277648925781, "learning_rate": 4.312796252067814e-06, "loss": 0.5426, "step": 8638 }, { "epoch": 0.56, "grad_norm": 1.202954649925232, "learning_rate": 4.311760871169972e-06, "loss": 0.5491, "step": 8639 }, { "epoch": 0.56, "grad_norm": 1.2387722730636597, "learning_rate": 4.310725520353287e-06, "loss": 0.5521, "step": 8640 }, { "epoch": 0.56, "grad_norm": 1.2463666200637817, "learning_rate": 4.309690199663011e-06, "loss": 0.5534, "step": 8641 }, { "epoch": 0.56, "grad_norm": 1.1499969959259033, "learning_rate": 4.308654909144398e-06, "loss": 0.5326, "step": 8642 }, { "epoch": 0.56, "grad_norm": 1.1710706949234009, "learning_rate": 4.307619648842692e-06, "loss": 0.5141, "step": 8643 }, { "epoch": 0.56, "grad_norm": 1.1343618631362915, "learning_rate": 4.306584418803145e-06, "loss": 0.5297, "step": 8644 }, { "epoch": 0.56, "grad_norm": 1.1982117891311646, "learning_rate": 4.305549219071005e-06, "loss": 0.5459, "step": 8645 }, { "epoch": 0.56, "grad_norm": 1.1755586862564087, "learning_rate": 4.304514049691517e-06, "loss": 0.5175, "step": 8646 }, { "epoch": 0.56, "grad_norm": 1.152316927909851, "learning_rate": 4.303478910709927e-06, "loss": 0.5633, "step": 8647 }, { "epoch": 0.56, "grad_norm": 1.2361035346984863, "learning_rate": 4.302443802171474e-06, "loss": 0.5798, "step": 8648 }, { "epoch": 0.56, "grad_norm": 1.195069432258606, "learning_rate": 4.3014087241214034e-06, "loss": 0.5414, "step": 8649 }, { "epoch": 0.56, "grad_norm": 1.2313685417175293, "learning_rate": 4.3003736766049545e-06, "loss": 0.536, "step": 8650 }, { "epoch": 0.56, "grad_norm": 1.1521235704421997, "learning_rate": 4.299338659667369e-06, "loss": 0.5499, "step": 8651 }, { "epoch": 0.56, "grad_norm": 1.0948411226272583, "learning_rate": 4.298303673353881e-06, "loss": 0.4789, "step": 8652 }, { "epoch": 0.56, "grad_norm": 1.0942896604537964, "learning_rate": 4.297268717709731e-06, "loss": 0.5186, "step": 8653 }, { "epoch": 0.56, "grad_norm": 1.0805096626281738, "learning_rate": 4.296233792780151e-06, "loss": 0.4778, "step": 8654 }, { "epoch": 0.56, "grad_norm": 1.1454713344573975, "learning_rate": 4.295198898610376e-06, "loss": 0.5405, "step": 8655 }, { "epoch": 0.56, "grad_norm": 1.3036177158355713, "learning_rate": 4.294164035245639e-06, "loss": 0.5262, "step": 8656 }, { "epoch": 0.56, "grad_norm": 1.0799951553344727, "learning_rate": 4.293129202731171e-06, "loss": 0.5244, "step": 8657 }, { "epoch": 0.56, "grad_norm": 1.2188903093338013, "learning_rate": 4.292094401112201e-06, "loss": 0.458, "step": 8658 }, { "epoch": 0.56, "grad_norm": 1.146559238433838, "learning_rate": 4.29105963043396e-06, "loss": 0.5518, "step": 8659 }, { "epoch": 0.56, "grad_norm": 1.1716692447662354, "learning_rate": 4.290024890741674e-06, "loss": 0.5209, "step": 8660 }, { "epoch": 0.56, "grad_norm": 1.114412784576416, "learning_rate": 4.2889901820805665e-06, "loss": 0.5009, "step": 8661 }, { "epoch": 0.56, "grad_norm": 1.151930809020996, "learning_rate": 4.287955504495863e-06, "loss": 0.5529, "step": 8662 }, { "epoch": 0.56, "grad_norm": 1.2781200408935547, "learning_rate": 4.286920858032788e-06, "loss": 0.5257, "step": 8663 }, { "epoch": 0.56, "grad_norm": 1.1945593357086182, "learning_rate": 4.2858862427365635e-06, "loss": 0.521, "step": 8664 }, { "epoch": 0.56, "grad_norm": 1.1857283115386963, "learning_rate": 4.284851658652409e-06, "loss": 0.5169, "step": 8665 }, { "epoch": 0.56, "grad_norm": 1.1451380252838135, "learning_rate": 4.283817105825543e-06, "loss": 0.5305, "step": 8666 }, { "epoch": 0.56, "grad_norm": 1.1767007112503052, "learning_rate": 4.282782584301183e-06, "loss": 0.54, "step": 8667 }, { "epoch": 0.56, "grad_norm": 1.2318915128707886, "learning_rate": 4.281748094124545e-06, "loss": 0.5365, "step": 8668 }, { "epoch": 0.56, "grad_norm": 1.192844271659851, "learning_rate": 4.280713635340846e-06, "loss": 0.5066, "step": 8669 }, { "epoch": 0.56, "grad_norm": 1.117649793624878, "learning_rate": 4.279679207995298e-06, "loss": 0.4907, "step": 8670 }, { "epoch": 0.56, "grad_norm": 1.17880117893219, "learning_rate": 4.278644812133115e-06, "loss": 0.55, "step": 8671 }, { "epoch": 0.56, "grad_norm": 1.3067628145217896, "learning_rate": 4.277610447799504e-06, "loss": 0.5893, "step": 8672 }, { "epoch": 0.56, "grad_norm": 1.34046471118927, "learning_rate": 4.2765761150396764e-06, "loss": 0.5767, "step": 8673 }, { "epoch": 0.56, "grad_norm": 1.122747540473938, "learning_rate": 4.27554181389884e-06, "loss": 0.4805, "step": 8674 }, { "epoch": 0.56, "grad_norm": 1.1913659572601318, "learning_rate": 4.2745075444222025e-06, "loss": 0.5875, "step": 8675 }, { "epoch": 0.56, "grad_norm": 1.1767399311065674, "learning_rate": 4.273473306654968e-06, "loss": 0.5483, "step": 8676 }, { "epoch": 0.56, "grad_norm": 1.2756779193878174, "learning_rate": 4.272439100642344e-06, "loss": 0.5502, "step": 8677 }, { "epoch": 0.56, "grad_norm": 1.099600911140442, "learning_rate": 4.271404926429527e-06, "loss": 0.571, "step": 8678 }, { "epoch": 0.56, "grad_norm": 1.1229238510131836, "learning_rate": 4.2703707840617205e-06, "loss": 0.5185, "step": 8679 }, { "epoch": 0.56, "grad_norm": 1.1568951606750488, "learning_rate": 4.269336673584124e-06, "loss": 0.5206, "step": 8680 }, { "epoch": 0.56, "grad_norm": 1.1503033638000488, "learning_rate": 4.268302595041938e-06, "loss": 0.5533, "step": 8681 }, { "epoch": 0.56, "grad_norm": 1.1598783731460571, "learning_rate": 4.267268548480356e-06, "loss": 0.523, "step": 8682 }, { "epoch": 0.56, "grad_norm": 1.1689543724060059, "learning_rate": 4.2662345339445795e-06, "loss": 0.5319, "step": 8683 }, { "epoch": 0.56, "grad_norm": 1.1632479429244995, "learning_rate": 4.2652005514797944e-06, "loss": 0.5254, "step": 8684 }, { "epoch": 0.56, "grad_norm": 1.1814560890197754, "learning_rate": 4.264166601131199e-06, "loss": 0.5207, "step": 8685 }, { "epoch": 0.56, "grad_norm": 1.2131285667419434, "learning_rate": 4.2631326829439814e-06, "loss": 0.5564, "step": 8686 }, { "epoch": 0.56, "grad_norm": 1.1245427131652832, "learning_rate": 4.262098796963334e-06, "loss": 0.5397, "step": 8687 }, { "epoch": 0.56, "grad_norm": 1.1536158323287964, "learning_rate": 4.261064943234446e-06, "loss": 0.5377, "step": 8688 }, { "epoch": 0.56, "grad_norm": 1.3004990816116333, "learning_rate": 4.2600311218025015e-06, "loss": 0.5243, "step": 8689 }, { "epoch": 0.56, "grad_norm": 1.2547004222869873, "learning_rate": 4.258997332712686e-06, "loss": 0.5564, "step": 8690 }, { "epoch": 0.56, "grad_norm": 1.1883639097213745, "learning_rate": 4.257963576010187e-06, "loss": 0.5063, "step": 8691 }, { "epoch": 0.56, "grad_norm": 1.1335291862487793, "learning_rate": 4.256929851740185e-06, "loss": 0.4857, "step": 8692 }, { "epoch": 0.56, "grad_norm": 1.253747582435608, "learning_rate": 4.255896159947862e-06, "loss": 0.5614, "step": 8693 }, { "epoch": 0.56, "grad_norm": 1.1795766353607178, "learning_rate": 4.2548625006784e-06, "loss": 0.6138, "step": 8694 }, { "epoch": 0.56, "grad_norm": 1.135840654373169, "learning_rate": 4.253828873976974e-06, "loss": 0.5332, "step": 8695 }, { "epoch": 0.56, "grad_norm": 1.2517796754837036, "learning_rate": 4.252795279888762e-06, "loss": 0.5243, "step": 8696 }, { "epoch": 0.56, "grad_norm": 1.3018484115600586, "learning_rate": 4.251761718458942e-06, "loss": 0.5145, "step": 8697 }, { "epoch": 0.56, "grad_norm": 1.2767624855041504, "learning_rate": 4.2507281897326855e-06, "loss": 0.5639, "step": 8698 }, { "epoch": 0.56, "grad_norm": 1.273512601852417, "learning_rate": 4.2496946937551666e-06, "loss": 0.5334, "step": 8699 }, { "epoch": 0.56, "grad_norm": 1.1315443515777588, "learning_rate": 4.248661230571558e-06, "loss": 0.4819, "step": 8700 }, { "epoch": 0.56, "grad_norm": 1.2267177104949951, "learning_rate": 4.2476278002270275e-06, "loss": 0.5163, "step": 8701 }, { "epoch": 0.56, "grad_norm": 1.297521948814392, "learning_rate": 4.246594402766746e-06, "loss": 0.5702, "step": 8702 }, { "epoch": 0.56, "grad_norm": 1.2235671281814575, "learning_rate": 4.245561038235878e-06, "loss": 0.5471, "step": 8703 }, { "epoch": 0.56, "grad_norm": 1.294386863708496, "learning_rate": 4.244527706679591e-06, "loss": 0.5601, "step": 8704 }, { "epoch": 0.56, "grad_norm": 1.1817619800567627, "learning_rate": 4.243494408143049e-06, "loss": 0.5965, "step": 8705 }, { "epoch": 0.56, "grad_norm": 1.2458800077438354, "learning_rate": 4.242461142671415e-06, "loss": 0.556, "step": 8706 }, { "epoch": 0.56, "grad_norm": 1.202564001083374, "learning_rate": 4.24142791030985e-06, "loss": 0.4813, "step": 8707 }, { "epoch": 0.56, "grad_norm": 1.200769305229187, "learning_rate": 4.240394711103513e-06, "loss": 0.5184, "step": 8708 }, { "epoch": 0.56, "grad_norm": 1.2057058811187744, "learning_rate": 4.239361545097563e-06, "loss": 0.5536, "step": 8709 }, { "epoch": 0.56, "grad_norm": 1.1680384874343872, "learning_rate": 4.238328412337158e-06, "loss": 0.5349, "step": 8710 }, { "epoch": 0.56, "grad_norm": 1.2766093015670776, "learning_rate": 4.237295312867452e-06, "loss": 0.5368, "step": 8711 }, { "epoch": 0.56, "grad_norm": 1.2062846422195435, "learning_rate": 4.2362622467336034e-06, "loss": 0.5215, "step": 8712 }, { "epoch": 0.56, "grad_norm": 1.1326099634170532, "learning_rate": 4.235229213980759e-06, "loss": 0.5412, "step": 8713 }, { "epoch": 0.56, "grad_norm": 1.246423602104187, "learning_rate": 4.234196214654072e-06, "loss": 0.5423, "step": 8714 }, { "epoch": 0.56, "grad_norm": 1.0718703269958496, "learning_rate": 4.233163248798693e-06, "loss": 0.5158, "step": 8715 }, { "epoch": 0.56, "grad_norm": 1.0968748331069946, "learning_rate": 4.23213031645977e-06, "loss": 0.4764, "step": 8716 }, { "epoch": 0.56, "grad_norm": 1.0971931219100952, "learning_rate": 4.23109741768245e-06, "loss": 0.5513, "step": 8717 }, { "epoch": 0.56, "grad_norm": 1.190636157989502, "learning_rate": 4.23006455251188e-06, "loss": 0.551, "step": 8718 }, { "epoch": 0.56, "grad_norm": 1.143750548362732, "learning_rate": 4.2290317209932e-06, "loss": 0.5638, "step": 8719 }, { "epoch": 0.56, "grad_norm": 1.1884335279464722, "learning_rate": 4.227998923171553e-06, "loss": 0.5488, "step": 8720 }, { "epoch": 0.56, "grad_norm": 1.250800609588623, "learning_rate": 4.226966159092083e-06, "loss": 0.5524, "step": 8721 }, { "epoch": 0.56, "grad_norm": 1.1982052326202393, "learning_rate": 4.225933428799927e-06, "loss": 0.5373, "step": 8722 }, { "epoch": 0.56, "grad_norm": 1.141332745552063, "learning_rate": 4.224900732340223e-06, "loss": 0.5664, "step": 8723 }, { "epoch": 0.56, "grad_norm": 1.1016812324523926, "learning_rate": 4.223868069758112e-06, "loss": 0.5183, "step": 8724 }, { "epoch": 0.56, "grad_norm": 1.163130521774292, "learning_rate": 4.222835441098722e-06, "loss": 0.5581, "step": 8725 }, { "epoch": 0.56, "grad_norm": 1.0805065631866455, "learning_rate": 4.22180284640719e-06, "loss": 0.498, "step": 8726 }, { "epoch": 0.56, "grad_norm": 1.1809746026992798, "learning_rate": 4.220770285728648e-06, "loss": 0.5256, "step": 8727 }, { "epoch": 0.56, "grad_norm": 1.252345085144043, "learning_rate": 4.219737759108226e-06, "loss": 0.5722, "step": 8728 }, { "epoch": 0.56, "grad_norm": 1.1291112899780273, "learning_rate": 4.218705266591056e-06, "loss": 0.5653, "step": 8729 }, { "epoch": 0.56, "grad_norm": 1.0688467025756836, "learning_rate": 4.2176728082222605e-06, "loss": 0.4959, "step": 8730 }, { "epoch": 0.56, "grad_norm": 1.0686599016189575, "learning_rate": 4.216640384046968e-06, "loss": 0.5038, "step": 8731 }, { "epoch": 0.56, "grad_norm": 1.074247121810913, "learning_rate": 4.215607994110304e-06, "loss": 0.5332, "step": 8732 }, { "epoch": 0.56, "grad_norm": 1.143463134765625, "learning_rate": 4.2145756384573895e-06, "loss": 0.5115, "step": 8733 }, { "epoch": 0.56, "grad_norm": 1.159955382347107, "learning_rate": 4.213543317133349e-06, "loss": 0.5668, "step": 8734 }, { "epoch": 0.56, "grad_norm": 1.0988595485687256, "learning_rate": 4.2125110301833025e-06, "loss": 0.5045, "step": 8735 }, { "epoch": 0.56, "grad_norm": 1.2456327676773071, "learning_rate": 4.211478777652364e-06, "loss": 0.5529, "step": 8736 }, { "epoch": 0.56, "grad_norm": 1.2277106046676636, "learning_rate": 4.210446559585655e-06, "loss": 0.5627, "step": 8737 }, { "epoch": 0.56, "grad_norm": 1.2076842784881592, "learning_rate": 4.2094143760282896e-06, "loss": 0.5701, "step": 8738 }, { "epoch": 0.56, "grad_norm": 1.3094758987426758, "learning_rate": 4.2083822270253824e-06, "loss": 0.5071, "step": 8739 }, { "epoch": 0.56, "grad_norm": 1.1580607891082764, "learning_rate": 4.207350112622046e-06, "loss": 0.5283, "step": 8740 }, { "epoch": 0.56, "grad_norm": 1.1434298753738403, "learning_rate": 4.206318032863391e-06, "loss": 0.5366, "step": 8741 }, { "epoch": 0.56, "grad_norm": 1.1527210474014282, "learning_rate": 4.2052859877945265e-06, "loss": 0.5269, "step": 8742 }, { "epoch": 0.56, "grad_norm": 1.2008421421051025, "learning_rate": 4.2042539774605606e-06, "loss": 0.5305, "step": 8743 }, { "epoch": 0.56, "grad_norm": 1.08388090133667, "learning_rate": 4.203222001906602e-06, "loss": 0.4972, "step": 8744 }, { "epoch": 0.56, "grad_norm": 1.219023585319519, "learning_rate": 4.202190061177752e-06, "loss": 0.4595, "step": 8745 }, { "epoch": 0.56, "grad_norm": 1.2001079320907593, "learning_rate": 4.201158155319118e-06, "loss": 0.5392, "step": 8746 }, { "epoch": 0.56, "grad_norm": 1.1633929014205933, "learning_rate": 4.200126284375799e-06, "loss": 0.5871, "step": 8747 }, { "epoch": 0.56, "grad_norm": 1.065144419670105, "learning_rate": 4.199094448392896e-06, "loss": 0.4483, "step": 8748 }, { "epoch": 0.56, "grad_norm": 1.0743907690048218, "learning_rate": 4.19806264741551e-06, "loss": 0.4991, "step": 8749 }, { "epoch": 0.56, "grad_norm": 1.1921015977859497, "learning_rate": 4.197030881488734e-06, "loss": 0.533, "step": 8750 }, { "epoch": 0.56, "grad_norm": 1.0749095678329468, "learning_rate": 4.1959991506576675e-06, "loss": 0.5367, "step": 8751 }, { "epoch": 0.56, "grad_norm": 1.2139766216278076, "learning_rate": 4.194967454967404e-06, "loss": 0.5586, "step": 8752 }, { "epoch": 0.56, "grad_norm": 1.1078968048095703, "learning_rate": 4.193935794463038e-06, "loss": 0.5252, "step": 8753 }, { "epoch": 0.57, "grad_norm": 1.188858985900879, "learning_rate": 4.192904169189657e-06, "loss": 0.534, "step": 8754 }, { "epoch": 0.57, "grad_norm": 1.294562578201294, "learning_rate": 4.191872579192351e-06, "loss": 0.5603, "step": 8755 }, { "epoch": 0.57, "grad_norm": 1.1729532480239868, "learning_rate": 4.190841024516211e-06, "loss": 0.5569, "step": 8756 }, { "epoch": 0.57, "grad_norm": 1.2607158422470093, "learning_rate": 4.189809505206321e-06, "loss": 0.4802, "step": 8757 }, { "epoch": 0.57, "grad_norm": 1.1976332664489746, "learning_rate": 4.188778021307768e-06, "loss": 0.5694, "step": 8758 }, { "epoch": 0.57, "grad_norm": 1.216447353363037, "learning_rate": 4.187746572865637e-06, "loss": 0.5348, "step": 8759 }, { "epoch": 0.57, "grad_norm": 1.1435726881027222, "learning_rate": 4.186715159925005e-06, "loss": 0.4924, "step": 8760 }, { "epoch": 0.57, "grad_norm": 1.2365479469299316, "learning_rate": 4.185683782530955e-06, "loss": 0.517, "step": 8761 }, { "epoch": 0.57, "grad_norm": 1.1244019269943237, "learning_rate": 4.184652440728567e-06, "loss": 0.486, "step": 8762 }, { "epoch": 0.57, "grad_norm": 1.2367068529129028, "learning_rate": 4.183621134562916e-06, "loss": 0.4982, "step": 8763 }, { "epoch": 0.57, "grad_norm": 1.2273321151733398, "learning_rate": 4.18258986407908e-06, "loss": 0.5471, "step": 8764 }, { "epoch": 0.57, "grad_norm": 1.1786822080612183, "learning_rate": 4.181558629322133e-06, "loss": 0.5291, "step": 8765 }, { "epoch": 0.57, "grad_norm": 1.3649486303329468, "learning_rate": 4.180527430337145e-06, "loss": 0.5455, "step": 8766 }, { "epoch": 0.57, "grad_norm": 1.2702157497406006, "learning_rate": 4.179496267169189e-06, "loss": 0.5651, "step": 8767 }, { "epoch": 0.57, "grad_norm": 1.1522846221923828, "learning_rate": 4.178465139863334e-06, "loss": 0.5285, "step": 8768 }, { "epoch": 0.57, "grad_norm": 1.1881827116012573, "learning_rate": 4.177434048464648e-06, "loss": 0.5104, "step": 8769 }, { "epoch": 0.57, "grad_norm": 1.21103835105896, "learning_rate": 4.176402993018198e-06, "loss": 0.4796, "step": 8770 }, { "epoch": 0.57, "grad_norm": 1.1658655405044556, "learning_rate": 4.17537197356905e-06, "loss": 0.5233, "step": 8771 }, { "epoch": 0.57, "grad_norm": 1.1624938249588013, "learning_rate": 4.174340990162264e-06, "loss": 0.539, "step": 8772 }, { "epoch": 0.57, "grad_norm": 1.1500276327133179, "learning_rate": 4.173310042842902e-06, "loss": 0.5684, "step": 8773 }, { "epoch": 0.57, "grad_norm": 1.1657384634017944, "learning_rate": 4.172279131656027e-06, "loss": 0.5009, "step": 8774 }, { "epoch": 0.57, "grad_norm": 1.1003361940383911, "learning_rate": 4.171248256646693e-06, "loss": 0.5399, "step": 8775 }, { "epoch": 0.57, "grad_norm": 1.1992725133895874, "learning_rate": 4.170217417859963e-06, "loss": 0.5025, "step": 8776 }, { "epoch": 0.57, "grad_norm": 1.0640449523925781, "learning_rate": 4.169186615340887e-06, "loss": 0.5286, "step": 8777 }, { "epoch": 0.57, "grad_norm": 1.107447862625122, "learning_rate": 4.16815584913452e-06, "loss": 0.5136, "step": 8778 }, { "epoch": 0.57, "grad_norm": 1.0754492282867432, "learning_rate": 4.167125119285915e-06, "loss": 0.5427, "step": 8779 }, { "epoch": 0.57, "grad_norm": 1.1585774421691895, "learning_rate": 4.166094425840122e-06, "loss": 0.5341, "step": 8780 }, { "epoch": 0.57, "grad_norm": 1.2016791105270386, "learning_rate": 4.1650637688421905e-06, "loss": 0.5185, "step": 8781 }, { "epoch": 0.57, "grad_norm": 1.1566308736801147, "learning_rate": 4.164033148337169e-06, "loss": 0.5512, "step": 8782 }, { "epoch": 0.57, "grad_norm": 1.1682058572769165, "learning_rate": 4.163002564370099e-06, "loss": 0.5525, "step": 8783 }, { "epoch": 0.57, "grad_norm": 1.2405891418457031, "learning_rate": 4.161972016986029e-06, "loss": 0.5313, "step": 8784 }, { "epoch": 0.57, "grad_norm": 1.0277996063232422, "learning_rate": 4.160941506229999e-06, "loss": 0.5281, "step": 8785 }, { "epoch": 0.57, "grad_norm": 1.2703934907913208, "learning_rate": 4.1599110321470525e-06, "loss": 0.5144, "step": 8786 }, { "epoch": 0.57, "grad_norm": 1.0810414552688599, "learning_rate": 4.158880594782226e-06, "loss": 0.5556, "step": 8787 }, { "epoch": 0.57, "grad_norm": 1.1559537649154663, "learning_rate": 4.15785019418056e-06, "loss": 0.5367, "step": 8788 }, { "epoch": 0.57, "grad_norm": 1.1474930047988892, "learning_rate": 4.156819830387088e-06, "loss": 0.5266, "step": 8789 }, { "epoch": 0.57, "grad_norm": 1.1715202331542969, "learning_rate": 4.155789503446845e-06, "loss": 0.5317, "step": 8790 }, { "epoch": 0.57, "grad_norm": 1.2197577953338623, "learning_rate": 4.154759213404867e-06, "loss": 0.5548, "step": 8791 }, { "epoch": 0.57, "grad_norm": 1.1950385570526123, "learning_rate": 4.153728960306182e-06, "loss": 0.52, "step": 8792 }, { "epoch": 0.57, "grad_norm": 1.098013997077942, "learning_rate": 4.152698744195819e-06, "loss": 0.5429, "step": 8793 }, { "epoch": 0.57, "grad_norm": 1.213615894317627, "learning_rate": 4.1516685651188106e-06, "loss": 0.5276, "step": 8794 }, { "epoch": 0.57, "grad_norm": 1.304598331451416, "learning_rate": 4.15063842312018e-06, "loss": 0.5763, "step": 8795 }, { "epoch": 0.57, "grad_norm": 1.0434235334396362, "learning_rate": 4.149608318244952e-06, "loss": 0.4707, "step": 8796 }, { "epoch": 0.57, "grad_norm": 1.2566291093826294, "learning_rate": 4.148578250538149e-06, "loss": 0.5846, "step": 8797 }, { "epoch": 0.57, "grad_norm": 1.2335994243621826, "learning_rate": 4.147548220044795e-06, "loss": 0.5251, "step": 8798 }, { "epoch": 0.57, "grad_norm": 1.194514513015747, "learning_rate": 4.146518226809908e-06, "loss": 0.5705, "step": 8799 }, { "epoch": 0.57, "grad_norm": 1.1696240901947021, "learning_rate": 4.145488270878511e-06, "loss": 0.51, "step": 8800 }, { "epoch": 0.57, "grad_norm": 1.162543535232544, "learning_rate": 4.144458352295614e-06, "loss": 0.4997, "step": 8801 }, { "epoch": 0.57, "grad_norm": 1.0763814449310303, "learning_rate": 4.143428471106235e-06, "loss": 0.5623, "step": 8802 }, { "epoch": 0.57, "grad_norm": 1.113853096961975, "learning_rate": 4.142398627355388e-06, "loss": 0.5386, "step": 8803 }, { "epoch": 0.57, "grad_norm": 1.1592601537704468, "learning_rate": 4.141368821088084e-06, "loss": 0.4769, "step": 8804 }, { "epoch": 0.57, "grad_norm": 1.1791722774505615, "learning_rate": 4.140339052349333e-06, "loss": 0.4892, "step": 8805 }, { "epoch": 0.57, "grad_norm": 1.231791377067566, "learning_rate": 4.139309321184146e-06, "loss": 0.5755, "step": 8806 }, { "epoch": 0.57, "grad_norm": 1.2748240232467651, "learning_rate": 4.1382796276375275e-06, "loss": 0.5465, "step": 8807 }, { "epoch": 0.57, "grad_norm": 1.1196190118789673, "learning_rate": 4.137249971754482e-06, "loss": 0.5307, "step": 8808 }, { "epoch": 0.57, "grad_norm": 1.1247949600219727, "learning_rate": 4.136220353580015e-06, "loss": 0.5116, "step": 8809 }, { "epoch": 0.57, "grad_norm": 1.2583565711975098, "learning_rate": 4.135190773159127e-06, "loss": 0.5607, "step": 8810 }, { "epoch": 0.57, "grad_norm": 1.1869621276855469, "learning_rate": 4.134161230536819e-06, "loss": 0.5543, "step": 8811 }, { "epoch": 0.57, "grad_norm": 1.1902464628219604, "learning_rate": 4.133131725758093e-06, "loss": 0.5004, "step": 8812 }, { "epoch": 0.57, "grad_norm": 1.2315162420272827, "learning_rate": 4.1321022588679396e-06, "loss": 0.5097, "step": 8813 }, { "epoch": 0.57, "grad_norm": 1.1356521844863892, "learning_rate": 4.131072829911358e-06, "loss": 0.572, "step": 8814 }, { "epoch": 0.57, "grad_norm": 1.2103887796401978, "learning_rate": 4.13004343893334e-06, "loss": 0.5194, "step": 8815 }, { "epoch": 0.57, "grad_norm": 1.1788270473480225, "learning_rate": 4.1290140859788795e-06, "loss": 0.509, "step": 8816 }, { "epoch": 0.57, "grad_norm": 1.6880561113357544, "learning_rate": 4.127984771092968e-06, "loss": 0.5541, "step": 8817 }, { "epoch": 0.57, "grad_norm": 1.1138341426849365, "learning_rate": 4.126955494320591e-06, "loss": 0.5056, "step": 8818 }, { "epoch": 0.57, "grad_norm": 1.2503631114959717, "learning_rate": 4.125926255706736e-06, "loss": 0.5422, "step": 8819 }, { "epoch": 0.57, "grad_norm": 1.275237798690796, "learning_rate": 4.12489705529639e-06, "loss": 0.546, "step": 8820 }, { "epoch": 0.57, "grad_norm": 1.2554949522018433, "learning_rate": 4.123867893134535e-06, "loss": 0.5095, "step": 8821 }, { "epoch": 0.57, "grad_norm": 1.136305332183838, "learning_rate": 4.122838769266154e-06, "loss": 0.4912, "step": 8822 }, { "epoch": 0.57, "grad_norm": 1.2130385637283325, "learning_rate": 4.12180968373623e-06, "loss": 0.5099, "step": 8823 }, { "epoch": 0.57, "grad_norm": 1.3008774518966675, "learning_rate": 4.120780636589737e-06, "loss": 0.6011, "step": 8824 }, { "epoch": 0.57, "grad_norm": 1.1468168497085571, "learning_rate": 4.1197516278716535e-06, "loss": 0.5714, "step": 8825 }, { "epoch": 0.57, "grad_norm": 1.17313551902771, "learning_rate": 4.118722657626955e-06, "loss": 0.5132, "step": 8826 }, { "epoch": 0.57, "grad_norm": 1.1508355140686035, "learning_rate": 4.117693725900616e-06, "loss": 0.5291, "step": 8827 }, { "epoch": 0.57, "grad_norm": 1.195776104927063, "learning_rate": 4.116664832737607e-06, "loss": 0.5473, "step": 8828 }, { "epoch": 0.57, "grad_norm": 1.2760947942733765, "learning_rate": 4.115635978182902e-06, "loss": 0.4849, "step": 8829 }, { "epoch": 0.57, "grad_norm": 1.2555842399597168, "learning_rate": 4.114607162281465e-06, "loss": 0.5625, "step": 8830 }, { "epoch": 0.57, "grad_norm": 1.2172781229019165, "learning_rate": 4.113578385078264e-06, "loss": 0.4925, "step": 8831 }, { "epoch": 0.57, "grad_norm": 1.1932637691497803, "learning_rate": 4.112549646618264e-06, "loss": 0.5304, "step": 8832 }, { "epoch": 0.57, "grad_norm": 1.1829622983932495, "learning_rate": 4.111520946946431e-06, "loss": 0.5135, "step": 8833 }, { "epoch": 0.57, "grad_norm": 1.2020840644836426, "learning_rate": 4.110492286107725e-06, "loss": 0.5654, "step": 8834 }, { "epoch": 0.57, "grad_norm": 1.125261902809143, "learning_rate": 4.109463664147106e-06, "loss": 0.5038, "step": 8835 }, { "epoch": 0.57, "grad_norm": 1.1474716663360596, "learning_rate": 4.1084350811095315e-06, "loss": 0.4846, "step": 8836 }, { "epoch": 0.57, "grad_norm": 1.2795913219451904, "learning_rate": 4.10740653703996e-06, "loss": 0.5679, "step": 8837 }, { "epoch": 0.57, "grad_norm": 1.247016429901123, "learning_rate": 4.106378031983347e-06, "loss": 0.5336, "step": 8838 }, { "epoch": 0.57, "grad_norm": 1.119641661643982, "learning_rate": 4.105349565984643e-06, "loss": 0.5274, "step": 8839 }, { "epoch": 0.57, "grad_norm": 1.208724021911621, "learning_rate": 4.1043211390888025e-06, "loss": 0.5382, "step": 8840 }, { "epoch": 0.57, "grad_norm": 1.1715590953826904, "learning_rate": 4.103292751340774e-06, "loss": 0.5197, "step": 8841 }, { "epoch": 0.57, "grad_norm": 1.2437095642089844, "learning_rate": 4.102264402785506e-06, "loss": 0.5274, "step": 8842 }, { "epoch": 0.57, "grad_norm": 1.2524508237838745, "learning_rate": 4.101236093467944e-06, "loss": 0.5113, "step": 8843 }, { "epoch": 0.57, "grad_norm": 1.2438061237335205, "learning_rate": 4.100207823433033e-06, "loss": 0.5443, "step": 8844 }, { "epoch": 0.57, "grad_norm": 1.2077854871749878, "learning_rate": 4.099179592725717e-06, "loss": 0.5461, "step": 8845 }, { "epoch": 0.57, "grad_norm": 1.1817519664764404, "learning_rate": 4.098151401390936e-06, "loss": 0.5804, "step": 8846 }, { "epoch": 0.57, "grad_norm": 1.187147855758667, "learning_rate": 4.097123249473634e-06, "loss": 0.5802, "step": 8847 }, { "epoch": 0.57, "grad_norm": 1.2940771579742432, "learning_rate": 4.096095137018742e-06, "loss": 0.5434, "step": 8848 }, { "epoch": 0.57, "grad_norm": 1.1996159553527832, "learning_rate": 4.095067064071201e-06, "loss": 0.5275, "step": 8849 }, { "epoch": 0.57, "grad_norm": 1.348047137260437, "learning_rate": 4.094039030675942e-06, "loss": 0.5692, "step": 8850 }, { "epoch": 0.57, "grad_norm": 1.2320064306259155, "learning_rate": 4.0930110368779e-06, "loss": 0.5235, "step": 8851 }, { "epoch": 0.57, "grad_norm": 1.304330825805664, "learning_rate": 4.091983082722006e-06, "loss": 0.5494, "step": 8852 }, { "epoch": 0.57, "grad_norm": 1.058408260345459, "learning_rate": 4.090955168253191e-06, "loss": 0.4987, "step": 8853 }, { "epoch": 0.57, "grad_norm": 1.2045875787734985, "learning_rate": 4.089927293516378e-06, "loss": 0.5166, "step": 8854 }, { "epoch": 0.57, "grad_norm": 1.1581006050109863, "learning_rate": 4.088899458556494e-06, "loss": 0.5625, "step": 8855 }, { "epoch": 0.57, "grad_norm": 1.1022354364395142, "learning_rate": 4.087871663418465e-06, "loss": 0.5131, "step": 8856 }, { "epoch": 0.57, "grad_norm": 1.1273679733276367, "learning_rate": 4.086843908147212e-06, "loss": 0.5076, "step": 8857 }, { "epoch": 0.57, "grad_norm": 1.1330878734588623, "learning_rate": 4.085816192787659e-06, "loss": 0.5233, "step": 8858 }, { "epoch": 0.57, "grad_norm": 1.247796893119812, "learning_rate": 4.084788517384718e-06, "loss": 0.5778, "step": 8859 }, { "epoch": 0.57, "grad_norm": 1.1268037557601929, "learning_rate": 4.08376088198331e-06, "loss": 0.4864, "step": 8860 }, { "epoch": 0.57, "grad_norm": 1.2398627996444702, "learning_rate": 4.08273328662835e-06, "loss": 0.5892, "step": 8861 }, { "epoch": 0.57, "grad_norm": 1.0698546171188354, "learning_rate": 4.081705731364751e-06, "loss": 0.5703, "step": 8862 }, { "epoch": 0.57, "grad_norm": 1.226583480834961, "learning_rate": 4.080678216237426e-06, "loss": 0.5185, "step": 8863 }, { "epoch": 0.57, "grad_norm": 1.3451616764068604, "learning_rate": 4.079650741291287e-06, "loss": 0.546, "step": 8864 }, { "epoch": 0.57, "grad_norm": 1.2170113325119019, "learning_rate": 4.078623306571236e-06, "loss": 0.5292, "step": 8865 }, { "epoch": 0.57, "grad_norm": 1.2903845310211182, "learning_rate": 4.077595912122184e-06, "loss": 0.5523, "step": 8866 }, { "epoch": 0.57, "grad_norm": 1.1362875699996948, "learning_rate": 4.076568557989034e-06, "loss": 0.5251, "step": 8867 }, { "epoch": 0.57, "grad_norm": 1.0893131494522095, "learning_rate": 4.07554124421669e-06, "loss": 0.5308, "step": 8868 }, { "epoch": 0.57, "grad_norm": 1.2602453231811523, "learning_rate": 4.074513970850054e-06, "loss": 0.5077, "step": 8869 }, { "epoch": 0.57, "grad_norm": 1.144572377204895, "learning_rate": 4.073486737934026e-06, "loss": 0.5198, "step": 8870 }, { "epoch": 0.57, "grad_norm": 1.3197511434555054, "learning_rate": 4.0724595455135e-06, "loss": 0.567, "step": 8871 }, { "epoch": 0.57, "grad_norm": 1.1921693086624146, "learning_rate": 4.071432393633375e-06, "loss": 0.5685, "step": 8872 }, { "epoch": 0.57, "grad_norm": 1.2508305311203003, "learning_rate": 4.070405282338543e-06, "loss": 0.5501, "step": 8873 }, { "epoch": 0.57, "grad_norm": 1.2004255056381226, "learning_rate": 4.069378211673899e-06, "loss": 0.4955, "step": 8874 }, { "epoch": 0.57, "grad_norm": 1.3416032791137695, "learning_rate": 4.068351181684333e-06, "loss": 0.524, "step": 8875 }, { "epoch": 0.57, "grad_norm": 1.2338480949401855, "learning_rate": 4.067324192414732e-06, "loss": 0.554, "step": 8876 }, { "epoch": 0.57, "grad_norm": 1.0727442502975464, "learning_rate": 4.066297243909985e-06, "loss": 0.488, "step": 8877 }, { "epoch": 0.57, "grad_norm": 1.0784462690353394, "learning_rate": 4.065270336214976e-06, "loss": 0.5302, "step": 8878 }, { "epoch": 0.57, "grad_norm": 1.1655032634735107, "learning_rate": 4.064243469374588e-06, "loss": 0.5336, "step": 8879 }, { "epoch": 0.57, "grad_norm": 1.274442434310913, "learning_rate": 4.063216643433705e-06, "loss": 0.5818, "step": 8880 }, { "epoch": 0.57, "grad_norm": 1.183596134185791, "learning_rate": 4.0621898584372055e-06, "loss": 0.515, "step": 8881 }, { "epoch": 0.57, "grad_norm": 1.210790991783142, "learning_rate": 4.061163114429968e-06, "loss": 0.5225, "step": 8882 }, { "epoch": 0.57, "grad_norm": 1.3985384702682495, "learning_rate": 4.060136411456868e-06, "loss": 0.487, "step": 8883 }, { "epoch": 0.57, "grad_norm": 1.109775185585022, "learning_rate": 4.05910974956278e-06, "loss": 0.4729, "step": 8884 }, { "epoch": 0.57, "grad_norm": 1.2546095848083496, "learning_rate": 4.058083128792579e-06, "loss": 0.5059, "step": 8885 }, { "epoch": 0.57, "grad_norm": 1.2190687656402588, "learning_rate": 4.0570565491911325e-06, "loss": 0.505, "step": 8886 }, { "epoch": 0.57, "grad_norm": 1.1882925033569336, "learning_rate": 4.056030010803312e-06, "loss": 0.5497, "step": 8887 }, { "epoch": 0.57, "grad_norm": 1.1430925130844116, "learning_rate": 4.055003513673985e-06, "loss": 0.4908, "step": 8888 }, { "epoch": 0.57, "grad_norm": 1.2600603103637695, "learning_rate": 4.053977057848016e-06, "loss": 0.5387, "step": 8889 }, { "epoch": 0.57, "grad_norm": 1.1730035543441772, "learning_rate": 4.052950643370268e-06, "loss": 0.498, "step": 8890 }, { "epoch": 0.57, "grad_norm": 1.0533698797225952, "learning_rate": 4.0519242702856045e-06, "loss": 0.4674, "step": 8891 }, { "epoch": 0.57, "grad_norm": 1.2176743745803833, "learning_rate": 4.050897938638884e-06, "loss": 0.522, "step": 8892 }, { "epoch": 0.57, "grad_norm": 1.2281616926193237, "learning_rate": 4.049871648474967e-06, "loss": 0.5616, "step": 8893 }, { "epoch": 0.57, "grad_norm": 1.1362420320510864, "learning_rate": 4.048845399838711e-06, "loss": 0.5245, "step": 8894 }, { "epoch": 0.57, "grad_norm": 1.1176193952560425, "learning_rate": 4.047819192774966e-06, "loss": 0.5053, "step": 8895 }, { "epoch": 0.57, "grad_norm": 1.2823213338851929, "learning_rate": 4.046793027328588e-06, "loss": 0.5965, "step": 8896 }, { "epoch": 0.57, "grad_norm": 1.0863139629364014, "learning_rate": 4.045766903544427e-06, "loss": 0.5213, "step": 8897 }, { "epoch": 0.57, "grad_norm": 1.2399436235427856, "learning_rate": 4.044740821467332e-06, "loss": 0.518, "step": 8898 }, { "epoch": 0.57, "grad_norm": 1.1945756673812866, "learning_rate": 4.043714781142154e-06, "loss": 0.5523, "step": 8899 }, { "epoch": 0.57, "grad_norm": 1.1775668859481812, "learning_rate": 4.042688782613734e-06, "loss": 0.5197, "step": 8900 }, { "epoch": 0.57, "grad_norm": 1.1936719417572021, "learning_rate": 4.0416628259269165e-06, "loss": 0.534, "step": 8901 }, { "epoch": 0.57, "grad_norm": 1.170468807220459, "learning_rate": 4.0406369111265455e-06, "loss": 0.5337, "step": 8902 }, { "epoch": 0.57, "grad_norm": 1.2480442523956299, "learning_rate": 4.039611038257459e-06, "loss": 0.5347, "step": 8903 }, { "epoch": 0.57, "grad_norm": 1.0982128381729126, "learning_rate": 4.038585207364496e-06, "loss": 0.4993, "step": 8904 }, { "epoch": 0.57, "grad_norm": 1.089728832244873, "learning_rate": 4.0375594184924955e-06, "loss": 0.5342, "step": 8905 }, { "epoch": 0.57, "grad_norm": 1.1561685800552368, "learning_rate": 4.036533671686288e-06, "loss": 0.5448, "step": 8906 }, { "epoch": 0.57, "grad_norm": 1.1213041543960571, "learning_rate": 4.035507966990707e-06, "loss": 0.5169, "step": 8907 }, { "epoch": 0.57, "grad_norm": 1.180601716041565, "learning_rate": 4.034482304450585e-06, "loss": 0.4818, "step": 8908 }, { "epoch": 0.58, "grad_norm": 1.147265076637268, "learning_rate": 4.03345668411075e-06, "loss": 0.5488, "step": 8909 }, { "epoch": 0.58, "grad_norm": 1.077701449394226, "learning_rate": 4.032431106016031e-06, "loss": 0.4805, "step": 8910 }, { "epoch": 0.58, "grad_norm": 1.2402548789978027, "learning_rate": 4.031405570211252e-06, "loss": 0.5381, "step": 8911 }, { "epoch": 0.58, "grad_norm": 1.0955146551132202, "learning_rate": 4.030380076741236e-06, "loss": 0.5224, "step": 8912 }, { "epoch": 0.58, "grad_norm": 1.3217910528182983, "learning_rate": 4.029354625650804e-06, "loss": 0.5803, "step": 8913 }, { "epoch": 0.58, "grad_norm": 1.277965784072876, "learning_rate": 4.0283292169847784e-06, "loss": 0.4913, "step": 8914 }, { "epoch": 0.58, "grad_norm": 1.1250941753387451, "learning_rate": 4.027303850787975e-06, "loss": 0.5428, "step": 8915 }, { "epoch": 0.58, "grad_norm": 1.372191071510315, "learning_rate": 4.02627852710521e-06, "loss": 0.546, "step": 8916 }, { "epoch": 0.58, "grad_norm": 1.1555589437484741, "learning_rate": 4.025253245981301e-06, "loss": 0.5305, "step": 8917 }, { "epoch": 0.58, "grad_norm": 1.1080703735351562, "learning_rate": 4.024228007461056e-06, "loss": 0.5583, "step": 8918 }, { "epoch": 0.58, "grad_norm": 1.1816495656967163, "learning_rate": 4.023202811589288e-06, "loss": 0.5259, "step": 8919 }, { "epoch": 0.58, "grad_norm": 1.186283826828003, "learning_rate": 4.022177658410804e-06, "loss": 0.5472, "step": 8920 }, { "epoch": 0.58, "grad_norm": 1.2211865186691284, "learning_rate": 4.021152547970411e-06, "loss": 0.5183, "step": 8921 }, { "epoch": 0.58, "grad_norm": 3.0033233165740967, "learning_rate": 4.020127480312916e-06, "loss": 0.5275, "step": 8922 }, { "epoch": 0.58, "grad_norm": 1.133056402206421, "learning_rate": 4.019102455483122e-06, "loss": 0.5362, "step": 8923 }, { "epoch": 0.58, "grad_norm": 1.2681020498275757, "learning_rate": 4.0180774735258275e-06, "loss": 0.5229, "step": 8924 }, { "epoch": 0.58, "grad_norm": 1.1427799463272095, "learning_rate": 4.017052534485833e-06, "loss": 0.5766, "step": 8925 }, { "epoch": 0.58, "grad_norm": 1.181527018547058, "learning_rate": 4.016027638407935e-06, "loss": 0.5632, "step": 8926 }, { "epoch": 0.58, "grad_norm": 1.1332802772521973, "learning_rate": 4.015002785336933e-06, "loss": 0.4946, "step": 8927 }, { "epoch": 0.58, "grad_norm": 1.1602288484573364, "learning_rate": 4.013977975317616e-06, "loss": 0.5431, "step": 8928 }, { "epoch": 0.58, "grad_norm": 1.194914698600769, "learning_rate": 4.012953208394778e-06, "loss": 0.5162, "step": 8929 }, { "epoch": 0.58, "grad_norm": 1.1404380798339844, "learning_rate": 4.011928484613209e-06, "loss": 0.5175, "step": 8930 }, { "epoch": 0.58, "grad_norm": 1.2054225206375122, "learning_rate": 4.010903804017697e-06, "loss": 0.5795, "step": 8931 }, { "epoch": 0.58, "grad_norm": 1.1005775928497314, "learning_rate": 4.009879166653027e-06, "loss": 0.5155, "step": 8932 }, { "epoch": 0.58, "grad_norm": 1.1484569311141968, "learning_rate": 4.008854572563985e-06, "loss": 0.5599, "step": 8933 }, { "epoch": 0.58, "grad_norm": 1.2391985654830933, "learning_rate": 4.007830021795352e-06, "loss": 0.5459, "step": 8934 }, { "epoch": 0.58, "grad_norm": 1.1005562543869019, "learning_rate": 4.006805514391908e-06, "loss": 0.5132, "step": 8935 }, { "epoch": 0.58, "grad_norm": 1.1830090284347534, "learning_rate": 4.0057810503984345e-06, "loss": 0.5416, "step": 8936 }, { "epoch": 0.58, "grad_norm": 1.3817590475082397, "learning_rate": 4.004756629859705e-06, "loss": 0.5247, "step": 8937 }, { "epoch": 0.58, "grad_norm": 1.1990339756011963, "learning_rate": 4.0037322528204945e-06, "loss": 0.5387, "step": 8938 }, { "epoch": 0.58, "grad_norm": 1.1877795457839966, "learning_rate": 4.002707919325578e-06, "loss": 0.5706, "step": 8939 }, { "epoch": 0.58, "grad_norm": 1.1907227039337158, "learning_rate": 4.001683629419725e-06, "loss": 0.5491, "step": 8940 }, { "epoch": 0.58, "grad_norm": 1.2207845449447632, "learning_rate": 4.000659383147707e-06, "loss": 0.5124, "step": 8941 }, { "epoch": 0.58, "grad_norm": 1.2632527351379395, "learning_rate": 3.999635180554286e-06, "loss": 0.5531, "step": 8942 }, { "epoch": 0.58, "grad_norm": 1.2253473997116089, "learning_rate": 3.998611021684231e-06, "loss": 0.5783, "step": 8943 }, { "epoch": 0.58, "grad_norm": 1.0789357423782349, "learning_rate": 3.997586906582304e-06, "loss": 0.5145, "step": 8944 }, { "epoch": 0.58, "grad_norm": 1.1759412288665771, "learning_rate": 3.996562835293268e-06, "loss": 0.5221, "step": 8945 }, { "epoch": 0.58, "grad_norm": 1.220801830291748, "learning_rate": 3.995538807861883e-06, "loss": 0.5498, "step": 8946 }, { "epoch": 0.58, "grad_norm": 1.2178300619125366, "learning_rate": 3.9945148243329035e-06, "loss": 0.578, "step": 8947 }, { "epoch": 0.58, "grad_norm": 1.1122971773147583, "learning_rate": 3.9934908847510865e-06, "loss": 0.5097, "step": 8948 }, { "epoch": 0.58, "grad_norm": 1.1821340322494507, "learning_rate": 3.992466989161186e-06, "loss": 0.569, "step": 8949 }, { "epoch": 0.58, "grad_norm": 1.2460758686065674, "learning_rate": 3.991443137607954e-06, "loss": 0.5405, "step": 8950 }, { "epoch": 0.58, "grad_norm": 1.1372075080871582, "learning_rate": 3.990419330136141e-06, "loss": 0.5431, "step": 8951 }, { "epoch": 0.58, "grad_norm": 1.0929251909255981, "learning_rate": 3.989395566790496e-06, "loss": 0.5209, "step": 8952 }, { "epoch": 0.58, "grad_norm": 1.1476362943649292, "learning_rate": 3.988371847615761e-06, "loss": 0.5202, "step": 8953 }, { "epoch": 0.58, "grad_norm": 1.3168877363204956, "learning_rate": 3.987348172656682e-06, "loss": 0.5359, "step": 8954 }, { "epoch": 0.58, "grad_norm": 1.2392593622207642, "learning_rate": 3.986324541958003e-06, "loss": 0.5441, "step": 8955 }, { "epoch": 0.58, "grad_norm": 1.1874990463256836, "learning_rate": 3.985300955564462e-06, "loss": 0.5594, "step": 8956 }, { "epoch": 0.58, "grad_norm": 1.195036768913269, "learning_rate": 3.984277413520799e-06, "loss": 0.5546, "step": 8957 }, { "epoch": 0.58, "grad_norm": 1.224365234375, "learning_rate": 3.983253915871751e-06, "loss": 0.5848, "step": 8958 }, { "epoch": 0.58, "grad_norm": 1.1763074398040771, "learning_rate": 3.98223046266205e-06, "loss": 0.5114, "step": 8959 }, { "epoch": 0.58, "grad_norm": 1.2595409154891968, "learning_rate": 3.981207053936429e-06, "loss": 0.5358, "step": 8960 }, { "epoch": 0.58, "grad_norm": 1.1193726062774658, "learning_rate": 3.980183689739619e-06, "loss": 0.5188, "step": 8961 }, { "epoch": 0.58, "grad_norm": 1.0939065217971802, "learning_rate": 3.979160370116349e-06, "loss": 0.4971, "step": 8962 }, { "epoch": 0.58, "grad_norm": 1.1805033683776855, "learning_rate": 3.9781370951113455e-06, "loss": 0.5364, "step": 8963 }, { "epoch": 0.58, "grad_norm": 1.2369359731674194, "learning_rate": 3.9771138647693345e-06, "loss": 0.5286, "step": 8964 }, { "epoch": 0.58, "grad_norm": 1.172436237335205, "learning_rate": 3.976090679135036e-06, "loss": 0.4822, "step": 8965 }, { "epoch": 0.58, "grad_norm": 1.1951496601104736, "learning_rate": 3.975067538253172e-06, "loss": 0.5495, "step": 8966 }, { "epoch": 0.58, "grad_norm": 1.158929467201233, "learning_rate": 3.974044442168462e-06, "loss": 0.5547, "step": 8967 }, { "epoch": 0.58, "grad_norm": 1.0765372514724731, "learning_rate": 3.973021390925621e-06, "loss": 0.4699, "step": 8968 }, { "epoch": 0.58, "grad_norm": 1.1658796072006226, "learning_rate": 3.971998384569368e-06, "loss": 0.5298, "step": 8969 }, { "epoch": 0.58, "grad_norm": 1.2078559398651123, "learning_rate": 3.970975423144413e-06, "loss": 0.5602, "step": 8970 }, { "epoch": 0.58, "grad_norm": 1.1984481811523438, "learning_rate": 3.969952506695466e-06, "loss": 0.5014, "step": 8971 }, { "epoch": 0.58, "grad_norm": 1.1141483783721924, "learning_rate": 3.9689296352672375e-06, "loss": 0.5088, "step": 8972 }, { "epoch": 0.58, "grad_norm": 1.1248836517333984, "learning_rate": 3.9679068089044345e-06, "loss": 0.6288, "step": 8973 }, { "epoch": 0.58, "grad_norm": 1.2291206121444702, "learning_rate": 3.966884027651763e-06, "loss": 0.5395, "step": 8974 }, { "epoch": 0.58, "grad_norm": 1.1410722732543945, "learning_rate": 3.965861291553924e-06, "loss": 0.5319, "step": 8975 }, { "epoch": 0.58, "grad_norm": 1.1293615102767944, "learning_rate": 3.96483860065562e-06, "loss": 0.5117, "step": 8976 }, { "epoch": 0.58, "grad_norm": 1.186735987663269, "learning_rate": 3.9638159550015505e-06, "loss": 0.5194, "step": 8977 }, { "epoch": 0.58, "grad_norm": 1.2578575611114502, "learning_rate": 3.962793354636413e-06, "loss": 0.5223, "step": 8978 }, { "epoch": 0.58, "grad_norm": 1.1677706241607666, "learning_rate": 3.9617707996049005e-06, "loss": 0.4811, "step": 8979 }, { "epoch": 0.58, "grad_norm": 1.1352006196975708, "learning_rate": 3.960748289951708e-06, "loss": 0.5107, "step": 8980 }, { "epoch": 0.58, "grad_norm": 1.1503808498382568, "learning_rate": 3.959725825721526e-06, "loss": 0.5239, "step": 8981 }, { "epoch": 0.58, "grad_norm": 1.1635311841964722, "learning_rate": 3.958703406959045e-06, "loss": 0.4798, "step": 8982 }, { "epoch": 0.58, "grad_norm": 1.2114121913909912, "learning_rate": 3.9576810337089525e-06, "loss": 0.5482, "step": 8983 }, { "epoch": 0.58, "grad_norm": 1.170684814453125, "learning_rate": 3.9566587060159315e-06, "loss": 0.5305, "step": 8984 }, { "epoch": 0.58, "grad_norm": 1.2523263692855835, "learning_rate": 3.955636423924666e-06, "loss": 0.5175, "step": 8985 }, { "epoch": 0.58, "grad_norm": 1.093878149986267, "learning_rate": 3.954614187479838e-06, "loss": 0.5521, "step": 8986 }, { "epoch": 0.58, "grad_norm": 1.1468454599380493, "learning_rate": 3.953591996726129e-06, "loss": 0.5542, "step": 8987 }, { "epoch": 0.58, "grad_norm": 1.2259330749511719, "learning_rate": 3.952569851708212e-06, "loss": 0.5271, "step": 8988 }, { "epoch": 0.58, "grad_norm": 1.1986972093582153, "learning_rate": 3.951547752470765e-06, "loss": 0.5185, "step": 8989 }, { "epoch": 0.58, "grad_norm": 1.2116825580596924, "learning_rate": 3.9505256990584595e-06, "loss": 0.5587, "step": 8990 }, { "epoch": 0.58, "grad_norm": 1.3065745830535889, "learning_rate": 3.949503691515969e-06, "loss": 0.5247, "step": 8991 }, { "epoch": 0.58, "grad_norm": 1.1442549228668213, "learning_rate": 3.948481729887961e-06, "loss": 0.5699, "step": 8992 }, { "epoch": 0.58, "grad_norm": 1.0429385900497437, "learning_rate": 3.947459814219107e-06, "loss": 0.5058, "step": 8993 }, { "epoch": 0.58, "grad_norm": 1.254270315170288, "learning_rate": 3.946437944554066e-06, "loss": 0.5295, "step": 8994 }, { "epoch": 0.58, "grad_norm": 1.1146037578582764, "learning_rate": 3.945416120937503e-06, "loss": 0.5088, "step": 8995 }, { "epoch": 0.58, "grad_norm": 1.0997732877731323, "learning_rate": 3.9443943434140824e-06, "loss": 0.5126, "step": 8996 }, { "epoch": 0.58, "grad_norm": 1.130861520767212, "learning_rate": 3.943372612028461e-06, "loss": 0.5091, "step": 8997 }, { "epoch": 0.58, "grad_norm": 1.4154542684555054, "learning_rate": 3.942350926825296e-06, "loss": 0.5204, "step": 8998 }, { "epoch": 0.58, "grad_norm": 1.19434654712677, "learning_rate": 3.941329287849246e-06, "loss": 0.4971, "step": 8999 }, { "epoch": 0.58, "grad_norm": 1.2862122058868408, "learning_rate": 3.940307695144959e-06, "loss": 0.5718, "step": 9000 }, { "epoch": 0.58, "grad_norm": 1.1852655410766602, "learning_rate": 3.939286148757089e-06, "loss": 0.4949, "step": 9001 }, { "epoch": 0.58, "grad_norm": 1.1923675537109375, "learning_rate": 3.938264648730284e-06, "loss": 0.5118, "step": 9002 }, { "epoch": 0.58, "grad_norm": 1.1064908504486084, "learning_rate": 3.937243195109193e-06, "loss": 0.5198, "step": 9003 }, { "epoch": 0.58, "grad_norm": 1.122179627418518, "learning_rate": 3.936221787938459e-06, "loss": 0.5445, "step": 9004 }, { "epoch": 0.58, "grad_norm": 1.070783019065857, "learning_rate": 3.9352004272627284e-06, "loss": 0.5328, "step": 9005 }, { "epoch": 0.58, "grad_norm": 1.1610137224197388, "learning_rate": 3.9341791131266385e-06, "loss": 0.5561, "step": 9006 }, { "epoch": 0.58, "grad_norm": 1.2159398794174194, "learning_rate": 3.933157845574829e-06, "loss": 0.5572, "step": 9007 }, { "epoch": 0.58, "grad_norm": 1.1700491905212402, "learning_rate": 3.932136624651937e-06, "loss": 0.5138, "step": 9008 }, { "epoch": 0.58, "grad_norm": 1.1216623783111572, "learning_rate": 3.931115450402599e-06, "loss": 0.5319, "step": 9009 }, { "epoch": 0.58, "grad_norm": 1.2900924682617188, "learning_rate": 3.930094322871446e-06, "loss": 0.5519, "step": 9010 }, { "epoch": 0.58, "grad_norm": 1.257706642150879, "learning_rate": 3.929073242103114e-06, "loss": 0.5333, "step": 9011 }, { "epoch": 0.58, "grad_norm": 1.2414218187332153, "learning_rate": 3.928052208142224e-06, "loss": 0.5023, "step": 9012 }, { "epoch": 0.58, "grad_norm": 1.0998852252960205, "learning_rate": 3.927031221033406e-06, "loss": 0.5316, "step": 9013 }, { "epoch": 0.58, "grad_norm": 1.1393121480941772, "learning_rate": 3.926010280821285e-06, "loss": 0.5109, "step": 9014 }, { "epoch": 0.58, "grad_norm": 1.1948386430740356, "learning_rate": 3.9249893875504844e-06, "loss": 0.5312, "step": 9015 }, { "epoch": 0.58, "grad_norm": 1.2196067571640015, "learning_rate": 3.923968541265625e-06, "loss": 0.5262, "step": 9016 }, { "epoch": 0.58, "grad_norm": 1.115932583808899, "learning_rate": 3.922947742011323e-06, "loss": 0.5448, "step": 9017 }, { "epoch": 0.58, "grad_norm": 1.4028689861297607, "learning_rate": 3.921926989832197e-06, "loss": 0.5306, "step": 9018 }, { "epoch": 0.58, "grad_norm": 1.1790388822555542, "learning_rate": 3.92090628477286e-06, "loss": 0.4963, "step": 9019 }, { "epoch": 0.58, "grad_norm": 1.1823581457138062, "learning_rate": 3.9198856268779265e-06, "loss": 0.5183, "step": 9020 }, { "epoch": 0.58, "grad_norm": 1.2443456649780273, "learning_rate": 3.918865016192005e-06, "loss": 0.5341, "step": 9021 }, { "epoch": 0.58, "grad_norm": 1.2630665302276611, "learning_rate": 3.917844452759704e-06, "loss": 0.5219, "step": 9022 }, { "epoch": 0.58, "grad_norm": 1.1437642574310303, "learning_rate": 3.91682393662563e-06, "loss": 0.5836, "step": 9023 }, { "epoch": 0.58, "grad_norm": 1.1677253246307373, "learning_rate": 3.915803467834387e-06, "loss": 0.4712, "step": 9024 }, { "epoch": 0.58, "grad_norm": 1.1738637685775757, "learning_rate": 3.914783046430579e-06, "loss": 0.5197, "step": 9025 }, { "epoch": 0.58, "grad_norm": 1.2375725507736206, "learning_rate": 3.913762672458802e-06, "loss": 0.5726, "step": 9026 }, { "epoch": 0.58, "grad_norm": 1.1332364082336426, "learning_rate": 3.912742345963656e-06, "loss": 0.5565, "step": 9027 }, { "epoch": 0.58, "grad_norm": 1.200108528137207, "learning_rate": 3.911722066989738e-06, "loss": 0.523, "step": 9028 }, { "epoch": 0.58, "grad_norm": 1.2328500747680664, "learning_rate": 3.91070183558164e-06, "loss": 0.5202, "step": 9029 }, { "epoch": 0.58, "grad_norm": 1.1427946090698242, "learning_rate": 3.909681651783956e-06, "loss": 0.5121, "step": 9030 }, { "epoch": 0.58, "grad_norm": 1.137742280960083, "learning_rate": 3.908661515641271e-06, "loss": 0.5214, "step": 9031 }, { "epoch": 0.58, "grad_norm": 1.103413701057434, "learning_rate": 3.907641427198177e-06, "loss": 0.5264, "step": 9032 }, { "epoch": 0.58, "grad_norm": 1.1601052284240723, "learning_rate": 3.906621386499257e-06, "loss": 0.5876, "step": 9033 }, { "epoch": 0.58, "grad_norm": 1.183941125869751, "learning_rate": 3.905601393589098e-06, "loss": 0.5705, "step": 9034 }, { "epoch": 0.58, "grad_norm": 1.071471929550171, "learning_rate": 3.9045814485122765e-06, "loss": 0.4933, "step": 9035 }, { "epoch": 0.58, "grad_norm": 1.2360234260559082, "learning_rate": 3.903561551313373e-06, "loss": 0.5502, "step": 9036 }, { "epoch": 0.58, "grad_norm": 1.2387003898620605, "learning_rate": 3.9025417020369656e-06, "loss": 0.531, "step": 9037 }, { "epoch": 0.58, "grad_norm": 1.0935876369476318, "learning_rate": 3.901521900727629e-06, "loss": 0.4456, "step": 9038 }, { "epoch": 0.58, "grad_norm": 1.154579520225525, "learning_rate": 3.900502147429936e-06, "loss": 0.5488, "step": 9039 }, { "epoch": 0.58, "grad_norm": 1.0771472454071045, "learning_rate": 3.899482442188459e-06, "loss": 0.502, "step": 9040 }, { "epoch": 0.58, "grad_norm": 1.10692298412323, "learning_rate": 3.898462785047763e-06, "loss": 0.5471, "step": 9041 }, { "epoch": 0.58, "grad_norm": 1.1369543075561523, "learning_rate": 3.897443176052418e-06, "loss": 0.5153, "step": 9042 }, { "epoch": 0.58, "grad_norm": 1.1105430126190186, "learning_rate": 3.896423615246986e-06, "loss": 0.5563, "step": 9043 }, { "epoch": 0.58, "grad_norm": 1.2915047407150269, "learning_rate": 3.89540410267603e-06, "loss": 0.5334, "step": 9044 }, { "epoch": 0.58, "grad_norm": 1.0892924070358276, "learning_rate": 3.894384638384112e-06, "loss": 0.5387, "step": 9045 }, { "epoch": 0.58, "grad_norm": 1.1790127754211426, "learning_rate": 3.89336522241579e-06, "loss": 0.5416, "step": 9046 }, { "epoch": 0.58, "grad_norm": 1.1752101182937622, "learning_rate": 3.892345854815618e-06, "loss": 0.5173, "step": 9047 }, { "epoch": 0.58, "grad_norm": 1.2862415313720703, "learning_rate": 3.89132653562815e-06, "loss": 0.5717, "step": 9048 }, { "epoch": 0.58, "grad_norm": 1.128713607788086, "learning_rate": 3.890307264897939e-06, "loss": 0.485, "step": 9049 }, { "epoch": 0.58, "grad_norm": 1.1762501001358032, "learning_rate": 3.8892880426695344e-06, "loss": 0.5117, "step": 9050 }, { "epoch": 0.58, "grad_norm": 1.3853341341018677, "learning_rate": 3.8882688689874835e-06, "loss": 0.5763, "step": 9051 }, { "epoch": 0.58, "grad_norm": 1.2440634965896606, "learning_rate": 3.887249743896335e-06, "loss": 0.5347, "step": 9052 }, { "epoch": 0.58, "grad_norm": 1.1879769563674927, "learning_rate": 3.886230667440626e-06, "loss": 0.5767, "step": 9053 }, { "epoch": 0.58, "grad_norm": 1.1674580574035645, "learning_rate": 3.885211639664902e-06, "loss": 0.5406, "step": 9054 }, { "epoch": 0.58, "grad_norm": 1.1059390306472778, "learning_rate": 3.8841926606137e-06, "loss": 0.5216, "step": 9055 }, { "epoch": 0.58, "grad_norm": 1.225748896598816, "learning_rate": 3.883173730331559e-06, "loss": 0.5215, "step": 9056 }, { "epoch": 0.58, "grad_norm": 1.1616259813308716, "learning_rate": 3.882154848863013e-06, "loss": 0.5313, "step": 9057 }, { "epoch": 0.58, "grad_norm": 1.0739266872406006, "learning_rate": 3.881136016252596e-06, "loss": 0.4989, "step": 9058 }, { "epoch": 0.58, "grad_norm": 1.2375777959823608, "learning_rate": 3.880117232544835e-06, "loss": 0.5156, "step": 9059 }, { "epoch": 0.58, "grad_norm": 1.2496941089630127, "learning_rate": 3.879098497784259e-06, "loss": 0.4633, "step": 9060 }, { "epoch": 0.58, "grad_norm": 1.3052237033843994, "learning_rate": 3.878079812015398e-06, "loss": 0.5527, "step": 9061 }, { "epoch": 0.58, "grad_norm": 1.1807762384414673, "learning_rate": 3.877061175282773e-06, "loss": 0.5369, "step": 9062 }, { "epoch": 0.59, "grad_norm": 1.2065703868865967, "learning_rate": 3.876042587630907e-06, "loss": 0.5203, "step": 9063 }, { "epoch": 0.59, "grad_norm": 1.2046754360198975, "learning_rate": 3.87502404910432e-06, "loss": 0.5324, "step": 9064 }, { "epoch": 0.59, "grad_norm": 1.1030083894729614, "learning_rate": 3.874005559747529e-06, "loss": 0.531, "step": 9065 }, { "epoch": 0.59, "grad_norm": 1.132737159729004, "learning_rate": 3.87298711960505e-06, "loss": 0.5019, "step": 9066 }, { "epoch": 0.59, "grad_norm": 1.3290857076644897, "learning_rate": 3.8719687287213955e-06, "loss": 0.5721, "step": 9067 }, { "epoch": 0.59, "grad_norm": 1.2008005380630493, "learning_rate": 3.870950387141078e-06, "loss": 0.5188, "step": 9068 }, { "epoch": 0.59, "grad_norm": 1.1669069528579712, "learning_rate": 3.869932094908606e-06, "loss": 0.5343, "step": 9069 }, { "epoch": 0.59, "grad_norm": 1.2172125577926636, "learning_rate": 3.868913852068488e-06, "loss": 0.5341, "step": 9070 }, { "epoch": 0.59, "grad_norm": 1.2727030515670776, "learning_rate": 3.867895658665225e-06, "loss": 0.5015, "step": 9071 }, { "epoch": 0.59, "grad_norm": 1.1249873638153076, "learning_rate": 3.866877514743324e-06, "loss": 0.4882, "step": 9072 }, { "epoch": 0.59, "grad_norm": 1.3399666547775269, "learning_rate": 3.865859420347281e-06, "loss": 0.5337, "step": 9073 }, { "epoch": 0.59, "grad_norm": 1.2585740089416504, "learning_rate": 3.864841375521598e-06, "loss": 0.5347, "step": 9074 }, { "epoch": 0.59, "grad_norm": 1.22527015209198, "learning_rate": 3.86382338031077e-06, "loss": 0.5519, "step": 9075 }, { "epoch": 0.59, "grad_norm": 1.0702621936798096, "learning_rate": 3.862805434759291e-06, "loss": 0.501, "step": 9076 }, { "epoch": 0.59, "grad_norm": 1.2394667863845825, "learning_rate": 3.861787538911652e-06, "loss": 0.5227, "step": 9077 }, { "epoch": 0.59, "grad_norm": 1.0818551778793335, "learning_rate": 3.860769692812342e-06, "loss": 0.475, "step": 9078 }, { "epoch": 0.59, "grad_norm": 1.2908188104629517, "learning_rate": 3.85975189650585e-06, "loss": 0.4849, "step": 9079 }, { "epoch": 0.59, "grad_norm": 1.123705267906189, "learning_rate": 3.85873415003666e-06, "loss": 0.5053, "step": 9080 }, { "epoch": 0.59, "grad_norm": 1.1656827926635742, "learning_rate": 3.857716453449259e-06, "loss": 0.4897, "step": 9081 }, { "epoch": 0.59, "grad_norm": 1.1540112495422363, "learning_rate": 3.856698806788123e-06, "loss": 0.5614, "step": 9082 }, { "epoch": 0.59, "grad_norm": 1.1789358854293823, "learning_rate": 3.85568121009773e-06, "loss": 0.5802, "step": 9083 }, { "epoch": 0.59, "grad_norm": 1.1842437982559204, "learning_rate": 3.854663663422561e-06, "loss": 0.5433, "step": 9084 }, { "epoch": 0.59, "grad_norm": 1.2402838468551636, "learning_rate": 3.8536461668070875e-06, "loss": 0.5133, "step": 9085 }, { "epoch": 0.59, "grad_norm": 1.121996283531189, "learning_rate": 3.852628720295782e-06, "loss": 0.504, "step": 9086 }, { "epoch": 0.59, "grad_norm": 1.1940689086914062, "learning_rate": 3.851611323933118e-06, "loss": 0.5307, "step": 9087 }, { "epoch": 0.59, "grad_norm": 1.2353894710540771, "learning_rate": 3.8505939777635575e-06, "loss": 0.5213, "step": 9088 }, { "epoch": 0.59, "grad_norm": 1.1832953691482544, "learning_rate": 3.8495766818315695e-06, "loss": 0.5868, "step": 9089 }, { "epoch": 0.59, "grad_norm": 1.1872297525405884, "learning_rate": 3.848559436181615e-06, "loss": 0.4983, "step": 9090 }, { "epoch": 0.59, "grad_norm": 1.1498968601226807, "learning_rate": 3.847542240858158e-06, "loss": 0.529, "step": 9091 }, { "epoch": 0.59, "grad_norm": 1.2862883806228638, "learning_rate": 3.846525095905656e-06, "loss": 0.5594, "step": 9092 }, { "epoch": 0.59, "grad_norm": 1.2138839960098267, "learning_rate": 3.845508001368569e-06, "loss": 0.473, "step": 9093 }, { "epoch": 0.59, "grad_norm": 1.065991759300232, "learning_rate": 3.844490957291345e-06, "loss": 0.4978, "step": 9094 }, { "epoch": 0.59, "grad_norm": 1.2376176118850708, "learning_rate": 3.84347396371844e-06, "loss": 0.5456, "step": 9095 }, { "epoch": 0.59, "grad_norm": 1.1881674528121948, "learning_rate": 3.842457020694306e-06, "loss": 0.5337, "step": 9096 }, { "epoch": 0.59, "grad_norm": 1.2204489707946777, "learning_rate": 3.8414401282633875e-06, "loss": 0.5561, "step": 9097 }, { "epoch": 0.59, "grad_norm": 1.1755101680755615, "learning_rate": 3.840423286470133e-06, "loss": 0.5139, "step": 9098 }, { "epoch": 0.59, "grad_norm": 1.1758675575256348, "learning_rate": 3.839406495358986e-06, "loss": 0.5423, "step": 9099 }, { "epoch": 0.59, "grad_norm": 1.154664158821106, "learning_rate": 3.838389754974385e-06, "loss": 0.5235, "step": 9100 }, { "epoch": 0.59, "grad_norm": 1.156902551651001, "learning_rate": 3.837373065360771e-06, "loss": 0.5171, "step": 9101 }, { "epoch": 0.59, "grad_norm": 1.0974845886230469, "learning_rate": 3.836356426562579e-06, "loss": 0.4485, "step": 9102 }, { "epoch": 0.59, "grad_norm": 1.426481008529663, "learning_rate": 3.835339838624248e-06, "loss": 0.4724, "step": 9103 }, { "epoch": 0.59, "grad_norm": 1.0784964561462402, "learning_rate": 3.834323301590206e-06, "loss": 0.5502, "step": 9104 }, { "epoch": 0.59, "grad_norm": 1.2557228803634644, "learning_rate": 3.8333068155048884e-06, "loss": 0.557, "step": 9105 }, { "epoch": 0.59, "grad_norm": 1.2233422994613647, "learning_rate": 3.832290380412717e-06, "loss": 0.52, "step": 9106 }, { "epoch": 0.59, "grad_norm": 1.104117512702942, "learning_rate": 3.831273996358121e-06, "loss": 0.5165, "step": 9107 }, { "epoch": 0.59, "grad_norm": 1.1012232303619385, "learning_rate": 3.830257663385522e-06, "loss": 0.5157, "step": 9108 }, { "epoch": 0.59, "grad_norm": 1.2167553901672363, "learning_rate": 3.829241381539344e-06, "loss": 0.5359, "step": 9109 }, { "epoch": 0.59, "grad_norm": 1.247133493423462, "learning_rate": 3.828225150864003e-06, "loss": 0.5461, "step": 9110 }, { "epoch": 0.59, "grad_norm": 1.1317332983016968, "learning_rate": 3.8272089714039195e-06, "loss": 0.5335, "step": 9111 }, { "epoch": 0.59, "grad_norm": 1.128605842590332, "learning_rate": 3.826192843203505e-06, "loss": 0.5522, "step": 9112 }, { "epoch": 0.59, "grad_norm": 1.1665242910385132, "learning_rate": 3.825176766307172e-06, "loss": 0.5686, "step": 9113 }, { "epoch": 0.59, "grad_norm": 1.1894831657409668, "learning_rate": 3.824160740759334e-06, "loss": 0.5615, "step": 9114 }, { "epoch": 0.59, "grad_norm": 1.2603846788406372, "learning_rate": 3.823144766604394e-06, "loss": 0.5091, "step": 9115 }, { "epoch": 0.59, "grad_norm": 1.2105695009231567, "learning_rate": 3.822128843886761e-06, "loss": 0.5377, "step": 9116 }, { "epoch": 0.59, "grad_norm": 1.303139567375183, "learning_rate": 3.821112972650837e-06, "loss": 0.4863, "step": 9117 }, { "epoch": 0.59, "grad_norm": 1.323448657989502, "learning_rate": 3.820097152941024e-06, "loss": 0.5478, "step": 9118 }, { "epoch": 0.59, "grad_norm": 1.2450382709503174, "learning_rate": 3.819081384801719e-06, "loss": 0.5416, "step": 9119 }, { "epoch": 0.59, "grad_norm": 1.1279653310775757, "learning_rate": 3.818065668277321e-06, "loss": 0.4844, "step": 9120 }, { "epoch": 0.59, "grad_norm": 1.255370020866394, "learning_rate": 3.8170500034122216e-06, "loss": 0.5061, "step": 9121 }, { "epoch": 0.59, "grad_norm": 1.1902544498443604, "learning_rate": 3.816034390250817e-06, "loss": 0.5265, "step": 9122 }, { "epoch": 0.59, "grad_norm": 1.2386481761932373, "learning_rate": 3.815018828837494e-06, "loss": 0.5012, "step": 9123 }, { "epoch": 0.59, "grad_norm": 1.166502833366394, "learning_rate": 3.8140033192166385e-06, "loss": 0.5112, "step": 9124 }, { "epoch": 0.59, "grad_norm": 1.3639479875564575, "learning_rate": 3.8129878614326387e-06, "loss": 0.5626, "step": 9125 }, { "epoch": 0.59, "grad_norm": 1.3441203832626343, "learning_rate": 3.8119724555298778e-06, "loss": 0.5755, "step": 9126 }, { "epoch": 0.59, "grad_norm": 1.2958877086639404, "learning_rate": 3.810957101552735e-06, "loss": 0.5332, "step": 9127 }, { "epoch": 0.59, "grad_norm": 1.3796122074127197, "learning_rate": 3.809941799545591e-06, "loss": 0.5194, "step": 9128 }, { "epoch": 0.59, "grad_norm": 1.2306585311889648, "learning_rate": 3.8089265495528196e-06, "loss": 0.5134, "step": 9129 }, { "epoch": 0.59, "grad_norm": 1.2038894891738892, "learning_rate": 3.8079113516187955e-06, "loss": 0.5141, "step": 9130 }, { "epoch": 0.59, "grad_norm": 1.073378324508667, "learning_rate": 3.80689620578789e-06, "loss": 0.4793, "step": 9131 }, { "epoch": 0.59, "grad_norm": 1.2794440984725952, "learning_rate": 3.805881112104474e-06, "loss": 0.5432, "step": 9132 }, { "epoch": 0.59, "grad_norm": 1.173536777496338, "learning_rate": 3.8048660706129128e-06, "loss": 0.5303, "step": 9133 }, { "epoch": 0.59, "grad_norm": 1.3006937503814697, "learning_rate": 3.8038510813575746e-06, "loss": 0.5427, "step": 9134 }, { "epoch": 0.59, "grad_norm": 1.1964364051818848, "learning_rate": 3.802836144382818e-06, "loss": 0.487, "step": 9135 }, { "epoch": 0.59, "grad_norm": 1.1718990802764893, "learning_rate": 3.801821259733004e-06, "loss": 0.466, "step": 9136 }, { "epoch": 0.59, "grad_norm": 1.1511430740356445, "learning_rate": 3.8008064274524916e-06, "loss": 0.5122, "step": 9137 }, { "epoch": 0.59, "grad_norm": 1.133608341217041, "learning_rate": 3.799791647585636e-06, "loss": 0.5483, "step": 9138 }, { "epoch": 0.59, "grad_norm": 1.360677719116211, "learning_rate": 3.7987769201767915e-06, "loss": 0.562, "step": 9139 }, { "epoch": 0.59, "grad_norm": 1.147467851638794, "learning_rate": 3.7977622452703107e-06, "loss": 0.5608, "step": 9140 }, { "epoch": 0.59, "grad_norm": 1.0931135416030884, "learning_rate": 3.7967476229105377e-06, "loss": 0.5731, "step": 9141 }, { "epoch": 0.59, "grad_norm": 1.2192615270614624, "learning_rate": 3.7957330531418224e-06, "loss": 0.5624, "step": 9142 }, { "epoch": 0.59, "grad_norm": 1.04554283618927, "learning_rate": 3.7947185360085078e-06, "loss": 0.4783, "step": 9143 }, { "epoch": 0.59, "grad_norm": 1.095155954360962, "learning_rate": 3.793704071554936e-06, "loss": 0.4959, "step": 9144 }, { "epoch": 0.59, "grad_norm": 1.241575837135315, "learning_rate": 3.7926896598254476e-06, "loss": 0.5616, "step": 9145 }, { "epoch": 0.59, "grad_norm": 1.1842283010482788, "learning_rate": 3.7916753008643813e-06, "loss": 0.5464, "step": 9146 }, { "epoch": 0.59, "grad_norm": 1.2208213806152344, "learning_rate": 3.790660994716068e-06, "loss": 0.5406, "step": 9147 }, { "epoch": 0.59, "grad_norm": 1.2769569158554077, "learning_rate": 3.7896467414248422e-06, "loss": 0.4974, "step": 9148 }, { "epoch": 0.59, "grad_norm": 1.2540228366851807, "learning_rate": 3.7886325410350344e-06, "loss": 0.56, "step": 9149 }, { "epoch": 0.59, "grad_norm": 1.2385777235031128, "learning_rate": 3.7876183935909733e-06, "loss": 0.5152, "step": 9150 }, { "epoch": 0.59, "grad_norm": 1.1755768060684204, "learning_rate": 3.7866042991369838e-06, "loss": 0.533, "step": 9151 }, { "epoch": 0.59, "grad_norm": 1.172377347946167, "learning_rate": 3.7855902577173924e-06, "loss": 0.5002, "step": 9152 }, { "epoch": 0.59, "grad_norm": 1.3116414546966553, "learning_rate": 3.7845762693765154e-06, "loss": 0.5635, "step": 9153 }, { "epoch": 0.59, "grad_norm": 1.2392853498458862, "learning_rate": 3.7835623341586734e-06, "loss": 0.5171, "step": 9154 }, { "epoch": 0.59, "grad_norm": 1.3473745584487915, "learning_rate": 3.782548452108184e-06, "loss": 0.5697, "step": 9155 }, { "epoch": 0.59, "grad_norm": 1.1373684406280518, "learning_rate": 3.781534623269361e-06, "loss": 0.5214, "step": 9156 }, { "epoch": 0.59, "grad_norm": 1.188713550567627, "learning_rate": 3.7805208476865164e-06, "loss": 0.5017, "step": 9157 }, { "epoch": 0.59, "grad_norm": 1.1823307275772095, "learning_rate": 3.7795071254039584e-06, "loss": 0.5245, "step": 9158 }, { "epoch": 0.59, "grad_norm": 1.221656322479248, "learning_rate": 3.7784934564659946e-06, "loss": 0.5764, "step": 9159 }, { "epoch": 0.59, "grad_norm": 1.2107264995574951, "learning_rate": 3.7774798409169305e-06, "loss": 0.5282, "step": 9160 }, { "epoch": 0.59, "grad_norm": 1.116315245628357, "learning_rate": 3.776466278801069e-06, "loss": 0.4909, "step": 9161 }, { "epoch": 0.59, "grad_norm": 1.2430800199508667, "learning_rate": 3.7754527701627096e-06, "loss": 0.5159, "step": 9162 }, { "epoch": 0.59, "grad_norm": 1.3239389657974243, "learning_rate": 3.7744393150461504e-06, "loss": 0.5094, "step": 9163 }, { "epoch": 0.59, "grad_norm": 1.108270287513733, "learning_rate": 3.7734259134956863e-06, "loss": 0.5024, "step": 9164 }, { "epoch": 0.59, "grad_norm": 1.392482042312622, "learning_rate": 3.7724125655556115e-06, "loss": 0.4986, "step": 9165 }, { "epoch": 0.59, "grad_norm": 1.2504849433898926, "learning_rate": 3.7713992712702154e-06, "loss": 0.5426, "step": 9166 }, { "epoch": 0.59, "grad_norm": 1.1664986610412598, "learning_rate": 3.7703860306837875e-06, "loss": 0.5164, "step": 9167 }, { "epoch": 0.59, "grad_norm": 1.1870532035827637, "learning_rate": 3.7693728438406134e-06, "loss": 0.5257, "step": 9168 }, { "epoch": 0.59, "grad_norm": 1.1798032522201538, "learning_rate": 3.7683597107849784e-06, "loss": 0.4623, "step": 9169 }, { "epoch": 0.59, "grad_norm": 1.248815655708313, "learning_rate": 3.767346631561163e-06, "loss": 0.4755, "step": 9170 }, { "epoch": 0.59, "grad_norm": 1.2396228313446045, "learning_rate": 3.7663336062134447e-06, "loss": 0.5459, "step": 9171 }, { "epoch": 0.59, "grad_norm": 1.2820874452590942, "learning_rate": 3.7653206347861015e-06, "loss": 0.5513, "step": 9172 }, { "epoch": 0.59, "grad_norm": 1.1802057027816772, "learning_rate": 3.7643077173234082e-06, "loss": 0.5569, "step": 9173 }, { "epoch": 0.59, "grad_norm": 1.1788233518600464, "learning_rate": 3.7632948538696363e-06, "loss": 0.5064, "step": 9174 }, { "epoch": 0.59, "grad_norm": 1.1336077451705933, "learning_rate": 3.7622820444690577e-06, "loss": 0.5117, "step": 9175 }, { "epoch": 0.59, "grad_norm": 1.1816630363464355, "learning_rate": 3.761269289165935e-06, "loss": 0.5182, "step": 9176 }, { "epoch": 0.59, "grad_norm": 1.0722532272338867, "learning_rate": 3.7602565880045366e-06, "loss": 0.4988, "step": 9177 }, { "epoch": 0.59, "grad_norm": 1.1983158588409424, "learning_rate": 3.7592439410291235e-06, "loss": 0.5053, "step": 9178 }, { "epoch": 0.59, "grad_norm": 1.192935585975647, "learning_rate": 3.7582313482839573e-06, "loss": 0.5397, "step": 9179 }, { "epoch": 0.59, "grad_norm": 1.2483848333358765, "learning_rate": 3.7572188098132945e-06, "loss": 0.5481, "step": 9180 }, { "epoch": 0.59, "grad_norm": 1.1542974710464478, "learning_rate": 3.756206325661393e-06, "loss": 0.5053, "step": 9181 }, { "epoch": 0.59, "grad_norm": 1.3020340204238892, "learning_rate": 3.755193895872502e-06, "loss": 0.5536, "step": 9182 }, { "epoch": 0.59, "grad_norm": 1.2464793920516968, "learning_rate": 3.7541815204908745e-06, "loss": 0.544, "step": 9183 }, { "epoch": 0.59, "grad_norm": 1.1819928884506226, "learning_rate": 3.753169199560758e-06, "loss": 0.5039, "step": 9184 }, { "epoch": 0.59, "grad_norm": 1.142942190170288, "learning_rate": 3.752156933126399e-06, "loss": 0.5406, "step": 9185 }, { "epoch": 0.59, "grad_norm": 1.1413439512252808, "learning_rate": 3.751144721232041e-06, "loss": 0.5359, "step": 9186 }, { "epoch": 0.59, "grad_norm": 1.1360583305358887, "learning_rate": 3.7501325639219276e-06, "loss": 0.5213, "step": 9187 }, { "epoch": 0.59, "grad_norm": 1.1422909498214722, "learning_rate": 3.7491204612402933e-06, "loss": 0.4701, "step": 9188 }, { "epoch": 0.59, "grad_norm": 1.1726382970809937, "learning_rate": 3.7481084132313756e-06, "loss": 0.5945, "step": 9189 }, { "epoch": 0.59, "grad_norm": 1.185188889503479, "learning_rate": 3.7470964199394094e-06, "loss": 0.5521, "step": 9190 }, { "epoch": 0.59, "grad_norm": 1.0859259366989136, "learning_rate": 3.746084481408626e-06, "loss": 0.478, "step": 9191 }, { "epoch": 0.59, "grad_norm": 1.2324490547180176, "learning_rate": 3.745072597683255e-06, "loss": 0.5454, "step": 9192 }, { "epoch": 0.59, "grad_norm": 1.1934235095977783, "learning_rate": 3.7440607688075255e-06, "loss": 0.5695, "step": 9193 }, { "epoch": 0.59, "grad_norm": 1.1870018243789673, "learning_rate": 3.7430489948256564e-06, "loss": 0.569, "step": 9194 }, { "epoch": 0.59, "grad_norm": 1.16300368309021, "learning_rate": 3.7420372757818734e-06, "loss": 0.5004, "step": 9195 }, { "epoch": 0.59, "grad_norm": 1.177708625793457, "learning_rate": 3.7410256117203957e-06, "loss": 0.5518, "step": 9196 }, { "epoch": 0.59, "grad_norm": 1.2263901233673096, "learning_rate": 3.7400140026854398e-06, "loss": 0.5369, "step": 9197 }, { "epoch": 0.59, "grad_norm": 1.1323250532150269, "learning_rate": 3.7390024487212224e-06, "loss": 0.5303, "step": 9198 }, { "epoch": 0.59, "grad_norm": 1.2962974309921265, "learning_rate": 3.7379909498719545e-06, "loss": 0.5175, "step": 9199 }, { "epoch": 0.59, "grad_norm": 1.1527900695800781, "learning_rate": 3.736979506181845e-06, "loss": 0.5642, "step": 9200 }, { "epoch": 0.59, "grad_norm": 1.2045029401779175, "learning_rate": 3.7359681176951025e-06, "loss": 0.4944, "step": 9201 }, { "epoch": 0.59, "grad_norm": 1.2270030975341797, "learning_rate": 3.7349567844559326e-06, "loss": 0.5666, "step": 9202 }, { "epoch": 0.59, "grad_norm": 1.1272016763687134, "learning_rate": 3.7339455065085383e-06, "loss": 0.542, "step": 9203 }, { "epoch": 0.59, "grad_norm": 1.1681017875671387, "learning_rate": 3.7329342838971204e-06, "loss": 0.5106, "step": 9204 }, { "epoch": 0.59, "grad_norm": 1.1255944967269897, "learning_rate": 3.7319231166658744e-06, "loss": 0.5107, "step": 9205 }, { "epoch": 0.59, "grad_norm": 1.0960092544555664, "learning_rate": 3.730912004858997e-06, "loss": 0.4889, "step": 9206 }, { "epoch": 0.59, "grad_norm": 1.1423134803771973, "learning_rate": 3.7299009485206827e-06, "loss": 0.4956, "step": 9207 }, { "epoch": 0.59, "grad_norm": 1.278969645500183, "learning_rate": 3.72888994769512e-06, "loss": 0.5442, "step": 9208 }, { "epoch": 0.59, "grad_norm": 1.1973985433578491, "learning_rate": 3.7278790024264986e-06, "loss": 0.5193, "step": 9209 }, { "epoch": 0.59, "grad_norm": 1.1321115493774414, "learning_rate": 3.7268681127590044e-06, "loss": 0.5201, "step": 9210 }, { "epoch": 0.59, "grad_norm": 1.052529215812683, "learning_rate": 3.7258572787368196e-06, "loss": 0.4322, "step": 9211 }, { "epoch": 0.59, "grad_norm": 1.1489067077636719, "learning_rate": 3.7248465004041266e-06, "loss": 0.5556, "step": 9212 }, { "epoch": 0.59, "grad_norm": 1.114022970199585, "learning_rate": 3.7238357778051026e-06, "loss": 0.4902, "step": 9213 }, { "epoch": 0.59, "grad_norm": 1.101251482963562, "learning_rate": 3.7228251109839236e-06, "loss": 0.5023, "step": 9214 }, { "epoch": 0.59, "grad_norm": 1.2238223552703857, "learning_rate": 3.7218144999847637e-06, "loss": 0.5519, "step": 9215 }, { "epoch": 0.59, "grad_norm": 1.1693928241729736, "learning_rate": 3.720803944851796e-06, "loss": 0.5167, "step": 9216 }, { "epoch": 0.59, "grad_norm": 1.1283191442489624, "learning_rate": 3.7197934456291873e-06, "loss": 0.5414, "step": 9217 }, { "epoch": 0.6, "grad_norm": 1.1383463144302368, "learning_rate": 3.7187830023611027e-06, "loss": 0.4986, "step": 9218 }, { "epoch": 0.6, "grad_norm": 1.144818663597107, "learning_rate": 3.7177726150917083e-06, "loss": 0.4767, "step": 9219 }, { "epoch": 0.6, "grad_norm": 1.1432757377624512, "learning_rate": 3.716762283865164e-06, "loss": 0.5, "step": 9220 }, { "epoch": 0.6, "grad_norm": 1.209390640258789, "learning_rate": 3.7157520087256295e-06, "loss": 0.4983, "step": 9221 }, { "epoch": 0.6, "grad_norm": 1.0998109579086304, "learning_rate": 3.7147417897172633e-06, "loss": 0.5303, "step": 9222 }, { "epoch": 0.6, "grad_norm": 1.170918345451355, "learning_rate": 3.7137316268842154e-06, "loss": 0.4879, "step": 9223 }, { "epoch": 0.6, "grad_norm": 1.1672260761260986, "learning_rate": 3.7127215202706395e-06, "loss": 0.584, "step": 9224 }, { "epoch": 0.6, "grad_norm": 1.1890919208526611, "learning_rate": 3.7117114699206845e-06, "loss": 0.553, "step": 9225 }, { "epoch": 0.6, "grad_norm": 1.1724028587341309, "learning_rate": 3.710701475878498e-06, "loss": 0.6121, "step": 9226 }, { "epoch": 0.6, "grad_norm": 1.2423995733261108, "learning_rate": 3.7096915381882237e-06, "loss": 0.492, "step": 9227 }, { "epoch": 0.6, "grad_norm": 1.237856149673462, "learning_rate": 3.7086816568940044e-06, "loss": 0.5837, "step": 9228 }, { "epoch": 0.6, "grad_norm": 1.0431791543960571, "learning_rate": 3.707671832039977e-06, "loss": 0.5011, "step": 9229 }, { "epoch": 0.6, "grad_norm": 1.2601556777954102, "learning_rate": 3.706662063670279e-06, "loss": 0.5157, "step": 9230 }, { "epoch": 0.6, "grad_norm": 1.2091132402420044, "learning_rate": 3.7056523518290454e-06, "loss": 0.4872, "step": 9231 }, { "epoch": 0.6, "grad_norm": 1.1670029163360596, "learning_rate": 3.7046426965604075e-06, "loss": 0.5176, "step": 9232 }, { "epoch": 0.6, "grad_norm": 1.1346149444580078, "learning_rate": 3.7036330979084967e-06, "loss": 0.489, "step": 9233 }, { "epoch": 0.6, "grad_norm": 1.302747130393982, "learning_rate": 3.7026235559174395e-06, "loss": 0.5995, "step": 9234 }, { "epoch": 0.6, "grad_norm": 1.1809970140457153, "learning_rate": 3.7016140706313575e-06, "loss": 0.5809, "step": 9235 }, { "epoch": 0.6, "grad_norm": 1.3403313159942627, "learning_rate": 3.7006046420943746e-06, "loss": 0.5713, "step": 9236 }, { "epoch": 0.6, "grad_norm": 1.2833307981491089, "learning_rate": 3.6995952703506103e-06, "loss": 0.5711, "step": 9237 }, { "epoch": 0.6, "grad_norm": 1.0985866785049438, "learning_rate": 3.6985859554441816e-06, "loss": 0.5087, "step": 9238 }, { "epoch": 0.6, "grad_norm": 1.241489052772522, "learning_rate": 3.697576697419204e-06, "loss": 0.5633, "step": 9239 }, { "epoch": 0.6, "grad_norm": 1.2518310546875, "learning_rate": 3.6965674963197894e-06, "loss": 0.5271, "step": 9240 }, { "epoch": 0.6, "grad_norm": 1.1973834037780762, "learning_rate": 3.695558352190045e-06, "loss": 0.537, "step": 9241 }, { "epoch": 0.6, "grad_norm": 1.2207955121994019, "learning_rate": 3.69454926507408e-06, "loss": 0.5076, "step": 9242 }, { "epoch": 0.6, "grad_norm": 1.1980680227279663, "learning_rate": 3.693540235015998e-06, "loss": 0.5044, "step": 9243 }, { "epoch": 0.6, "grad_norm": 1.227472186088562, "learning_rate": 3.6925312620599017e-06, "loss": 0.4974, "step": 9244 }, { "epoch": 0.6, "grad_norm": 1.1881589889526367, "learning_rate": 3.6915223462498926e-06, "loss": 0.5273, "step": 9245 }, { "epoch": 0.6, "grad_norm": 1.150442123413086, "learning_rate": 3.690513487630064e-06, "loss": 0.5647, "step": 9246 }, { "epoch": 0.6, "grad_norm": 1.2384635210037231, "learning_rate": 3.689504686244513e-06, "loss": 0.4902, "step": 9247 }, { "epoch": 0.6, "grad_norm": 1.1530921459197998, "learning_rate": 3.68849594213733e-06, "loss": 0.4922, "step": 9248 }, { "epoch": 0.6, "grad_norm": 1.1442270278930664, "learning_rate": 3.6874872553526057e-06, "loss": 0.5062, "step": 9249 }, { "epoch": 0.6, "grad_norm": 1.1745491027832031, "learning_rate": 3.6864786259344286e-06, "loss": 0.4807, "step": 9250 }, { "epoch": 0.6, "grad_norm": 1.2218360900878906, "learning_rate": 3.6854700539268817e-06, "loss": 0.5264, "step": 9251 }, { "epoch": 0.6, "grad_norm": 1.278935432434082, "learning_rate": 3.6844615393740463e-06, "loss": 0.4943, "step": 9252 }, { "epoch": 0.6, "grad_norm": 1.191319465637207, "learning_rate": 3.6834530823200025e-06, "loss": 0.5327, "step": 9253 }, { "epoch": 0.6, "grad_norm": 1.1334201097488403, "learning_rate": 3.682444682808829e-06, "loss": 0.4998, "step": 9254 }, { "epoch": 0.6, "grad_norm": 1.1184258460998535, "learning_rate": 3.681436340884598e-06, "loss": 0.5592, "step": 9255 }, { "epoch": 0.6, "grad_norm": 1.0278515815734863, "learning_rate": 3.6804280565913832e-06, "loss": 0.4834, "step": 9256 }, { "epoch": 0.6, "grad_norm": 1.0645796060562134, "learning_rate": 3.6794198299732537e-06, "loss": 0.5002, "step": 9257 }, { "epoch": 0.6, "grad_norm": 1.1791470050811768, "learning_rate": 3.6784116610742755e-06, "loss": 0.566, "step": 9258 }, { "epoch": 0.6, "grad_norm": 1.1709723472595215, "learning_rate": 3.6774035499385153e-06, "loss": 0.467, "step": 9259 }, { "epoch": 0.6, "grad_norm": 1.2035623788833618, "learning_rate": 3.6763954966100317e-06, "loss": 0.5086, "step": 9260 }, { "epoch": 0.6, "grad_norm": 1.188712477684021, "learning_rate": 3.6753875011328866e-06, "loss": 0.5421, "step": 9261 }, { "epoch": 0.6, "grad_norm": 1.1459804773330688, "learning_rate": 3.674379563551136e-06, "loss": 0.5242, "step": 9262 }, { "epoch": 0.6, "grad_norm": 1.2224929332733154, "learning_rate": 3.673371683908837e-06, "loss": 0.5454, "step": 9263 }, { "epoch": 0.6, "grad_norm": 1.170608639717102, "learning_rate": 3.6723638622500367e-06, "loss": 0.5246, "step": 9264 }, { "epoch": 0.6, "grad_norm": 1.0575411319732666, "learning_rate": 3.6713560986187863e-06, "loss": 0.5116, "step": 9265 }, { "epoch": 0.6, "grad_norm": 1.1102097034454346, "learning_rate": 3.6703483930591334e-06, "loss": 0.4753, "step": 9266 }, { "epoch": 0.6, "grad_norm": 1.1410927772521973, "learning_rate": 3.669340745615121e-06, "loss": 0.5202, "step": 9267 }, { "epoch": 0.6, "grad_norm": 1.1723612546920776, "learning_rate": 3.6683331563307923e-06, "loss": 0.4751, "step": 9268 }, { "epoch": 0.6, "grad_norm": 1.1942923069000244, "learning_rate": 3.667325625250187e-06, "loss": 0.5392, "step": 9269 }, { "epoch": 0.6, "grad_norm": 1.1417126655578613, "learning_rate": 3.6663181524173384e-06, "loss": 0.5276, "step": 9270 }, { "epoch": 0.6, "grad_norm": 1.2427253723144531, "learning_rate": 3.6653107378762824e-06, "loss": 0.5619, "step": 9271 }, { "epoch": 0.6, "grad_norm": 1.114059567451477, "learning_rate": 3.6643033816710505e-06, "loss": 0.5149, "step": 9272 }, { "epoch": 0.6, "grad_norm": 1.248180866241455, "learning_rate": 3.663296083845672e-06, "loss": 0.5691, "step": 9273 }, { "epoch": 0.6, "grad_norm": 1.1908533573150635, "learning_rate": 3.662288844444173e-06, "loss": 0.5033, "step": 9274 }, { "epoch": 0.6, "grad_norm": 1.160499095916748, "learning_rate": 3.6612816635105784e-06, "loss": 0.5686, "step": 9275 }, { "epoch": 0.6, "grad_norm": 1.0850878953933716, "learning_rate": 3.6602745410889073e-06, "loss": 0.4816, "step": 9276 }, { "epoch": 0.6, "grad_norm": 1.4115327596664429, "learning_rate": 3.6592674772231783e-06, "loss": 0.5022, "step": 9277 }, { "epoch": 0.6, "grad_norm": 1.2690609693527222, "learning_rate": 3.65826047195741e-06, "loss": 0.5437, "step": 9278 }, { "epoch": 0.6, "grad_norm": 1.1726632118225098, "learning_rate": 3.6572535253356143e-06, "loss": 0.495, "step": 9279 }, { "epoch": 0.6, "grad_norm": 1.1632024049758911, "learning_rate": 3.6562466374018023e-06, "loss": 0.5062, "step": 9280 }, { "epoch": 0.6, "grad_norm": 1.2031790018081665, "learning_rate": 3.655239808199985e-06, "loss": 0.5485, "step": 9281 }, { "epoch": 0.6, "grad_norm": 1.2103289365768433, "learning_rate": 3.654233037774165e-06, "loss": 0.4754, "step": 9282 }, { "epoch": 0.6, "grad_norm": 1.1438478231430054, "learning_rate": 3.653226326168346e-06, "loss": 0.5177, "step": 9283 }, { "epoch": 0.6, "grad_norm": 1.0759453773498535, "learning_rate": 3.652219673426529e-06, "loss": 0.5123, "step": 9284 }, { "epoch": 0.6, "grad_norm": 1.2350715398788452, "learning_rate": 3.651213079592714e-06, "loss": 0.5312, "step": 9285 }, { "epoch": 0.6, "grad_norm": 1.1340899467468262, "learning_rate": 3.6502065447108968e-06, "loss": 0.5275, "step": 9286 }, { "epoch": 0.6, "grad_norm": 1.3675116300582886, "learning_rate": 3.6492000688250672e-06, "loss": 0.567, "step": 9287 }, { "epoch": 0.6, "grad_norm": 1.1536091566085815, "learning_rate": 3.648193651979217e-06, "loss": 0.505, "step": 9288 }, { "epoch": 0.6, "grad_norm": 1.1587339639663696, "learning_rate": 3.647187294217335e-06, "loss": 0.5347, "step": 9289 }, { "epoch": 0.6, "grad_norm": 1.34734046459198, "learning_rate": 3.646180995583406e-06, "loss": 0.5541, "step": 9290 }, { "epoch": 0.6, "grad_norm": 1.0734472274780273, "learning_rate": 3.645174756121412e-06, "loss": 0.4672, "step": 9291 }, { "epoch": 0.6, "grad_norm": 1.217214822769165, "learning_rate": 3.644168575875337e-06, "loss": 0.5714, "step": 9292 }, { "epoch": 0.6, "grad_norm": 1.2143806219100952, "learning_rate": 3.643162454889153e-06, "loss": 0.6037, "step": 9293 }, { "epoch": 0.6, "grad_norm": 1.284562349319458, "learning_rate": 3.6421563932068375e-06, "loss": 0.5638, "step": 9294 }, { "epoch": 0.6, "grad_norm": 1.0418314933776855, "learning_rate": 3.641150390872363e-06, "loss": 0.4596, "step": 9295 }, { "epoch": 0.6, "grad_norm": 1.0799206495285034, "learning_rate": 3.6401444479296988e-06, "loss": 0.4836, "step": 9296 }, { "epoch": 0.6, "grad_norm": 1.0645575523376465, "learning_rate": 3.6391385644228127e-06, "loss": 0.4958, "step": 9297 }, { "epoch": 0.6, "grad_norm": 1.0913516283035278, "learning_rate": 3.63813274039567e-06, "loss": 0.5434, "step": 9298 }, { "epoch": 0.6, "grad_norm": 1.2007681131362915, "learning_rate": 3.63712697589223e-06, "loss": 0.5477, "step": 9299 }, { "epoch": 0.6, "grad_norm": 1.0466320514678955, "learning_rate": 3.6361212709564536e-06, "loss": 0.4943, "step": 9300 }, { "epoch": 0.6, "grad_norm": 1.1148148775100708, "learning_rate": 3.635115625632298e-06, "loss": 0.5305, "step": 9301 }, { "epoch": 0.6, "grad_norm": 1.2301865816116333, "learning_rate": 3.6341100399637174e-06, "loss": 0.5248, "step": 9302 }, { "epoch": 0.6, "grad_norm": 1.1994407176971436, "learning_rate": 3.633104513994662e-06, "loss": 0.5075, "step": 9303 }, { "epoch": 0.6, "grad_norm": 1.1752955913543701, "learning_rate": 3.632099047769083e-06, "loss": 0.507, "step": 9304 }, { "epoch": 0.6, "grad_norm": 1.1809937953948975, "learning_rate": 3.631093641330924e-06, "loss": 0.5118, "step": 9305 }, { "epoch": 0.6, "grad_norm": 1.2186204195022583, "learning_rate": 3.6300882947241313e-06, "loss": 0.4757, "step": 9306 }, { "epoch": 0.6, "grad_norm": 1.2410404682159424, "learning_rate": 3.629083007992644e-06, "loss": 0.5343, "step": 9307 }, { "epoch": 0.6, "grad_norm": 1.2061889171600342, "learning_rate": 3.628077781180401e-06, "loss": 0.4964, "step": 9308 }, { "epoch": 0.6, "grad_norm": 1.1165231466293335, "learning_rate": 3.6270726143313385e-06, "loss": 0.5824, "step": 9309 }, { "epoch": 0.6, "grad_norm": 1.219838261604309, "learning_rate": 3.6260675074893926e-06, "loss": 0.5256, "step": 9310 }, { "epoch": 0.6, "grad_norm": 1.131621241569519, "learning_rate": 3.6250624606984884e-06, "loss": 0.5258, "step": 9311 }, { "epoch": 0.6, "grad_norm": 1.3000895977020264, "learning_rate": 3.624057474002557e-06, "loss": 0.5037, "step": 9312 }, { "epoch": 0.6, "grad_norm": 1.1783134937286377, "learning_rate": 3.6230525474455237e-06, "loss": 0.528, "step": 9313 }, { "epoch": 0.6, "grad_norm": 1.0910727977752686, "learning_rate": 3.6220476810713103e-06, "loss": 0.5246, "step": 9314 }, { "epoch": 0.6, "grad_norm": 1.2332909107208252, "learning_rate": 3.621042874923838e-06, "loss": 0.5455, "step": 9315 }, { "epoch": 0.6, "grad_norm": 1.223137617111206, "learning_rate": 3.6200381290470254e-06, "loss": 0.5267, "step": 9316 }, { "epoch": 0.6, "grad_norm": 1.0933974981307983, "learning_rate": 3.6190334434847848e-06, "loss": 0.4819, "step": 9317 }, { "epoch": 0.6, "grad_norm": 1.1698830127716064, "learning_rate": 3.6180288182810287e-06, "loss": 0.5398, "step": 9318 }, { "epoch": 0.6, "grad_norm": 1.0965673923492432, "learning_rate": 3.617024253479667e-06, "loss": 0.468, "step": 9319 }, { "epoch": 0.6, "grad_norm": 1.2223721742630005, "learning_rate": 3.616019749124608e-06, "loss": 0.497, "step": 9320 }, { "epoch": 0.6, "grad_norm": 1.177095890045166, "learning_rate": 3.6150153052597546e-06, "loss": 0.5528, "step": 9321 }, { "epoch": 0.6, "grad_norm": 1.2932848930358887, "learning_rate": 3.614010921929011e-06, "loss": 0.5483, "step": 9322 }, { "epoch": 0.6, "grad_norm": 1.1785167455673218, "learning_rate": 3.613006599176272e-06, "loss": 0.4799, "step": 9323 }, { "epoch": 0.6, "grad_norm": 1.1536266803741455, "learning_rate": 3.612002337045436e-06, "loss": 0.5342, "step": 9324 }, { "epoch": 0.6, "grad_norm": 1.1542272567749023, "learning_rate": 3.610998135580397e-06, "loss": 0.5492, "step": 9325 }, { "epoch": 0.6, "grad_norm": 1.161359429359436, "learning_rate": 3.6099939948250463e-06, "loss": 0.5681, "step": 9326 }, { "epoch": 0.6, "grad_norm": 1.1461739540100098, "learning_rate": 3.608989914823271e-06, "loss": 0.5353, "step": 9327 }, { "epoch": 0.6, "grad_norm": 1.1948257684707642, "learning_rate": 3.60798589561896e-06, "loss": 0.5153, "step": 9328 }, { "epoch": 0.6, "grad_norm": 1.1480389833450317, "learning_rate": 3.6069819372559924e-06, "loss": 0.4796, "step": 9329 }, { "epoch": 0.6, "grad_norm": 1.1735081672668457, "learning_rate": 3.6059780397782496e-06, "loss": 0.4898, "step": 9330 }, { "epoch": 0.6, "grad_norm": 1.2041780948638916, "learning_rate": 3.60497420322961e-06, "loss": 0.483, "step": 9331 }, { "epoch": 0.6, "grad_norm": 1.2185819149017334, "learning_rate": 3.603970427653949e-06, "loss": 0.5446, "step": 9332 }, { "epoch": 0.6, "grad_norm": 1.1863551139831543, "learning_rate": 3.602966713095141e-06, "loss": 0.534, "step": 9333 }, { "epoch": 0.6, "grad_norm": 1.125137209892273, "learning_rate": 3.601963059597052e-06, "loss": 0.5003, "step": 9334 }, { "epoch": 0.6, "grad_norm": 1.0636262893676758, "learning_rate": 3.6009594672035496e-06, "loss": 0.449, "step": 9335 }, { "epoch": 0.6, "grad_norm": 1.285043478012085, "learning_rate": 3.5999559359585e-06, "loss": 0.5451, "step": 9336 }, { "epoch": 0.6, "grad_norm": 1.2000681161880493, "learning_rate": 3.598952465905764e-06, "loss": 0.5099, "step": 9337 }, { "epoch": 0.6, "grad_norm": 1.236519694328308, "learning_rate": 3.5979490570892017e-06, "loss": 0.5123, "step": 9338 }, { "epoch": 0.6, "grad_norm": 1.2188714742660522, "learning_rate": 3.59694570955267e-06, "loss": 0.5217, "step": 9339 }, { "epoch": 0.6, "grad_norm": 1.1032872200012207, "learning_rate": 3.5959424233400198e-06, "loss": 0.5506, "step": 9340 }, { "epoch": 0.6, "grad_norm": 1.1833785772323608, "learning_rate": 3.5949391984951032e-06, "loss": 0.5456, "step": 9341 }, { "epoch": 0.6, "grad_norm": 1.102731704711914, "learning_rate": 3.59393603506177e-06, "loss": 0.511, "step": 9342 }, { "epoch": 0.6, "grad_norm": 1.3855730295181274, "learning_rate": 3.5929329330838654e-06, "loss": 0.5522, "step": 9343 }, { "epoch": 0.6, "grad_norm": 1.1284581422805786, "learning_rate": 3.5919298926052308e-06, "loss": 0.5239, "step": 9344 }, { "epoch": 0.6, "grad_norm": 1.2822333574295044, "learning_rate": 3.590926913669709e-06, "loss": 0.5296, "step": 9345 }, { "epoch": 0.6, "grad_norm": 1.0963736772537231, "learning_rate": 3.589923996321135e-06, "loss": 0.5337, "step": 9346 }, { "epoch": 0.6, "grad_norm": 1.1536884307861328, "learning_rate": 3.588921140603346e-06, "loss": 0.5375, "step": 9347 }, { "epoch": 0.6, "grad_norm": 1.1127904653549194, "learning_rate": 3.587918346560174e-06, "loss": 0.484, "step": 9348 }, { "epoch": 0.6, "grad_norm": 1.2733454704284668, "learning_rate": 3.586915614235447e-06, "loss": 0.5788, "step": 9349 }, { "epoch": 0.6, "grad_norm": 1.091952919960022, "learning_rate": 3.5859129436729917e-06, "loss": 0.4837, "step": 9350 }, { "epoch": 0.6, "grad_norm": 1.2182964086532593, "learning_rate": 3.5849103349166347e-06, "loss": 0.5823, "step": 9351 }, { "epoch": 0.6, "grad_norm": 1.1050676107406616, "learning_rate": 3.583907788010196e-06, "loss": 0.5208, "step": 9352 }, { "epoch": 0.6, "grad_norm": 1.3044865131378174, "learning_rate": 3.5829053029974935e-06, "loss": 0.596, "step": 9353 }, { "epoch": 0.6, "grad_norm": 1.1580727100372314, "learning_rate": 3.5819028799223443e-06, "loss": 0.5489, "step": 9354 }, { "epoch": 0.6, "grad_norm": 1.1933022737503052, "learning_rate": 3.580900518828561e-06, "loss": 0.5374, "step": 9355 }, { "epoch": 0.6, "grad_norm": 1.266505241394043, "learning_rate": 3.5798982197599552e-06, "loss": 0.5067, "step": 9356 }, { "epoch": 0.6, "grad_norm": 1.149000644683838, "learning_rate": 3.578895982760336e-06, "loss": 0.5147, "step": 9357 }, { "epoch": 0.6, "grad_norm": 1.0832778215408325, "learning_rate": 3.577893807873505e-06, "loss": 0.491, "step": 9358 }, { "epoch": 0.6, "grad_norm": 1.1701914072036743, "learning_rate": 3.5768916951432664e-06, "loss": 0.5332, "step": 9359 }, { "epoch": 0.6, "grad_norm": 1.2306427955627441, "learning_rate": 3.57588964461342e-06, "loss": 0.5666, "step": 9360 }, { "epoch": 0.6, "grad_norm": 1.2693238258361816, "learning_rate": 3.5748876563277636e-06, "loss": 0.5464, "step": 9361 }, { "epoch": 0.6, "grad_norm": 1.1045979261398315, "learning_rate": 3.57388573033009e-06, "loss": 0.5363, "step": 9362 }, { "epoch": 0.6, "grad_norm": 1.236465334892273, "learning_rate": 3.572883866664194e-06, "loss": 0.5495, "step": 9363 }, { "epoch": 0.6, "grad_norm": 1.197928547859192, "learning_rate": 3.5718820653738605e-06, "loss": 0.5037, "step": 9364 }, { "epoch": 0.6, "grad_norm": 1.2230703830718994, "learning_rate": 3.5708803265028775e-06, "loss": 0.5286, "step": 9365 }, { "epoch": 0.6, "grad_norm": 1.1463161706924438, "learning_rate": 3.569878650095028e-06, "loss": 0.4874, "step": 9366 }, { "epoch": 0.6, "grad_norm": 1.2423374652862549, "learning_rate": 3.568877036194093e-06, "loss": 0.4889, "step": 9367 }, { "epoch": 0.6, "grad_norm": 1.128282904624939, "learning_rate": 3.56787548484385e-06, "loss": 0.4999, "step": 9368 }, { "epoch": 0.6, "grad_norm": 1.145796537399292, "learning_rate": 3.5668739960880772e-06, "loss": 0.537, "step": 9369 }, { "epoch": 0.6, "grad_norm": 1.249971628189087, "learning_rate": 3.565872569970542e-06, "loss": 0.5374, "step": 9370 }, { "epoch": 0.6, "grad_norm": 1.1765260696411133, "learning_rate": 3.5648712065350172e-06, "loss": 0.531, "step": 9371 }, { "epoch": 0.6, "grad_norm": 1.097870945930481, "learning_rate": 3.563869905825269e-06, "loss": 0.5382, "step": 9372 }, { "epoch": 0.61, "grad_norm": 1.1754099130630493, "learning_rate": 3.562868667885062e-06, "loss": 0.5127, "step": 9373 }, { "epoch": 0.61, "grad_norm": 1.3220396041870117, "learning_rate": 3.5618674927581597e-06, "loss": 0.5363, "step": 9374 }, { "epoch": 0.61, "grad_norm": 1.181707501411438, "learning_rate": 3.5608663804883163e-06, "loss": 0.4933, "step": 9375 }, { "epoch": 0.61, "grad_norm": 1.1539760828018188, "learning_rate": 3.559865331119291e-06, "loss": 0.5028, "step": 9376 }, { "epoch": 0.61, "grad_norm": 1.2121376991271973, "learning_rate": 3.558864344694837e-06, "loss": 0.5233, "step": 9377 }, { "epoch": 0.61, "grad_norm": 1.1389650106430054, "learning_rate": 3.5578634212587025e-06, "loss": 0.4963, "step": 9378 }, { "epoch": 0.61, "grad_norm": 1.0881532430648804, "learning_rate": 3.556862560854639e-06, "loss": 0.4935, "step": 9379 }, { "epoch": 0.61, "grad_norm": 1.1157214641571045, "learning_rate": 3.5558617635263908e-06, "loss": 0.5359, "step": 9380 }, { "epoch": 0.61, "grad_norm": 1.148011326789856, "learning_rate": 3.5548610293176967e-06, "loss": 0.5172, "step": 9381 }, { "epoch": 0.61, "grad_norm": 1.0717288255691528, "learning_rate": 3.553860358272299e-06, "loss": 0.4972, "step": 9382 }, { "epoch": 0.61, "grad_norm": 1.2193710803985596, "learning_rate": 3.552859750433934e-06, "loss": 0.5208, "step": 9383 }, { "epoch": 0.61, "grad_norm": 1.1011055707931519, "learning_rate": 3.5518592058463354e-06, "loss": 0.4939, "step": 9384 }, { "epoch": 0.61, "grad_norm": 1.35318922996521, "learning_rate": 3.5508587245532344e-06, "loss": 0.5948, "step": 9385 }, { "epoch": 0.61, "grad_norm": 1.2809866666793823, "learning_rate": 3.5498583065983625e-06, "loss": 0.5089, "step": 9386 }, { "epoch": 0.61, "grad_norm": 1.2381727695465088, "learning_rate": 3.54885795202544e-06, "loss": 0.5362, "step": 9387 }, { "epoch": 0.61, "grad_norm": 1.1269997358322144, "learning_rate": 3.5478576608781924e-06, "loss": 0.4734, "step": 9388 }, { "epoch": 0.61, "grad_norm": 1.0881444215774536, "learning_rate": 3.54685743320034e-06, "loss": 0.5365, "step": 9389 }, { "epoch": 0.61, "grad_norm": 1.1585266590118408, "learning_rate": 3.5458572690356013e-06, "loss": 0.5083, "step": 9390 }, { "epoch": 0.61, "grad_norm": 1.1514291763305664, "learning_rate": 3.5448571684276883e-06, "loss": 0.5251, "step": 9391 }, { "epoch": 0.61, "grad_norm": 1.2741734981536865, "learning_rate": 3.543857131420315e-06, "loss": 0.5057, "step": 9392 }, { "epoch": 0.61, "grad_norm": 1.2153491973876953, "learning_rate": 3.542857158057189e-06, "loss": 0.5588, "step": 9393 }, { "epoch": 0.61, "grad_norm": 1.1119966506958008, "learning_rate": 3.5418572483820168e-06, "loss": 0.4938, "step": 9394 }, { "epoch": 0.61, "grad_norm": 1.1687681674957275, "learning_rate": 3.5408574024385036e-06, "loss": 0.5766, "step": 9395 }, { "epoch": 0.61, "grad_norm": 1.1735292673110962, "learning_rate": 3.5398576202703477e-06, "loss": 0.5571, "step": 9396 }, { "epoch": 0.61, "grad_norm": 1.2167823314666748, "learning_rate": 3.5388579019212476e-06, "loss": 0.5599, "step": 9397 }, { "epoch": 0.61, "grad_norm": 1.1253010034561157, "learning_rate": 3.5378582474349e-06, "loss": 0.4895, "step": 9398 }, { "epoch": 0.61, "grad_norm": 1.1631041765213013, "learning_rate": 3.5368586568549962e-06, "loss": 0.5292, "step": 9399 }, { "epoch": 0.61, "grad_norm": 1.2662241458892822, "learning_rate": 3.5358591302252244e-06, "loss": 0.522, "step": 9400 }, { "epoch": 0.61, "grad_norm": 1.2309011220932007, "learning_rate": 3.534859667589272e-06, "loss": 0.5675, "step": 9401 }, { "epoch": 0.61, "grad_norm": 1.111854910850525, "learning_rate": 3.533860268990824e-06, "loss": 0.5253, "step": 9402 }, { "epoch": 0.61, "grad_norm": 1.1861320734024048, "learning_rate": 3.532860934473561e-06, "loss": 0.5877, "step": 9403 }, { "epoch": 0.61, "grad_norm": 1.1875503063201904, "learning_rate": 3.531861664081163e-06, "loss": 0.5258, "step": 9404 }, { "epoch": 0.61, "grad_norm": 1.2300052642822266, "learning_rate": 3.5308624578573024e-06, "loss": 0.5595, "step": 9405 }, { "epoch": 0.61, "grad_norm": 1.1643216609954834, "learning_rate": 3.529863315845653e-06, "loss": 0.5424, "step": 9406 }, { "epoch": 0.61, "grad_norm": 1.1519521474838257, "learning_rate": 3.5288642380898845e-06, "loss": 0.5382, "step": 9407 }, { "epoch": 0.61, "grad_norm": 1.2975218296051025, "learning_rate": 3.527865224633665e-06, "loss": 0.4821, "step": 9408 }, { "epoch": 0.61, "grad_norm": 1.0812910795211792, "learning_rate": 3.5268662755206583e-06, "loss": 0.5075, "step": 9409 }, { "epoch": 0.61, "grad_norm": 1.0939242839813232, "learning_rate": 3.5258673907945284e-06, "loss": 0.4796, "step": 9410 }, { "epoch": 0.61, "grad_norm": 1.2204514741897583, "learning_rate": 3.5248685704989295e-06, "loss": 0.5333, "step": 9411 }, { "epoch": 0.61, "grad_norm": 1.1273822784423828, "learning_rate": 3.5238698146775186e-06, "loss": 0.5197, "step": 9412 }, { "epoch": 0.61, "grad_norm": 1.2294398546218872, "learning_rate": 3.5228711233739504e-06, "loss": 0.5689, "step": 9413 }, { "epoch": 0.61, "grad_norm": 1.204681634902954, "learning_rate": 3.521872496631874e-06, "loss": 0.5736, "step": 9414 }, { "epoch": 0.61, "grad_norm": 1.247950792312622, "learning_rate": 3.5208739344949393e-06, "loss": 0.5648, "step": 9415 }, { "epoch": 0.61, "grad_norm": 1.2313083410263062, "learning_rate": 3.5198754370067865e-06, "loss": 0.4969, "step": 9416 }, { "epoch": 0.61, "grad_norm": 1.257850170135498, "learning_rate": 3.5188770042110598e-06, "loss": 0.5741, "step": 9417 }, { "epoch": 0.61, "grad_norm": 1.1267786026000977, "learning_rate": 3.5178786361513985e-06, "loss": 0.5361, "step": 9418 }, { "epoch": 0.61, "grad_norm": 1.2479767799377441, "learning_rate": 3.516880332871437e-06, "loss": 0.5583, "step": 9419 }, { "epoch": 0.61, "grad_norm": 1.0068756341934204, "learning_rate": 3.5158820944148104e-06, "loss": 0.4524, "step": 9420 }, { "epoch": 0.61, "grad_norm": 1.2401018142700195, "learning_rate": 3.514883920825151e-06, "loss": 0.5055, "step": 9421 }, { "epoch": 0.61, "grad_norm": 1.1709604263305664, "learning_rate": 3.51388581214608e-06, "loss": 0.5019, "step": 9422 }, { "epoch": 0.61, "grad_norm": 1.029836654663086, "learning_rate": 3.512887768421227e-06, "loss": 0.5198, "step": 9423 }, { "epoch": 0.61, "grad_norm": 1.0851032733917236, "learning_rate": 3.511889789694213e-06, "loss": 0.5348, "step": 9424 }, { "epoch": 0.61, "grad_norm": 1.1726710796356201, "learning_rate": 3.510891876008656e-06, "loss": 0.492, "step": 9425 }, { "epoch": 0.61, "grad_norm": 1.1624740362167358, "learning_rate": 3.509894027408174e-06, "loss": 0.5454, "step": 9426 }, { "epoch": 0.61, "grad_norm": 1.2278356552124023, "learning_rate": 3.508896243936382e-06, "loss": 0.5636, "step": 9427 }, { "epoch": 0.61, "grad_norm": 1.2248055934906006, "learning_rate": 3.507898525636885e-06, "loss": 0.5356, "step": 9428 }, { "epoch": 0.61, "grad_norm": 1.1234560012817383, "learning_rate": 3.506900872553294e-06, "loss": 0.4761, "step": 9429 }, { "epoch": 0.61, "grad_norm": 1.286818504333496, "learning_rate": 3.5059032847292134e-06, "loss": 0.5057, "step": 9430 }, { "epoch": 0.61, "grad_norm": 1.2452634572982788, "learning_rate": 3.504905762208246e-06, "loss": 0.5074, "step": 9431 }, { "epoch": 0.61, "grad_norm": 1.2235512733459473, "learning_rate": 3.5039083050339906e-06, "loss": 0.5502, "step": 9432 }, { "epoch": 0.61, "grad_norm": 1.1370283365249634, "learning_rate": 3.5029109132500438e-06, "loss": 0.5442, "step": 9433 }, { "epoch": 0.61, "grad_norm": 1.1274354457855225, "learning_rate": 3.5019135868999977e-06, "loss": 0.512, "step": 9434 }, { "epoch": 0.61, "grad_norm": 1.2356959581375122, "learning_rate": 3.500916326027443e-06, "loss": 0.5064, "step": 9435 }, { "epoch": 0.61, "grad_norm": 1.0302575826644897, "learning_rate": 3.499919130675968e-06, "loss": 0.5434, "step": 9436 }, { "epoch": 0.61, "grad_norm": 1.0994377136230469, "learning_rate": 3.4989220008891587e-06, "loss": 0.5291, "step": 9437 }, { "epoch": 0.61, "grad_norm": 1.1123489141464233, "learning_rate": 3.497924936710595e-06, "loss": 0.5175, "step": 9438 }, { "epoch": 0.61, "grad_norm": 1.124207854270935, "learning_rate": 3.4969279381838585e-06, "loss": 0.5401, "step": 9439 }, { "epoch": 0.61, "grad_norm": 1.3475713729858398, "learning_rate": 3.495931005352522e-06, "loss": 0.5325, "step": 9440 }, { "epoch": 0.61, "grad_norm": 1.1110708713531494, "learning_rate": 3.494934138260162e-06, "loss": 0.541, "step": 9441 }, { "epoch": 0.61, "grad_norm": 1.1604994535446167, "learning_rate": 3.4939373369503464e-06, "loss": 0.5139, "step": 9442 }, { "epoch": 0.61, "grad_norm": 1.2235760688781738, "learning_rate": 3.4929406014666447e-06, "loss": 0.5105, "step": 9443 }, { "epoch": 0.61, "grad_norm": 1.2401503324508667, "learning_rate": 3.4919439318526206e-06, "loss": 0.5013, "step": 9444 }, { "epoch": 0.61, "grad_norm": 1.2065482139587402, "learning_rate": 3.4909473281518375e-06, "loss": 0.5076, "step": 9445 }, { "epoch": 0.61, "grad_norm": 1.180794596672058, "learning_rate": 3.489950790407853e-06, "loss": 0.5332, "step": 9446 }, { "epoch": 0.61, "grad_norm": 1.1086238622665405, "learning_rate": 3.4889543186642225e-06, "loss": 0.4759, "step": 9447 }, { "epoch": 0.61, "grad_norm": 1.237149953842163, "learning_rate": 3.4879579129645013e-06, "loss": 0.5709, "step": 9448 }, { "epoch": 0.61, "grad_norm": 1.1368050575256348, "learning_rate": 3.486961573352238e-06, "loss": 0.4644, "step": 9449 }, { "epoch": 0.61, "grad_norm": 1.0804795026779175, "learning_rate": 3.4859652998709796e-06, "loss": 0.51, "step": 9450 }, { "epoch": 0.61, "grad_norm": 1.1600581407546997, "learning_rate": 3.484969092564275e-06, "loss": 0.5696, "step": 9451 }, { "epoch": 0.61, "grad_norm": 1.0981942415237427, "learning_rate": 3.4839729514756592e-06, "loss": 0.5024, "step": 9452 }, { "epoch": 0.61, "grad_norm": 1.1031383275985718, "learning_rate": 3.4829768766486755e-06, "loss": 0.512, "step": 9453 }, { "epoch": 0.61, "grad_norm": 1.1398085355758667, "learning_rate": 3.4819808681268584e-06, "loss": 0.4856, "step": 9454 }, { "epoch": 0.61, "grad_norm": 1.3056426048278809, "learning_rate": 3.4809849259537405e-06, "loss": 0.5385, "step": 9455 }, { "epoch": 0.61, "grad_norm": 1.2784618139266968, "learning_rate": 3.479989050172855e-06, "loss": 0.5108, "step": 9456 }, { "epoch": 0.61, "grad_norm": 1.165406584739685, "learning_rate": 3.4789932408277237e-06, "loss": 0.4968, "step": 9457 }, { "epoch": 0.61, "grad_norm": 1.2137638330459595, "learning_rate": 3.4779974979618734e-06, "loss": 0.5194, "step": 9458 }, { "epoch": 0.61, "grad_norm": 1.2384096384048462, "learning_rate": 3.4770018216188267e-06, "loss": 0.5122, "step": 9459 }, { "epoch": 0.61, "grad_norm": 1.1772725582122803, "learning_rate": 3.4760062118421003e-06, "loss": 0.5635, "step": 9460 }, { "epoch": 0.61, "grad_norm": 1.1740779876708984, "learning_rate": 3.4750106686752105e-06, "loss": 0.5124, "step": 9461 }, { "epoch": 0.61, "grad_norm": 1.1699109077453613, "learning_rate": 3.474015192161673e-06, "loss": 0.5646, "step": 9462 }, { "epoch": 0.61, "grad_norm": 1.1003187894821167, "learning_rate": 3.4730197823449906e-06, "loss": 0.5426, "step": 9463 }, { "epoch": 0.61, "grad_norm": 1.2240614891052246, "learning_rate": 3.472024439268674e-06, "loss": 0.5684, "step": 9464 }, { "epoch": 0.61, "grad_norm": 1.1936925649642944, "learning_rate": 3.4710291629762283e-06, "loss": 0.5029, "step": 9465 }, { "epoch": 0.61, "grad_norm": 1.1574487686157227, "learning_rate": 3.4700339535111514e-06, "loss": 0.5277, "step": 9466 }, { "epoch": 0.61, "grad_norm": 1.2232215404510498, "learning_rate": 3.4690388109169446e-06, "loss": 0.5289, "step": 9467 }, { "epoch": 0.61, "grad_norm": 1.1098747253417969, "learning_rate": 3.4680437352371028e-06, "loss": 0.539, "step": 9468 }, { "epoch": 0.61, "grad_norm": 1.2099210023880005, "learning_rate": 3.467048726515115e-06, "loss": 0.4741, "step": 9469 }, { "epoch": 0.61, "grad_norm": 1.2661470174789429, "learning_rate": 3.466053784794472e-06, "loss": 0.5075, "step": 9470 }, { "epoch": 0.61, "grad_norm": 1.1797465085983276, "learning_rate": 3.4650589101186603e-06, "loss": 0.5302, "step": 9471 }, { "epoch": 0.61, "grad_norm": 1.1523807048797607, "learning_rate": 3.4640641025311638e-06, "loss": 0.5599, "step": 9472 }, { "epoch": 0.61, "grad_norm": 1.2357819080352783, "learning_rate": 3.4630693620754617e-06, "loss": 0.5056, "step": 9473 }, { "epoch": 0.61, "grad_norm": 1.1690987348556519, "learning_rate": 3.4620746887950356e-06, "loss": 0.5564, "step": 9474 }, { "epoch": 0.61, "grad_norm": 1.0651813745498657, "learning_rate": 3.4610800827333545e-06, "loss": 0.5224, "step": 9475 }, { "epoch": 0.61, "grad_norm": 1.1212607622146606, "learning_rate": 3.460085543933893e-06, "loss": 0.4871, "step": 9476 }, { "epoch": 0.61, "grad_norm": 1.2455520629882812, "learning_rate": 3.459091072440118e-06, "loss": 0.5629, "step": 9477 }, { "epoch": 0.61, "grad_norm": 1.2688666582107544, "learning_rate": 3.4580966682954986e-06, "loss": 0.5795, "step": 9478 }, { "epoch": 0.61, "grad_norm": 1.0698858499526978, "learning_rate": 3.4571023315434953e-06, "loss": 0.5047, "step": 9479 }, { "epoch": 0.61, "grad_norm": 1.1814745664596558, "learning_rate": 3.456108062227569e-06, "loss": 0.5349, "step": 9480 }, { "epoch": 0.61, "grad_norm": 1.2348175048828125, "learning_rate": 3.4551138603911743e-06, "loss": 0.5441, "step": 9481 }, { "epoch": 0.61, "grad_norm": 1.1997162103652954, "learning_rate": 3.454119726077767e-06, "loss": 0.5597, "step": 9482 }, { "epoch": 0.61, "grad_norm": 1.2199190855026245, "learning_rate": 3.453125659330798e-06, "loss": 0.5498, "step": 9483 }, { "epoch": 0.61, "grad_norm": 1.1404540538787842, "learning_rate": 3.4521316601937173e-06, "loss": 0.4979, "step": 9484 }, { "epoch": 0.61, "grad_norm": 1.14205801486969, "learning_rate": 3.451137728709967e-06, "loss": 0.524, "step": 9485 }, { "epoch": 0.61, "grad_norm": 1.1853106021881104, "learning_rate": 3.450143864922991e-06, "loss": 0.5488, "step": 9486 }, { "epoch": 0.61, "grad_norm": 1.201845407485962, "learning_rate": 3.449150068876227e-06, "loss": 0.5916, "step": 9487 }, { "epoch": 0.61, "grad_norm": 1.1506662368774414, "learning_rate": 3.4481563406131137e-06, "loss": 0.5542, "step": 9488 }, { "epoch": 0.61, "grad_norm": 1.0817068815231323, "learning_rate": 3.4471626801770815e-06, "loss": 0.496, "step": 9489 }, { "epoch": 0.61, "grad_norm": 1.154130458831787, "learning_rate": 3.4461690876115615e-06, "loss": 0.5418, "step": 9490 }, { "epoch": 0.61, "grad_norm": 1.3358126878738403, "learning_rate": 3.4451755629599824e-06, "loss": 0.5277, "step": 9491 }, { "epoch": 0.61, "grad_norm": 1.1235687732696533, "learning_rate": 3.44418210626577e-06, "loss": 0.5668, "step": 9492 }, { "epoch": 0.61, "grad_norm": 1.2578102350234985, "learning_rate": 3.4431887175723422e-06, "loss": 0.518, "step": 9493 }, { "epoch": 0.61, "grad_norm": 1.1760380268096924, "learning_rate": 3.4421953969231186e-06, "loss": 0.5233, "step": 9494 }, { "epoch": 0.61, "grad_norm": 1.1902302503585815, "learning_rate": 3.4412021443615153e-06, "loss": 0.5161, "step": 9495 }, { "epoch": 0.61, "grad_norm": 1.1245516538619995, "learning_rate": 3.4402089599309435e-06, "loss": 0.4908, "step": 9496 }, { "epoch": 0.61, "grad_norm": 1.0751128196716309, "learning_rate": 3.4392158436748146e-06, "loss": 0.499, "step": 9497 }, { "epoch": 0.61, "grad_norm": 1.3632426261901855, "learning_rate": 3.438222795636536e-06, "loss": 0.5156, "step": 9498 }, { "epoch": 0.61, "grad_norm": 1.1423054933547974, "learning_rate": 3.4372298158595074e-06, "loss": 0.5953, "step": 9499 }, { "epoch": 0.61, "grad_norm": 1.1308972835540771, "learning_rate": 3.436236904387132e-06, "loss": 0.523, "step": 9500 }, { "epoch": 0.61, "grad_norm": 1.1117331981658936, "learning_rate": 3.435244061262806e-06, "loss": 0.5042, "step": 9501 }, { "epoch": 0.61, "grad_norm": 1.218308448791504, "learning_rate": 3.434251286529926e-06, "loss": 0.5399, "step": 9502 }, { "epoch": 0.61, "grad_norm": 1.2058249711990356, "learning_rate": 3.433258580231884e-06, "loss": 0.5248, "step": 9503 }, { "epoch": 0.61, "grad_norm": 1.1754730939865112, "learning_rate": 3.432265942412066e-06, "loss": 0.5468, "step": 9504 }, { "epoch": 0.61, "grad_norm": 1.1119768619537354, "learning_rate": 3.431273373113858e-06, "loss": 0.4696, "step": 9505 }, { "epoch": 0.61, "grad_norm": 1.1530545949935913, "learning_rate": 3.4302808723806436e-06, "loss": 0.517, "step": 9506 }, { "epoch": 0.61, "grad_norm": 1.1966264247894287, "learning_rate": 3.4292884402558026e-06, "loss": 0.5683, "step": 9507 }, { "epoch": 0.61, "grad_norm": 1.2100114822387695, "learning_rate": 3.428296076782711e-06, "loss": 0.5589, "step": 9508 }, { "epoch": 0.61, "grad_norm": 1.1489537954330444, "learning_rate": 3.4273037820047457e-06, "loss": 0.5147, "step": 9509 }, { "epoch": 0.61, "grad_norm": 1.2472745180130005, "learning_rate": 3.4263115559652713e-06, "loss": 0.5366, "step": 9510 }, { "epoch": 0.61, "grad_norm": 1.2474713325500488, "learning_rate": 3.4253193987076595e-06, "loss": 0.5177, "step": 9511 }, { "epoch": 0.61, "grad_norm": 1.1253399848937988, "learning_rate": 3.424327310275274e-06, "loss": 0.5151, "step": 9512 }, { "epoch": 0.61, "grad_norm": 1.139752745628357, "learning_rate": 3.4233352907114757e-06, "loss": 0.5166, "step": 9513 }, { "epoch": 0.61, "grad_norm": 1.2174307107925415, "learning_rate": 3.422343340059625e-06, "loss": 0.526, "step": 9514 }, { "epoch": 0.61, "grad_norm": 1.1944234371185303, "learning_rate": 3.421351458363078e-06, "loss": 0.4853, "step": 9515 }, { "epoch": 0.61, "grad_norm": 1.0782911777496338, "learning_rate": 3.420359645665184e-06, "loss": 0.534, "step": 9516 }, { "epoch": 0.61, "grad_norm": 1.121679425239563, "learning_rate": 3.419367902009294e-06, "loss": 0.5233, "step": 9517 }, { "epoch": 0.61, "grad_norm": 1.0718121528625488, "learning_rate": 3.418376227438755e-06, "loss": 0.499, "step": 9518 }, { "epoch": 0.61, "grad_norm": 1.1349200010299683, "learning_rate": 3.417384621996911e-06, "loss": 0.5545, "step": 9519 }, { "epoch": 0.61, "grad_norm": 1.1592414379119873, "learning_rate": 3.416393085727101e-06, "loss": 0.49, "step": 9520 }, { "epoch": 0.61, "grad_norm": 1.241986632347107, "learning_rate": 3.4154016186726662e-06, "loss": 0.5525, "step": 9521 }, { "epoch": 0.61, "grad_norm": 1.194227695465088, "learning_rate": 3.414410220876936e-06, "loss": 0.5212, "step": 9522 }, { "epoch": 0.61, "grad_norm": 1.1411899328231812, "learning_rate": 3.4134188923832444e-06, "loss": 0.5253, "step": 9523 }, { "epoch": 0.61, "grad_norm": 1.216695785522461, "learning_rate": 3.4124276332349194e-06, "loss": 0.5393, "step": 9524 }, { "epoch": 0.61, "grad_norm": 1.3173854351043701, "learning_rate": 3.4114364434752865e-06, "loss": 0.5321, "step": 9525 }, { "epoch": 0.61, "grad_norm": 1.1743637323379517, "learning_rate": 3.41044532314767e-06, "loss": 0.5486, "step": 9526 }, { "epoch": 0.61, "grad_norm": 1.1671605110168457, "learning_rate": 3.409454272295386e-06, "loss": 0.5275, "step": 9527 }, { "epoch": 0.62, "grad_norm": 1.2057257890701294, "learning_rate": 3.4084632909617522e-06, "loss": 0.5793, "step": 9528 }, { "epoch": 0.62, "grad_norm": 1.2247123718261719, "learning_rate": 3.407472379190081e-06, "loss": 0.5924, "step": 9529 }, { "epoch": 0.62, "grad_norm": 1.3907063007354736, "learning_rate": 3.406481537023684e-06, "loss": 0.5358, "step": 9530 }, { "epoch": 0.62, "grad_norm": 1.3204195499420166, "learning_rate": 3.4054907645058678e-06, "loss": 0.5413, "step": 9531 }, { "epoch": 0.62, "grad_norm": 1.1602095365524292, "learning_rate": 3.4045000616799352e-06, "loss": 0.4736, "step": 9532 }, { "epoch": 0.62, "grad_norm": 1.1330420970916748, "learning_rate": 3.40350942858919e-06, "loss": 0.5351, "step": 9533 }, { "epoch": 0.62, "grad_norm": 1.1072207689285278, "learning_rate": 3.4025188652769283e-06, "loss": 0.5396, "step": 9534 }, { "epoch": 0.62, "grad_norm": 1.1955387592315674, "learning_rate": 3.4015283717864456e-06, "loss": 0.5305, "step": 9535 }, { "epoch": 0.62, "grad_norm": 1.1740585565567017, "learning_rate": 3.4005379481610327e-06, "loss": 0.5393, "step": 9536 }, { "epoch": 0.62, "grad_norm": 1.193377137184143, "learning_rate": 3.39954759444398e-06, "loss": 0.5538, "step": 9537 }, { "epoch": 0.62, "grad_norm": 1.1594890356063843, "learning_rate": 3.398557310678572e-06, "loss": 0.5484, "step": 9538 }, { "epoch": 0.62, "grad_norm": 1.1522241830825806, "learning_rate": 3.397567096908094e-06, "loss": 0.5713, "step": 9539 }, { "epoch": 0.62, "grad_norm": 1.109031081199646, "learning_rate": 3.3965769531758232e-06, "loss": 0.4972, "step": 9540 }, { "epoch": 0.62, "grad_norm": 1.184904932975769, "learning_rate": 3.3955868795250356e-06, "loss": 0.4808, "step": 9541 }, { "epoch": 0.62, "grad_norm": 1.1625723838806152, "learning_rate": 3.3945968759990066e-06, "loss": 0.5492, "step": 9542 }, { "epoch": 0.62, "grad_norm": 1.1032146215438843, "learning_rate": 3.3936069426410066e-06, "loss": 0.5147, "step": 9543 }, { "epoch": 0.62, "grad_norm": 1.1705807447433472, "learning_rate": 3.392617079494304e-06, "loss": 0.5385, "step": 9544 }, { "epoch": 0.62, "grad_norm": 1.1636167764663696, "learning_rate": 3.39162728660216e-06, "loss": 0.5036, "step": 9545 }, { "epoch": 0.62, "grad_norm": 1.1831822395324707, "learning_rate": 3.3906375640078373e-06, "loss": 0.5067, "step": 9546 }, { "epoch": 0.62, "grad_norm": 1.1948050260543823, "learning_rate": 3.3896479117545945e-06, "loss": 0.5291, "step": 9547 }, { "epoch": 0.62, "grad_norm": 1.1677838563919067, "learning_rate": 3.3886583298856866e-06, "loss": 0.517, "step": 9548 }, { "epoch": 0.62, "grad_norm": 1.1986194849014282, "learning_rate": 3.387668818444366e-06, "loss": 0.4923, "step": 9549 }, { "epoch": 0.62, "grad_norm": 1.1716028451919556, "learning_rate": 3.386679377473884e-06, "loss": 0.5257, "step": 9550 }, { "epoch": 0.62, "grad_norm": 1.200661301612854, "learning_rate": 3.3856900070174814e-06, "loss": 0.5152, "step": 9551 }, { "epoch": 0.62, "grad_norm": 1.0936660766601562, "learning_rate": 3.384700707118404e-06, "loss": 0.4491, "step": 9552 }, { "epoch": 0.62, "grad_norm": 1.178515076637268, "learning_rate": 3.383711477819891e-06, "loss": 0.5126, "step": 9553 }, { "epoch": 0.62, "grad_norm": 1.1713097095489502, "learning_rate": 3.382722319165179e-06, "loss": 0.5463, "step": 9554 }, { "epoch": 0.62, "grad_norm": 1.1288074254989624, "learning_rate": 3.381733231197502e-06, "loss": 0.516, "step": 9555 }, { "epoch": 0.62, "grad_norm": 1.1843657493591309, "learning_rate": 3.3807442139600922e-06, "loss": 0.5181, "step": 9556 }, { "epoch": 0.62, "grad_norm": 1.1953151226043701, "learning_rate": 3.379755267496173e-06, "loss": 0.5636, "step": 9557 }, { "epoch": 0.62, "grad_norm": 1.2375295162200928, "learning_rate": 3.378766391848971e-06, "loss": 0.5384, "step": 9558 }, { "epoch": 0.62, "grad_norm": 1.1127104759216309, "learning_rate": 3.377777587061707e-06, "loss": 0.5071, "step": 9559 }, { "epoch": 0.62, "grad_norm": 1.1645182371139526, "learning_rate": 3.3767888531775992e-06, "loss": 0.5156, "step": 9560 }, { "epoch": 0.62, "grad_norm": 1.1822099685668945, "learning_rate": 3.3758001902398628e-06, "loss": 0.5365, "step": 9561 }, { "epoch": 0.62, "grad_norm": 1.1844645738601685, "learning_rate": 3.3748115982917116e-06, "loss": 0.537, "step": 9562 }, { "epoch": 0.62, "grad_norm": 1.2016185522079468, "learning_rate": 3.3738230773763502e-06, "loss": 0.5378, "step": 9563 }, { "epoch": 0.62, "grad_norm": 1.2431319952011108, "learning_rate": 3.372834627536987e-06, "loss": 0.5229, "step": 9564 }, { "epoch": 0.62, "grad_norm": 1.1863988637924194, "learning_rate": 3.3718462488168236e-06, "loss": 0.5366, "step": 9565 }, { "epoch": 0.62, "grad_norm": 1.1032168865203857, "learning_rate": 3.3708579412590604e-06, "loss": 0.4966, "step": 9566 }, { "epoch": 0.62, "grad_norm": 1.35708749294281, "learning_rate": 3.369869704906893e-06, "loss": 0.5479, "step": 9567 }, { "epoch": 0.62, "grad_norm": 1.1628386974334717, "learning_rate": 3.3688815398035167e-06, "loss": 0.5628, "step": 9568 }, { "epoch": 0.62, "grad_norm": 1.2018383741378784, "learning_rate": 3.3678934459921185e-06, "loss": 0.5158, "step": 9569 }, { "epoch": 0.62, "grad_norm": 1.1922255754470825, "learning_rate": 3.3669054235158873e-06, "loss": 0.5116, "step": 9570 }, { "epoch": 0.62, "grad_norm": 1.1571003198623657, "learning_rate": 3.3659174724180054e-06, "loss": 0.5176, "step": 9571 }, { "epoch": 0.62, "grad_norm": 1.0809104442596436, "learning_rate": 3.364929592741656e-06, "loss": 0.5515, "step": 9572 }, { "epoch": 0.62, "grad_norm": 1.1594505310058594, "learning_rate": 3.363941784530016e-06, "loss": 0.5398, "step": 9573 }, { "epoch": 0.62, "grad_norm": 1.1662873029708862, "learning_rate": 3.362954047826259e-06, "loss": 0.5127, "step": 9574 }, { "epoch": 0.62, "grad_norm": 1.1279898881912231, "learning_rate": 3.3619663826735568e-06, "loss": 0.5117, "step": 9575 }, { "epoch": 0.62, "grad_norm": 1.299241542816162, "learning_rate": 3.3609787891150767e-06, "loss": 0.5372, "step": 9576 }, { "epoch": 0.62, "grad_norm": 1.1689218282699585, "learning_rate": 3.3599912671939873e-06, "loss": 0.5545, "step": 9577 }, { "epoch": 0.62, "grad_norm": 1.0791220664978027, "learning_rate": 3.3590038169534468e-06, "loss": 0.5393, "step": 9578 }, { "epoch": 0.62, "grad_norm": 1.2293833494186401, "learning_rate": 3.3580164384366154e-06, "loss": 0.5815, "step": 9579 }, { "epoch": 0.62, "grad_norm": 1.2537815570831299, "learning_rate": 3.3570291316866503e-06, "loss": 0.5238, "step": 9580 }, { "epoch": 0.62, "grad_norm": 1.14780855178833, "learning_rate": 3.3560418967467024e-06, "loss": 0.5651, "step": 9581 }, { "epoch": 0.62, "grad_norm": 1.26405930519104, "learning_rate": 3.355054733659922e-06, "loss": 0.5455, "step": 9582 }, { "epoch": 0.62, "grad_norm": 1.2445799112319946, "learning_rate": 3.354067642469454e-06, "loss": 0.4602, "step": 9583 }, { "epoch": 0.62, "grad_norm": 1.1687084436416626, "learning_rate": 3.3530806232184437e-06, "loss": 0.5648, "step": 9584 }, { "epoch": 0.62, "grad_norm": 1.247697114944458, "learning_rate": 3.352093675950031e-06, "loss": 0.5129, "step": 9585 }, { "epoch": 0.62, "grad_norm": 1.1762479543685913, "learning_rate": 3.351106800707352e-06, "loss": 0.5285, "step": 9586 }, { "epoch": 0.62, "grad_norm": 1.1778653860092163, "learning_rate": 3.350119997533539e-06, "loss": 0.5389, "step": 9587 }, { "epoch": 0.62, "grad_norm": 1.170353889465332, "learning_rate": 3.3491332664717245e-06, "loss": 0.5095, "step": 9588 }, { "epoch": 0.62, "grad_norm": 1.2461620569229126, "learning_rate": 3.348146607565036e-06, "loss": 0.5308, "step": 9589 }, { "epoch": 0.62, "grad_norm": 1.2146531343460083, "learning_rate": 3.347160020856597e-06, "loss": 0.5424, "step": 9590 }, { "epoch": 0.62, "grad_norm": 1.085013747215271, "learning_rate": 3.346173506389531e-06, "loss": 0.4884, "step": 9591 }, { "epoch": 0.62, "grad_norm": 1.1287362575531006, "learning_rate": 3.345187064206953e-06, "loss": 0.5064, "step": 9592 }, { "epoch": 0.62, "grad_norm": 1.2678955793380737, "learning_rate": 3.3442006943519776e-06, "loss": 0.5335, "step": 9593 }, { "epoch": 0.62, "grad_norm": 1.233667254447937, "learning_rate": 3.343214396867719e-06, "loss": 0.5679, "step": 9594 }, { "epoch": 0.62, "grad_norm": 1.2630846500396729, "learning_rate": 3.342228171797284e-06, "loss": 0.4945, "step": 9595 }, { "epoch": 0.62, "grad_norm": 1.0905402898788452, "learning_rate": 3.341242019183778e-06, "loss": 0.4644, "step": 9596 }, { "epoch": 0.62, "grad_norm": 1.1823517084121704, "learning_rate": 3.3402559390703067e-06, "loss": 0.568, "step": 9597 }, { "epoch": 0.62, "grad_norm": 1.2018928527832031, "learning_rate": 3.339269931499963e-06, "loss": 0.5174, "step": 9598 }, { "epoch": 0.62, "grad_norm": 1.209037184715271, "learning_rate": 3.338283996515847e-06, "loss": 0.5254, "step": 9599 }, { "epoch": 0.62, "grad_norm": 1.1968632936477661, "learning_rate": 3.3372981341610498e-06, "loss": 0.5255, "step": 9600 }, { "epoch": 0.62, "grad_norm": 1.2152118682861328, "learning_rate": 3.3363123444786617e-06, "loss": 0.551, "step": 9601 }, { "epoch": 0.62, "grad_norm": 1.1095380783081055, "learning_rate": 3.3353266275117678e-06, "loss": 0.5013, "step": 9602 }, { "epoch": 0.62, "grad_norm": 1.1378830671310425, "learning_rate": 3.3343409833034547e-06, "loss": 0.5137, "step": 9603 }, { "epoch": 0.62, "grad_norm": 1.1917363405227661, "learning_rate": 3.3333554118967977e-06, "loss": 0.4862, "step": 9604 }, { "epoch": 0.62, "grad_norm": 1.1436374187469482, "learning_rate": 3.3323699133348754e-06, "loss": 0.5164, "step": 9605 }, { "epoch": 0.62, "grad_norm": 1.3320503234863281, "learning_rate": 3.3313844876607617e-06, "loss": 0.5358, "step": 9606 }, { "epoch": 0.62, "grad_norm": 1.1621005535125732, "learning_rate": 3.3303991349175268e-06, "loss": 0.5967, "step": 9607 }, { "epoch": 0.62, "grad_norm": 1.2085113525390625, "learning_rate": 3.3294138551482386e-06, "loss": 0.5235, "step": 9608 }, { "epoch": 0.62, "grad_norm": 1.0998727083206177, "learning_rate": 3.328428648395963e-06, "loss": 0.5106, "step": 9609 }, { "epoch": 0.62, "grad_norm": 1.159085988998413, "learning_rate": 3.3274435147037554e-06, "loss": 0.5336, "step": 9610 }, { "epoch": 0.62, "grad_norm": 1.1049422025680542, "learning_rate": 3.3264584541146765e-06, "loss": 0.5265, "step": 9611 }, { "epoch": 0.62, "grad_norm": 1.1247782707214355, "learning_rate": 3.3254734666717813e-06, "loss": 0.5211, "step": 9612 }, { "epoch": 0.62, "grad_norm": 1.0439097881317139, "learning_rate": 3.32448855241812e-06, "loss": 0.469, "step": 9613 }, { "epoch": 0.62, "grad_norm": 1.1350650787353516, "learning_rate": 3.3235037113967422e-06, "loss": 0.5466, "step": 9614 }, { "epoch": 0.62, "grad_norm": 1.1748487949371338, "learning_rate": 3.3225189436506934e-06, "loss": 0.5165, "step": 9615 }, { "epoch": 0.62, "grad_norm": 1.153861165046692, "learning_rate": 3.3215342492230114e-06, "loss": 0.5189, "step": 9616 }, { "epoch": 0.62, "grad_norm": 1.2780147790908813, "learning_rate": 3.320549628156737e-06, "loss": 0.5512, "step": 9617 }, { "epoch": 0.62, "grad_norm": 1.1817537546157837, "learning_rate": 3.3195650804949047e-06, "loss": 0.5079, "step": 9618 }, { "epoch": 0.62, "grad_norm": 1.2009536027908325, "learning_rate": 3.318580606280549e-06, "loss": 0.5343, "step": 9619 }, { "epoch": 0.62, "grad_norm": 1.2499847412109375, "learning_rate": 3.3175962055566958e-06, "loss": 0.4871, "step": 9620 }, { "epoch": 0.62, "grad_norm": 1.171879768371582, "learning_rate": 3.316611878366372e-06, "loss": 0.5514, "step": 9621 }, { "epoch": 0.62, "grad_norm": 1.1630921363830566, "learning_rate": 3.3156276247525993e-06, "loss": 0.5294, "step": 9622 }, { "epoch": 0.62, "grad_norm": 1.1356154680252075, "learning_rate": 3.3146434447583973e-06, "loss": 0.5534, "step": 9623 }, { "epoch": 0.62, "grad_norm": 1.291545033454895, "learning_rate": 3.3136593384267833e-06, "loss": 0.5143, "step": 9624 }, { "epoch": 0.62, "grad_norm": 1.2006527185440063, "learning_rate": 3.3126753058007677e-06, "loss": 0.5244, "step": 9625 }, { "epoch": 0.62, "grad_norm": 1.2725574970245361, "learning_rate": 3.3116913469233614e-06, "loss": 0.5233, "step": 9626 }, { "epoch": 0.62, "grad_norm": 1.1014987230300903, "learning_rate": 3.3107074618375714e-06, "loss": 0.465, "step": 9627 }, { "epoch": 0.62, "grad_norm": 1.1261024475097656, "learning_rate": 3.309723650586398e-06, "loss": 0.5286, "step": 9628 }, { "epoch": 0.62, "grad_norm": 1.161658763885498, "learning_rate": 3.3087399132128437e-06, "loss": 0.5246, "step": 9629 }, { "epoch": 0.62, "grad_norm": 1.1001259088516235, "learning_rate": 3.307756249759905e-06, "loss": 0.4842, "step": 9630 }, { "epoch": 0.62, "grad_norm": 1.2026875019073486, "learning_rate": 3.306772660270573e-06, "loss": 0.5372, "step": 9631 }, { "epoch": 0.62, "grad_norm": 1.1788361072540283, "learning_rate": 3.3057891447878408e-06, "loss": 0.5713, "step": 9632 }, { "epoch": 0.62, "grad_norm": 1.1489813327789307, "learning_rate": 3.3048057033546943e-06, "loss": 0.5498, "step": 9633 }, { "epoch": 0.62, "grad_norm": 1.1707043647766113, "learning_rate": 3.303822336014116e-06, "loss": 0.5526, "step": 9634 }, { "epoch": 0.62, "grad_norm": 1.1000239849090576, "learning_rate": 3.3028390428090866e-06, "loss": 0.5271, "step": 9635 }, { "epoch": 0.62, "grad_norm": 1.3534929752349854, "learning_rate": 3.3018558237825844e-06, "loss": 0.5103, "step": 9636 }, { "epoch": 0.62, "grad_norm": 1.2155840396881104, "learning_rate": 3.3008726789775826e-06, "loss": 0.484, "step": 9637 }, { "epoch": 0.62, "grad_norm": 1.1549240350723267, "learning_rate": 3.2998896084370545e-06, "loss": 0.5726, "step": 9638 }, { "epoch": 0.62, "grad_norm": 1.0483322143554688, "learning_rate": 3.2989066122039627e-06, "loss": 0.5124, "step": 9639 }, { "epoch": 0.62, "grad_norm": 1.0954781770706177, "learning_rate": 3.297923690321274e-06, "loss": 0.5186, "step": 9640 }, { "epoch": 0.62, "grad_norm": 1.224653959274292, "learning_rate": 3.2969408428319505e-06, "loss": 0.5932, "step": 9641 }, { "epoch": 0.62, "grad_norm": 1.156097173690796, "learning_rate": 3.2959580697789473e-06, "loss": 0.5525, "step": 9642 }, { "epoch": 0.62, "grad_norm": 1.2513961791992188, "learning_rate": 3.294975371205221e-06, "loss": 0.4889, "step": 9643 }, { "epoch": 0.62, "grad_norm": 1.0848127603530884, "learning_rate": 3.293992747153725e-06, "loss": 0.5347, "step": 9644 }, { "epoch": 0.62, "grad_norm": 1.2216132879257202, "learning_rate": 3.293010197667401e-06, "loss": 0.4618, "step": 9645 }, { "epoch": 0.62, "grad_norm": 1.1123555898666382, "learning_rate": 3.2920277227891984e-06, "loss": 0.4767, "step": 9646 }, { "epoch": 0.62, "grad_norm": 1.165496587753296, "learning_rate": 3.291045322562057e-06, "loss": 0.4908, "step": 9647 }, { "epoch": 0.62, "grad_norm": 1.1352850198745728, "learning_rate": 3.2900629970289156e-06, "loss": 0.5564, "step": 9648 }, { "epoch": 0.62, "grad_norm": 1.2133973836898804, "learning_rate": 3.2890807462327077e-06, "loss": 0.5145, "step": 9649 }, { "epoch": 0.62, "grad_norm": 1.2267656326293945, "learning_rate": 3.28809857021637e-06, "loss": 0.5664, "step": 9650 }, { "epoch": 0.62, "grad_norm": 1.2708784341812134, "learning_rate": 3.287116469022824e-06, "loss": 0.5577, "step": 9651 }, { "epoch": 0.62, "grad_norm": 1.2952017784118652, "learning_rate": 3.286134442694998e-06, "loss": 0.5683, "step": 9652 }, { "epoch": 0.62, "grad_norm": 1.1523839235305786, "learning_rate": 3.285152491275814e-06, "loss": 0.5451, "step": 9653 }, { "epoch": 0.62, "grad_norm": 1.1620792150497437, "learning_rate": 3.284170614808189e-06, "loss": 0.5082, "step": 9654 }, { "epoch": 0.62, "grad_norm": 1.2095656394958496, "learning_rate": 3.283188813335041e-06, "loss": 0.5133, "step": 9655 }, { "epoch": 0.62, "grad_norm": 1.3102390766143799, "learning_rate": 3.2822070868992815e-06, "loss": 0.5413, "step": 9656 }, { "epoch": 0.62, "grad_norm": 1.1824394464492798, "learning_rate": 3.281225435543817e-06, "loss": 0.5261, "step": 9657 }, { "epoch": 0.62, "grad_norm": 1.2098791599273682, "learning_rate": 3.280243859311554e-06, "loss": 0.554, "step": 9658 }, { "epoch": 0.62, "grad_norm": 1.1885896921157837, "learning_rate": 3.279262358245394e-06, "loss": 0.4787, "step": 9659 }, { "epoch": 0.62, "grad_norm": 1.2682348489761353, "learning_rate": 3.2782809323882383e-06, "loss": 0.525, "step": 9660 }, { "epoch": 0.62, "grad_norm": 1.169588565826416, "learning_rate": 3.27729958178298e-06, "loss": 0.5468, "step": 9661 }, { "epoch": 0.62, "grad_norm": 1.1315301656723022, "learning_rate": 3.2763183064725148e-06, "loss": 0.5718, "step": 9662 }, { "epoch": 0.62, "grad_norm": 1.163426160812378, "learning_rate": 3.275337106499727e-06, "loss": 0.5223, "step": 9663 }, { "epoch": 0.62, "grad_norm": 1.1478073596954346, "learning_rate": 3.2743559819075046e-06, "loss": 0.5442, "step": 9664 }, { "epoch": 0.62, "grad_norm": 1.0935213565826416, "learning_rate": 3.27337493273873e-06, "loss": 0.5243, "step": 9665 }, { "epoch": 0.62, "grad_norm": 1.2765593528747559, "learning_rate": 3.272393959036283e-06, "loss": 0.5617, "step": 9666 }, { "epoch": 0.62, "grad_norm": 1.2442673444747925, "learning_rate": 3.2714130608430384e-06, "loss": 0.5429, "step": 9667 }, { "epoch": 0.62, "grad_norm": 1.2309609651565552, "learning_rate": 3.27043223820187e-06, "loss": 0.5504, "step": 9668 }, { "epoch": 0.62, "grad_norm": 1.069898247718811, "learning_rate": 3.2694514911556454e-06, "loss": 0.4761, "step": 9669 }, { "epoch": 0.62, "grad_norm": 1.2352241277694702, "learning_rate": 3.2684708197472303e-06, "loss": 0.5597, "step": 9670 }, { "epoch": 0.62, "grad_norm": 1.1294969320297241, "learning_rate": 3.2674902240194896e-06, "loss": 0.5338, "step": 9671 }, { "epoch": 0.62, "grad_norm": 1.2685221433639526, "learning_rate": 3.2665097040152805e-06, "loss": 0.5202, "step": 9672 }, { "epoch": 0.62, "grad_norm": 1.2167484760284424, "learning_rate": 3.265529259777461e-06, "loss": 0.5088, "step": 9673 }, { "epoch": 0.62, "grad_norm": 1.3619153499603271, "learning_rate": 3.264548891348881e-06, "loss": 0.5301, "step": 9674 }, { "epoch": 0.62, "grad_norm": 1.2607653141021729, "learning_rate": 3.263568598772393e-06, "loss": 0.5525, "step": 9675 }, { "epoch": 0.62, "grad_norm": 1.1174449920654297, "learning_rate": 3.26258838209084e-06, "loss": 0.5431, "step": 9676 }, { "epoch": 0.62, "grad_norm": 1.171568512916565, "learning_rate": 3.261608241347066e-06, "loss": 0.5386, "step": 9677 }, { "epoch": 0.62, "grad_norm": 1.113199234008789, "learning_rate": 3.2606281765839114e-06, "loss": 0.5347, "step": 9678 }, { "epoch": 0.62, "grad_norm": 1.256089210510254, "learning_rate": 3.259648187844212e-06, "loss": 0.5575, "step": 9679 }, { "epoch": 0.62, "grad_norm": 1.1521703004837036, "learning_rate": 3.258668275170801e-06, "loss": 0.4995, "step": 9680 }, { "epoch": 0.62, "grad_norm": 1.221764326095581, "learning_rate": 3.2576884386065056e-06, "loss": 0.5405, "step": 9681 }, { "epoch": 0.62, "grad_norm": 1.0577924251556396, "learning_rate": 3.2567086781941527e-06, "loss": 0.5205, "step": 9682 }, { "epoch": 0.63, "grad_norm": 1.2784631252288818, "learning_rate": 3.2557289939765655e-06, "loss": 0.5171, "step": 9683 }, { "epoch": 0.63, "grad_norm": 1.2684210538864136, "learning_rate": 3.254749385996565e-06, "loss": 0.4803, "step": 9684 }, { "epoch": 0.63, "grad_norm": 1.273568034172058, "learning_rate": 3.2537698542969674e-06, "loss": 0.5495, "step": 9685 }, { "epoch": 0.63, "grad_norm": 1.1963027715682983, "learning_rate": 3.252790398920582e-06, "loss": 0.513, "step": 9686 }, { "epoch": 0.63, "grad_norm": 1.2772367000579834, "learning_rate": 3.2518110199102205e-06, "loss": 0.4783, "step": 9687 }, { "epoch": 0.63, "grad_norm": 1.2098653316497803, "learning_rate": 3.250831717308689e-06, "loss": 0.5082, "step": 9688 }, { "epoch": 0.63, "grad_norm": 1.1258500814437866, "learning_rate": 3.2498524911587904e-06, "loss": 0.5307, "step": 9689 }, { "epoch": 0.63, "grad_norm": 1.1735048294067383, "learning_rate": 3.2488733415033236e-06, "loss": 0.533, "step": 9690 }, { "epoch": 0.63, "grad_norm": 1.1887845993041992, "learning_rate": 3.2478942683850872e-06, "loss": 0.5282, "step": 9691 }, { "epoch": 0.63, "grad_norm": 1.212890863418579, "learning_rate": 3.24691527184687e-06, "loss": 0.5393, "step": 9692 }, { "epoch": 0.63, "grad_norm": 1.1625802516937256, "learning_rate": 3.245936351931463e-06, "loss": 0.5283, "step": 9693 }, { "epoch": 0.63, "grad_norm": 1.2488845586776733, "learning_rate": 3.2449575086816526e-06, "loss": 0.527, "step": 9694 }, { "epoch": 0.63, "grad_norm": 1.123345136642456, "learning_rate": 3.243978742140221e-06, "loss": 0.4915, "step": 9695 }, { "epoch": 0.63, "grad_norm": 1.2521909475326538, "learning_rate": 3.243000052349948e-06, "loss": 0.5386, "step": 9696 }, { "epoch": 0.63, "grad_norm": 1.2962591648101807, "learning_rate": 3.242021439353613e-06, "loss": 0.5073, "step": 9697 }, { "epoch": 0.63, "grad_norm": 1.07191002368927, "learning_rate": 3.241042903193982e-06, "loss": 0.4885, "step": 9698 }, { "epoch": 0.63, "grad_norm": 1.0865904092788696, "learning_rate": 3.240064443913828e-06, "loss": 0.502, "step": 9699 }, { "epoch": 0.63, "grad_norm": 1.138656497001648, "learning_rate": 3.239086061555916e-06, "loss": 0.5035, "step": 9700 }, { "epoch": 0.63, "grad_norm": 1.218191385269165, "learning_rate": 3.238107756163009e-06, "loss": 0.5576, "step": 9701 }, { "epoch": 0.63, "grad_norm": 1.1811383962631226, "learning_rate": 3.2371295277778667e-06, "loss": 0.5212, "step": 9702 }, { "epoch": 0.63, "grad_norm": 1.2564313411712646, "learning_rate": 3.2361513764432462e-06, "loss": 0.5432, "step": 9703 }, { "epoch": 0.63, "grad_norm": 1.2655917406082153, "learning_rate": 3.2351733022018965e-06, "loss": 0.5547, "step": 9704 }, { "epoch": 0.63, "grad_norm": 1.343853235244751, "learning_rate": 3.234195305096568e-06, "loss": 0.537, "step": 9705 }, { "epoch": 0.63, "grad_norm": 1.1440203189849854, "learning_rate": 3.2332173851700076e-06, "loss": 0.4953, "step": 9706 }, { "epoch": 0.63, "grad_norm": 1.1400326490402222, "learning_rate": 3.2322395424649565e-06, "loss": 0.5036, "step": 9707 }, { "epoch": 0.63, "grad_norm": 1.278496265411377, "learning_rate": 3.231261777024154e-06, "loss": 0.4901, "step": 9708 }, { "epoch": 0.63, "grad_norm": 1.0801830291748047, "learning_rate": 3.230284088890338e-06, "loss": 0.4669, "step": 9709 }, { "epoch": 0.63, "grad_norm": 1.1580843925476074, "learning_rate": 3.2293064781062367e-06, "loss": 0.5226, "step": 9710 }, { "epoch": 0.63, "grad_norm": 1.3702408075332642, "learning_rate": 3.2283289447145803e-06, "loss": 0.567, "step": 9711 }, { "epoch": 0.63, "grad_norm": 1.1720649003982544, "learning_rate": 3.227351488758095e-06, "loss": 0.4836, "step": 9712 }, { "epoch": 0.63, "grad_norm": 1.1895633935928345, "learning_rate": 3.2263741102795035e-06, "loss": 0.5348, "step": 9713 }, { "epoch": 0.63, "grad_norm": 1.102447509765625, "learning_rate": 3.225396809321524e-06, "loss": 0.4853, "step": 9714 }, { "epoch": 0.63, "grad_norm": 1.2383956909179688, "learning_rate": 3.2244195859268702e-06, "loss": 0.5305, "step": 9715 }, { "epoch": 0.63, "grad_norm": 1.159489393234253, "learning_rate": 3.2234424401382554e-06, "loss": 0.5391, "step": 9716 }, { "epoch": 0.63, "grad_norm": 1.146249771118164, "learning_rate": 3.222465371998388e-06, "loss": 0.5215, "step": 9717 }, { "epoch": 0.63, "grad_norm": 1.1950876712799072, "learning_rate": 3.221488381549973e-06, "loss": 0.5402, "step": 9718 }, { "epoch": 0.63, "grad_norm": 1.082685947418213, "learning_rate": 3.2205114688357124e-06, "loss": 0.4809, "step": 9719 }, { "epoch": 0.63, "grad_norm": 1.2207082509994507, "learning_rate": 3.219534633898305e-06, "loss": 0.5582, "step": 9720 }, { "epoch": 0.63, "grad_norm": 1.1547070741653442, "learning_rate": 3.218557876780445e-06, "loss": 0.542, "step": 9721 }, { "epoch": 0.63, "grad_norm": 1.165406584739685, "learning_rate": 3.217581197524824e-06, "loss": 0.5328, "step": 9722 }, { "epoch": 0.63, "grad_norm": 1.1884808540344238, "learning_rate": 3.2166045961741288e-06, "loss": 0.4874, "step": 9723 }, { "epoch": 0.63, "grad_norm": 1.153231143951416, "learning_rate": 3.2156280727710464e-06, "loss": 0.5305, "step": 9724 }, { "epoch": 0.63, "grad_norm": 1.2206661701202393, "learning_rate": 3.2146516273582567e-06, "loss": 0.5123, "step": 9725 }, { "epoch": 0.63, "grad_norm": 1.2978652715682983, "learning_rate": 3.2136752599784395e-06, "loss": 0.5805, "step": 9726 }, { "epoch": 0.63, "grad_norm": 1.1500352621078491, "learning_rate": 3.2126989706742675e-06, "loss": 0.5373, "step": 9727 }, { "epoch": 0.63, "grad_norm": 1.2481259107589722, "learning_rate": 3.2117227594884117e-06, "loss": 0.5276, "step": 9728 }, { "epoch": 0.63, "grad_norm": 1.2309883832931519, "learning_rate": 3.210746626463539e-06, "loss": 0.5239, "step": 9729 }, { "epoch": 0.63, "grad_norm": 1.1889219284057617, "learning_rate": 3.209770571642316e-06, "loss": 0.4907, "step": 9730 }, { "epoch": 0.63, "grad_norm": 1.226213812828064, "learning_rate": 3.2087945950674027e-06, "loss": 0.5833, "step": 9731 }, { "epoch": 0.63, "grad_norm": 1.1825523376464844, "learning_rate": 3.2078186967814574e-06, "loss": 0.5129, "step": 9732 }, { "epoch": 0.63, "grad_norm": 1.3629306554794312, "learning_rate": 3.206842876827132e-06, "loss": 0.5076, "step": 9733 }, { "epoch": 0.63, "grad_norm": 1.140170931816101, "learning_rate": 3.205867135247077e-06, "loss": 0.4969, "step": 9734 }, { "epoch": 0.63, "grad_norm": 1.1776319742202759, "learning_rate": 3.204891472083941e-06, "loss": 0.5111, "step": 9735 }, { "epoch": 0.63, "grad_norm": 1.2009913921356201, "learning_rate": 3.203915887380368e-06, "loss": 0.5329, "step": 9736 }, { "epoch": 0.63, "grad_norm": 1.2826975584030151, "learning_rate": 3.202940381178997e-06, "loss": 0.5442, "step": 9737 }, { "epoch": 0.63, "grad_norm": 1.1982061862945557, "learning_rate": 3.201964953522467e-06, "loss": 0.5478, "step": 9738 }, { "epoch": 0.63, "grad_norm": 1.0630159378051758, "learning_rate": 3.200989604453408e-06, "loss": 0.5432, "step": 9739 }, { "epoch": 0.63, "grad_norm": 1.2003052234649658, "learning_rate": 3.200014334014453e-06, "loss": 0.5397, "step": 9740 }, { "epoch": 0.63, "grad_norm": 1.1861635446548462, "learning_rate": 3.199039142248226e-06, "loss": 0.5365, "step": 9741 }, { "epoch": 0.63, "grad_norm": 1.2159135341644287, "learning_rate": 3.1980640291973525e-06, "loss": 0.4852, "step": 9742 }, { "epoch": 0.63, "grad_norm": 1.1285408735275269, "learning_rate": 3.1970889949044507e-06, "loss": 0.4593, "step": 9743 }, { "epoch": 0.63, "grad_norm": 1.1884050369262695, "learning_rate": 3.19611403941214e-06, "loss": 0.5267, "step": 9744 }, { "epoch": 0.63, "grad_norm": 1.3761498928070068, "learning_rate": 3.195139162763028e-06, "loss": 0.5637, "step": 9745 }, { "epoch": 0.63, "grad_norm": 1.2105518579483032, "learning_rate": 3.194164364999727e-06, "loss": 0.5474, "step": 9746 }, { "epoch": 0.63, "grad_norm": 1.106553554534912, "learning_rate": 3.1931896461648416e-06, "loss": 0.4816, "step": 9747 }, { "epoch": 0.63, "grad_norm": 1.2243432998657227, "learning_rate": 3.192215006300976e-06, "loss": 0.5402, "step": 9748 }, { "epoch": 0.63, "grad_norm": 1.165465235710144, "learning_rate": 3.1912404454507284e-06, "loss": 0.5622, "step": 9749 }, { "epoch": 0.63, "grad_norm": 1.2950119972229004, "learning_rate": 3.190265963656696e-06, "loss": 0.4999, "step": 9750 }, { "epoch": 0.63, "grad_norm": 1.2175188064575195, "learning_rate": 3.189291560961467e-06, "loss": 0.565, "step": 9751 }, { "epoch": 0.63, "grad_norm": 1.138983130455017, "learning_rate": 3.1883172374076323e-06, "loss": 0.5468, "step": 9752 }, { "epoch": 0.63, "grad_norm": 1.2215118408203125, "learning_rate": 3.1873429930377764e-06, "loss": 0.544, "step": 9753 }, { "epoch": 0.63, "grad_norm": 1.3621702194213867, "learning_rate": 3.1863688278944827e-06, "loss": 0.4921, "step": 9754 }, { "epoch": 0.63, "grad_norm": 1.3039592504501343, "learning_rate": 3.1853947420203286e-06, "loss": 0.4997, "step": 9755 }, { "epoch": 0.63, "grad_norm": 1.225714087486267, "learning_rate": 3.184420735457888e-06, "loss": 0.5126, "step": 9756 }, { "epoch": 0.63, "grad_norm": 1.0735702514648438, "learning_rate": 3.1834468082497327e-06, "loss": 0.4935, "step": 9757 }, { "epoch": 0.63, "grad_norm": 1.178045630455017, "learning_rate": 3.182472960438431e-06, "loss": 0.5129, "step": 9758 }, { "epoch": 0.63, "grad_norm": 1.1095261573791504, "learning_rate": 3.1814991920665463e-06, "loss": 0.4805, "step": 9759 }, { "epoch": 0.63, "grad_norm": 1.2411860227584839, "learning_rate": 3.180525503176641e-06, "loss": 0.5056, "step": 9760 }, { "epoch": 0.63, "grad_norm": 1.201629400253296, "learning_rate": 3.179551893811272e-06, "loss": 0.5477, "step": 9761 }, { "epoch": 0.63, "grad_norm": 1.1532459259033203, "learning_rate": 3.178578364012991e-06, "loss": 0.4721, "step": 9762 }, { "epoch": 0.63, "grad_norm": 1.113511323928833, "learning_rate": 3.1776049138243513e-06, "loss": 0.4866, "step": 9763 }, { "epoch": 0.63, "grad_norm": 1.3661620616912842, "learning_rate": 3.1766315432879e-06, "loss": 0.552, "step": 9764 }, { "epoch": 0.63, "grad_norm": 1.2805815935134888, "learning_rate": 3.175658252446178e-06, "loss": 0.5447, "step": 9765 }, { "epoch": 0.63, "grad_norm": 1.1374180316925049, "learning_rate": 3.1746850413417275e-06, "loss": 0.5607, "step": 9766 }, { "epoch": 0.63, "grad_norm": 1.085795521736145, "learning_rate": 3.1737119100170855e-06, "loss": 0.5077, "step": 9767 }, { "epoch": 0.63, "grad_norm": 1.1499062776565552, "learning_rate": 3.1727388585147824e-06, "loss": 0.4804, "step": 9768 }, { "epoch": 0.63, "grad_norm": 1.18626070022583, "learning_rate": 3.1717658868773508e-06, "loss": 0.4969, "step": 9769 }, { "epoch": 0.63, "grad_norm": 1.2682725191116333, "learning_rate": 3.170792995147314e-06, "loss": 0.5507, "step": 9770 }, { "epoch": 0.63, "grad_norm": 1.226978063583374, "learning_rate": 3.169820183367197e-06, "loss": 0.5195, "step": 9771 }, { "epoch": 0.63, "grad_norm": 1.1173880100250244, "learning_rate": 3.168847451579517e-06, "loss": 0.4743, "step": 9772 }, { "epoch": 0.63, "grad_norm": 1.1686269044876099, "learning_rate": 3.167874799826792e-06, "loss": 0.4944, "step": 9773 }, { "epoch": 0.63, "grad_norm": 1.1461353302001953, "learning_rate": 3.1669022281515314e-06, "loss": 0.5556, "step": 9774 }, { "epoch": 0.63, "grad_norm": 1.1320666074752808, "learning_rate": 3.165929736596245e-06, "loss": 0.5224, "step": 9775 }, { "epoch": 0.63, "grad_norm": 1.2642086744308472, "learning_rate": 3.1649573252034377e-06, "loss": 0.5294, "step": 9776 }, { "epoch": 0.63, "grad_norm": 1.1840589046478271, "learning_rate": 3.163984994015611e-06, "loss": 0.5366, "step": 9777 }, { "epoch": 0.63, "grad_norm": 1.3409160375595093, "learning_rate": 3.1630127430752633e-06, "loss": 0.5755, "step": 9778 }, { "epoch": 0.63, "grad_norm": 1.134690761566162, "learning_rate": 3.1620405724248914e-06, "loss": 0.5518, "step": 9779 }, { "epoch": 0.63, "grad_norm": 1.1791166067123413, "learning_rate": 3.1610684821069814e-06, "loss": 0.4775, "step": 9780 }, { "epoch": 0.63, "grad_norm": 1.0781549215316772, "learning_rate": 3.160096472164024e-06, "loss": 0.4815, "step": 9781 }, { "epoch": 0.63, "grad_norm": 1.2226707935333252, "learning_rate": 3.159124542638503e-06, "loss": 0.5538, "step": 9782 }, { "epoch": 0.63, "grad_norm": 1.2239428758621216, "learning_rate": 3.1581526935728983e-06, "loss": 0.6017, "step": 9783 }, { "epoch": 0.63, "grad_norm": 1.231569766998291, "learning_rate": 3.1571809250096873e-06, "loss": 0.552, "step": 9784 }, { "epoch": 0.63, "grad_norm": 1.1482112407684326, "learning_rate": 3.156209236991346e-06, "loss": 0.541, "step": 9785 }, { "epoch": 0.63, "grad_norm": 1.240989327430725, "learning_rate": 3.1552376295603397e-06, "loss": 0.5174, "step": 9786 }, { "epoch": 0.63, "grad_norm": 1.0547120571136475, "learning_rate": 3.154266102759137e-06, "loss": 0.4694, "step": 9787 }, { "epoch": 0.63, "grad_norm": 1.1153883934020996, "learning_rate": 3.153294656630201e-06, "loss": 0.4747, "step": 9788 }, { "epoch": 0.63, "grad_norm": 1.126861333847046, "learning_rate": 3.1523232912159906e-06, "loss": 0.4977, "step": 9789 }, { "epoch": 0.63, "grad_norm": 1.1605879068374634, "learning_rate": 3.1513520065589627e-06, "loss": 0.523, "step": 9790 }, { "epoch": 0.63, "grad_norm": 1.2922766208648682, "learning_rate": 3.1503808027015703e-06, "loss": 0.5749, "step": 9791 }, { "epoch": 0.63, "grad_norm": 1.240456223487854, "learning_rate": 3.1494096796862592e-06, "loss": 0.5202, "step": 9792 }, { "epoch": 0.63, "grad_norm": 1.1634079217910767, "learning_rate": 3.148438637555477e-06, "loss": 0.5482, "step": 9793 }, { "epoch": 0.63, "grad_norm": 1.1843762397766113, "learning_rate": 3.1474676763516644e-06, "loss": 0.5743, "step": 9794 }, { "epoch": 0.63, "grad_norm": 1.039075255393982, "learning_rate": 3.14649679611726e-06, "loss": 0.5196, "step": 9795 }, { "epoch": 0.63, "grad_norm": 1.1138906478881836, "learning_rate": 3.145525996894698e-06, "loss": 0.5345, "step": 9796 }, { "epoch": 0.63, "grad_norm": 1.1509809494018555, "learning_rate": 3.1445552787264135e-06, "loss": 0.5817, "step": 9797 }, { "epoch": 0.63, "grad_norm": 1.159018874168396, "learning_rate": 3.1435846416548276e-06, "loss": 0.5098, "step": 9798 }, { "epoch": 0.63, "grad_norm": 1.136932134628296, "learning_rate": 3.1426140857223674e-06, "loss": 0.5415, "step": 9799 }, { "epoch": 0.63, "grad_norm": 1.1183757781982422, "learning_rate": 3.141643610971454e-06, "loss": 0.5311, "step": 9800 }, { "epoch": 0.63, "grad_norm": 1.0668559074401855, "learning_rate": 3.140673217444503e-06, "loss": 0.5024, "step": 9801 }, { "epoch": 0.63, "grad_norm": 1.1890686750411987, "learning_rate": 3.139702905183931e-06, "loss": 0.4993, "step": 9802 }, { "epoch": 0.63, "grad_norm": 1.1506285667419434, "learning_rate": 3.138732674232143e-06, "loss": 0.5383, "step": 9803 }, { "epoch": 0.63, "grad_norm": 1.2979230880737305, "learning_rate": 3.1377625246315485e-06, "loss": 0.5306, "step": 9804 }, { "epoch": 0.63, "grad_norm": 1.104572057723999, "learning_rate": 3.136792456424549e-06, "loss": 0.5156, "step": 9805 }, { "epoch": 0.63, "grad_norm": 1.154567003250122, "learning_rate": 3.1358224696535437e-06, "loss": 0.5197, "step": 9806 }, { "epoch": 0.63, "grad_norm": 1.1825509071350098, "learning_rate": 3.13485256436093e-06, "loss": 0.5308, "step": 9807 }, { "epoch": 0.63, "grad_norm": 1.1774921417236328, "learning_rate": 3.133882740589098e-06, "loss": 0.4998, "step": 9808 }, { "epoch": 0.63, "grad_norm": 1.1101816892623901, "learning_rate": 3.1329129983804364e-06, "loss": 0.5008, "step": 9809 }, { "epoch": 0.63, "grad_norm": 1.0601050853729248, "learning_rate": 3.1319433377773306e-06, "loss": 0.5004, "step": 9810 }, { "epoch": 0.63, "grad_norm": 1.1390239000320435, "learning_rate": 3.1309737588221624e-06, "loss": 0.4795, "step": 9811 }, { "epoch": 0.63, "grad_norm": 1.1635398864746094, "learning_rate": 3.1300042615573085e-06, "loss": 0.5253, "step": 9812 }, { "epoch": 0.63, "grad_norm": 1.1560391187667847, "learning_rate": 3.129034846025143e-06, "loss": 0.551, "step": 9813 }, { "epoch": 0.63, "grad_norm": 1.338160514831543, "learning_rate": 3.1280655122680385e-06, "loss": 0.5625, "step": 9814 }, { "epoch": 0.63, "grad_norm": 1.1953229904174805, "learning_rate": 3.1270962603283605e-06, "loss": 0.5115, "step": 9815 }, { "epoch": 0.63, "grad_norm": 1.21277916431427, "learning_rate": 3.126127090248473e-06, "loss": 0.506, "step": 9816 }, { "epoch": 0.63, "grad_norm": 1.0799599885940552, "learning_rate": 3.125158002070735e-06, "loss": 0.49, "step": 9817 }, { "epoch": 0.63, "grad_norm": 1.2076220512390137, "learning_rate": 3.124188995837503e-06, "loss": 0.5411, "step": 9818 }, { "epoch": 0.63, "grad_norm": 1.139580488204956, "learning_rate": 3.123220071591131e-06, "loss": 0.5295, "step": 9819 }, { "epoch": 0.63, "grad_norm": 1.0920974016189575, "learning_rate": 3.1222512293739694e-06, "loss": 0.5553, "step": 9820 }, { "epoch": 0.63, "grad_norm": 1.119795322418213, "learning_rate": 3.12128246922836e-06, "loss": 0.5121, "step": 9821 }, { "epoch": 0.63, "grad_norm": 1.1284786462783813, "learning_rate": 3.1203137911966465e-06, "loss": 0.5339, "step": 9822 }, { "epoch": 0.63, "grad_norm": 1.2109423875808716, "learning_rate": 3.1193451953211674e-06, "loss": 0.4682, "step": 9823 }, { "epoch": 0.63, "grad_norm": 1.2320199012756348, "learning_rate": 3.1183766816442584e-06, "loss": 0.5309, "step": 9824 }, { "epoch": 0.63, "grad_norm": 1.1750606298446655, "learning_rate": 3.117408250208249e-06, "loss": 0.5175, "step": 9825 }, { "epoch": 0.63, "grad_norm": 1.0432870388031006, "learning_rate": 3.1164399010554702e-06, "loss": 0.4691, "step": 9826 }, { "epoch": 0.63, "grad_norm": 1.1853593587875366, "learning_rate": 3.1154716342282422e-06, "loss": 0.5099, "step": 9827 }, { "epoch": 0.63, "grad_norm": 1.1662214994430542, "learning_rate": 3.1145034497688862e-06, "loss": 0.4794, "step": 9828 }, { "epoch": 0.63, "grad_norm": 1.1516295671463013, "learning_rate": 3.1135353477197204e-06, "loss": 0.5131, "step": 9829 }, { "epoch": 0.63, "grad_norm": 1.080297589302063, "learning_rate": 3.112567328123057e-06, "loss": 0.4641, "step": 9830 }, { "epoch": 0.63, "grad_norm": 1.0898034572601318, "learning_rate": 3.1115993910212063e-06, "loss": 0.5274, "step": 9831 }, { "epoch": 0.63, "grad_norm": 1.1638283729553223, "learning_rate": 3.1106315364564753e-06, "loss": 0.5058, "step": 9832 }, { "epoch": 0.63, "grad_norm": 1.2314212322235107, "learning_rate": 3.109663764471164e-06, "loss": 0.5212, "step": 9833 }, { "epoch": 0.63, "grad_norm": 1.1995035409927368, "learning_rate": 3.108696075107572e-06, "loss": 0.5228, "step": 9834 }, { "epoch": 0.63, "grad_norm": 1.1373611688613892, "learning_rate": 3.1077284684079957e-06, "loss": 0.5039, "step": 9835 }, { "epoch": 0.63, "grad_norm": 1.1587294340133667, "learning_rate": 3.106760944414725e-06, "loss": 0.577, "step": 9836 }, { "epoch": 0.63, "grad_norm": 1.129996657371521, "learning_rate": 3.1057935031700493e-06, "loss": 0.508, "step": 9837 }, { "epoch": 0.64, "grad_norm": 1.1533479690551758, "learning_rate": 3.1048261447162543e-06, "loss": 0.5107, "step": 9838 }, { "epoch": 0.64, "grad_norm": 1.2000707387924194, "learning_rate": 3.103858869095617e-06, "loss": 0.4814, "step": 9839 }, { "epoch": 0.64, "grad_norm": 1.172394871711731, "learning_rate": 3.1028916763504165e-06, "loss": 0.497, "step": 9840 }, { "epoch": 0.64, "grad_norm": 1.234827995300293, "learning_rate": 3.101924566522926e-06, "loss": 0.476, "step": 9841 }, { "epoch": 0.64, "grad_norm": 1.187794804573059, "learning_rate": 3.1009575396554157e-06, "loss": 0.5287, "step": 9842 }, { "epoch": 0.64, "grad_norm": 1.236585021018982, "learning_rate": 3.0999905957901533e-06, "loss": 0.5842, "step": 9843 }, { "epoch": 0.64, "grad_norm": 1.2117102146148682, "learning_rate": 3.0990237349693985e-06, "loss": 0.5748, "step": 9844 }, { "epoch": 0.64, "grad_norm": 1.1460832357406616, "learning_rate": 3.0980569572354113e-06, "loss": 0.5188, "step": 9845 }, { "epoch": 0.64, "grad_norm": 1.2162623405456543, "learning_rate": 3.097090262630448e-06, "loss": 0.5147, "step": 9846 }, { "epoch": 0.64, "grad_norm": 1.1140410900115967, "learning_rate": 3.096123651196759e-06, "loss": 0.4953, "step": 9847 }, { "epoch": 0.64, "grad_norm": 1.1651886701583862, "learning_rate": 3.095157122976593e-06, "loss": 0.5555, "step": 9848 }, { "epoch": 0.64, "grad_norm": 1.1412049531936646, "learning_rate": 3.094190678012198e-06, "loss": 0.5197, "step": 9849 }, { "epoch": 0.64, "grad_norm": 1.1696882247924805, "learning_rate": 3.0932243163458075e-06, "loss": 0.5209, "step": 9850 }, { "epoch": 0.64, "grad_norm": 1.1367299556732178, "learning_rate": 3.0922580380196644e-06, "loss": 0.542, "step": 9851 }, { "epoch": 0.64, "grad_norm": 1.1449254751205444, "learning_rate": 3.0912918430759993e-06, "loss": 0.498, "step": 9852 }, { "epoch": 0.64, "grad_norm": 1.3168063163757324, "learning_rate": 3.0903257315570446e-06, "loss": 0.5659, "step": 9853 }, { "epoch": 0.64, "grad_norm": 1.2502256631851196, "learning_rate": 3.089359703505024e-06, "loss": 0.5411, "step": 9854 }, { "epoch": 0.64, "grad_norm": 1.2133244276046753, "learning_rate": 3.088393758962162e-06, "loss": 0.5042, "step": 9855 }, { "epoch": 0.64, "grad_norm": 1.1666914224624634, "learning_rate": 3.087427897970676e-06, "loss": 0.531, "step": 9856 }, { "epoch": 0.64, "grad_norm": 1.2382029294967651, "learning_rate": 3.0864621205727817e-06, "loss": 0.516, "step": 9857 }, { "epoch": 0.64, "grad_norm": 1.078293800354004, "learning_rate": 3.085496426810693e-06, "loss": 0.5086, "step": 9858 }, { "epoch": 0.64, "grad_norm": 1.1465693712234497, "learning_rate": 3.0845308167266143e-06, "loss": 0.5124, "step": 9859 }, { "epoch": 0.64, "grad_norm": 1.2137588262557983, "learning_rate": 3.083565290362752e-06, "loss": 0.5145, "step": 9860 }, { "epoch": 0.64, "grad_norm": 1.1787189245224, "learning_rate": 3.082599847761307e-06, "loss": 0.5162, "step": 9861 }, { "epoch": 0.64, "grad_norm": 1.340829849243164, "learning_rate": 3.0816344889644766e-06, "loss": 0.5423, "step": 9862 }, { "epoch": 0.64, "grad_norm": 1.2899127006530762, "learning_rate": 3.080669214014451e-06, "loss": 0.4895, "step": 9863 }, { "epoch": 0.64, "grad_norm": 1.07399582862854, "learning_rate": 3.079704022953423e-06, "loss": 0.5201, "step": 9864 }, { "epoch": 0.64, "grad_norm": 1.116456389427185, "learning_rate": 3.078738915823577e-06, "loss": 0.4964, "step": 9865 }, { "epoch": 0.64, "grad_norm": 1.1450551748275757, "learning_rate": 3.077773892667097e-06, "loss": 0.5493, "step": 9866 }, { "epoch": 0.64, "grad_norm": 1.1663429737091064, "learning_rate": 3.0768089535261626e-06, "loss": 0.5126, "step": 9867 }, { "epoch": 0.64, "grad_norm": 1.2124207019805908, "learning_rate": 3.075844098442944e-06, "loss": 0.5122, "step": 9868 }, { "epoch": 0.64, "grad_norm": 1.2191557884216309, "learning_rate": 3.0748793274596162e-06, "loss": 0.5732, "step": 9869 }, { "epoch": 0.64, "grad_norm": 1.2527343034744263, "learning_rate": 3.0739146406183464e-06, "loss": 0.5503, "step": 9870 }, { "epoch": 0.64, "grad_norm": 1.2097338438034058, "learning_rate": 3.0729500379612977e-06, "loss": 0.5106, "step": 9871 }, { "epoch": 0.64, "grad_norm": 1.2211642265319824, "learning_rate": 3.0719855195306306e-06, "loss": 0.512, "step": 9872 }, { "epoch": 0.64, "grad_norm": 1.0985203981399536, "learning_rate": 3.071021085368505e-06, "loss": 0.4868, "step": 9873 }, { "epoch": 0.64, "grad_norm": 1.2863434553146362, "learning_rate": 3.0700567355170686e-06, "loss": 0.5219, "step": 9874 }, { "epoch": 0.64, "grad_norm": 1.1168640851974487, "learning_rate": 3.0690924700184725e-06, "loss": 0.5092, "step": 9875 }, { "epoch": 0.64, "grad_norm": 1.281673789024353, "learning_rate": 3.0681282889148634e-06, "loss": 0.5696, "step": 9876 }, { "epoch": 0.64, "grad_norm": 1.1290967464447021, "learning_rate": 3.067164192248382e-06, "loss": 0.5239, "step": 9877 }, { "epoch": 0.64, "grad_norm": 1.2948881387710571, "learning_rate": 3.0662001800611674e-06, "loss": 0.5709, "step": 9878 }, { "epoch": 0.64, "grad_norm": 1.1284323930740356, "learning_rate": 3.065236252395356e-06, "loss": 0.4871, "step": 9879 }, { "epoch": 0.64, "grad_norm": 1.098449945449829, "learning_rate": 3.064272409293073e-06, "loss": 0.4878, "step": 9880 }, { "epoch": 0.64, "grad_norm": 1.0697745084762573, "learning_rate": 3.06330865079645e-06, "loss": 0.5069, "step": 9881 }, { "epoch": 0.64, "grad_norm": 1.123992919921875, "learning_rate": 3.0623449769476088e-06, "loss": 0.5161, "step": 9882 }, { "epoch": 0.64, "grad_norm": 1.2693204879760742, "learning_rate": 3.061381387788669e-06, "loss": 0.507, "step": 9883 }, { "epoch": 0.64, "grad_norm": 1.1948877573013306, "learning_rate": 3.0604178833617493e-06, "loss": 0.5316, "step": 9884 }, { "epoch": 0.64, "grad_norm": 1.1657663583755493, "learning_rate": 3.0594544637089575e-06, "loss": 0.4841, "step": 9885 }, { "epoch": 0.64, "grad_norm": 1.1137521266937256, "learning_rate": 3.058491128872405e-06, "loss": 0.4989, "step": 9886 }, { "epoch": 0.64, "grad_norm": 1.2495790719985962, "learning_rate": 3.0575278788941954e-06, "loss": 0.5589, "step": 9887 }, { "epoch": 0.64, "grad_norm": 1.257556438446045, "learning_rate": 3.0565647138164312e-06, "loss": 0.5234, "step": 9888 }, { "epoch": 0.64, "grad_norm": 1.109571099281311, "learning_rate": 3.055601633681209e-06, "loss": 0.4557, "step": 9889 }, { "epoch": 0.64, "grad_norm": 1.172792911529541, "learning_rate": 3.0546386385306248e-06, "loss": 0.519, "step": 9890 }, { "epoch": 0.64, "grad_norm": 1.2413889169692993, "learning_rate": 3.053675728406764e-06, "loss": 0.5433, "step": 9891 }, { "epoch": 0.64, "grad_norm": 1.330660104751587, "learning_rate": 3.052712903351717e-06, "loss": 0.5529, "step": 9892 }, { "epoch": 0.64, "grad_norm": 1.1156518459320068, "learning_rate": 3.0517501634075638e-06, "loss": 0.5258, "step": 9893 }, { "epoch": 0.64, "grad_norm": 1.3134586811065674, "learning_rate": 3.0507875086163843e-06, "loss": 0.5101, "step": 9894 }, { "epoch": 0.64, "grad_norm": 1.415286898612976, "learning_rate": 3.0498249390202538e-06, "loss": 0.5264, "step": 9895 }, { "epoch": 0.64, "grad_norm": 1.2225786447525024, "learning_rate": 3.0488624546612465e-06, "loss": 0.5271, "step": 9896 }, { "epoch": 0.64, "grad_norm": 1.1790168285369873, "learning_rate": 3.047900055581424e-06, "loss": 0.5261, "step": 9897 }, { "epoch": 0.64, "grad_norm": 1.2640893459320068, "learning_rate": 3.0469377418228552e-06, "loss": 0.5184, "step": 9898 }, { "epoch": 0.64, "grad_norm": 1.1992069482803345, "learning_rate": 3.0459755134275976e-06, "loss": 0.4907, "step": 9899 }, { "epoch": 0.64, "grad_norm": 1.1998227834701538, "learning_rate": 3.0450133704377107e-06, "loss": 0.521, "step": 9900 }, { "epoch": 0.64, "grad_norm": 1.1510727405548096, "learning_rate": 3.0440513128952433e-06, "loss": 0.5325, "step": 9901 }, { "epoch": 0.64, "grad_norm": 1.094551920890808, "learning_rate": 3.043089340842248e-06, "loss": 0.5115, "step": 9902 }, { "epoch": 0.64, "grad_norm": 1.0247280597686768, "learning_rate": 3.042127454320768e-06, "loss": 0.4702, "step": 9903 }, { "epoch": 0.64, "grad_norm": 1.175427794456482, "learning_rate": 3.0411656533728457e-06, "loss": 0.5169, "step": 9904 }, { "epoch": 0.64, "grad_norm": 1.1862871646881104, "learning_rate": 3.0402039380405197e-06, "loss": 0.5229, "step": 9905 }, { "epoch": 0.64, "grad_norm": 1.2056564092636108, "learning_rate": 3.039242308365822e-06, "loss": 0.5594, "step": 9906 }, { "epoch": 0.64, "grad_norm": 1.1289602518081665, "learning_rate": 3.038280764390784e-06, "loss": 0.4978, "step": 9907 }, { "epoch": 0.64, "grad_norm": 1.0871422290802002, "learning_rate": 3.0373193061574343e-06, "loss": 0.5297, "step": 9908 }, { "epoch": 0.64, "grad_norm": 1.1179215908050537, "learning_rate": 3.036357933707793e-06, "loss": 0.5413, "step": 9909 }, { "epoch": 0.64, "grad_norm": 1.0871952772140503, "learning_rate": 3.0353966470838804e-06, "loss": 0.4417, "step": 9910 }, { "epoch": 0.64, "grad_norm": 1.2012115716934204, "learning_rate": 3.0344354463277104e-06, "loss": 0.5314, "step": 9911 }, { "epoch": 0.64, "grad_norm": 1.1285372972488403, "learning_rate": 3.0334743314812964e-06, "loss": 0.5116, "step": 9912 }, { "epoch": 0.64, "grad_norm": 0.9957178831100464, "learning_rate": 3.0325133025866457e-06, "loss": 0.4763, "step": 9913 }, { "epoch": 0.64, "grad_norm": 1.22710382938385, "learning_rate": 3.031552359685764e-06, "loss": 0.5478, "step": 9914 }, { "epoch": 0.64, "grad_norm": 1.1612128019332886, "learning_rate": 3.030591502820648e-06, "loss": 0.5382, "step": 9915 }, { "epoch": 0.64, "grad_norm": 1.494419813156128, "learning_rate": 3.0296307320332963e-06, "loss": 0.5136, "step": 9916 }, { "epoch": 0.64, "grad_norm": 1.2438547611236572, "learning_rate": 3.0286700473657016e-06, "loss": 0.5465, "step": 9917 }, { "epoch": 0.64, "grad_norm": 1.1953598260879517, "learning_rate": 3.0277094488598534e-06, "loss": 0.6151, "step": 9918 }, { "epoch": 0.64, "grad_norm": 1.1353709697723389, "learning_rate": 3.0267489365577367e-06, "loss": 0.4746, "step": 9919 }, { "epoch": 0.64, "grad_norm": 1.1907109022140503, "learning_rate": 3.025788510501335e-06, "loss": 0.5161, "step": 9920 }, { "epoch": 0.64, "grad_norm": 1.2691141366958618, "learning_rate": 3.024828170732621e-06, "loss": 0.5656, "step": 9921 }, { "epoch": 0.64, "grad_norm": 1.1532421112060547, "learning_rate": 3.0238679172935727e-06, "loss": 0.5089, "step": 9922 }, { "epoch": 0.64, "grad_norm": 1.1718807220458984, "learning_rate": 3.022907750226159e-06, "loss": 0.53, "step": 9923 }, { "epoch": 0.64, "grad_norm": 1.252418041229248, "learning_rate": 3.021947669572346e-06, "loss": 0.5408, "step": 9924 }, { "epoch": 0.64, "grad_norm": 1.2211887836456299, "learning_rate": 3.0209876753740974e-06, "loss": 0.5222, "step": 9925 }, { "epoch": 0.64, "grad_norm": 1.3362306356430054, "learning_rate": 3.0200277676733737e-06, "loss": 0.5575, "step": 9926 }, { "epoch": 0.64, "grad_norm": 1.242434024810791, "learning_rate": 3.019067946512126e-06, "loss": 0.5126, "step": 9927 }, { "epoch": 0.64, "grad_norm": 1.1937170028686523, "learning_rate": 3.018108211932307e-06, "loss": 0.5361, "step": 9928 }, { "epoch": 0.64, "grad_norm": 1.0649393796920776, "learning_rate": 3.0171485639758645e-06, "loss": 0.5123, "step": 9929 }, { "epoch": 0.64, "grad_norm": 1.1240805387496948, "learning_rate": 3.016189002684743e-06, "loss": 0.4844, "step": 9930 }, { "epoch": 0.64, "grad_norm": 1.1385812759399414, "learning_rate": 3.015229528100884e-06, "loss": 0.5236, "step": 9931 }, { "epoch": 0.64, "grad_norm": 1.1846530437469482, "learning_rate": 3.0142701402662193e-06, "loss": 0.5379, "step": 9932 }, { "epoch": 0.64, "grad_norm": 1.107153058052063, "learning_rate": 3.013310839222684e-06, "loss": 0.4947, "step": 9933 }, { "epoch": 0.64, "grad_norm": 1.2962816953659058, "learning_rate": 3.0123516250122052e-06, "loss": 0.5484, "step": 9934 }, { "epoch": 0.64, "grad_norm": 1.0657072067260742, "learning_rate": 3.0113924976767093e-06, "loss": 0.4949, "step": 9935 }, { "epoch": 0.64, "grad_norm": 1.0764238834381104, "learning_rate": 3.010433457258116e-06, "loss": 0.5107, "step": 9936 }, { "epoch": 0.64, "grad_norm": 1.1829026937484741, "learning_rate": 3.0094745037983464e-06, "loss": 0.5393, "step": 9937 }, { "epoch": 0.64, "grad_norm": 1.18466055393219, "learning_rate": 3.008515637339308e-06, "loss": 0.4952, "step": 9938 }, { "epoch": 0.64, "grad_norm": 1.2067521810531616, "learning_rate": 3.0075568579229126e-06, "loss": 0.5483, "step": 9939 }, { "epoch": 0.64, "grad_norm": 1.2131273746490479, "learning_rate": 3.0065981655910665e-06, "loss": 0.5344, "step": 9940 }, { "epoch": 0.64, "grad_norm": 1.1740015745162964, "learning_rate": 3.0056395603856715e-06, "loss": 0.5415, "step": 9941 }, { "epoch": 0.64, "grad_norm": 1.207715630531311, "learning_rate": 3.004681042348627e-06, "loss": 0.5877, "step": 9942 }, { "epoch": 0.64, "grad_norm": 1.2236709594726562, "learning_rate": 3.0037226115218266e-06, "loss": 0.5337, "step": 9943 }, { "epoch": 0.64, "grad_norm": 1.2329418659210205, "learning_rate": 3.0027642679471585e-06, "loss": 0.5108, "step": 9944 }, { "epoch": 0.64, "grad_norm": 1.1839927434921265, "learning_rate": 3.0018060116665117e-06, "loss": 0.5113, "step": 9945 }, { "epoch": 0.64, "grad_norm": 1.18363618850708, "learning_rate": 3.0008478427217693e-06, "loss": 0.506, "step": 9946 }, { "epoch": 0.64, "grad_norm": 1.189489722251892, "learning_rate": 2.9998897611548095e-06, "loss": 0.5425, "step": 9947 }, { "epoch": 0.64, "grad_norm": 1.1231069564819336, "learning_rate": 2.998931767007508e-06, "loss": 0.4987, "step": 9948 }, { "epoch": 0.64, "grad_norm": 1.2389986515045166, "learning_rate": 2.997973860321737e-06, "loss": 0.5528, "step": 9949 }, { "epoch": 0.64, "grad_norm": 1.2349424362182617, "learning_rate": 2.997016041139362e-06, "loss": 0.5394, "step": 9950 }, { "epoch": 0.64, "grad_norm": 1.06862211227417, "learning_rate": 2.9960583095022476e-06, "loss": 0.5412, "step": 9951 }, { "epoch": 0.64, "grad_norm": 1.1037368774414062, "learning_rate": 2.9951006654522564e-06, "loss": 0.4976, "step": 9952 }, { "epoch": 0.64, "grad_norm": 1.2386585474014282, "learning_rate": 2.99414310903124e-06, "loss": 0.5229, "step": 9953 }, { "epoch": 0.64, "grad_norm": 1.0647072792053223, "learning_rate": 2.9931856402810544e-06, "loss": 0.5245, "step": 9954 }, { "epoch": 0.64, "grad_norm": 1.1622520685195923, "learning_rate": 2.992228259243547e-06, "loss": 0.4871, "step": 9955 }, { "epoch": 0.64, "grad_norm": 1.1311841011047363, "learning_rate": 2.9912709659605634e-06, "loss": 0.4822, "step": 9956 }, { "epoch": 0.64, "grad_norm": 1.216914415359497, "learning_rate": 2.9903137604739407e-06, "loss": 0.528, "step": 9957 }, { "epoch": 0.64, "grad_norm": 1.2060322761535645, "learning_rate": 2.9893566428255193e-06, "loss": 0.5459, "step": 9958 }, { "epoch": 0.64, "grad_norm": 1.1553181409835815, "learning_rate": 2.9883996130571313e-06, "loss": 0.5579, "step": 9959 }, { "epoch": 0.64, "grad_norm": 1.1630343198776245, "learning_rate": 2.9874426712106066e-06, "loss": 0.4972, "step": 9960 }, { "epoch": 0.64, "grad_norm": 1.1844820976257324, "learning_rate": 2.9864858173277723e-06, "loss": 0.5269, "step": 9961 }, { "epoch": 0.64, "grad_norm": 1.2735406160354614, "learning_rate": 2.9855290514504454e-06, "loss": 0.5041, "step": 9962 }, { "epoch": 0.64, "grad_norm": 1.2325396537780762, "learning_rate": 2.984572373620447e-06, "loss": 0.5094, "step": 9963 }, { "epoch": 0.64, "grad_norm": 1.049989104270935, "learning_rate": 2.98361578387959e-06, "loss": 0.4857, "step": 9964 }, { "epoch": 0.64, "grad_norm": 1.1748521327972412, "learning_rate": 2.9826592822696844e-06, "loss": 0.5124, "step": 9965 }, { "epoch": 0.64, "grad_norm": 1.1255062818527222, "learning_rate": 2.981702868832537e-06, "loss": 0.5127, "step": 9966 }, { "epoch": 0.64, "grad_norm": 1.1225258111953735, "learning_rate": 2.9807465436099515e-06, "loss": 0.4889, "step": 9967 }, { "epoch": 0.64, "grad_norm": 1.237039566040039, "learning_rate": 2.9797903066437235e-06, "loss": 0.507, "step": 9968 }, { "epoch": 0.64, "grad_norm": 1.186218500137329, "learning_rate": 2.9788341579756484e-06, "loss": 0.5582, "step": 9969 }, { "epoch": 0.64, "grad_norm": 1.1412209272384644, "learning_rate": 2.9778780976475185e-06, "loss": 0.5205, "step": 9970 }, { "epoch": 0.64, "grad_norm": 1.133738398551941, "learning_rate": 2.9769221257011187e-06, "loss": 0.4888, "step": 9971 }, { "epoch": 0.64, "grad_norm": 1.1519347429275513, "learning_rate": 2.9759662421782358e-06, "loss": 0.5303, "step": 9972 }, { "epoch": 0.64, "grad_norm": 1.1476762294769287, "learning_rate": 2.9750104471206444e-06, "loss": 0.5343, "step": 9973 }, { "epoch": 0.64, "grad_norm": 1.2619684934616089, "learning_rate": 2.9740547405701215e-06, "loss": 0.5461, "step": 9974 }, { "epoch": 0.64, "grad_norm": 1.205075740814209, "learning_rate": 2.973099122568439e-06, "loss": 0.5449, "step": 9975 }, { "epoch": 0.64, "grad_norm": 1.1848825216293335, "learning_rate": 2.972143593157365e-06, "loss": 0.5539, "step": 9976 }, { "epoch": 0.64, "grad_norm": 1.1782333850860596, "learning_rate": 2.9711881523786617e-06, "loss": 0.5526, "step": 9977 }, { "epoch": 0.64, "grad_norm": 1.095442295074463, "learning_rate": 2.970232800274092e-06, "loss": 0.5228, "step": 9978 }, { "epoch": 0.64, "grad_norm": 1.150090217590332, "learning_rate": 2.969277536885408e-06, "loss": 0.4876, "step": 9979 }, { "epoch": 0.64, "grad_norm": 1.2549515962600708, "learning_rate": 2.968322362254363e-06, "loss": 0.5307, "step": 9980 }, { "epoch": 0.64, "grad_norm": 1.1634633541107178, "learning_rate": 2.9673672764227068e-06, "loss": 0.5642, "step": 9981 }, { "epoch": 0.64, "grad_norm": 1.065733790397644, "learning_rate": 2.966412279432182e-06, "loss": 0.5098, "step": 9982 }, { "epoch": 0.64, "grad_norm": 1.2272428274154663, "learning_rate": 2.96545737132453e-06, "loss": 0.4965, "step": 9983 }, { "epoch": 0.64, "grad_norm": 1.132871389389038, "learning_rate": 2.964502552141489e-06, "loss": 0.5008, "step": 9984 }, { "epoch": 0.64, "grad_norm": 1.1367740631103516, "learning_rate": 2.9635478219247874e-06, "loss": 0.5023, "step": 9985 }, { "epoch": 0.64, "grad_norm": 1.1688190698623657, "learning_rate": 2.9625931807161567e-06, "loss": 0.5573, "step": 9986 }, { "epoch": 0.64, "grad_norm": 1.1381245851516724, "learning_rate": 2.961638628557322e-06, "loss": 0.5337, "step": 9987 }, { "epoch": 0.64, "grad_norm": 1.2307034730911255, "learning_rate": 2.960684165490003e-06, "loss": 0.5381, "step": 9988 }, { "epoch": 0.64, "grad_norm": 1.227540373802185, "learning_rate": 2.9597297915559187e-06, "loss": 0.5068, "step": 9989 }, { "epoch": 0.64, "grad_norm": 1.445624828338623, "learning_rate": 2.9587755067967817e-06, "loss": 0.5353, "step": 9990 }, { "epoch": 0.64, "grad_norm": 1.1295782327651978, "learning_rate": 2.9578213112543e-06, "loss": 0.5286, "step": 9991 }, { "epoch": 0.64, "grad_norm": 1.0883877277374268, "learning_rate": 2.956867204970179e-06, "loss": 0.5039, "step": 9992 }, { "epoch": 0.65, "grad_norm": 1.2173779010772705, "learning_rate": 2.955913187986122e-06, "loss": 0.556, "step": 9993 }, { "epoch": 0.65, "grad_norm": 1.159287452697754, "learning_rate": 2.954959260343826e-06, "loss": 0.5397, "step": 9994 }, { "epoch": 0.65, "grad_norm": 1.2550506591796875, "learning_rate": 2.954005422084984e-06, "loss": 0.4819, "step": 9995 }, { "epoch": 0.65, "grad_norm": 1.1274042129516602, "learning_rate": 2.9530516732512872e-06, "loss": 0.492, "step": 9996 }, { "epoch": 0.65, "grad_norm": 1.2531332969665527, "learning_rate": 2.9520980138844193e-06, "loss": 0.5664, "step": 9997 }, { "epoch": 0.65, "grad_norm": 1.3810876607894897, "learning_rate": 2.951144444026065e-06, "loss": 0.5324, "step": 9998 }, { "epoch": 0.65, "grad_norm": 1.132723093032837, "learning_rate": 2.9501909637178995e-06, "loss": 0.5016, "step": 9999 }, { "epoch": 0.65, "grad_norm": 1.2122952938079834, "learning_rate": 2.9492375730015987e-06, "loss": 0.4984, "step": 10000 }, { "epoch": 0.65, "grad_norm": 1.063369631767273, "learning_rate": 2.948284271918832e-06, "loss": 0.5064, "step": 10001 }, { "epoch": 0.65, "grad_norm": 1.1769181489944458, "learning_rate": 2.947331060511268e-06, "loss": 0.5144, "step": 10002 }, { "epoch": 0.65, "grad_norm": 1.2118873596191406, "learning_rate": 2.946377938820567e-06, "loss": 0.5297, "step": 10003 }, { "epoch": 0.65, "grad_norm": 1.0570322275161743, "learning_rate": 2.9454249068883873e-06, "loss": 0.5141, "step": 10004 }, { "epoch": 0.65, "grad_norm": 1.3023178577423096, "learning_rate": 2.9444719647563834e-06, "loss": 0.5294, "step": 10005 }, { "epoch": 0.65, "grad_norm": 1.101177453994751, "learning_rate": 2.9435191124662067e-06, "loss": 0.4941, "step": 10006 }, { "epoch": 0.65, "grad_norm": 1.2416054010391235, "learning_rate": 2.942566350059504e-06, "loss": 0.4953, "step": 10007 }, { "epoch": 0.65, "grad_norm": 1.1665180921554565, "learning_rate": 2.9416136775779203e-06, "loss": 0.4943, "step": 10008 }, { "epoch": 0.65, "grad_norm": 1.1409345865249634, "learning_rate": 2.9406610950630896e-06, "loss": 0.5321, "step": 10009 }, { "epoch": 0.65, "grad_norm": 1.1231105327606201, "learning_rate": 2.939708602556649e-06, "loss": 0.4943, "step": 10010 }, { "epoch": 0.65, "grad_norm": 1.1860408782958984, "learning_rate": 2.93875620010023e-06, "loss": 0.5213, "step": 10011 }, { "epoch": 0.65, "grad_norm": 1.1714438199996948, "learning_rate": 2.93780388773546e-06, "loss": 0.5231, "step": 10012 }, { "epoch": 0.65, "grad_norm": 1.1124424934387207, "learning_rate": 2.9368516655039627e-06, "loss": 0.5206, "step": 10013 }, { "epoch": 0.65, "grad_norm": 1.0801746845245361, "learning_rate": 2.9358995334473545e-06, "loss": 0.5295, "step": 10014 }, { "epoch": 0.65, "grad_norm": 1.259859323501587, "learning_rate": 2.934947491607252e-06, "loss": 0.5552, "step": 10015 }, { "epoch": 0.65, "grad_norm": 1.0736095905303955, "learning_rate": 2.933995540025267e-06, "loss": 0.5006, "step": 10016 }, { "epoch": 0.65, "grad_norm": 1.1663486957550049, "learning_rate": 2.9330436787430062e-06, "loss": 0.5172, "step": 10017 }, { "epoch": 0.65, "grad_norm": 1.095369577407837, "learning_rate": 2.932091907802074e-06, "loss": 0.4957, "step": 10018 }, { "epoch": 0.65, "grad_norm": 1.0506829023361206, "learning_rate": 2.931140227244071e-06, "loss": 0.5594, "step": 10019 }, { "epoch": 0.65, "grad_norm": 1.1391816139221191, "learning_rate": 2.9301886371105887e-06, "loss": 0.4959, "step": 10020 }, { "epoch": 0.65, "grad_norm": 1.071999430656433, "learning_rate": 2.929237137443221e-06, "loss": 0.5033, "step": 10021 }, { "epoch": 0.65, "grad_norm": 1.1933362483978271, "learning_rate": 2.928285728283555e-06, "loss": 0.5546, "step": 10022 }, { "epoch": 0.65, "grad_norm": 1.2849189043045044, "learning_rate": 2.9273344096731753e-06, "loss": 0.5232, "step": 10023 }, { "epoch": 0.65, "grad_norm": 1.1336814165115356, "learning_rate": 2.9263831816536608e-06, "loss": 0.5247, "step": 10024 }, { "epoch": 0.65, "grad_norm": 1.081855058670044, "learning_rate": 2.9254320442665894e-06, "loss": 0.5405, "step": 10025 }, { "epoch": 0.65, "grad_norm": 1.2362234592437744, "learning_rate": 2.9244809975535294e-06, "loss": 0.5296, "step": 10026 }, { "epoch": 0.65, "grad_norm": 1.2789620161056519, "learning_rate": 2.9235300415560497e-06, "loss": 0.5068, "step": 10027 }, { "epoch": 0.65, "grad_norm": 1.1642346382141113, "learning_rate": 2.922579176315714e-06, "loss": 0.5158, "step": 10028 }, { "epoch": 0.65, "grad_norm": 1.3359702825546265, "learning_rate": 2.921628401874083e-06, "loss": 0.5221, "step": 10029 }, { "epoch": 0.65, "grad_norm": 1.1425764560699463, "learning_rate": 2.920677718272713e-06, "loss": 0.5452, "step": 10030 }, { "epoch": 0.65, "grad_norm": 1.191367506980896, "learning_rate": 2.9197271255531568e-06, "loss": 0.5531, "step": 10031 }, { "epoch": 0.65, "grad_norm": 1.2270863056182861, "learning_rate": 2.918776623756958e-06, "loss": 0.5501, "step": 10032 }, { "epoch": 0.65, "grad_norm": 1.1961809396743774, "learning_rate": 2.917826212925664e-06, "loss": 0.5109, "step": 10033 }, { "epoch": 0.65, "grad_norm": 1.166253924369812, "learning_rate": 2.9168758931008144e-06, "loss": 0.5369, "step": 10034 }, { "epoch": 0.65, "grad_norm": 1.1780866384506226, "learning_rate": 2.915925664323944e-06, "loss": 0.52, "step": 10035 }, { "epoch": 0.65, "grad_norm": 1.121684193611145, "learning_rate": 2.914975526636587e-06, "loss": 0.5046, "step": 10036 }, { "epoch": 0.65, "grad_norm": 1.2038850784301758, "learning_rate": 2.9140254800802713e-06, "loss": 0.5273, "step": 10037 }, { "epoch": 0.65, "grad_norm": 1.1892852783203125, "learning_rate": 2.9130755246965193e-06, "loss": 0.4859, "step": 10038 }, { "epoch": 0.65, "grad_norm": 1.1591812372207642, "learning_rate": 2.9121256605268506e-06, "loss": 0.523, "step": 10039 }, { "epoch": 0.65, "grad_norm": 1.2649651765823364, "learning_rate": 2.9111758876127827e-06, "loss": 0.5274, "step": 10040 }, { "epoch": 0.65, "grad_norm": 1.331113338470459, "learning_rate": 2.9102262059958276e-06, "loss": 0.5447, "step": 10041 }, { "epoch": 0.65, "grad_norm": 1.2208222150802612, "learning_rate": 2.9092766157174935e-06, "loss": 0.5394, "step": 10042 }, { "epoch": 0.65, "grad_norm": 1.2461966276168823, "learning_rate": 2.908327116819286e-06, "loss": 0.5507, "step": 10043 }, { "epoch": 0.65, "grad_norm": 1.2020831108093262, "learning_rate": 2.9073777093427026e-06, "loss": 0.4947, "step": 10044 }, { "epoch": 0.65, "grad_norm": 1.231285572052002, "learning_rate": 2.9064283933292394e-06, "loss": 0.5116, "step": 10045 }, { "epoch": 0.65, "grad_norm": 1.1755354404449463, "learning_rate": 2.9054791688203897e-06, "loss": 0.5763, "step": 10046 }, { "epoch": 0.65, "grad_norm": 1.168724536895752, "learning_rate": 2.904530035857642e-06, "loss": 0.5353, "step": 10047 }, { "epoch": 0.65, "grad_norm": 1.1439353227615356, "learning_rate": 2.9035809944824805e-06, "loss": 0.5453, "step": 10048 }, { "epoch": 0.65, "grad_norm": 1.2128351926803589, "learning_rate": 2.9026320447363865e-06, "loss": 0.4761, "step": 10049 }, { "epoch": 0.65, "grad_norm": 1.1985127925872803, "learning_rate": 2.901683186660832e-06, "loss": 0.518, "step": 10050 }, { "epoch": 0.65, "grad_norm": 1.175121545791626, "learning_rate": 2.9007344202972924e-06, "loss": 0.513, "step": 10051 }, { "epoch": 0.65, "grad_norm": 1.096649408340454, "learning_rate": 2.8997857456872347e-06, "loss": 0.5407, "step": 10052 }, { "epoch": 0.65, "grad_norm": 1.1848523616790771, "learning_rate": 2.8988371628721233e-06, "loss": 0.5244, "step": 10053 }, { "epoch": 0.65, "grad_norm": 1.1969828605651855, "learning_rate": 2.8978886718934198e-06, "loss": 0.513, "step": 10054 }, { "epoch": 0.65, "grad_norm": 1.1563254594802856, "learning_rate": 2.896940272792579e-06, "loss": 0.5758, "step": 10055 }, { "epoch": 0.65, "grad_norm": 1.1784120798110962, "learning_rate": 2.895991965611052e-06, "loss": 0.5387, "step": 10056 }, { "epoch": 0.65, "grad_norm": 1.261176586151123, "learning_rate": 2.8950437503902873e-06, "loss": 0.5378, "step": 10057 }, { "epoch": 0.65, "grad_norm": 1.2436199188232422, "learning_rate": 2.8940956271717303e-06, "loss": 0.5071, "step": 10058 }, { "epoch": 0.65, "grad_norm": 1.1164618730545044, "learning_rate": 2.8931475959968212e-06, "loss": 0.4953, "step": 10059 }, { "epoch": 0.65, "grad_norm": 1.2102564573287964, "learning_rate": 2.892199656906993e-06, "loss": 0.5374, "step": 10060 }, { "epoch": 0.65, "grad_norm": 1.2117840051651, "learning_rate": 2.89125180994368e-06, "loss": 0.5507, "step": 10061 }, { "epoch": 0.65, "grad_norm": 1.347381591796875, "learning_rate": 2.8903040551483096e-06, "loss": 0.531, "step": 10062 }, { "epoch": 0.65, "grad_norm": 1.145080804824829, "learning_rate": 2.889356392562306e-06, "loss": 0.5129, "step": 10063 }, { "epoch": 0.65, "grad_norm": 1.1452906131744385, "learning_rate": 2.88840882222709e-06, "loss": 0.5049, "step": 10064 }, { "epoch": 0.65, "grad_norm": 1.2247129678726196, "learning_rate": 2.8874613441840753e-06, "loss": 0.5753, "step": 10065 }, { "epoch": 0.65, "grad_norm": 1.27224862575531, "learning_rate": 2.886513958474675e-06, "loss": 0.5413, "step": 10066 }, { "epoch": 0.65, "grad_norm": 1.1292786598205566, "learning_rate": 2.885566665140296e-06, "loss": 0.5064, "step": 10067 }, { "epoch": 0.65, "grad_norm": 1.1004201173782349, "learning_rate": 2.884619464222345e-06, "loss": 0.4502, "step": 10068 }, { "epoch": 0.65, "grad_norm": 1.2722193002700806, "learning_rate": 2.883672355762218e-06, "loss": 0.5366, "step": 10069 }, { "epoch": 0.65, "grad_norm": 1.375801682472229, "learning_rate": 2.8827253398013114e-06, "loss": 0.5046, "step": 10070 }, { "epoch": 0.65, "grad_norm": 1.1885055303573608, "learning_rate": 2.881778416381018e-06, "loss": 0.5023, "step": 10071 }, { "epoch": 0.65, "grad_norm": 1.2527196407318115, "learning_rate": 2.880831585542725e-06, "loss": 0.5293, "step": 10072 }, { "epoch": 0.65, "grad_norm": 1.1635829210281372, "learning_rate": 2.8798848473278174e-06, "loss": 0.5268, "step": 10073 }, { "epoch": 0.65, "grad_norm": 1.1570976972579956, "learning_rate": 2.8789382017776716e-06, "loss": 0.4952, "step": 10074 }, { "epoch": 0.65, "grad_norm": 1.1501917839050293, "learning_rate": 2.8779916489336645e-06, "loss": 0.4875, "step": 10075 }, { "epoch": 0.65, "grad_norm": 1.1773905754089355, "learning_rate": 2.8770451888371677e-06, "loss": 0.5086, "step": 10076 }, { "epoch": 0.65, "grad_norm": 1.2288705110549927, "learning_rate": 2.876098821529548e-06, "loss": 0.4998, "step": 10077 }, { "epoch": 0.65, "grad_norm": 1.0991992950439453, "learning_rate": 2.8751525470521712e-06, "loss": 0.4885, "step": 10078 }, { "epoch": 0.65, "grad_norm": 1.259883165359497, "learning_rate": 2.8742063654463923e-06, "loss": 0.5002, "step": 10079 }, { "epoch": 0.65, "grad_norm": 1.133736491203308, "learning_rate": 2.8732602767535688e-06, "loss": 0.5034, "step": 10080 }, { "epoch": 0.65, "grad_norm": 1.1345683336257935, "learning_rate": 2.872314281015052e-06, "loss": 0.4954, "step": 10081 }, { "epoch": 0.65, "grad_norm": 1.3435235023498535, "learning_rate": 2.871368378272188e-06, "loss": 0.5286, "step": 10082 }, { "epoch": 0.65, "grad_norm": 1.1330047845840454, "learning_rate": 2.87042256856632e-06, "loss": 0.5076, "step": 10083 }, { "epoch": 0.65, "grad_norm": 1.2045421600341797, "learning_rate": 2.869476851938789e-06, "loss": 0.5217, "step": 10084 }, { "epoch": 0.65, "grad_norm": 1.1453440189361572, "learning_rate": 2.8685312284309262e-06, "loss": 0.5364, "step": 10085 }, { "epoch": 0.65, "grad_norm": 1.195063591003418, "learning_rate": 2.8675856980840645e-06, "loss": 0.5023, "step": 10086 }, { "epoch": 0.65, "grad_norm": 1.2407350540161133, "learning_rate": 2.8666402609395295e-06, "loss": 0.542, "step": 10087 }, { "epoch": 0.65, "grad_norm": 1.0507951974868774, "learning_rate": 2.865694917038645e-06, "loss": 0.5314, "step": 10088 }, { "epoch": 0.65, "grad_norm": 1.2446492910385132, "learning_rate": 2.8647496664227294e-06, "loss": 0.5501, "step": 10089 }, { "epoch": 0.65, "grad_norm": 1.1930021047592163, "learning_rate": 2.863804509133099e-06, "loss": 0.5132, "step": 10090 }, { "epoch": 0.65, "grad_norm": 1.1488780975341797, "learning_rate": 2.8628594452110593e-06, "loss": 0.506, "step": 10091 }, { "epoch": 0.65, "grad_norm": 1.2372722625732422, "learning_rate": 2.86191447469792e-06, "loss": 0.5007, "step": 10092 }, { "epoch": 0.65, "grad_norm": 1.1930935382843018, "learning_rate": 2.8609695976349816e-06, "loss": 0.5175, "step": 10093 }, { "epoch": 0.65, "grad_norm": 1.2166590690612793, "learning_rate": 2.8600248140635446e-06, "loss": 0.4868, "step": 10094 }, { "epoch": 0.65, "grad_norm": 1.0926462411880493, "learning_rate": 2.8590801240249013e-06, "loss": 0.5181, "step": 10095 }, { "epoch": 0.65, "grad_norm": 1.1707513332366943, "learning_rate": 2.858135527560344e-06, "loss": 0.4834, "step": 10096 }, { "epoch": 0.65, "grad_norm": 1.3146146535873413, "learning_rate": 2.8571910247111544e-06, "loss": 0.5364, "step": 10097 }, { "epoch": 0.65, "grad_norm": 1.4002307653427124, "learning_rate": 2.8562466155186176e-06, "loss": 0.5272, "step": 10098 }, { "epoch": 0.65, "grad_norm": 1.053755283355713, "learning_rate": 2.8553023000240094e-06, "loss": 0.4919, "step": 10099 }, { "epoch": 0.65, "grad_norm": 1.2093251943588257, "learning_rate": 2.8543580782686042e-06, "loss": 0.5192, "step": 10100 }, { "epoch": 0.65, "grad_norm": 1.1357975006103516, "learning_rate": 2.853413950293674e-06, "loss": 0.5483, "step": 10101 }, { "epoch": 0.65, "grad_norm": 1.109190821647644, "learning_rate": 2.85246991614048e-06, "loss": 0.5063, "step": 10102 }, { "epoch": 0.65, "grad_norm": 1.0853711366653442, "learning_rate": 2.8515259758502843e-06, "loss": 0.4637, "step": 10103 }, { "epoch": 0.65, "grad_norm": 1.3012593984603882, "learning_rate": 2.850582129464346e-06, "loss": 0.558, "step": 10104 }, { "epoch": 0.65, "grad_norm": 1.2156487703323364, "learning_rate": 2.849638377023917e-06, "loss": 0.5298, "step": 10105 }, { "epoch": 0.65, "grad_norm": 1.2530640363693237, "learning_rate": 2.8486947185702463e-06, "loss": 0.5414, "step": 10106 }, { "epoch": 0.65, "grad_norm": 1.148038625717163, "learning_rate": 2.8477511541445815e-06, "loss": 0.4712, "step": 10107 }, { "epoch": 0.65, "grad_norm": 1.298565149307251, "learning_rate": 2.8468076837881587e-06, "loss": 0.5097, "step": 10108 }, { "epoch": 0.65, "grad_norm": 1.1809571981430054, "learning_rate": 2.8458643075422167e-06, "loss": 0.5306, "step": 10109 }, { "epoch": 0.65, "grad_norm": 1.1378021240234375, "learning_rate": 2.8449210254479877e-06, "loss": 0.5044, "step": 10110 }, { "epoch": 0.65, "grad_norm": 1.154218316078186, "learning_rate": 2.843977837546701e-06, "loss": 0.5337, "step": 10111 }, { "epoch": 0.65, "grad_norm": 1.283806324005127, "learning_rate": 2.84303474387958e-06, "loss": 0.5464, "step": 10112 }, { "epoch": 0.65, "grad_norm": 1.1571234464645386, "learning_rate": 2.842091744487847e-06, "loss": 0.5325, "step": 10113 }, { "epoch": 0.65, "grad_norm": 1.26659095287323, "learning_rate": 2.841148839412715e-06, "loss": 0.5827, "step": 10114 }, { "epoch": 0.65, "grad_norm": 1.1286925077438354, "learning_rate": 2.840206028695397e-06, "loss": 0.5024, "step": 10115 }, { "epoch": 0.65, "grad_norm": 1.2404431104660034, "learning_rate": 2.839263312377101e-06, "loss": 0.5548, "step": 10116 }, { "epoch": 0.65, "grad_norm": 1.1367824077606201, "learning_rate": 2.8383206904990315e-06, "loss": 0.5187, "step": 10117 }, { "epoch": 0.65, "grad_norm": 1.1196091175079346, "learning_rate": 2.8373781631023866e-06, "loss": 0.5341, "step": 10118 }, { "epoch": 0.65, "grad_norm": 1.0997917652130127, "learning_rate": 2.836435730228365e-06, "loss": 0.4898, "step": 10119 }, { "epoch": 0.65, "grad_norm": 1.221140742301941, "learning_rate": 2.8354933919181537e-06, "loss": 0.5467, "step": 10120 }, { "epoch": 0.65, "grad_norm": 1.1999149322509766, "learning_rate": 2.8345511482129417e-06, "loss": 0.5306, "step": 10121 }, { "epoch": 0.65, "grad_norm": 1.1088324785232544, "learning_rate": 2.8336089991539122e-06, "loss": 0.5366, "step": 10122 }, { "epoch": 0.65, "grad_norm": 1.1591076850891113, "learning_rate": 2.832666944782244e-06, "loss": 0.512, "step": 10123 }, { "epoch": 0.65, "grad_norm": 1.1500420570373535, "learning_rate": 2.8317249851391114e-06, "loss": 0.4805, "step": 10124 }, { "epoch": 0.65, "grad_norm": 1.1505106687545776, "learning_rate": 2.830783120265688e-06, "loss": 0.5341, "step": 10125 }, { "epoch": 0.65, "grad_norm": 1.0922231674194336, "learning_rate": 2.8298413502031353e-06, "loss": 0.4752, "step": 10126 }, { "epoch": 0.65, "grad_norm": 1.208850622177124, "learning_rate": 2.828899674992618e-06, "loss": 0.5649, "step": 10127 }, { "epoch": 0.65, "grad_norm": 1.1876156330108643, "learning_rate": 2.827958094675295e-06, "loss": 0.5288, "step": 10128 }, { "epoch": 0.65, "grad_norm": 1.1208659410476685, "learning_rate": 2.827016609292319e-06, "loss": 0.4904, "step": 10129 }, { "epoch": 0.65, "grad_norm": 1.059951663017273, "learning_rate": 2.826075218884841e-06, "loss": 0.4549, "step": 10130 }, { "epoch": 0.65, "grad_norm": 1.2132508754730225, "learning_rate": 2.8251339234940084e-06, "loss": 0.5122, "step": 10131 }, { "epoch": 0.65, "grad_norm": 1.1399322748184204, "learning_rate": 2.824192723160959e-06, "loss": 0.5239, "step": 10132 }, { "epoch": 0.65, "grad_norm": 1.2125855684280396, "learning_rate": 2.8232516179268314e-06, "loss": 0.5381, "step": 10133 }, { "epoch": 0.65, "grad_norm": 1.2018935680389404, "learning_rate": 2.82231060783276e-06, "loss": 0.5004, "step": 10134 }, { "epoch": 0.65, "grad_norm": 1.1649835109710693, "learning_rate": 2.821369692919873e-06, "loss": 0.524, "step": 10135 }, { "epoch": 0.65, "grad_norm": 1.1848961114883423, "learning_rate": 2.820428873229296e-06, "loss": 0.5496, "step": 10136 }, { "epoch": 0.65, "grad_norm": 1.2693864107131958, "learning_rate": 2.819488148802152e-06, "loss": 0.5045, "step": 10137 }, { "epoch": 0.65, "grad_norm": 1.1226322650909424, "learning_rate": 2.8185475196795532e-06, "loss": 0.5245, "step": 10138 }, { "epoch": 0.65, "grad_norm": 1.1245938539505005, "learning_rate": 2.8176069859026133e-06, "loss": 0.5004, "step": 10139 }, { "epoch": 0.65, "grad_norm": 1.1237895488739014, "learning_rate": 2.816666547512442e-06, "loss": 0.5516, "step": 10140 }, { "epoch": 0.65, "grad_norm": 1.2003037929534912, "learning_rate": 2.8157262045501426e-06, "loss": 0.4885, "step": 10141 }, { "epoch": 0.65, "grad_norm": 1.3194704055786133, "learning_rate": 2.8147859570568176e-06, "loss": 0.5352, "step": 10142 }, { "epoch": 0.65, "grad_norm": 1.1532107591629028, "learning_rate": 2.8138458050735586e-06, "loss": 0.5141, "step": 10143 }, { "epoch": 0.65, "grad_norm": 1.1829849481582642, "learning_rate": 2.8129057486414584e-06, "loss": 0.5407, "step": 10144 }, { "epoch": 0.65, "grad_norm": 1.1781374216079712, "learning_rate": 2.8119657878016053e-06, "loss": 0.5372, "step": 10145 }, { "epoch": 0.65, "grad_norm": 1.0247775316238403, "learning_rate": 2.811025922595083e-06, "loss": 0.4653, "step": 10146 }, { "epoch": 0.65, "grad_norm": 1.2725462913513184, "learning_rate": 2.8100861530629698e-06, "loss": 0.4622, "step": 10147 }, { "epoch": 0.66, "grad_norm": 1.2236748933792114, "learning_rate": 2.8091464792463422e-06, "loss": 0.5255, "step": 10148 }, { "epoch": 0.66, "grad_norm": 1.1489951610565186, "learning_rate": 2.8082069011862676e-06, "loss": 0.4613, "step": 10149 }, { "epoch": 0.66, "grad_norm": 1.0951216220855713, "learning_rate": 2.807267418923815e-06, "loss": 0.4595, "step": 10150 }, { "epoch": 0.66, "grad_norm": 1.1696287393569946, "learning_rate": 2.8063280325000465e-06, "loss": 0.5487, "step": 10151 }, { "epoch": 0.66, "grad_norm": 1.3191304206848145, "learning_rate": 2.8053887419560194e-06, "loss": 0.5379, "step": 10152 }, { "epoch": 0.66, "grad_norm": 1.260022759437561, "learning_rate": 2.8044495473327904e-06, "loss": 0.5346, "step": 10153 }, { "epoch": 0.66, "grad_norm": 1.3030760288238525, "learning_rate": 2.803510448671405e-06, "loss": 0.5407, "step": 10154 }, { "epoch": 0.66, "grad_norm": 1.2426278591156006, "learning_rate": 2.8025714460129105e-06, "loss": 0.5098, "step": 10155 }, { "epoch": 0.66, "grad_norm": 1.1594715118408203, "learning_rate": 2.801632539398349e-06, "loss": 0.5174, "step": 10156 }, { "epoch": 0.66, "grad_norm": 1.1248278617858887, "learning_rate": 2.8006937288687575e-06, "loss": 0.5047, "step": 10157 }, { "epoch": 0.66, "grad_norm": 1.240604043006897, "learning_rate": 2.799755014465171e-06, "loss": 0.5371, "step": 10158 }, { "epoch": 0.66, "grad_norm": 1.1549090147018433, "learning_rate": 2.798816396228613e-06, "loss": 0.5079, "step": 10159 }, { "epoch": 0.66, "grad_norm": 1.1702147722244263, "learning_rate": 2.7978778742001123e-06, "loss": 0.5198, "step": 10160 }, { "epoch": 0.66, "grad_norm": 1.0742751359939575, "learning_rate": 2.796939448420688e-06, "loss": 0.4712, "step": 10161 }, { "epoch": 0.66, "grad_norm": 1.1495124101638794, "learning_rate": 2.796001118931358e-06, "loss": 0.5002, "step": 10162 }, { "epoch": 0.66, "grad_norm": 1.0973402261734009, "learning_rate": 2.795062885773131e-06, "loss": 0.4788, "step": 10163 }, { "epoch": 0.66, "grad_norm": 1.2350746393203735, "learning_rate": 2.794124748987016e-06, "loss": 0.5473, "step": 10164 }, { "epoch": 0.66, "grad_norm": 1.139095664024353, "learning_rate": 2.7931867086140174e-06, "loss": 0.5055, "step": 10165 }, { "epoch": 0.66, "grad_norm": 1.209989070892334, "learning_rate": 2.7922487646951345e-06, "loss": 0.5449, "step": 10166 }, { "epoch": 0.66, "grad_norm": 1.1657934188842773, "learning_rate": 2.791310917271364e-06, "loss": 0.5227, "step": 10167 }, { "epoch": 0.66, "grad_norm": 1.1824209690093994, "learning_rate": 2.790373166383692e-06, "loss": 0.5416, "step": 10168 }, { "epoch": 0.66, "grad_norm": 1.2198816537857056, "learning_rate": 2.7894355120731087e-06, "loss": 0.4813, "step": 10169 }, { "epoch": 0.66, "grad_norm": 1.1035962104797363, "learning_rate": 2.7884979543805953e-06, "loss": 0.5636, "step": 10170 }, { "epoch": 0.66, "grad_norm": 1.1283620595932007, "learning_rate": 2.7875604933471306e-06, "loss": 0.4785, "step": 10171 }, { "epoch": 0.66, "grad_norm": 1.1242793798446655, "learning_rate": 2.7866231290136907e-06, "loss": 0.4867, "step": 10172 }, { "epoch": 0.66, "grad_norm": 1.2116812467575073, "learning_rate": 2.785685861421241e-06, "loss": 0.5636, "step": 10173 }, { "epoch": 0.66, "grad_norm": 1.1549392938613892, "learning_rate": 2.78474869061075e-06, "loss": 0.5693, "step": 10174 }, { "epoch": 0.66, "grad_norm": 1.149938941001892, "learning_rate": 2.7838116166231775e-06, "loss": 0.503, "step": 10175 }, { "epoch": 0.66, "grad_norm": 1.223578691482544, "learning_rate": 2.7828746394994823e-06, "loss": 0.5293, "step": 10176 }, { "epoch": 0.66, "grad_norm": 1.2032955884933472, "learning_rate": 2.7819377592806164e-06, "loss": 0.5554, "step": 10177 }, { "epoch": 0.66, "grad_norm": 1.2487245798110962, "learning_rate": 2.7810009760075296e-06, "loss": 0.5804, "step": 10178 }, { "epoch": 0.66, "grad_norm": 1.3047969341278076, "learning_rate": 2.780064289721163e-06, "loss": 0.5219, "step": 10179 }, { "epoch": 0.66, "grad_norm": 1.1906558275222778, "learning_rate": 2.7791277004624596e-06, "loss": 0.5493, "step": 10180 }, { "epoch": 0.66, "grad_norm": 1.0828683376312256, "learning_rate": 2.7781912082723535e-06, "loss": 0.5556, "step": 10181 }, { "epoch": 0.66, "grad_norm": 1.2216063737869263, "learning_rate": 2.7772548131917775e-06, "loss": 0.5254, "step": 10182 }, { "epoch": 0.66, "grad_norm": 1.1307231187820435, "learning_rate": 2.7763185152616594e-06, "loss": 0.5073, "step": 10183 }, { "epoch": 0.66, "grad_norm": 1.2043777704238892, "learning_rate": 2.7753823145229236e-06, "loss": 0.5001, "step": 10184 }, { "epoch": 0.66, "grad_norm": 1.2527254819869995, "learning_rate": 2.774446211016485e-06, "loss": 0.5666, "step": 10185 }, { "epoch": 0.66, "grad_norm": 1.247689127922058, "learning_rate": 2.7735102047832606e-06, "loss": 0.5209, "step": 10186 }, { "epoch": 0.66, "grad_norm": 1.2127189636230469, "learning_rate": 2.772574295864161e-06, "loss": 0.5067, "step": 10187 }, { "epoch": 0.66, "grad_norm": 1.2330416440963745, "learning_rate": 2.771638484300092e-06, "loss": 0.5869, "step": 10188 }, { "epoch": 0.66, "grad_norm": 1.17604660987854, "learning_rate": 2.7707027701319577e-06, "loss": 0.4771, "step": 10189 }, { "epoch": 0.66, "grad_norm": 1.1875380277633667, "learning_rate": 2.7697671534006516e-06, "loss": 0.5128, "step": 10190 }, { "epoch": 0.66, "grad_norm": 1.1654266119003296, "learning_rate": 2.768831634147069e-06, "loss": 0.4989, "step": 10191 }, { "epoch": 0.66, "grad_norm": 1.0573859214782715, "learning_rate": 2.767896212412099e-06, "loss": 0.5033, "step": 10192 }, { "epoch": 0.66, "grad_norm": 1.2073111534118652, "learning_rate": 2.766960888236627e-06, "loss": 0.5115, "step": 10193 }, { "epoch": 0.66, "grad_norm": 1.2316190004348755, "learning_rate": 2.7660256616615332e-06, "loss": 0.5197, "step": 10194 }, { "epoch": 0.66, "grad_norm": 1.121495246887207, "learning_rate": 2.7650905327276957e-06, "loss": 0.503, "step": 10195 }, { "epoch": 0.66, "grad_norm": 1.2320806980133057, "learning_rate": 2.7641555014759834e-06, "loss": 0.579, "step": 10196 }, { "epoch": 0.66, "grad_norm": 1.23375403881073, "learning_rate": 2.763220567947265e-06, "loss": 0.5694, "step": 10197 }, { "epoch": 0.66, "grad_norm": 1.0712313652038574, "learning_rate": 2.762285732182405e-06, "loss": 0.5298, "step": 10198 }, { "epoch": 0.66, "grad_norm": 1.1276167631149292, "learning_rate": 2.761350994222262e-06, "loss": 0.5202, "step": 10199 }, { "epoch": 0.66, "grad_norm": 1.2021589279174805, "learning_rate": 2.760416354107691e-06, "loss": 0.4956, "step": 10200 }, { "epoch": 0.66, "grad_norm": 1.3472028970718384, "learning_rate": 2.7594818118795465e-06, "loss": 0.5281, "step": 10201 }, { "epoch": 0.66, "grad_norm": 1.1933507919311523, "learning_rate": 2.758547367578668e-06, "loss": 0.5214, "step": 10202 }, { "epoch": 0.66, "grad_norm": 1.2851048707962036, "learning_rate": 2.7576130212459006e-06, "loss": 0.5606, "step": 10203 }, { "epoch": 0.66, "grad_norm": 1.217191219329834, "learning_rate": 2.7566787729220833e-06, "loss": 0.5031, "step": 10204 }, { "epoch": 0.66, "grad_norm": 1.2889562845230103, "learning_rate": 2.7557446226480493e-06, "loss": 0.5525, "step": 10205 }, { "epoch": 0.66, "grad_norm": 1.147607684135437, "learning_rate": 2.754810570464628e-06, "loss": 0.5197, "step": 10206 }, { "epoch": 0.66, "grad_norm": 1.213258981704712, "learning_rate": 2.7538766164126447e-06, "loss": 0.5498, "step": 10207 }, { "epoch": 0.66, "grad_norm": 1.190893530845642, "learning_rate": 2.752942760532918e-06, "loss": 0.5328, "step": 10208 }, { "epoch": 0.66, "grad_norm": 1.1804897785186768, "learning_rate": 2.752009002866266e-06, "loss": 0.5153, "step": 10209 }, { "epoch": 0.66, "grad_norm": 1.1238348484039307, "learning_rate": 2.7510753434535e-06, "loss": 0.5106, "step": 10210 }, { "epoch": 0.66, "grad_norm": 1.0484281778335571, "learning_rate": 2.7501417823354287e-06, "loss": 0.5115, "step": 10211 }, { "epoch": 0.66, "grad_norm": 1.2252014875411987, "learning_rate": 2.749208319552855e-06, "loss": 0.5364, "step": 10212 }, { "epoch": 0.66, "grad_norm": 1.1151214838027954, "learning_rate": 2.7482749551465815e-06, "loss": 0.505, "step": 10213 }, { "epoch": 0.66, "grad_norm": 1.2916613817214966, "learning_rate": 2.7473416891573976e-06, "loss": 0.5582, "step": 10214 }, { "epoch": 0.66, "grad_norm": 1.163909912109375, "learning_rate": 2.7464085216260967e-06, "loss": 0.512, "step": 10215 }, { "epoch": 0.66, "grad_norm": 1.2034844160079956, "learning_rate": 2.7454754525934644e-06, "loss": 0.5509, "step": 10216 }, { "epoch": 0.66, "grad_norm": 1.1549543142318726, "learning_rate": 2.7445424821002843e-06, "loss": 0.5056, "step": 10217 }, { "epoch": 0.66, "grad_norm": 1.1721373796463013, "learning_rate": 2.743609610187332e-06, "loss": 0.4864, "step": 10218 }, { "epoch": 0.66, "grad_norm": 1.2607841491699219, "learning_rate": 2.742676836895385e-06, "loss": 0.5071, "step": 10219 }, { "epoch": 0.66, "grad_norm": 1.1461842060089111, "learning_rate": 2.7417441622652075e-06, "loss": 0.5043, "step": 10220 }, { "epoch": 0.66, "grad_norm": 1.2001937627792358, "learning_rate": 2.7408115863375663e-06, "loss": 0.5081, "step": 10221 }, { "epoch": 0.66, "grad_norm": 1.1275197267532349, "learning_rate": 2.739879109153221e-06, "loss": 0.506, "step": 10222 }, { "epoch": 0.66, "grad_norm": 1.1420665979385376, "learning_rate": 2.7389467307529293e-06, "loss": 0.5264, "step": 10223 }, { "epoch": 0.66, "grad_norm": 1.2424094676971436, "learning_rate": 2.7380144511774427e-06, "loss": 0.5825, "step": 10224 }, { "epoch": 0.66, "grad_norm": 1.0853822231292725, "learning_rate": 2.73708227046751e-06, "loss": 0.5114, "step": 10225 }, { "epoch": 0.66, "grad_norm": 1.1953725814819336, "learning_rate": 2.736150188663871e-06, "loss": 0.5114, "step": 10226 }, { "epoch": 0.66, "grad_norm": 1.148635745048523, "learning_rate": 2.7352182058072664e-06, "loss": 0.5416, "step": 10227 }, { "epoch": 0.66, "grad_norm": 1.3801653385162354, "learning_rate": 2.734286321938431e-06, "loss": 0.5709, "step": 10228 }, { "epoch": 0.66, "grad_norm": 1.1107085943222046, "learning_rate": 2.733354537098094e-06, "loss": 0.4661, "step": 10229 }, { "epoch": 0.66, "grad_norm": 1.184226155281067, "learning_rate": 2.7324228513269847e-06, "loss": 0.5261, "step": 10230 }, { "epoch": 0.66, "grad_norm": 1.118945837020874, "learning_rate": 2.7314912646658205e-06, "loss": 0.5614, "step": 10231 }, { "epoch": 0.66, "grad_norm": 1.1494486331939697, "learning_rate": 2.73055977715532e-06, "loss": 0.4816, "step": 10232 }, { "epoch": 0.66, "grad_norm": 1.3201820850372314, "learning_rate": 2.729628388836196e-06, "loss": 0.5324, "step": 10233 }, { "epoch": 0.66, "grad_norm": 1.1678392887115479, "learning_rate": 2.728697099749158e-06, "loss": 0.5279, "step": 10234 }, { "epoch": 0.66, "grad_norm": 1.0714505910873413, "learning_rate": 2.7277659099349095e-06, "loss": 0.488, "step": 10235 }, { "epoch": 0.66, "grad_norm": 1.2989600896835327, "learning_rate": 2.726834819434153e-06, "loss": 0.5235, "step": 10236 }, { "epoch": 0.66, "grad_norm": 1.1383094787597656, "learning_rate": 2.7259038282875795e-06, "loss": 0.5213, "step": 10237 }, { "epoch": 0.66, "grad_norm": 1.2316402196884155, "learning_rate": 2.7249729365358824e-06, "loss": 0.5235, "step": 10238 }, { "epoch": 0.66, "grad_norm": 1.2303603887557983, "learning_rate": 2.7240421442197484e-06, "loss": 0.5327, "step": 10239 }, { "epoch": 0.66, "grad_norm": 1.2617002725601196, "learning_rate": 2.7231114513798608e-06, "loss": 0.5146, "step": 10240 }, { "epoch": 0.66, "grad_norm": 1.0992543697357178, "learning_rate": 2.7221808580568967e-06, "loss": 0.506, "step": 10241 }, { "epoch": 0.66, "grad_norm": 1.1598153114318848, "learning_rate": 2.7212503642915323e-06, "loss": 0.4841, "step": 10242 }, { "epoch": 0.66, "grad_norm": 1.1878072023391724, "learning_rate": 2.7203199701244336e-06, "loss": 0.5178, "step": 10243 }, { "epoch": 0.66, "grad_norm": 1.2944426536560059, "learning_rate": 2.7193896755962672e-06, "loss": 0.5599, "step": 10244 }, { "epoch": 0.66, "grad_norm": 1.2524527311325073, "learning_rate": 2.7184594807476936e-06, "loss": 0.4941, "step": 10245 }, { "epoch": 0.66, "grad_norm": 1.2964754104614258, "learning_rate": 2.717529385619371e-06, "loss": 0.534, "step": 10246 }, { "epoch": 0.66, "grad_norm": 1.1503854990005493, "learning_rate": 2.7165993902519505e-06, "loss": 0.507, "step": 10247 }, { "epoch": 0.66, "grad_norm": 1.1954017877578735, "learning_rate": 2.7156694946860782e-06, "loss": 0.5056, "step": 10248 }, { "epoch": 0.66, "grad_norm": 1.15092933177948, "learning_rate": 2.7147396989623987e-06, "loss": 0.5052, "step": 10249 }, { "epoch": 0.66, "grad_norm": 1.073237419128418, "learning_rate": 2.7138100031215506e-06, "loss": 0.5193, "step": 10250 }, { "epoch": 0.66, "grad_norm": 1.1618252992630005, "learning_rate": 2.7128804072041714e-06, "loss": 0.5485, "step": 10251 }, { "epoch": 0.66, "grad_norm": 1.0906628370285034, "learning_rate": 2.7119509112508865e-06, "loss": 0.5077, "step": 10252 }, { "epoch": 0.66, "grad_norm": 1.118464469909668, "learning_rate": 2.711021515302323e-06, "loss": 0.5228, "step": 10253 }, { "epoch": 0.66, "grad_norm": 1.1514830589294434, "learning_rate": 2.710092219399105e-06, "loss": 0.5318, "step": 10254 }, { "epoch": 0.66, "grad_norm": 1.1467446088790894, "learning_rate": 2.7091630235818473e-06, "loss": 0.5031, "step": 10255 }, { "epoch": 0.66, "grad_norm": 1.0953782796859741, "learning_rate": 2.708233927891165e-06, "loss": 0.498, "step": 10256 }, { "epoch": 0.66, "grad_norm": 1.142796277999878, "learning_rate": 2.7073049323676635e-06, "loss": 0.4832, "step": 10257 }, { "epoch": 0.66, "grad_norm": 1.0950191020965576, "learning_rate": 2.7063760370519475e-06, "loss": 0.4825, "step": 10258 }, { "epoch": 0.66, "grad_norm": 1.1392674446105957, "learning_rate": 2.7054472419846183e-06, "loss": 0.4596, "step": 10259 }, { "epoch": 0.66, "grad_norm": 1.183953881263733, "learning_rate": 2.7045185472062717e-06, "loss": 0.5211, "step": 10260 }, { "epoch": 0.66, "grad_norm": 1.1702946424484253, "learning_rate": 2.703589952757495e-06, "loss": 0.5332, "step": 10261 }, { "epoch": 0.66, "grad_norm": 1.111974835395813, "learning_rate": 2.7026614586788763e-06, "loss": 0.4942, "step": 10262 }, { "epoch": 0.66, "grad_norm": 1.2975274324417114, "learning_rate": 2.7017330650109986e-06, "loss": 0.5211, "step": 10263 }, { "epoch": 0.66, "grad_norm": 1.2029038667678833, "learning_rate": 2.7008047717944386e-06, "loss": 0.5235, "step": 10264 }, { "epoch": 0.66, "grad_norm": 1.105429768562317, "learning_rate": 2.69987657906977e-06, "loss": 0.4892, "step": 10265 }, { "epoch": 0.66, "grad_norm": 1.1509262323379517, "learning_rate": 2.6989484868775634e-06, "loss": 0.5852, "step": 10266 }, { "epoch": 0.66, "grad_norm": 1.1780683994293213, "learning_rate": 2.69802049525838e-06, "loss": 0.5301, "step": 10267 }, { "epoch": 0.66, "grad_norm": 1.1906607151031494, "learning_rate": 2.6970926042527816e-06, "loss": 0.5226, "step": 10268 }, { "epoch": 0.66, "grad_norm": 1.2061296701431274, "learning_rate": 2.6961648139013242e-06, "loss": 0.5306, "step": 10269 }, { "epoch": 0.66, "grad_norm": 1.141809344291687, "learning_rate": 2.6952371242445584e-06, "loss": 0.5014, "step": 10270 }, { "epoch": 0.66, "grad_norm": 1.084310531616211, "learning_rate": 2.6943095353230335e-06, "loss": 0.513, "step": 10271 }, { "epoch": 0.66, "grad_norm": 1.1860637664794922, "learning_rate": 2.6933820471772888e-06, "loss": 0.5433, "step": 10272 }, { "epoch": 0.66, "grad_norm": 1.2769184112548828, "learning_rate": 2.692454659847863e-06, "loss": 0.5895, "step": 10273 }, { "epoch": 0.66, "grad_norm": 1.233406662940979, "learning_rate": 2.69152737337529e-06, "loss": 0.5658, "step": 10274 }, { "epoch": 0.66, "grad_norm": 1.1092393398284912, "learning_rate": 2.6906001878001e-06, "loss": 0.5438, "step": 10275 }, { "epoch": 0.66, "grad_norm": 1.1860735416412354, "learning_rate": 2.6896731031628177e-06, "loss": 0.5146, "step": 10276 }, { "epoch": 0.66, "grad_norm": 1.3289916515350342, "learning_rate": 2.6887461195039654e-06, "loss": 0.5025, "step": 10277 }, { "epoch": 0.66, "grad_norm": 1.2757951021194458, "learning_rate": 2.6878192368640547e-06, "loss": 0.5435, "step": 10278 }, { "epoch": 0.66, "grad_norm": 1.1830792427062988, "learning_rate": 2.6868924552836e-06, "loss": 0.5088, "step": 10279 }, { "epoch": 0.66, "grad_norm": 1.2084559202194214, "learning_rate": 2.6859657748031083e-06, "loss": 0.5379, "step": 10280 }, { "epoch": 0.66, "grad_norm": 1.1574980020523071, "learning_rate": 2.6850391954630815e-06, "loss": 0.53, "step": 10281 }, { "epoch": 0.66, "grad_norm": 1.2799972295761108, "learning_rate": 2.6841127173040197e-06, "loss": 0.5497, "step": 10282 }, { "epoch": 0.66, "grad_norm": 1.1395119428634644, "learning_rate": 2.6831863403664176e-06, "loss": 0.5231, "step": 10283 }, { "epoch": 0.66, "grad_norm": 1.1479523181915283, "learning_rate": 2.6822600646907614e-06, "loss": 0.5424, "step": 10284 }, { "epoch": 0.66, "grad_norm": 1.105676293373108, "learning_rate": 2.6813338903175374e-06, "loss": 0.4748, "step": 10285 }, { "epoch": 0.66, "grad_norm": 1.1768364906311035, "learning_rate": 2.6804078172872273e-06, "loss": 0.5237, "step": 10286 }, { "epoch": 0.66, "grad_norm": 1.1966493129730225, "learning_rate": 2.679481845640306e-06, "loss": 0.5051, "step": 10287 }, { "epoch": 0.66, "grad_norm": 1.1364307403564453, "learning_rate": 2.6785559754172473e-06, "loss": 0.5378, "step": 10288 }, { "epoch": 0.66, "grad_norm": 1.393783688545227, "learning_rate": 2.677630206658519e-06, "loss": 0.5075, "step": 10289 }, { "epoch": 0.66, "grad_norm": 1.1900508403778076, "learning_rate": 2.6767045394045804e-06, "loss": 0.5138, "step": 10290 }, { "epoch": 0.66, "grad_norm": 1.1351032257080078, "learning_rate": 2.6757789736958928e-06, "loss": 0.5411, "step": 10291 }, { "epoch": 0.66, "grad_norm": 1.161458134651184, "learning_rate": 2.6748535095729088e-06, "loss": 0.5271, "step": 10292 }, { "epoch": 0.66, "grad_norm": 1.2543014287948608, "learning_rate": 2.6739281470760797e-06, "loss": 0.5554, "step": 10293 }, { "epoch": 0.66, "grad_norm": 1.119085669517517, "learning_rate": 2.6730028862458498e-06, "loss": 0.5302, "step": 10294 }, { "epoch": 0.66, "grad_norm": 1.240906834602356, "learning_rate": 2.672077727122662e-06, "loss": 0.4769, "step": 10295 }, { "epoch": 0.66, "grad_norm": 1.1621826887130737, "learning_rate": 2.671152669746948e-06, "loss": 0.4974, "step": 10296 }, { "epoch": 0.66, "grad_norm": 1.20156729221344, "learning_rate": 2.6702277141591433e-06, "loss": 0.5334, "step": 10297 }, { "epoch": 0.66, "grad_norm": 1.2574774026870728, "learning_rate": 2.669302860399674e-06, "loss": 0.5666, "step": 10298 }, { "epoch": 0.66, "grad_norm": 1.1344420909881592, "learning_rate": 2.6683781085089626e-06, "loss": 0.5377, "step": 10299 }, { "epoch": 0.66, "grad_norm": 1.1109645366668701, "learning_rate": 2.667453458527429e-06, "loss": 0.494, "step": 10300 }, { "epoch": 0.66, "grad_norm": 1.1650744676589966, "learning_rate": 2.6665289104954887e-06, "loss": 0.5363, "step": 10301 }, { "epoch": 0.66, "grad_norm": 1.21123206615448, "learning_rate": 2.6656044644535472e-06, "loss": 0.5741, "step": 10302 }, { "epoch": 0.67, "grad_norm": 1.0555291175842285, "learning_rate": 2.6646801204420127e-06, "loss": 0.5179, "step": 10303 }, { "epoch": 0.67, "grad_norm": 1.2528679370880127, "learning_rate": 2.663755878501284e-06, "loss": 0.5324, "step": 10304 }, { "epoch": 0.67, "grad_norm": 1.3048847913742065, "learning_rate": 2.6628317386717584e-06, "loss": 0.4876, "step": 10305 }, { "epoch": 0.67, "grad_norm": 1.2138047218322754, "learning_rate": 2.661907700993827e-06, "loss": 0.5281, "step": 10306 }, { "epoch": 0.67, "grad_norm": 1.1653186082839966, "learning_rate": 2.66098376550788e-06, "loss": 0.4983, "step": 10307 }, { "epoch": 0.67, "grad_norm": 1.117082953453064, "learning_rate": 2.6600599322542963e-06, "loss": 0.5112, "step": 10308 }, { "epoch": 0.67, "grad_norm": 1.155344843864441, "learning_rate": 2.6591362012734557e-06, "loss": 0.5404, "step": 10309 }, { "epoch": 0.67, "grad_norm": 1.207527995109558, "learning_rate": 2.6582125726057316e-06, "loss": 0.5254, "step": 10310 }, { "epoch": 0.67, "grad_norm": 1.1778675317764282, "learning_rate": 2.6572890462914948e-06, "loss": 0.5245, "step": 10311 }, { "epoch": 0.67, "grad_norm": 1.1107224225997925, "learning_rate": 2.656365622371111e-06, "loss": 0.4678, "step": 10312 }, { "epoch": 0.67, "grad_norm": 1.1394520998001099, "learning_rate": 2.6554423008849374e-06, "loss": 0.5005, "step": 10313 }, { "epoch": 0.67, "grad_norm": 1.251438856124878, "learning_rate": 2.654519081873332e-06, "loss": 0.5062, "step": 10314 }, { "epoch": 0.67, "grad_norm": 1.2319645881652832, "learning_rate": 2.653595965376645e-06, "loss": 0.4844, "step": 10315 }, { "epoch": 0.67, "grad_norm": 1.13107430934906, "learning_rate": 2.652672951435226e-06, "loss": 0.5118, "step": 10316 }, { "epoch": 0.67, "grad_norm": 1.1805895566940308, "learning_rate": 2.651750040089416e-06, "loss": 0.5087, "step": 10317 }, { "epoch": 0.67, "grad_norm": 1.159537672996521, "learning_rate": 2.6508272313795545e-06, "loss": 0.4573, "step": 10318 }, { "epoch": 0.67, "grad_norm": 1.3264671564102173, "learning_rate": 2.649904525345972e-06, "loss": 0.5376, "step": 10319 }, { "epoch": 0.67, "grad_norm": 1.220833420753479, "learning_rate": 2.6489819220289993e-06, "loss": 0.5332, "step": 10320 }, { "epoch": 0.67, "grad_norm": 1.2030128240585327, "learning_rate": 2.6480594214689612e-06, "loss": 0.5817, "step": 10321 }, { "epoch": 0.67, "grad_norm": 1.2180452346801758, "learning_rate": 2.647137023706178e-06, "loss": 0.5252, "step": 10322 }, { "epoch": 0.67, "grad_norm": 1.1626012325286865, "learning_rate": 2.646214728780964e-06, "loss": 0.5201, "step": 10323 }, { "epoch": 0.67, "grad_norm": 1.156903862953186, "learning_rate": 2.6452925367336346e-06, "loss": 0.5217, "step": 10324 }, { "epoch": 0.67, "grad_norm": 1.2597461938858032, "learning_rate": 2.6443704476044902e-06, "loss": 0.5482, "step": 10325 }, { "epoch": 0.67, "grad_norm": 1.1048476696014404, "learning_rate": 2.643448461433836e-06, "loss": 0.5074, "step": 10326 }, { "epoch": 0.67, "grad_norm": 1.094860315322876, "learning_rate": 2.64252657826197e-06, "loss": 0.5081, "step": 10327 }, { "epoch": 0.67, "grad_norm": 1.1180564165115356, "learning_rate": 2.641604798129185e-06, "loss": 0.5063, "step": 10328 }, { "epoch": 0.67, "grad_norm": 1.1485847234725952, "learning_rate": 2.64068312107577e-06, "loss": 0.4909, "step": 10329 }, { "epoch": 0.67, "grad_norm": 1.1411800384521484, "learning_rate": 2.63976154714201e-06, "loss": 0.4976, "step": 10330 }, { "epoch": 0.67, "grad_norm": 1.1852012872695923, "learning_rate": 2.638840076368181e-06, "loss": 0.5015, "step": 10331 }, { "epoch": 0.67, "grad_norm": 1.171403408050537, "learning_rate": 2.63791870879456e-06, "loss": 0.4966, "step": 10332 }, { "epoch": 0.67, "grad_norm": 1.2119766473770142, "learning_rate": 2.6369974444614186e-06, "loss": 0.564, "step": 10333 }, { "epoch": 0.67, "grad_norm": 1.1425565481185913, "learning_rate": 2.636076283409023e-06, "loss": 0.499, "step": 10334 }, { "epoch": 0.67, "grad_norm": 1.2159953117370605, "learning_rate": 2.6351552256776334e-06, "loss": 0.5039, "step": 10335 }, { "epoch": 0.67, "grad_norm": 1.146856427192688, "learning_rate": 2.63423427130751e-06, "loss": 0.5354, "step": 10336 }, { "epoch": 0.67, "grad_norm": 1.2567126750946045, "learning_rate": 2.6333134203389e-06, "loss": 0.5357, "step": 10337 }, { "epoch": 0.67, "grad_norm": 1.2644776105880737, "learning_rate": 2.632392672812054e-06, "loss": 0.5171, "step": 10338 }, { "epoch": 0.67, "grad_norm": 1.2708605527877808, "learning_rate": 2.631472028767217e-06, "loss": 0.5118, "step": 10339 }, { "epoch": 0.67, "grad_norm": 1.1347233057022095, "learning_rate": 2.6305514882446283e-06, "loss": 0.4508, "step": 10340 }, { "epoch": 0.67, "grad_norm": 1.224071979522705, "learning_rate": 2.629631051284518e-06, "loss": 0.5136, "step": 10341 }, { "epoch": 0.67, "grad_norm": 1.1841249465942383, "learning_rate": 2.6287107179271196e-06, "loss": 0.5572, "step": 10342 }, { "epoch": 0.67, "grad_norm": 1.1806854009628296, "learning_rate": 2.6277904882126577e-06, "loss": 0.5009, "step": 10343 }, { "epoch": 0.67, "grad_norm": 1.2574281692504883, "learning_rate": 2.6268703621813528e-06, "loss": 0.4798, "step": 10344 }, { "epoch": 0.67, "grad_norm": 1.1955981254577637, "learning_rate": 2.625950339873424e-06, "loss": 0.5729, "step": 10345 }, { "epoch": 0.67, "grad_norm": 1.1434969902038574, "learning_rate": 2.6250304213290782e-06, "loss": 0.541, "step": 10346 }, { "epoch": 0.67, "grad_norm": 1.1761037111282349, "learning_rate": 2.624110606588526e-06, "loss": 0.5041, "step": 10347 }, { "epoch": 0.67, "grad_norm": 1.2805298566818237, "learning_rate": 2.623190895691968e-06, "loss": 0.5206, "step": 10348 }, { "epoch": 0.67, "grad_norm": 1.2258886098861694, "learning_rate": 2.622271288679607e-06, "loss": 0.5873, "step": 10349 }, { "epoch": 0.67, "grad_norm": 1.147230625152588, "learning_rate": 2.621351785591631e-06, "loss": 0.504, "step": 10350 }, { "epoch": 0.67, "grad_norm": 1.2192009687423706, "learning_rate": 2.6204323864682312e-06, "loss": 0.5232, "step": 10351 }, { "epoch": 0.67, "grad_norm": 1.0879803895950317, "learning_rate": 2.6195130913495923e-06, "loss": 0.5516, "step": 10352 }, { "epoch": 0.67, "grad_norm": 1.1401869058609009, "learning_rate": 2.6185939002758953e-06, "loss": 0.4762, "step": 10353 }, { "epoch": 0.67, "grad_norm": 1.149148941040039, "learning_rate": 2.617674813287316e-06, "loss": 0.5001, "step": 10354 }, { "epoch": 0.67, "grad_norm": 1.244644284248352, "learning_rate": 2.6167558304240227e-06, "loss": 0.5708, "step": 10355 }, { "epoch": 0.67, "grad_norm": 1.2984708547592163, "learning_rate": 2.6158369517261824e-06, "loss": 0.5109, "step": 10356 }, { "epoch": 0.67, "grad_norm": 1.1424951553344727, "learning_rate": 2.614918177233958e-06, "loss": 0.5041, "step": 10357 }, { "epoch": 0.67, "grad_norm": 1.1199623346328735, "learning_rate": 2.613999506987507e-06, "loss": 0.5283, "step": 10358 }, { "epoch": 0.67, "grad_norm": 1.2216466665267944, "learning_rate": 2.613080941026983e-06, "loss": 0.4879, "step": 10359 }, { "epoch": 0.67, "grad_norm": 1.2270598411560059, "learning_rate": 2.612162479392531e-06, "loss": 0.5586, "step": 10360 }, { "epoch": 0.67, "grad_norm": 1.1646218299865723, "learning_rate": 2.6112441221242964e-06, "loss": 0.5626, "step": 10361 }, { "epoch": 0.67, "grad_norm": 1.1320645809173584, "learning_rate": 2.610325869262418e-06, "loss": 0.5257, "step": 10362 }, { "epoch": 0.67, "grad_norm": 1.269135594367981, "learning_rate": 2.6094077208470304e-06, "loss": 0.5442, "step": 10363 }, { "epoch": 0.67, "grad_norm": 1.1865308284759521, "learning_rate": 2.6084896769182633e-06, "loss": 0.5605, "step": 10364 }, { "epoch": 0.67, "grad_norm": 1.1297074556350708, "learning_rate": 2.6075717375162447e-06, "loss": 0.5158, "step": 10365 }, { "epoch": 0.67, "grad_norm": 1.2261719703674316, "learning_rate": 2.6066539026810905e-06, "loss": 0.488, "step": 10366 }, { "epoch": 0.67, "grad_norm": 1.2225421667099, "learning_rate": 2.6057361724529193e-06, "loss": 0.4889, "step": 10367 }, { "epoch": 0.67, "grad_norm": 1.1610130071640015, "learning_rate": 2.6048185468718436e-06, "loss": 0.5369, "step": 10368 }, { "epoch": 0.67, "grad_norm": 1.4074236154556274, "learning_rate": 2.6039010259779685e-06, "loss": 0.5344, "step": 10369 }, { "epoch": 0.67, "grad_norm": 1.3255285024642944, "learning_rate": 2.602983609811398e-06, "loss": 0.5135, "step": 10370 }, { "epoch": 0.67, "grad_norm": 1.2603718042373657, "learning_rate": 2.6020662984122314e-06, "loss": 0.5733, "step": 10371 }, { "epoch": 0.67, "grad_norm": 1.2694056034088135, "learning_rate": 2.6011490918205584e-06, "loss": 0.554, "step": 10372 }, { "epoch": 0.67, "grad_norm": 1.212044596672058, "learning_rate": 2.6002319900764688e-06, "loss": 0.5201, "step": 10373 }, { "epoch": 0.67, "grad_norm": 1.1765644550323486, "learning_rate": 2.599314993220048e-06, "loss": 0.5331, "step": 10374 }, { "epoch": 0.67, "grad_norm": 1.1635054349899292, "learning_rate": 2.598398101291375e-06, "loss": 0.5444, "step": 10375 }, { "epoch": 0.67, "grad_norm": 1.221307396888733, "learning_rate": 2.5974813143305244e-06, "loss": 0.4612, "step": 10376 }, { "epoch": 0.67, "grad_norm": 1.1682134866714478, "learning_rate": 2.5965646323775695e-06, "loss": 0.5258, "step": 10377 }, { "epoch": 0.67, "grad_norm": 1.4131033420562744, "learning_rate": 2.5956480554725717e-06, "loss": 0.5294, "step": 10378 }, { "epoch": 0.67, "grad_norm": 1.1233714818954468, "learning_rate": 2.594731583655593e-06, "loss": 0.5015, "step": 10379 }, { "epoch": 0.67, "grad_norm": 1.1961778402328491, "learning_rate": 2.593815216966692e-06, "loss": 0.5322, "step": 10380 }, { "epoch": 0.67, "grad_norm": 1.1450244188308716, "learning_rate": 2.5928989554459195e-06, "loss": 0.5233, "step": 10381 }, { "epoch": 0.67, "grad_norm": 1.3634251356124878, "learning_rate": 2.5919827991333237e-06, "loss": 0.5543, "step": 10382 }, { "epoch": 0.67, "grad_norm": 1.153716802597046, "learning_rate": 2.591066748068949e-06, "loss": 0.4741, "step": 10383 }, { "epoch": 0.67, "grad_norm": 1.2943408489227295, "learning_rate": 2.590150802292829e-06, "loss": 0.5341, "step": 10384 }, { "epoch": 0.67, "grad_norm": 1.1840007305145264, "learning_rate": 2.589234961845e-06, "loss": 0.4697, "step": 10385 }, { "epoch": 0.67, "grad_norm": 1.1340036392211914, "learning_rate": 2.588319226765491e-06, "loss": 0.5038, "step": 10386 }, { "epoch": 0.67, "grad_norm": 1.0490829944610596, "learning_rate": 2.5874035970943267e-06, "loss": 0.4723, "step": 10387 }, { "epoch": 0.67, "grad_norm": 1.1891751289367676, "learning_rate": 2.586488072871527e-06, "loss": 0.5495, "step": 10388 }, { "epoch": 0.67, "grad_norm": 1.1747519969940186, "learning_rate": 2.5855726541371075e-06, "loss": 0.4889, "step": 10389 }, { "epoch": 0.67, "grad_norm": 1.1214045286178589, "learning_rate": 2.584657340931077e-06, "loss": 0.5293, "step": 10390 }, { "epoch": 0.67, "grad_norm": 1.4136948585510254, "learning_rate": 2.583742133293442e-06, "loss": 0.5109, "step": 10391 }, { "epoch": 0.67, "grad_norm": 1.1598366498947144, "learning_rate": 2.5828270312642044e-06, "loss": 0.5049, "step": 10392 }, { "epoch": 0.67, "grad_norm": 1.2048355340957642, "learning_rate": 2.5819120348833605e-06, "loss": 0.5705, "step": 10393 }, { "epoch": 0.67, "grad_norm": 1.241976022720337, "learning_rate": 2.580997144190903e-06, "loss": 0.5603, "step": 10394 }, { "epoch": 0.67, "grad_norm": 1.1978806257247925, "learning_rate": 2.580082359226822e-06, "loss": 0.5114, "step": 10395 }, { "epoch": 0.67, "grad_norm": 1.2299408912658691, "learning_rate": 2.579167680031095e-06, "loss": 0.5072, "step": 10396 }, { "epoch": 0.67, "grad_norm": 1.1851637363433838, "learning_rate": 2.5782531066437026e-06, "loss": 0.5462, "step": 10397 }, { "epoch": 0.67, "grad_norm": 1.1462167501449585, "learning_rate": 2.577338639104619e-06, "loss": 0.497, "step": 10398 }, { "epoch": 0.67, "grad_norm": 1.1472243070602417, "learning_rate": 2.576424277453813e-06, "loss": 0.5071, "step": 10399 }, { "epoch": 0.67, "grad_norm": 1.2308613061904907, "learning_rate": 2.575510021731251e-06, "loss": 0.4862, "step": 10400 }, { "epoch": 0.67, "grad_norm": 1.319200038909912, "learning_rate": 2.574595871976888e-06, "loss": 0.5785, "step": 10401 }, { "epoch": 0.67, "grad_norm": 1.2128654718399048, "learning_rate": 2.573681828230683e-06, "loss": 0.5365, "step": 10402 }, { "epoch": 0.67, "grad_norm": 1.141163945198059, "learning_rate": 2.5727678905325846e-06, "loss": 0.5208, "step": 10403 }, { "epoch": 0.67, "grad_norm": 1.1364365816116333, "learning_rate": 2.5718540589225395e-06, "loss": 0.5165, "step": 10404 }, { "epoch": 0.67, "grad_norm": 1.1217089891433716, "learning_rate": 2.570940333440488e-06, "loss": 0.5082, "step": 10405 }, { "epoch": 0.67, "grad_norm": 1.065331220626831, "learning_rate": 2.5700267141263706e-06, "loss": 0.5034, "step": 10406 }, { "epoch": 0.67, "grad_norm": 1.3761154413223267, "learning_rate": 2.5691132010201137e-06, "loss": 0.5431, "step": 10407 }, { "epoch": 0.67, "grad_norm": 1.2045334577560425, "learning_rate": 2.568199794161647e-06, "loss": 0.5669, "step": 10408 }, { "epoch": 0.67, "grad_norm": 1.1249078512191772, "learning_rate": 2.567286493590893e-06, "loss": 0.4855, "step": 10409 }, { "epoch": 0.67, "grad_norm": 1.1858843564987183, "learning_rate": 2.56637329934777e-06, "loss": 0.5333, "step": 10410 }, { "epoch": 0.67, "grad_norm": 1.210128903388977, "learning_rate": 2.5654602114721917e-06, "loss": 0.4798, "step": 10411 }, { "epoch": 0.67, "grad_norm": 1.2427880764007568, "learning_rate": 2.5645472300040676e-06, "loss": 0.5366, "step": 10412 }, { "epoch": 0.67, "grad_norm": 1.2617229223251343, "learning_rate": 2.5636343549832987e-06, "loss": 0.4907, "step": 10413 }, { "epoch": 0.67, "grad_norm": 1.168552041053772, "learning_rate": 2.5627215864497866e-06, "loss": 0.5433, "step": 10414 }, { "epoch": 0.67, "grad_norm": 1.2070717811584473, "learning_rate": 2.561808924443426e-06, "loss": 0.5297, "step": 10415 }, { "epoch": 0.67, "grad_norm": 1.1022883653640747, "learning_rate": 2.5608963690041058e-06, "loss": 0.459, "step": 10416 }, { "epoch": 0.67, "grad_norm": 1.2623522281646729, "learning_rate": 2.5599839201717124e-06, "loss": 0.5249, "step": 10417 }, { "epoch": 0.67, "grad_norm": 1.201804757118225, "learning_rate": 2.5590715779861292e-06, "loss": 0.5247, "step": 10418 }, { "epoch": 0.67, "grad_norm": 1.2219023704528809, "learning_rate": 2.5581593424872274e-06, "loss": 0.5262, "step": 10419 }, { "epoch": 0.67, "grad_norm": 1.204437494277954, "learning_rate": 2.557247213714881e-06, "loss": 0.5084, "step": 10420 }, { "epoch": 0.67, "grad_norm": 1.2433686256408691, "learning_rate": 2.556335191708956e-06, "loss": 0.5083, "step": 10421 }, { "epoch": 0.67, "grad_norm": 1.1936129331588745, "learning_rate": 2.5554232765093144e-06, "loss": 0.5509, "step": 10422 }, { "epoch": 0.67, "grad_norm": 1.1779929399490356, "learning_rate": 2.554511468155816e-06, "loss": 0.5281, "step": 10423 }, { "epoch": 0.67, "grad_norm": 1.2295818328857422, "learning_rate": 2.5535997666883127e-06, "loss": 0.5427, "step": 10424 }, { "epoch": 0.67, "grad_norm": 1.19077467918396, "learning_rate": 2.5526881721466502e-06, "loss": 0.5606, "step": 10425 }, { "epoch": 0.67, "grad_norm": 1.1455968618392944, "learning_rate": 2.5517766845706728e-06, "loss": 0.5123, "step": 10426 }, { "epoch": 0.67, "grad_norm": 1.1225731372833252, "learning_rate": 2.5508653040002206e-06, "loss": 0.5102, "step": 10427 }, { "epoch": 0.67, "grad_norm": 1.1498984098434448, "learning_rate": 2.549954030475127e-06, "loss": 0.5518, "step": 10428 }, { "epoch": 0.67, "grad_norm": 1.1364630460739136, "learning_rate": 2.549042864035224e-06, "loss": 0.4855, "step": 10429 }, { "epoch": 0.67, "grad_norm": 1.3165806531906128, "learning_rate": 2.548131804720331e-06, "loss": 0.5376, "step": 10430 }, { "epoch": 0.67, "grad_norm": 1.3034268617630005, "learning_rate": 2.5472208525702715e-06, "loss": 0.5312, "step": 10431 }, { "epoch": 0.67, "grad_norm": 1.2339539527893066, "learning_rate": 2.5463100076248605e-06, "loss": 0.5161, "step": 10432 }, { "epoch": 0.67, "grad_norm": 1.2808750867843628, "learning_rate": 2.5453992699239084e-06, "loss": 0.4936, "step": 10433 }, { "epoch": 0.67, "grad_norm": 1.1915498971939087, "learning_rate": 2.5444886395072223e-06, "loss": 0.5625, "step": 10434 }, { "epoch": 0.67, "grad_norm": 1.1938132047653198, "learning_rate": 2.5435781164146014e-06, "loss": 0.5438, "step": 10435 }, { "epoch": 0.67, "grad_norm": 1.2211108207702637, "learning_rate": 2.542667700685843e-06, "loss": 0.5525, "step": 10436 }, { "epoch": 0.67, "grad_norm": 1.2380194664001465, "learning_rate": 2.54175739236074e-06, "loss": 0.5482, "step": 10437 }, { "epoch": 0.67, "grad_norm": 1.2760761976242065, "learning_rate": 2.54084719147908e-06, "loss": 0.5413, "step": 10438 }, { "epoch": 0.67, "grad_norm": 1.1829346418380737, "learning_rate": 2.5399370980806436e-06, "loss": 0.5069, "step": 10439 }, { "epoch": 0.67, "grad_norm": 1.1787152290344238, "learning_rate": 2.5390271122052092e-06, "loss": 0.4871, "step": 10440 }, { "epoch": 0.67, "grad_norm": 1.1663874387741089, "learning_rate": 2.5381172338925496e-06, "loss": 0.5268, "step": 10441 }, { "epoch": 0.67, "grad_norm": 1.0963047742843628, "learning_rate": 2.537207463182435e-06, "loss": 0.5141, "step": 10442 }, { "epoch": 0.67, "grad_norm": 1.1589003801345825, "learning_rate": 2.53629780011463e-06, "loss": 0.553, "step": 10443 }, { "epoch": 0.67, "grad_norm": 1.1152212619781494, "learning_rate": 2.5353882447288892e-06, "loss": 0.5034, "step": 10444 }, { "epoch": 0.67, "grad_norm": 1.111355185508728, "learning_rate": 2.5344787970649696e-06, "loss": 0.4825, "step": 10445 }, { "epoch": 0.67, "grad_norm": 1.1538420915603638, "learning_rate": 2.533569457162621e-06, "loss": 0.5213, "step": 10446 }, { "epoch": 0.67, "grad_norm": 1.1895116567611694, "learning_rate": 2.5326602250615894e-06, "loss": 0.5273, "step": 10447 }, { "epoch": 0.67, "grad_norm": 1.3155165910720825, "learning_rate": 2.5317511008016118e-06, "loss": 0.5431, "step": 10448 }, { "epoch": 0.67, "grad_norm": 1.1965937614440918, "learning_rate": 2.5308420844224257e-06, "loss": 0.544, "step": 10449 }, { "epoch": 0.67, "grad_norm": 1.2788629531860352, "learning_rate": 2.529933175963761e-06, "loss": 0.5112, "step": 10450 }, { "epoch": 0.67, "grad_norm": 1.2466317415237427, "learning_rate": 2.529024375465344e-06, "loss": 0.5053, "step": 10451 }, { "epoch": 0.67, "grad_norm": 1.122989296913147, "learning_rate": 2.5281156829668973e-06, "loss": 0.5657, "step": 10452 }, { "epoch": 0.67, "grad_norm": 1.1689974069595337, "learning_rate": 2.5272070985081387e-06, "loss": 0.5328, "step": 10453 }, { "epoch": 0.67, "grad_norm": 1.2836662530899048, "learning_rate": 2.5262986221287754e-06, "loss": 0.5091, "step": 10454 }, { "epoch": 0.67, "grad_norm": 1.282191276550293, "learning_rate": 2.525390253868517e-06, "loss": 0.5323, "step": 10455 }, { "epoch": 0.67, "grad_norm": 1.2372328042984009, "learning_rate": 2.524481993767066e-06, "loss": 0.5583, "step": 10456 }, { "epoch": 0.67, "grad_norm": 1.1476900577545166, "learning_rate": 2.5235738418641207e-06, "loss": 0.496, "step": 10457 }, { "epoch": 0.68, "grad_norm": 1.254101276397705, "learning_rate": 2.522665798199373e-06, "loss": 0.4998, "step": 10458 }, { "epoch": 0.68, "grad_norm": 1.1646578311920166, "learning_rate": 2.5217578628125138e-06, "loss": 0.5704, "step": 10459 }, { "epoch": 0.68, "grad_norm": 1.0615261793136597, "learning_rate": 2.5208500357432227e-06, "loss": 0.5064, "step": 10460 }, { "epoch": 0.68, "grad_norm": 1.193942904472351, "learning_rate": 2.51994231703118e-06, "loss": 0.4956, "step": 10461 }, { "epoch": 0.68, "grad_norm": 1.0964969396591187, "learning_rate": 2.51903470671606e-06, "loss": 0.503, "step": 10462 }, { "epoch": 0.68, "grad_norm": 1.1558119058609009, "learning_rate": 2.5181272048375314e-06, "loss": 0.5083, "step": 10463 }, { "epoch": 0.68, "grad_norm": 1.1419854164123535, "learning_rate": 2.5172198114352604e-06, "loss": 0.5228, "step": 10464 }, { "epoch": 0.68, "grad_norm": 1.1806228160858154, "learning_rate": 2.5163125265489073e-06, "loss": 0.482, "step": 10465 }, { "epoch": 0.68, "grad_norm": 1.105676531791687, "learning_rate": 2.515405350218123e-06, "loss": 0.4963, "step": 10466 }, { "epoch": 0.68, "grad_norm": 1.2819641828536987, "learning_rate": 2.5144982824825616e-06, "loss": 0.5847, "step": 10467 }, { "epoch": 0.68, "grad_norm": 1.2689392566680908, "learning_rate": 2.513591323381868e-06, "loss": 0.5488, "step": 10468 }, { "epoch": 0.68, "grad_norm": 1.0888607501983643, "learning_rate": 2.512684472955681e-06, "loss": 0.5216, "step": 10469 }, { "epoch": 0.68, "grad_norm": 1.2403100728988647, "learning_rate": 2.5117777312436393e-06, "loss": 0.5504, "step": 10470 }, { "epoch": 0.68, "grad_norm": 1.151198148727417, "learning_rate": 2.5108710982853747e-06, "loss": 0.5462, "step": 10471 }, { "epoch": 0.68, "grad_norm": 1.1569719314575195, "learning_rate": 2.509964574120511e-06, "loss": 0.5176, "step": 10472 }, { "epoch": 0.68, "grad_norm": 1.2325085401535034, "learning_rate": 2.509058158788671e-06, "loss": 0.502, "step": 10473 }, { "epoch": 0.68, "grad_norm": 1.1868181228637695, "learning_rate": 2.5081518523294723e-06, "loss": 0.5304, "step": 10474 }, { "epoch": 0.68, "grad_norm": 1.3762718439102173, "learning_rate": 2.5072456547825273e-06, "loss": 0.5997, "step": 10475 }, { "epoch": 0.68, "grad_norm": 1.0906541347503662, "learning_rate": 2.5063395661874435e-06, "loss": 0.5132, "step": 10476 }, { "epoch": 0.68, "grad_norm": 1.1556570529937744, "learning_rate": 2.505433586583825e-06, "loss": 0.5523, "step": 10477 }, { "epoch": 0.68, "grad_norm": 1.1599810123443604, "learning_rate": 2.5045277160112665e-06, "loss": 0.5267, "step": 10478 }, { "epoch": 0.68, "grad_norm": 1.3024160861968994, "learning_rate": 2.503621954509363e-06, "loss": 0.5571, "step": 10479 }, { "epoch": 0.68, "grad_norm": 1.2119470834732056, "learning_rate": 2.502716302117703e-06, "loss": 0.5323, "step": 10480 }, { "epoch": 0.68, "grad_norm": 1.240614652633667, "learning_rate": 2.5018107588758712e-06, "loss": 0.5178, "step": 10481 }, { "epoch": 0.68, "grad_norm": 1.2110381126403809, "learning_rate": 2.500905324823445e-06, "loss": 0.5398, "step": 10482 }, { "epoch": 0.68, "grad_norm": 1.2598836421966553, "learning_rate": 2.5000000000000015e-06, "loss": 0.557, "step": 10483 }, { "epoch": 0.68, "grad_norm": 1.1495524644851685, "learning_rate": 2.4990947844451057e-06, "loss": 0.5076, "step": 10484 }, { "epoch": 0.68, "grad_norm": 1.200849175453186, "learning_rate": 2.4981896781983244e-06, "loss": 0.5619, "step": 10485 }, { "epoch": 0.68, "grad_norm": 1.0999025106430054, "learning_rate": 2.497284681299218e-06, "loss": 0.4828, "step": 10486 }, { "epoch": 0.68, "grad_norm": 1.2315665483474731, "learning_rate": 2.4963797937873406e-06, "loss": 0.4896, "step": 10487 }, { "epoch": 0.68, "grad_norm": 1.3480863571166992, "learning_rate": 2.495475015702245e-06, "loss": 0.5311, "step": 10488 }, { "epoch": 0.68, "grad_norm": 1.1780471801757812, "learning_rate": 2.4945703470834733e-06, "loss": 0.541, "step": 10489 }, { "epoch": 0.68, "grad_norm": 1.1714701652526855, "learning_rate": 2.4936657879705672e-06, "loss": 0.5188, "step": 10490 }, { "epoch": 0.68, "grad_norm": 1.1966062784194946, "learning_rate": 2.492761338403063e-06, "loss": 0.5226, "step": 10491 }, { "epoch": 0.68, "grad_norm": 1.256734013557434, "learning_rate": 2.491856998420492e-06, "loss": 0.5809, "step": 10492 }, { "epoch": 0.68, "grad_norm": 1.1074858903884888, "learning_rate": 2.4909527680623807e-06, "loss": 0.5034, "step": 10493 }, { "epoch": 0.68, "grad_norm": 1.1295496225357056, "learning_rate": 2.490048647368252e-06, "loss": 0.5319, "step": 10494 }, { "epoch": 0.68, "grad_norm": 1.331191062927246, "learning_rate": 2.4891446363776193e-06, "loss": 0.611, "step": 10495 }, { "epoch": 0.68, "grad_norm": 1.1707102060317993, "learning_rate": 2.488240735129997e-06, "loss": 0.5028, "step": 10496 }, { "epoch": 0.68, "grad_norm": 1.199066400527954, "learning_rate": 2.4873369436648914e-06, "loss": 0.5459, "step": 10497 }, { "epoch": 0.68, "grad_norm": 1.1787198781967163, "learning_rate": 2.486433262021805e-06, "loss": 0.5195, "step": 10498 }, { "epoch": 0.68, "grad_norm": 1.319607138633728, "learning_rate": 2.4855296902402364e-06, "loss": 0.5498, "step": 10499 }, { "epoch": 0.68, "grad_norm": 1.225160002708435, "learning_rate": 2.4846262283596787e-06, "loss": 0.5057, "step": 10500 }, { "epoch": 0.68, "grad_norm": 1.28316330909729, "learning_rate": 2.4837228764196176e-06, "loss": 0.5329, "step": 10501 }, { "epoch": 0.68, "grad_norm": 1.3132874965667725, "learning_rate": 2.4828196344595366e-06, "loss": 0.5708, "step": 10502 }, { "epoch": 0.68, "grad_norm": 1.2560458183288574, "learning_rate": 2.4819165025189155e-06, "loss": 0.5004, "step": 10503 }, { "epoch": 0.68, "grad_norm": 1.2679194211959839, "learning_rate": 2.4810134806372278e-06, "loss": 0.5425, "step": 10504 }, { "epoch": 0.68, "grad_norm": 1.2003377676010132, "learning_rate": 2.4801105688539413e-06, "loss": 0.532, "step": 10505 }, { "epoch": 0.68, "grad_norm": 1.293371558189392, "learning_rate": 2.479207767208522e-06, "loss": 0.5054, "step": 10506 }, { "epoch": 0.68, "grad_norm": 1.2687667608261108, "learning_rate": 2.4783050757404257e-06, "loss": 0.4674, "step": 10507 }, { "epoch": 0.68, "grad_norm": 1.1495281457901, "learning_rate": 2.477402494489109e-06, "loss": 0.5024, "step": 10508 }, { "epoch": 0.68, "grad_norm": 1.2517648935317993, "learning_rate": 2.4765000234940206e-06, "loss": 0.5225, "step": 10509 }, { "epoch": 0.68, "grad_norm": 1.2399694919586182, "learning_rate": 2.475597662794605e-06, "loss": 0.5155, "step": 10510 }, { "epoch": 0.68, "grad_norm": 1.1663081645965576, "learning_rate": 2.474695412430303e-06, "loss": 0.5681, "step": 10511 }, { "epoch": 0.68, "grad_norm": 1.1505759954452515, "learning_rate": 2.4737932724405512e-06, "loss": 0.5336, "step": 10512 }, { "epoch": 0.68, "grad_norm": 1.2591207027435303, "learning_rate": 2.4728912428647756e-06, "loss": 0.5093, "step": 10513 }, { "epoch": 0.68, "grad_norm": 1.2364327907562256, "learning_rate": 2.4719893237424038e-06, "loss": 0.4806, "step": 10514 }, { "epoch": 0.68, "grad_norm": 1.0429881811141968, "learning_rate": 2.471087515112856e-06, "loss": 0.4695, "step": 10515 }, { "epoch": 0.68, "grad_norm": 1.3036736249923706, "learning_rate": 2.4701858170155483e-06, "loss": 0.5699, "step": 10516 }, { "epoch": 0.68, "grad_norm": 1.301438808441162, "learning_rate": 2.469284229489892e-06, "loss": 0.5345, "step": 10517 }, { "epoch": 0.68, "grad_norm": 1.1451324224472046, "learning_rate": 2.4683827525752947e-06, "loss": 0.5297, "step": 10518 }, { "epoch": 0.68, "grad_norm": 1.1508159637451172, "learning_rate": 2.467481386311153e-06, "loss": 0.4786, "step": 10519 }, { "epoch": 0.68, "grad_norm": 1.3547985553741455, "learning_rate": 2.4665801307368665e-06, "loss": 0.5898, "step": 10520 }, { "epoch": 0.68, "grad_norm": 1.3648310899734497, "learning_rate": 2.4656789858918256e-06, "loss": 0.549, "step": 10521 }, { "epoch": 0.68, "grad_norm": 1.2424147129058838, "learning_rate": 2.464777951815418e-06, "loss": 0.5232, "step": 10522 }, { "epoch": 0.68, "grad_norm": 1.1996718645095825, "learning_rate": 2.463877028547027e-06, "loss": 0.5167, "step": 10523 }, { "epoch": 0.68, "grad_norm": 1.2159788608551025, "learning_rate": 2.4629762161260263e-06, "loss": 0.542, "step": 10524 }, { "epoch": 0.68, "grad_norm": 1.2063480615615845, "learning_rate": 2.462075514591789e-06, "loss": 0.5187, "step": 10525 }, { "epoch": 0.68, "grad_norm": 1.335601568222046, "learning_rate": 2.461174923983683e-06, "loss": 0.5899, "step": 10526 }, { "epoch": 0.68, "grad_norm": 1.2720530033111572, "learning_rate": 2.460274444341073e-06, "loss": 0.5068, "step": 10527 }, { "epoch": 0.68, "grad_norm": 1.2110755443572998, "learning_rate": 2.4593740757033124e-06, "loss": 0.5427, "step": 10528 }, { "epoch": 0.68, "grad_norm": 1.1098628044128418, "learning_rate": 2.4584738181097564e-06, "loss": 0.5499, "step": 10529 }, { "epoch": 0.68, "grad_norm": 1.246015191078186, "learning_rate": 2.457573671599752e-06, "loss": 0.5463, "step": 10530 }, { "epoch": 0.68, "grad_norm": 1.136095404624939, "learning_rate": 2.456673636212643e-06, "loss": 0.4955, "step": 10531 }, { "epoch": 0.68, "grad_norm": 1.195878028869629, "learning_rate": 2.45577371198777e-06, "loss": 0.5335, "step": 10532 }, { "epoch": 0.68, "grad_norm": 1.2382160425186157, "learning_rate": 2.4548738989644617e-06, "loss": 0.5612, "step": 10533 }, { "epoch": 0.68, "grad_norm": 1.3118232488632202, "learning_rate": 2.4539741971820486e-06, "loss": 0.5415, "step": 10534 }, { "epoch": 0.68, "grad_norm": 1.2084057331085205, "learning_rate": 2.4530746066798543e-06, "loss": 0.5565, "step": 10535 }, { "epoch": 0.68, "grad_norm": 1.1942362785339355, "learning_rate": 2.4521751274972e-06, "loss": 0.5056, "step": 10536 }, { "epoch": 0.68, "grad_norm": 1.1313837766647339, "learning_rate": 2.4512757596733954e-06, "loss": 0.5409, "step": 10537 }, { "epoch": 0.68, "grad_norm": 1.1754313707351685, "learning_rate": 2.4503765032477515e-06, "loss": 0.4816, "step": 10538 }, { "epoch": 0.68, "grad_norm": 1.0986943244934082, "learning_rate": 2.4494773582595727e-06, "loss": 0.485, "step": 10539 }, { "epoch": 0.68, "grad_norm": 1.184421420097351, "learning_rate": 2.4485783247481574e-06, "loss": 0.5039, "step": 10540 }, { "epoch": 0.68, "grad_norm": 1.3024424314498901, "learning_rate": 2.447679402752804e-06, "loss": 0.5726, "step": 10541 }, { "epoch": 0.68, "grad_norm": 1.1371705532073975, "learning_rate": 2.4467805923127956e-06, "loss": 0.4735, "step": 10542 }, { "epoch": 0.68, "grad_norm": 1.2218217849731445, "learning_rate": 2.445881893467421e-06, "loss": 0.5773, "step": 10543 }, { "epoch": 0.68, "grad_norm": 1.1053102016448975, "learning_rate": 2.444983306255959e-06, "loss": 0.5229, "step": 10544 }, { "epoch": 0.68, "grad_norm": 1.0824947357177734, "learning_rate": 2.4440848307176845e-06, "loss": 0.5167, "step": 10545 }, { "epoch": 0.68, "grad_norm": 1.0602103471755981, "learning_rate": 2.4431864668918677e-06, "loss": 0.4867, "step": 10546 }, { "epoch": 0.68, "grad_norm": 1.113061547279358, "learning_rate": 2.4422882148177757e-06, "loss": 0.4944, "step": 10547 }, { "epoch": 0.68, "grad_norm": 1.125885009765625, "learning_rate": 2.441390074534665e-06, "loss": 0.5496, "step": 10548 }, { "epoch": 0.68, "grad_norm": 1.0838278532028198, "learning_rate": 2.440492046081793e-06, "loss": 0.4968, "step": 10549 }, { "epoch": 0.68, "grad_norm": 1.1853783130645752, "learning_rate": 2.4395941294984095e-06, "loss": 0.486, "step": 10550 }, { "epoch": 0.68, "grad_norm": 1.2321064472198486, "learning_rate": 2.4386963248237606e-06, "loss": 0.541, "step": 10551 }, { "epoch": 0.68, "grad_norm": 1.1988801956176758, "learning_rate": 2.4377986320970876e-06, "loss": 0.4971, "step": 10552 }, { "epoch": 0.68, "grad_norm": 1.2272424697875977, "learning_rate": 2.4369010513576275e-06, "loss": 0.5023, "step": 10553 }, { "epoch": 0.68, "grad_norm": 1.290009617805481, "learning_rate": 2.436003582644608e-06, "loss": 0.5379, "step": 10554 }, { "epoch": 0.68, "grad_norm": 1.4017460346221924, "learning_rate": 2.435106225997256e-06, "loss": 0.534, "step": 10555 }, { "epoch": 0.68, "grad_norm": 1.259947419166565, "learning_rate": 2.4342089814547942e-06, "loss": 0.5231, "step": 10556 }, { "epoch": 0.68, "grad_norm": 1.193896770477295, "learning_rate": 2.433311849056437e-06, "loss": 0.5195, "step": 10557 }, { "epoch": 0.68, "grad_norm": 1.1291661262512207, "learning_rate": 2.432414828841398e-06, "loss": 0.5285, "step": 10558 }, { "epoch": 0.68, "grad_norm": 1.217686653137207, "learning_rate": 2.431517920848883e-06, "loss": 0.5832, "step": 10559 }, { "epoch": 0.68, "grad_norm": 1.2524839639663696, "learning_rate": 2.4306211251180924e-06, "loss": 0.574, "step": 10560 }, { "epoch": 0.68, "grad_norm": 1.1899162530899048, "learning_rate": 2.429724441688222e-06, "loss": 0.5462, "step": 10561 }, { "epoch": 0.68, "grad_norm": 1.380712628364563, "learning_rate": 2.4288278705984652e-06, "loss": 0.529, "step": 10562 }, { "epoch": 0.68, "grad_norm": 1.2583069801330566, "learning_rate": 2.427931411888009e-06, "loss": 0.5973, "step": 10563 }, { "epoch": 0.68, "grad_norm": 1.2947909832000732, "learning_rate": 2.4270350655960345e-06, "loss": 0.5723, "step": 10564 }, { "epoch": 0.68, "grad_norm": 1.2122948169708252, "learning_rate": 2.4261388317617205e-06, "loss": 0.4883, "step": 10565 }, { "epoch": 0.68, "grad_norm": 1.1678440570831299, "learning_rate": 2.425242710424236e-06, "loss": 0.5558, "step": 10566 }, { "epoch": 0.68, "grad_norm": 1.137148141860962, "learning_rate": 2.4243467016227493e-06, "loss": 0.5004, "step": 10567 }, { "epoch": 0.68, "grad_norm": 1.2316559553146362, "learning_rate": 2.4234508053964236e-06, "loss": 0.5111, "step": 10568 }, { "epoch": 0.68, "grad_norm": 1.181719422340393, "learning_rate": 2.4225550217844153e-06, "loss": 0.5194, "step": 10569 }, { "epoch": 0.68, "grad_norm": 1.0488125085830688, "learning_rate": 2.421659350825879e-06, "loss": 0.4924, "step": 10570 }, { "epoch": 0.68, "grad_norm": 1.2395265102386475, "learning_rate": 2.420763792559958e-06, "loss": 0.5339, "step": 10571 }, { "epoch": 0.68, "grad_norm": 1.1283516883850098, "learning_rate": 2.419868347025797e-06, "loss": 0.565, "step": 10572 }, { "epoch": 0.68, "grad_norm": 1.1486051082611084, "learning_rate": 2.4189730142625347e-06, "loss": 0.5079, "step": 10573 }, { "epoch": 0.68, "grad_norm": 1.16555655002594, "learning_rate": 2.418077794309302e-06, "loss": 0.4956, "step": 10574 }, { "epoch": 0.68, "grad_norm": 1.1928153038024902, "learning_rate": 2.417182687205228e-06, "loss": 0.514, "step": 10575 }, { "epoch": 0.68, "grad_norm": 1.0647011995315552, "learning_rate": 2.4162876929894364e-06, "loss": 0.5068, "step": 10576 }, { "epoch": 0.68, "grad_norm": 1.2103667259216309, "learning_rate": 2.4153928117010422e-06, "loss": 0.5133, "step": 10577 }, { "epoch": 0.68, "grad_norm": 1.2637462615966797, "learning_rate": 2.414498043379159e-06, "loss": 0.5598, "step": 10578 }, { "epoch": 0.68, "grad_norm": 1.2519147396087646, "learning_rate": 2.413603388062897e-06, "loss": 0.489, "step": 10579 }, { "epoch": 0.68, "grad_norm": 1.2246856689453125, "learning_rate": 2.4127088457913566e-06, "loss": 0.5278, "step": 10580 }, { "epoch": 0.68, "grad_norm": 1.0975538492202759, "learning_rate": 2.4118144166036377e-06, "loss": 0.519, "step": 10581 }, { "epoch": 0.68, "grad_norm": 1.1524724960327148, "learning_rate": 2.4109201005388355e-06, "loss": 0.5108, "step": 10582 }, { "epoch": 0.68, "grad_norm": 1.114024043083191, "learning_rate": 2.4100258976360337e-06, "loss": 0.5227, "step": 10583 }, { "epoch": 0.68, "grad_norm": 1.096672773361206, "learning_rate": 2.409131807934317e-06, "loss": 0.5238, "step": 10584 }, { "epoch": 0.68, "grad_norm": 1.1570683717727661, "learning_rate": 2.408237831472765e-06, "loss": 0.5252, "step": 10585 }, { "epoch": 0.68, "grad_norm": 1.1617828607559204, "learning_rate": 2.40734396829045e-06, "loss": 0.5241, "step": 10586 }, { "epoch": 0.68, "grad_norm": 1.2174526453018188, "learning_rate": 2.4064502184264415e-06, "loss": 0.5221, "step": 10587 }, { "epoch": 0.68, "grad_norm": 1.1115070581436157, "learning_rate": 2.4055565819198048e-06, "loss": 0.5076, "step": 10588 }, { "epoch": 0.68, "grad_norm": 1.1036570072174072, "learning_rate": 2.4046630588095937e-06, "loss": 0.4583, "step": 10589 }, { "epoch": 0.68, "grad_norm": 1.1314005851745605, "learning_rate": 2.4037696491348642e-06, "loss": 0.5169, "step": 10590 }, { "epoch": 0.68, "grad_norm": 1.4006463289260864, "learning_rate": 2.402876352934665e-06, "loss": 0.5296, "step": 10591 }, { "epoch": 0.68, "grad_norm": 1.2080533504486084, "learning_rate": 2.4019831702480402e-06, "loss": 0.468, "step": 10592 }, { "epoch": 0.68, "grad_norm": 1.329267144203186, "learning_rate": 2.4010901011140273e-06, "loss": 0.5736, "step": 10593 }, { "epoch": 0.68, "grad_norm": 1.1798489093780518, "learning_rate": 2.4001971455716634e-06, "loss": 0.48, "step": 10594 }, { "epoch": 0.68, "grad_norm": 1.0857659578323364, "learning_rate": 2.399304303659972e-06, "loss": 0.5516, "step": 10595 }, { "epoch": 0.68, "grad_norm": 1.1865869760513306, "learning_rate": 2.3984115754179806e-06, "loss": 0.4905, "step": 10596 }, { "epoch": 0.68, "grad_norm": 1.1486284732818604, "learning_rate": 2.3975189608847065e-06, "loss": 0.4633, "step": 10597 }, { "epoch": 0.68, "grad_norm": 1.1706994771957397, "learning_rate": 2.396626460099164e-06, "loss": 0.5252, "step": 10598 }, { "epoch": 0.68, "grad_norm": 1.1650164127349854, "learning_rate": 2.3957340731003624e-06, "loss": 0.5047, "step": 10599 }, { "epoch": 0.68, "grad_norm": 1.1146504878997803, "learning_rate": 2.394841799927307e-06, "loss": 0.4941, "step": 10600 }, { "epoch": 0.68, "grad_norm": 1.1494821310043335, "learning_rate": 2.393949640618993e-06, "loss": 0.4896, "step": 10601 }, { "epoch": 0.68, "grad_norm": 1.2102106809616089, "learning_rate": 2.3930575952144175e-06, "loss": 0.5775, "step": 10602 }, { "epoch": 0.68, "grad_norm": 1.1363999843597412, "learning_rate": 2.3921656637525682e-06, "loss": 0.5021, "step": 10603 }, { "epoch": 0.68, "grad_norm": 1.1752256155014038, "learning_rate": 2.3912738462724287e-06, "loss": 0.5316, "step": 10604 }, { "epoch": 0.68, "grad_norm": 1.1968337297439575, "learning_rate": 2.390382142812979e-06, "loss": 0.5385, "step": 10605 }, { "epoch": 0.68, "grad_norm": 1.2151002883911133, "learning_rate": 2.389490553413196e-06, "loss": 0.556, "step": 10606 }, { "epoch": 0.68, "grad_norm": 1.096068263053894, "learning_rate": 2.3885990781120423e-06, "loss": 0.482, "step": 10607 }, { "epoch": 0.68, "grad_norm": 1.1787856817245483, "learning_rate": 2.387707716948486e-06, "loss": 0.5041, "step": 10608 }, { "epoch": 0.68, "grad_norm": 1.2296206951141357, "learning_rate": 2.3868164699614854e-06, "loss": 0.5237, "step": 10609 }, { "epoch": 0.68, "grad_norm": 1.1405961513519287, "learning_rate": 2.3859253371899948e-06, "loss": 0.5695, "step": 10610 }, { "epoch": 0.68, "grad_norm": 1.230094075202942, "learning_rate": 2.3850343186729637e-06, "loss": 0.5194, "step": 10611 }, { "epoch": 0.68, "grad_norm": 1.1708197593688965, "learning_rate": 2.3841434144493373e-06, "loss": 0.5241, "step": 10612 }, { "epoch": 0.69, "grad_norm": 1.0701208114624023, "learning_rate": 2.3832526245580518e-06, "loss": 0.4925, "step": 10613 }, { "epoch": 0.69, "grad_norm": 1.1878188848495483, "learning_rate": 2.382361949038042e-06, "loss": 0.5196, "step": 10614 }, { "epoch": 0.69, "grad_norm": 1.2748656272888184, "learning_rate": 2.3814713879282385e-06, "loss": 0.544, "step": 10615 }, { "epoch": 0.69, "grad_norm": 1.205163836479187, "learning_rate": 2.3805809412675666e-06, "loss": 0.5751, "step": 10616 }, { "epoch": 0.69, "grad_norm": 1.1798033714294434, "learning_rate": 2.3796906090949418e-06, "loss": 0.5188, "step": 10617 }, { "epoch": 0.69, "grad_norm": 1.1341010332107544, "learning_rate": 2.3788003914492797e-06, "loss": 0.5024, "step": 10618 }, { "epoch": 0.69, "grad_norm": 1.1292364597320557, "learning_rate": 2.37791028836949e-06, "loss": 0.5186, "step": 10619 }, { "epoch": 0.69, "grad_norm": 1.1293596029281616, "learning_rate": 2.3770202998944756e-06, "loss": 0.5244, "step": 10620 }, { "epoch": 0.69, "grad_norm": 1.1324610710144043, "learning_rate": 2.3761304260631396e-06, "loss": 0.4892, "step": 10621 }, { "epoch": 0.69, "grad_norm": 1.2291566133499146, "learning_rate": 2.37524066691437e-06, "loss": 0.4992, "step": 10622 }, { "epoch": 0.69, "grad_norm": 1.0448864698410034, "learning_rate": 2.3743510224870597e-06, "loss": 0.5231, "step": 10623 }, { "epoch": 0.69, "grad_norm": 1.2496259212493896, "learning_rate": 2.3734614928200917e-06, "loss": 0.53, "step": 10624 }, { "epoch": 0.69, "grad_norm": 1.083768606185913, "learning_rate": 2.3725720779523477e-06, "loss": 0.5586, "step": 10625 }, { "epoch": 0.69, "grad_norm": 1.0512675046920776, "learning_rate": 2.3716827779226965e-06, "loss": 0.4909, "step": 10626 }, { "epoch": 0.69, "grad_norm": 1.256972312927246, "learning_rate": 2.3707935927700104e-06, "loss": 0.5884, "step": 10627 }, { "epoch": 0.69, "grad_norm": 1.168462872505188, "learning_rate": 2.369904522533153e-06, "loss": 0.5148, "step": 10628 }, { "epoch": 0.69, "grad_norm": 1.066301703453064, "learning_rate": 2.369015567250983e-06, "loss": 0.5048, "step": 10629 }, { "epoch": 0.69, "grad_norm": 1.1981455087661743, "learning_rate": 2.3681267269623565e-06, "loss": 0.5361, "step": 10630 }, { "epoch": 0.69, "grad_norm": 1.0953984260559082, "learning_rate": 2.367238001706118e-06, "loss": 0.5105, "step": 10631 }, { "epoch": 0.69, "grad_norm": 1.0972199440002441, "learning_rate": 2.3663493915211144e-06, "loss": 0.5573, "step": 10632 }, { "epoch": 0.69, "grad_norm": 1.160687804222107, "learning_rate": 2.365460896446184e-06, "loss": 0.4715, "step": 10633 }, { "epoch": 0.69, "grad_norm": 1.1372586488723755, "learning_rate": 2.3645725165201604e-06, "loss": 0.5061, "step": 10634 }, { "epoch": 0.69, "grad_norm": 1.1866387128829956, "learning_rate": 2.3636842517818746e-06, "loss": 0.5258, "step": 10635 }, { "epoch": 0.69, "grad_norm": 1.2521430253982544, "learning_rate": 2.362796102270147e-06, "loss": 0.5145, "step": 10636 }, { "epoch": 0.69, "grad_norm": 1.2148849964141846, "learning_rate": 2.3619080680237972e-06, "loss": 0.5098, "step": 10637 }, { "epoch": 0.69, "grad_norm": 1.2395614385604858, "learning_rate": 2.3610201490816392e-06, "loss": 0.5316, "step": 10638 }, { "epoch": 0.69, "grad_norm": 1.3518306016921997, "learning_rate": 2.3601323454824822e-06, "loss": 0.5739, "step": 10639 }, { "epoch": 0.69, "grad_norm": 1.2534772157669067, "learning_rate": 2.359244657265129e-06, "loss": 0.5267, "step": 10640 }, { "epoch": 0.69, "grad_norm": 1.2399994134902954, "learning_rate": 2.3583570844683808e-06, "loss": 0.5231, "step": 10641 }, { "epoch": 0.69, "grad_norm": 1.152815580368042, "learning_rate": 2.3574696271310266e-06, "loss": 0.5111, "step": 10642 }, { "epoch": 0.69, "grad_norm": 1.1899760961532593, "learning_rate": 2.3565822852918575e-06, "loss": 0.5335, "step": 10643 }, { "epoch": 0.69, "grad_norm": 1.1076442003250122, "learning_rate": 2.3556950589896566e-06, "loss": 0.5275, "step": 10644 }, { "epoch": 0.69, "grad_norm": 1.1105551719665527, "learning_rate": 2.3548079482632023e-06, "loss": 0.5093, "step": 10645 }, { "epoch": 0.69, "grad_norm": 1.1433500051498413, "learning_rate": 2.353920953151268e-06, "loss": 0.5041, "step": 10646 }, { "epoch": 0.69, "grad_norm": 1.2539080381393433, "learning_rate": 2.353034073692624e-06, "loss": 0.5225, "step": 10647 }, { "epoch": 0.69, "grad_norm": 1.2294403314590454, "learning_rate": 2.352147309926029e-06, "loss": 0.5015, "step": 10648 }, { "epoch": 0.69, "grad_norm": 1.2652807235717773, "learning_rate": 2.3512606618902432e-06, "loss": 0.5566, "step": 10649 }, { "epoch": 0.69, "grad_norm": 1.320387601852417, "learning_rate": 2.3503741296240207e-06, "loss": 0.4898, "step": 10650 }, { "epoch": 0.69, "grad_norm": 1.1441121101379395, "learning_rate": 2.3494877131661086e-06, "loss": 0.5355, "step": 10651 }, { "epoch": 0.69, "grad_norm": 1.1649963855743408, "learning_rate": 2.3486014125552503e-06, "loss": 0.4765, "step": 10652 }, { "epoch": 0.69, "grad_norm": 1.2904808521270752, "learning_rate": 2.3477152278301853e-06, "loss": 0.4946, "step": 10653 }, { "epoch": 0.69, "grad_norm": 1.2095723152160645, "learning_rate": 2.346829159029643e-06, "loss": 0.5348, "step": 10654 }, { "epoch": 0.69, "grad_norm": 1.1925972700119019, "learning_rate": 2.345943206192353e-06, "loss": 0.5484, "step": 10655 }, { "epoch": 0.69, "grad_norm": 1.323876976966858, "learning_rate": 2.345057369357037e-06, "loss": 0.5584, "step": 10656 }, { "epoch": 0.69, "grad_norm": 1.3052425384521484, "learning_rate": 2.344171648562414e-06, "loss": 0.4548, "step": 10657 }, { "epoch": 0.69, "grad_norm": 1.355508804321289, "learning_rate": 2.343286043847198e-06, "loss": 0.5187, "step": 10658 }, { "epoch": 0.69, "grad_norm": 1.1856797933578491, "learning_rate": 2.3424005552500935e-06, "loss": 0.5229, "step": 10659 }, { "epoch": 0.69, "grad_norm": 1.2367470264434814, "learning_rate": 2.341515182809803e-06, "loss": 0.525, "step": 10660 }, { "epoch": 0.69, "grad_norm": 1.1526470184326172, "learning_rate": 2.3406299265650256e-06, "loss": 0.5229, "step": 10661 }, { "epoch": 0.69, "grad_norm": 1.3060754537582397, "learning_rate": 2.339744786554453e-06, "loss": 0.483, "step": 10662 }, { "epoch": 0.69, "grad_norm": 1.159348487854004, "learning_rate": 2.3388597628167715e-06, "loss": 0.5095, "step": 10663 }, { "epoch": 0.69, "grad_norm": 1.3879809379577637, "learning_rate": 2.3379748553906663e-06, "loss": 0.5359, "step": 10664 }, { "epoch": 0.69, "grad_norm": 1.068507194519043, "learning_rate": 2.33709006431481e-06, "loss": 0.464, "step": 10665 }, { "epoch": 0.69, "grad_norm": 1.1605414152145386, "learning_rate": 2.3362053896278764e-06, "loss": 0.4908, "step": 10666 }, { "epoch": 0.69, "grad_norm": 1.1003296375274658, "learning_rate": 2.3353208313685328e-06, "loss": 0.4691, "step": 10667 }, { "epoch": 0.69, "grad_norm": 1.2193546295166016, "learning_rate": 2.33443638957544e-06, "loss": 0.5559, "step": 10668 }, { "epoch": 0.69, "grad_norm": 1.2739449739456177, "learning_rate": 2.333552064287256e-06, "loss": 0.5535, "step": 10669 }, { "epoch": 0.69, "grad_norm": 1.162510871887207, "learning_rate": 2.332667855542634e-06, "loss": 0.4956, "step": 10670 }, { "epoch": 0.69, "grad_norm": 1.0697182416915894, "learning_rate": 2.331783763380216e-06, "loss": 0.5207, "step": 10671 }, { "epoch": 0.69, "grad_norm": 1.237607717514038, "learning_rate": 2.3308997878386454e-06, "loss": 0.5782, "step": 10672 }, { "epoch": 0.69, "grad_norm": 1.1774389743804932, "learning_rate": 2.330015928956559e-06, "loss": 0.5726, "step": 10673 }, { "epoch": 0.69, "grad_norm": 1.2061489820480347, "learning_rate": 2.3291321867725876e-06, "loss": 0.5238, "step": 10674 }, { "epoch": 0.69, "grad_norm": 1.2483710050582886, "learning_rate": 2.328248561325357e-06, "loss": 0.5203, "step": 10675 }, { "epoch": 0.69, "grad_norm": 1.1724629402160645, "learning_rate": 2.327365052653491e-06, "loss": 0.5464, "step": 10676 }, { "epoch": 0.69, "grad_norm": 1.188645362854004, "learning_rate": 2.3264816607956007e-06, "loss": 0.5102, "step": 10677 }, { "epoch": 0.69, "grad_norm": 1.2097505331039429, "learning_rate": 2.3255983857902997e-06, "loss": 0.5815, "step": 10678 }, { "epoch": 0.69, "grad_norm": 1.2260459661483765, "learning_rate": 2.324715227676193e-06, "loss": 0.5482, "step": 10679 }, { "epoch": 0.69, "grad_norm": 1.2198542356491089, "learning_rate": 2.323832186491881e-06, "loss": 0.5198, "step": 10680 }, { "epoch": 0.69, "grad_norm": 1.1609976291656494, "learning_rate": 2.3229492622759604e-06, "loss": 0.542, "step": 10681 }, { "epoch": 0.69, "grad_norm": 1.270534634590149, "learning_rate": 2.322066455067022e-06, "loss": 0.5247, "step": 10682 }, { "epoch": 0.69, "grad_norm": 1.2420425415039062, "learning_rate": 2.3211837649036477e-06, "loss": 0.4793, "step": 10683 }, { "epoch": 0.69, "grad_norm": 1.3265832662582397, "learning_rate": 2.32030119182442e-06, "loss": 0.5596, "step": 10684 }, { "epoch": 0.69, "grad_norm": 1.1791326999664307, "learning_rate": 2.3194187358679132e-06, "loss": 0.5119, "step": 10685 }, { "epoch": 0.69, "grad_norm": 1.147430181503296, "learning_rate": 2.3185363970726976e-06, "loss": 0.5407, "step": 10686 }, { "epoch": 0.69, "grad_norm": 1.1834421157836914, "learning_rate": 2.3176541754773375e-06, "loss": 0.499, "step": 10687 }, { "epoch": 0.69, "grad_norm": 1.2477511167526245, "learning_rate": 2.316772071120395e-06, "loss": 0.5101, "step": 10688 }, { "epoch": 0.69, "grad_norm": 1.1639827489852905, "learning_rate": 2.31589008404042e-06, "loss": 0.5271, "step": 10689 }, { "epoch": 0.69, "grad_norm": 1.214680790901184, "learning_rate": 2.3150082142759657e-06, "loss": 0.4691, "step": 10690 }, { "epoch": 0.69, "grad_norm": 1.2380625009536743, "learning_rate": 2.314126461865574e-06, "loss": 0.4912, "step": 10691 }, { "epoch": 0.69, "grad_norm": 1.0713645219802856, "learning_rate": 2.3132448268477846e-06, "loss": 0.502, "step": 10692 }, { "epoch": 0.69, "grad_norm": 1.193961262702942, "learning_rate": 2.3123633092611325e-06, "loss": 0.5352, "step": 10693 }, { "epoch": 0.69, "grad_norm": 1.2561227083206177, "learning_rate": 2.311481909144148e-06, "loss": 0.5299, "step": 10694 }, { "epoch": 0.69, "grad_norm": 1.295727252960205, "learning_rate": 2.310600626535351e-06, "loss": 0.6079, "step": 10695 }, { "epoch": 0.69, "grad_norm": 1.1697602272033691, "learning_rate": 2.309719461473261e-06, "loss": 0.5058, "step": 10696 }, { "epoch": 0.69, "grad_norm": 1.2492848634719849, "learning_rate": 2.3088384139963934e-06, "loss": 0.5224, "step": 10697 }, { "epoch": 0.69, "grad_norm": 1.2276166677474976, "learning_rate": 2.3079574841432546e-06, "loss": 0.4942, "step": 10698 }, { "epoch": 0.69, "grad_norm": 1.3607909679412842, "learning_rate": 2.3070766719523506e-06, "loss": 0.5522, "step": 10699 }, { "epoch": 0.69, "grad_norm": 1.2874752283096313, "learning_rate": 2.306195977462176e-06, "loss": 0.5582, "step": 10700 }, { "epoch": 0.69, "grad_norm": 1.150557041168213, "learning_rate": 2.305315400711225e-06, "loss": 0.5184, "step": 10701 }, { "epoch": 0.69, "grad_norm": 1.0996077060699463, "learning_rate": 2.304434941737985e-06, "loss": 0.5322, "step": 10702 }, { "epoch": 0.69, "grad_norm": 1.248664379119873, "learning_rate": 2.3035546005809396e-06, "loss": 0.5564, "step": 10703 }, { "epoch": 0.69, "grad_norm": 1.1271556615829468, "learning_rate": 2.3026743772785655e-06, "loss": 0.4786, "step": 10704 }, { "epoch": 0.69, "grad_norm": 1.181383490562439, "learning_rate": 2.301794271869337e-06, "loss": 0.5142, "step": 10705 }, { "epoch": 0.69, "grad_norm": 1.1951748132705688, "learning_rate": 2.3009142843917176e-06, "loss": 0.5261, "step": 10706 }, { "epoch": 0.69, "grad_norm": 1.1376121044158936, "learning_rate": 2.300034414884171e-06, "loss": 0.503, "step": 10707 }, { "epoch": 0.69, "grad_norm": 1.1445871591567993, "learning_rate": 2.2991546633851543e-06, "loss": 0.5485, "step": 10708 }, { "epoch": 0.69, "grad_norm": 1.140345811843872, "learning_rate": 2.2982750299331186e-06, "loss": 0.4986, "step": 10709 }, { "epoch": 0.69, "grad_norm": 1.1806209087371826, "learning_rate": 2.297395514566513e-06, "loss": 0.5107, "step": 10710 }, { "epoch": 0.69, "grad_norm": 1.2019926309585571, "learning_rate": 2.2965161173237744e-06, "loss": 0.5571, "step": 10711 }, { "epoch": 0.69, "grad_norm": 1.1874881982803345, "learning_rate": 2.295636838243342e-06, "loss": 0.5192, "step": 10712 }, { "epoch": 0.69, "grad_norm": 1.2526905536651611, "learning_rate": 2.2947576773636454e-06, "loss": 0.5116, "step": 10713 }, { "epoch": 0.69, "grad_norm": 1.1081644296646118, "learning_rate": 2.293878634723113e-06, "loss": 0.4836, "step": 10714 }, { "epoch": 0.69, "grad_norm": 1.2531381845474243, "learning_rate": 2.292999710360162e-06, "loss": 0.578, "step": 10715 }, { "epoch": 0.69, "grad_norm": 1.2369775772094727, "learning_rate": 2.292120904313209e-06, "loss": 0.5519, "step": 10716 }, { "epoch": 0.69, "grad_norm": 1.2188338041305542, "learning_rate": 2.2912422166206655e-06, "loss": 0.5443, "step": 10717 }, { "epoch": 0.69, "grad_norm": 1.201509714126587, "learning_rate": 2.2903636473209358e-06, "loss": 0.5261, "step": 10718 }, { "epoch": 0.69, "grad_norm": 1.1141637563705444, "learning_rate": 2.289485196452422e-06, "loss": 0.4958, "step": 10719 }, { "epoch": 0.69, "grad_norm": 1.2286649942398071, "learning_rate": 2.2886068640535146e-06, "loss": 0.5818, "step": 10720 }, { "epoch": 0.69, "grad_norm": 1.1776583194732666, "learning_rate": 2.2877286501626067e-06, "loss": 0.5097, "step": 10721 }, { "epoch": 0.69, "grad_norm": 1.1801837682724, "learning_rate": 2.286850554818081e-06, "loss": 0.5047, "step": 10722 }, { "epoch": 0.69, "grad_norm": 1.2405956983566284, "learning_rate": 2.2859725780583196e-06, "loss": 0.4735, "step": 10723 }, { "epoch": 0.69, "grad_norm": 1.1938587427139282, "learning_rate": 2.285094719921693e-06, "loss": 0.5288, "step": 10724 }, { "epoch": 0.69, "grad_norm": 1.1667735576629639, "learning_rate": 2.2842169804465713e-06, "loss": 0.4972, "step": 10725 }, { "epoch": 0.69, "grad_norm": 1.20871102809906, "learning_rate": 2.2833393596713185e-06, "loss": 0.5408, "step": 10726 }, { "epoch": 0.69, "grad_norm": 1.3180036544799805, "learning_rate": 2.282461857634293e-06, "loss": 0.5466, "step": 10727 }, { "epoch": 0.69, "grad_norm": 1.210966944694519, "learning_rate": 2.281584474373849e-06, "loss": 0.5353, "step": 10728 }, { "epoch": 0.69, "grad_norm": 1.1268061399459839, "learning_rate": 2.2807072099283356e-06, "loss": 0.5154, "step": 10729 }, { "epoch": 0.69, "grad_norm": 1.2482140064239502, "learning_rate": 2.279830064336092e-06, "loss": 0.5315, "step": 10730 }, { "epoch": 0.69, "grad_norm": 1.080420970916748, "learning_rate": 2.2789530376354584e-06, "loss": 0.5011, "step": 10731 }, { "epoch": 0.69, "grad_norm": 1.2455484867095947, "learning_rate": 2.2780761298647672e-06, "loss": 0.6051, "step": 10732 }, { "epoch": 0.69, "grad_norm": 1.1462507247924805, "learning_rate": 2.277199341062346e-06, "loss": 0.4608, "step": 10733 }, { "epoch": 0.69, "grad_norm": 1.1478351354599, "learning_rate": 2.2763226712665167e-06, "loss": 0.52, "step": 10734 }, { "epoch": 0.69, "grad_norm": 1.0555286407470703, "learning_rate": 2.2754461205155977e-06, "loss": 0.4574, "step": 10735 }, { "epoch": 0.69, "grad_norm": 1.144971489906311, "learning_rate": 2.2745696888478973e-06, "loss": 0.5108, "step": 10736 }, { "epoch": 0.69, "grad_norm": 1.3399235010147095, "learning_rate": 2.273693376301724e-06, "loss": 0.5836, "step": 10737 }, { "epoch": 0.69, "grad_norm": 1.158179759979248, "learning_rate": 2.27281718291538e-06, "loss": 0.507, "step": 10738 }, { "epoch": 0.69, "grad_norm": 1.0890815258026123, "learning_rate": 2.2719411087271603e-06, "loss": 0.4809, "step": 10739 }, { "epoch": 0.69, "grad_norm": 1.2450324296951294, "learning_rate": 2.271065153775358e-06, "loss": 0.5335, "step": 10740 }, { "epoch": 0.69, "grad_norm": 1.2188811302185059, "learning_rate": 2.2701893180982553e-06, "loss": 0.5354, "step": 10741 }, { "epoch": 0.69, "grad_norm": 1.2760512828826904, "learning_rate": 2.269313601734135e-06, "loss": 0.5055, "step": 10742 }, { "epoch": 0.69, "grad_norm": 1.178704023361206, "learning_rate": 2.2684380047212714e-06, "loss": 0.4886, "step": 10743 }, { "epoch": 0.69, "grad_norm": 1.340662956237793, "learning_rate": 2.267562527097935e-06, "loss": 0.5406, "step": 10744 }, { "epoch": 0.69, "grad_norm": 1.1664842367172241, "learning_rate": 2.2666871689023907e-06, "loss": 0.5364, "step": 10745 }, { "epoch": 0.69, "grad_norm": 1.145734429359436, "learning_rate": 2.2658119301729004e-06, "loss": 0.5649, "step": 10746 }, { "epoch": 0.69, "grad_norm": 1.2227123975753784, "learning_rate": 2.2649368109477143e-06, "loss": 0.5293, "step": 10747 }, { "epoch": 0.69, "grad_norm": 1.1488897800445557, "learning_rate": 2.2640618112650837e-06, "loss": 0.5074, "step": 10748 }, { "epoch": 0.69, "grad_norm": 1.2469178438186646, "learning_rate": 2.263186931163253e-06, "loss": 0.5484, "step": 10749 }, { "epoch": 0.69, "grad_norm": 1.186704158782959, "learning_rate": 2.2623121706804595e-06, "loss": 0.5068, "step": 10750 }, { "epoch": 0.69, "grad_norm": 1.120220422744751, "learning_rate": 2.261437529854938e-06, "loss": 0.4941, "step": 10751 }, { "epoch": 0.69, "grad_norm": 1.2396316528320312, "learning_rate": 2.260563008724919e-06, "loss": 0.4905, "step": 10752 }, { "epoch": 0.69, "grad_norm": 1.1917095184326172, "learning_rate": 2.2596886073286204e-06, "loss": 0.494, "step": 10753 }, { "epoch": 0.69, "grad_norm": 1.255893349647522, "learning_rate": 2.2588143257042626e-06, "loss": 0.4777, "step": 10754 }, { "epoch": 0.69, "grad_norm": 1.154140830039978, "learning_rate": 2.257940163890059e-06, "loss": 0.525, "step": 10755 }, { "epoch": 0.69, "grad_norm": 1.2301033735275269, "learning_rate": 2.2570661219242156e-06, "loss": 0.5592, "step": 10756 }, { "epoch": 0.69, "grad_norm": 1.2898515462875366, "learning_rate": 2.2561921998449356e-06, "loss": 0.5428, "step": 10757 }, { "epoch": 0.69, "grad_norm": 1.3112367391586304, "learning_rate": 2.255318397690417e-06, "loss": 0.5198, "step": 10758 }, { "epoch": 0.69, "grad_norm": 1.1303420066833496, "learning_rate": 2.254444715498848e-06, "loss": 0.5479, "step": 10759 }, { "epoch": 0.69, "grad_norm": 1.2181333303451538, "learning_rate": 2.253571153308417e-06, "loss": 0.5526, "step": 10760 }, { "epoch": 0.69, "grad_norm": 1.2645785808563232, "learning_rate": 2.252697711157305e-06, "loss": 0.555, "step": 10761 }, { "epoch": 0.69, "grad_norm": 1.2183935642242432, "learning_rate": 2.2518243890836877e-06, "loss": 0.4956, "step": 10762 }, { "epoch": 0.69, "grad_norm": 1.234954833984375, "learning_rate": 2.2509511871257367e-06, "loss": 0.5567, "step": 10763 }, { "epoch": 0.69, "grad_norm": 1.223196268081665, "learning_rate": 2.2500781053216175e-06, "loss": 0.5132, "step": 10764 }, { "epoch": 0.69, "grad_norm": 1.2025901079177856, "learning_rate": 2.249205143709488e-06, "loss": 0.5164, "step": 10765 }, { "epoch": 0.69, "grad_norm": 1.1668628454208374, "learning_rate": 2.248332302327505e-06, "loss": 0.5397, "step": 10766 }, { "epoch": 0.69, "grad_norm": 1.2400779724121094, "learning_rate": 2.2474595812138173e-06, "loss": 0.515, "step": 10767 }, { "epoch": 0.7, "grad_norm": 1.2131397724151611, "learning_rate": 2.2465869804065695e-06, "loss": 0.5053, "step": 10768 }, { "epoch": 0.7, "grad_norm": 1.0715352296829224, "learning_rate": 2.2457144999439006e-06, "loss": 0.465, "step": 10769 }, { "epoch": 0.7, "grad_norm": 1.1680231094360352, "learning_rate": 2.244842139863947e-06, "loss": 0.5203, "step": 10770 }, { "epoch": 0.7, "grad_norm": 1.2064714431762695, "learning_rate": 2.243969900204833e-06, "loss": 0.4773, "step": 10771 }, { "epoch": 0.7, "grad_norm": 1.1854974031448364, "learning_rate": 2.2430977810046846e-06, "loss": 0.5031, "step": 10772 }, { "epoch": 0.7, "grad_norm": 1.1674118041992188, "learning_rate": 2.2422257823016187e-06, "loss": 0.6064, "step": 10773 }, { "epoch": 0.7, "grad_norm": 1.123302936553955, "learning_rate": 2.241353904133749e-06, "loss": 0.5321, "step": 10774 }, { "epoch": 0.7, "grad_norm": 1.1419557332992554, "learning_rate": 2.2404821465391824e-06, "loss": 0.4827, "step": 10775 }, { "epoch": 0.7, "grad_norm": 1.2419288158416748, "learning_rate": 2.239610509556024e-06, "loss": 0.5045, "step": 10776 }, { "epoch": 0.7, "grad_norm": 1.2302967309951782, "learning_rate": 2.2387389932223658e-06, "loss": 0.5168, "step": 10777 }, { "epoch": 0.7, "grad_norm": 1.2520350217819214, "learning_rate": 2.2378675975763025e-06, "loss": 0.5523, "step": 10778 }, { "epoch": 0.7, "grad_norm": 1.277877688407898, "learning_rate": 2.23699632265592e-06, "loss": 0.5128, "step": 10779 }, { "epoch": 0.7, "grad_norm": 1.14431631565094, "learning_rate": 2.2361251684992996e-06, "loss": 0.4586, "step": 10780 }, { "epoch": 0.7, "grad_norm": 1.1638941764831543, "learning_rate": 2.235254135144517e-06, "loss": 0.5362, "step": 10781 }, { "epoch": 0.7, "grad_norm": 1.1798783540725708, "learning_rate": 2.2343832226296454e-06, "loss": 0.5001, "step": 10782 }, { "epoch": 0.7, "grad_norm": 1.2223654985427856, "learning_rate": 2.2335124309927453e-06, "loss": 0.5065, "step": 10783 }, { "epoch": 0.7, "grad_norm": 1.1981568336486816, "learning_rate": 2.2326417602718793e-06, "loss": 0.5593, "step": 10784 }, { "epoch": 0.7, "grad_norm": 1.1966493129730225, "learning_rate": 2.231771210505102e-06, "loss": 0.5152, "step": 10785 }, { "epoch": 0.7, "grad_norm": 1.1115374565124512, "learning_rate": 2.2309007817304633e-06, "loss": 0.5222, "step": 10786 }, { "epoch": 0.7, "grad_norm": 1.1755281686782837, "learning_rate": 2.230030473986009e-06, "loss": 0.5496, "step": 10787 }, { "epoch": 0.7, "grad_norm": 1.1125187873840332, "learning_rate": 2.229160287309774e-06, "loss": 0.5013, "step": 10788 }, { "epoch": 0.7, "grad_norm": 1.1574866771697998, "learning_rate": 2.228290221739794e-06, "loss": 0.5044, "step": 10789 }, { "epoch": 0.7, "grad_norm": 1.0892990827560425, "learning_rate": 2.227420277314097e-06, "loss": 0.4764, "step": 10790 }, { "epoch": 0.7, "grad_norm": 1.2621824741363525, "learning_rate": 2.2265504540707065e-06, "loss": 0.5091, "step": 10791 }, { "epoch": 0.7, "grad_norm": 1.1839593648910522, "learning_rate": 2.2256807520476403e-06, "loss": 0.562, "step": 10792 }, { "epoch": 0.7, "grad_norm": 1.1497761011123657, "learning_rate": 2.2248111712829122e-06, "loss": 0.5371, "step": 10793 }, { "epoch": 0.7, "grad_norm": 1.157676100730896, "learning_rate": 2.223941711814526e-06, "loss": 0.5237, "step": 10794 }, { "epoch": 0.7, "grad_norm": 1.0226045846939087, "learning_rate": 2.2230723736804855e-06, "loss": 0.452, "step": 10795 }, { "epoch": 0.7, "grad_norm": 1.1538022756576538, "learning_rate": 2.2222031569187865e-06, "loss": 0.5234, "step": 10796 }, { "epoch": 0.7, "grad_norm": 1.1651966571807861, "learning_rate": 2.2213340615674206e-06, "loss": 0.5237, "step": 10797 }, { "epoch": 0.7, "grad_norm": 1.148923397064209, "learning_rate": 2.2204650876643748e-06, "loss": 0.5098, "step": 10798 }, { "epoch": 0.7, "grad_norm": 1.162447214126587, "learning_rate": 2.2195962352476296e-06, "loss": 0.5118, "step": 10799 }, { "epoch": 0.7, "grad_norm": 1.1383047103881836, "learning_rate": 2.218727504355158e-06, "loss": 0.5249, "step": 10800 }, { "epoch": 0.7, "grad_norm": 1.2545369863510132, "learning_rate": 2.2178588950249308e-06, "loss": 0.5116, "step": 10801 }, { "epoch": 0.7, "grad_norm": 1.1341915130615234, "learning_rate": 2.2169904072949137e-06, "loss": 0.5319, "step": 10802 }, { "epoch": 0.7, "grad_norm": 1.1624908447265625, "learning_rate": 2.2161220412030677e-06, "loss": 0.4802, "step": 10803 }, { "epoch": 0.7, "grad_norm": 1.1766610145568848, "learning_rate": 2.215253796787343e-06, "loss": 0.5384, "step": 10804 }, { "epoch": 0.7, "grad_norm": 1.148324966430664, "learning_rate": 2.2143856740856895e-06, "loss": 0.5454, "step": 10805 }, { "epoch": 0.7, "grad_norm": 1.2326093912124634, "learning_rate": 2.213517673136052e-06, "loss": 0.5662, "step": 10806 }, { "epoch": 0.7, "grad_norm": 1.2558194398880005, "learning_rate": 2.2126497939763667e-06, "loss": 0.5587, "step": 10807 }, { "epoch": 0.7, "grad_norm": 1.1557248830795288, "learning_rate": 2.2117820366445703e-06, "loss": 0.5059, "step": 10808 }, { "epoch": 0.7, "grad_norm": 1.146693468093872, "learning_rate": 2.210914401178585e-06, "loss": 0.5215, "step": 10809 }, { "epoch": 0.7, "grad_norm": 1.2433505058288574, "learning_rate": 2.2100468876163354e-06, "loss": 0.5257, "step": 10810 }, { "epoch": 0.7, "grad_norm": 1.0847952365875244, "learning_rate": 2.2091794959957387e-06, "loss": 0.5211, "step": 10811 }, { "epoch": 0.7, "grad_norm": 1.1833890676498413, "learning_rate": 2.2083122263547054e-06, "loss": 0.4532, "step": 10812 }, { "epoch": 0.7, "grad_norm": 1.2576842308044434, "learning_rate": 2.2074450787311437e-06, "loss": 0.5405, "step": 10813 }, { "epoch": 0.7, "grad_norm": 1.2112133502960205, "learning_rate": 2.2065780531629506e-06, "loss": 0.5344, "step": 10814 }, { "epoch": 0.7, "grad_norm": 1.3035345077514648, "learning_rate": 2.205711149688024e-06, "loss": 0.5714, "step": 10815 }, { "epoch": 0.7, "grad_norm": 1.1543524265289307, "learning_rate": 2.2048443683442537e-06, "loss": 0.5364, "step": 10816 }, { "epoch": 0.7, "grad_norm": 1.1479429006576538, "learning_rate": 2.2039777091695264e-06, "loss": 0.5003, "step": 10817 }, { "epoch": 0.7, "grad_norm": 1.0916614532470703, "learning_rate": 2.203111172201718e-06, "loss": 0.5375, "step": 10818 }, { "epoch": 0.7, "grad_norm": 1.1028521060943604, "learning_rate": 2.2022447574787035e-06, "loss": 0.4986, "step": 10819 }, { "epoch": 0.7, "grad_norm": 1.272499680519104, "learning_rate": 2.201378465038353e-06, "loss": 0.5099, "step": 10820 }, { "epoch": 0.7, "grad_norm": 1.246043086051941, "learning_rate": 2.200512294918529e-06, "loss": 0.4734, "step": 10821 }, { "epoch": 0.7, "grad_norm": 1.2004808187484741, "learning_rate": 2.19964624715709e-06, "loss": 0.5181, "step": 10822 }, { "epoch": 0.7, "grad_norm": 1.2388713359832764, "learning_rate": 2.1987803217918904e-06, "loss": 0.5126, "step": 10823 }, { "epoch": 0.7, "grad_norm": 1.2235673666000366, "learning_rate": 2.197914518860773e-06, "loss": 0.5516, "step": 10824 }, { "epoch": 0.7, "grad_norm": 1.1794427633285522, "learning_rate": 2.1970488384015836e-06, "loss": 0.5014, "step": 10825 }, { "epoch": 0.7, "grad_norm": 1.168477177619934, "learning_rate": 2.196183280452157e-06, "loss": 0.5135, "step": 10826 }, { "epoch": 0.7, "grad_norm": 1.2267181873321533, "learning_rate": 2.195317845050326e-06, "loss": 0.4564, "step": 10827 }, { "epoch": 0.7, "grad_norm": 1.197616457939148, "learning_rate": 2.194452532233917e-06, "loss": 0.5567, "step": 10828 }, { "epoch": 0.7, "grad_norm": 1.1183944940567017, "learning_rate": 2.193587342040748e-06, "loss": 0.4663, "step": 10829 }, { "epoch": 0.7, "grad_norm": 1.1646612882614136, "learning_rate": 2.192722274508635e-06, "loss": 0.5294, "step": 10830 }, { "epoch": 0.7, "grad_norm": 1.1119698286056519, "learning_rate": 2.191857329675389e-06, "loss": 0.4819, "step": 10831 }, { "epoch": 0.7, "grad_norm": 1.2679415941238403, "learning_rate": 2.190992507578814e-06, "loss": 0.5238, "step": 10832 }, { "epoch": 0.7, "grad_norm": 1.106297254562378, "learning_rate": 2.1901278082567095e-06, "loss": 0.5603, "step": 10833 }, { "epoch": 0.7, "grad_norm": 1.2622870206832886, "learning_rate": 2.1892632317468705e-06, "loss": 0.5287, "step": 10834 }, { "epoch": 0.7, "grad_norm": 1.238633394241333, "learning_rate": 2.1883987780870814e-06, "loss": 0.5069, "step": 10835 }, { "epoch": 0.7, "grad_norm": 1.236641526222229, "learning_rate": 2.1875344473151284e-06, "loss": 0.5559, "step": 10836 }, { "epoch": 0.7, "grad_norm": 1.1547529697418213, "learning_rate": 2.186670239468788e-06, "loss": 0.5149, "step": 10837 }, { "epoch": 0.7, "grad_norm": 1.1686525344848633, "learning_rate": 2.1858061545858334e-06, "loss": 0.537, "step": 10838 }, { "epoch": 0.7, "grad_norm": 1.181273102760315, "learning_rate": 2.184942192704031e-06, "loss": 0.5334, "step": 10839 }, { "epoch": 0.7, "grad_norm": 1.2135398387908936, "learning_rate": 2.184078353861144e-06, "loss": 0.4905, "step": 10840 }, { "epoch": 0.7, "grad_norm": 1.0796842575073242, "learning_rate": 2.1832146380949254e-06, "loss": 0.5174, "step": 10841 }, { "epoch": 0.7, "grad_norm": 1.1530791521072388, "learning_rate": 2.1823510454431272e-06, "loss": 0.5248, "step": 10842 }, { "epoch": 0.7, "grad_norm": 1.2677134275436401, "learning_rate": 2.1814875759434957e-06, "loss": 0.526, "step": 10843 }, { "epoch": 0.7, "grad_norm": 1.1042859554290771, "learning_rate": 2.18062422963377e-06, "loss": 0.5047, "step": 10844 }, { "epoch": 0.7, "grad_norm": 1.2543590068817139, "learning_rate": 2.179761006551685e-06, "loss": 0.53, "step": 10845 }, { "epoch": 0.7, "grad_norm": 1.2143975496292114, "learning_rate": 2.178897906734972e-06, "loss": 0.5111, "step": 10846 }, { "epoch": 0.7, "grad_norm": 1.2601186037063599, "learning_rate": 2.178034930221351e-06, "loss": 0.5422, "step": 10847 }, { "epoch": 0.7, "grad_norm": 1.1816189289093018, "learning_rate": 2.1771720770485425e-06, "loss": 0.5146, "step": 10848 }, { "epoch": 0.7, "grad_norm": 1.21775484085083, "learning_rate": 2.17630934725426e-06, "loss": 0.5264, "step": 10849 }, { "epoch": 0.7, "grad_norm": 1.0878044366836548, "learning_rate": 2.1754467408762104e-06, "loss": 0.5157, "step": 10850 }, { "epoch": 0.7, "grad_norm": 1.1944856643676758, "learning_rate": 2.1745842579520967e-06, "loss": 0.5242, "step": 10851 }, { "epoch": 0.7, "grad_norm": 1.1513690948486328, "learning_rate": 2.1737218985196167e-06, "loss": 0.51, "step": 10852 }, { "epoch": 0.7, "grad_norm": 1.1612039804458618, "learning_rate": 2.1728596626164587e-06, "loss": 0.497, "step": 10853 }, { "epoch": 0.7, "grad_norm": 1.281943440437317, "learning_rate": 2.1719975502803115e-06, "loss": 0.5231, "step": 10854 }, { "epoch": 0.7, "grad_norm": 1.1800073385238647, "learning_rate": 2.1711355615488545e-06, "loss": 0.5051, "step": 10855 }, { "epoch": 0.7, "grad_norm": 1.14492666721344, "learning_rate": 2.1702736964597632e-06, "loss": 0.509, "step": 10856 }, { "epoch": 0.7, "grad_norm": 1.4400489330291748, "learning_rate": 2.1694119550507083e-06, "loss": 0.5903, "step": 10857 }, { "epoch": 0.7, "grad_norm": 1.314214825630188, "learning_rate": 2.1685503373593555e-06, "loss": 0.4977, "step": 10858 }, { "epoch": 0.7, "grad_norm": 1.1468205451965332, "learning_rate": 2.1676888434233605e-06, "loss": 0.4962, "step": 10859 }, { "epoch": 0.7, "grad_norm": 1.1963729858398438, "learning_rate": 2.1668274732803783e-06, "loss": 0.5443, "step": 10860 }, { "epoch": 0.7, "grad_norm": 1.2867375612258911, "learning_rate": 2.165966226968058e-06, "loss": 0.5258, "step": 10861 }, { "epoch": 0.7, "grad_norm": 1.217745304107666, "learning_rate": 2.1651051045240425e-06, "loss": 0.5231, "step": 10862 }, { "epoch": 0.7, "grad_norm": 1.1764473915100098, "learning_rate": 2.1642441059859677e-06, "loss": 0.4682, "step": 10863 }, { "epoch": 0.7, "grad_norm": 1.1431846618652344, "learning_rate": 2.1633832313914687e-06, "loss": 0.5071, "step": 10864 }, { "epoch": 0.7, "grad_norm": 1.1784805059432983, "learning_rate": 2.1625224807781686e-06, "loss": 0.5118, "step": 10865 }, { "epoch": 0.7, "grad_norm": 1.209079384803772, "learning_rate": 2.1616618541836904e-06, "loss": 0.521, "step": 10866 }, { "epoch": 0.7, "grad_norm": 1.170608401298523, "learning_rate": 2.160801351645649e-06, "loss": 0.531, "step": 10867 }, { "epoch": 0.7, "grad_norm": 1.1766635179519653, "learning_rate": 2.159940973201656e-06, "loss": 0.5239, "step": 10868 }, { "epoch": 0.7, "grad_norm": 1.1502035856246948, "learning_rate": 2.1590807188893163e-06, "loss": 0.5257, "step": 10869 }, { "epoch": 0.7, "grad_norm": 1.133543848991394, "learning_rate": 2.1582205887462276e-06, "loss": 0.5462, "step": 10870 }, { "epoch": 0.7, "grad_norm": 1.251165747642517, "learning_rate": 2.157360582809985e-06, "loss": 0.5202, "step": 10871 }, { "epoch": 0.7, "grad_norm": 1.2074559926986694, "learning_rate": 2.156500701118177e-06, "loss": 0.51, "step": 10872 }, { "epoch": 0.7, "grad_norm": 1.3068190813064575, "learning_rate": 2.155640943708387e-06, "loss": 0.5448, "step": 10873 }, { "epoch": 0.7, "grad_norm": 1.2181658744812012, "learning_rate": 2.154781310618193e-06, "loss": 0.5802, "step": 10874 }, { "epoch": 0.7, "grad_norm": 1.135831594467163, "learning_rate": 2.1539218018851694e-06, "loss": 0.4857, "step": 10875 }, { "epoch": 0.7, "grad_norm": 1.1660418510437012, "learning_rate": 2.1530624175468785e-06, "loss": 0.5022, "step": 10876 }, { "epoch": 0.7, "grad_norm": 1.1179265975952148, "learning_rate": 2.152203157640884e-06, "loss": 0.5622, "step": 10877 }, { "epoch": 0.7, "grad_norm": 1.2003360986709595, "learning_rate": 2.151344022204742e-06, "loss": 0.5293, "step": 10878 }, { "epoch": 0.7, "grad_norm": 1.2941381931304932, "learning_rate": 2.150485011276004e-06, "loss": 0.5222, "step": 10879 }, { "epoch": 0.7, "grad_norm": 1.2016847133636475, "learning_rate": 2.1496261248922133e-06, "loss": 0.5962, "step": 10880 }, { "epoch": 0.7, "grad_norm": 1.2529568672180176, "learning_rate": 2.1487673630909133e-06, "loss": 0.5295, "step": 10881 }, { "epoch": 0.7, "grad_norm": 1.1888481378555298, "learning_rate": 2.1479087259096333e-06, "loss": 0.5517, "step": 10882 }, { "epoch": 0.7, "grad_norm": 1.1492968797683716, "learning_rate": 2.1470502133859043e-06, "loss": 0.4932, "step": 10883 }, { "epoch": 0.7, "grad_norm": 1.3061243295669556, "learning_rate": 2.14619182555725e-06, "loss": 0.4978, "step": 10884 }, { "epoch": 0.7, "grad_norm": 1.1489300727844238, "learning_rate": 2.1453335624611883e-06, "loss": 0.4965, "step": 10885 }, { "epoch": 0.7, "grad_norm": 1.159691333770752, "learning_rate": 2.144475424135231e-06, "loss": 0.5566, "step": 10886 }, { "epoch": 0.7, "grad_norm": 1.1742277145385742, "learning_rate": 2.1436174106168877e-06, "loss": 0.534, "step": 10887 }, { "epoch": 0.7, "grad_norm": 1.1570003032684326, "learning_rate": 2.1427595219436558e-06, "loss": 0.5334, "step": 10888 }, { "epoch": 0.7, "grad_norm": 1.1896235942840576, "learning_rate": 2.1419017581530334e-06, "loss": 0.4931, "step": 10889 }, { "epoch": 0.7, "grad_norm": 1.1426841020584106, "learning_rate": 2.141044119282511e-06, "loss": 0.5561, "step": 10890 }, { "epoch": 0.7, "grad_norm": 1.1059958934783936, "learning_rate": 2.1401866053695743e-06, "loss": 0.5487, "step": 10891 }, { "epoch": 0.7, "grad_norm": 1.1653251647949219, "learning_rate": 2.139329216451704e-06, "loss": 0.4754, "step": 10892 }, { "epoch": 0.7, "grad_norm": 1.1544883251190186, "learning_rate": 2.138471952566372e-06, "loss": 0.5846, "step": 10893 }, { "epoch": 0.7, "grad_norm": 1.1857866048812866, "learning_rate": 2.137614813751048e-06, "loss": 0.5166, "step": 10894 }, { "epoch": 0.7, "grad_norm": 1.1439056396484375, "learning_rate": 2.1367578000431943e-06, "loss": 0.5259, "step": 10895 }, { "epoch": 0.7, "grad_norm": 1.2961103916168213, "learning_rate": 2.135900911480271e-06, "loss": 0.5507, "step": 10896 }, { "epoch": 0.7, "grad_norm": 1.2360233068466187, "learning_rate": 2.135044148099731e-06, "loss": 0.5388, "step": 10897 }, { "epoch": 0.7, "grad_norm": 1.0862430334091187, "learning_rate": 2.1341875099390175e-06, "loss": 0.5141, "step": 10898 }, { "epoch": 0.7, "grad_norm": 1.241252064704895, "learning_rate": 2.133330997035574e-06, "loss": 0.5041, "step": 10899 }, { "epoch": 0.7, "grad_norm": 1.2664767503738403, "learning_rate": 2.132474609426837e-06, "loss": 0.5211, "step": 10900 }, { "epoch": 0.7, "grad_norm": 1.2936593294143677, "learning_rate": 2.131618347150236e-06, "loss": 0.5477, "step": 10901 }, { "epoch": 0.7, "grad_norm": 1.2074223756790161, "learning_rate": 2.1307622102431983e-06, "loss": 0.4912, "step": 10902 }, { "epoch": 0.7, "grad_norm": 1.2465784549713135, "learning_rate": 2.129906198743141e-06, "loss": 0.5455, "step": 10903 }, { "epoch": 0.7, "grad_norm": 1.1572496891021729, "learning_rate": 2.129050312687477e-06, "loss": 0.509, "step": 10904 }, { "epoch": 0.7, "grad_norm": 1.1585040092468262, "learning_rate": 2.1281945521136184e-06, "loss": 0.4953, "step": 10905 }, { "epoch": 0.7, "grad_norm": 1.1711180210113525, "learning_rate": 2.1273389170589674e-06, "loss": 0.5083, "step": 10906 }, { "epoch": 0.7, "grad_norm": 1.2858448028564453, "learning_rate": 2.126483407560919e-06, "loss": 0.5411, "step": 10907 }, { "epoch": 0.7, "grad_norm": 1.2626785039901733, "learning_rate": 2.125628023656867e-06, "loss": 0.5365, "step": 10908 }, { "epoch": 0.7, "grad_norm": 1.225522756576538, "learning_rate": 2.1247727653841986e-06, "loss": 0.5184, "step": 10909 }, { "epoch": 0.7, "grad_norm": 1.1448044776916504, "learning_rate": 2.123917632780294e-06, "loss": 0.5235, "step": 10910 }, { "epoch": 0.7, "grad_norm": 1.1483371257781982, "learning_rate": 2.1230626258825316e-06, "loss": 0.4394, "step": 10911 }, { "epoch": 0.7, "grad_norm": 1.1480122804641724, "learning_rate": 2.1222077447282767e-06, "loss": 0.5009, "step": 10912 }, { "epoch": 0.7, "grad_norm": 1.176880121231079, "learning_rate": 2.1213529893548972e-06, "loss": 0.541, "step": 10913 }, { "epoch": 0.7, "grad_norm": 1.1239573955535889, "learning_rate": 2.1204983597997515e-06, "loss": 0.4594, "step": 10914 }, { "epoch": 0.7, "grad_norm": 1.1938402652740479, "learning_rate": 2.1196438561001924e-06, "loss": 0.5413, "step": 10915 }, { "epoch": 0.7, "grad_norm": 1.1672698259353638, "learning_rate": 2.1187894782935716e-06, "loss": 0.4972, "step": 10916 }, { "epoch": 0.7, "grad_norm": 1.1508220434188843, "learning_rate": 2.117935226417227e-06, "loss": 0.4957, "step": 10917 }, { "epoch": 0.7, "grad_norm": 1.2809566259384155, "learning_rate": 2.117081100508498e-06, "loss": 0.5241, "step": 10918 }, { "epoch": 0.7, "grad_norm": 1.1892884969711304, "learning_rate": 2.1162271006047157e-06, "loss": 0.4791, "step": 10919 }, { "epoch": 0.7, "grad_norm": 1.2159110307693481, "learning_rate": 2.115373226743207e-06, "loss": 0.5349, "step": 10920 }, { "epoch": 0.7, "grad_norm": 1.2089263200759888, "learning_rate": 2.1145194789612917e-06, "loss": 0.5269, "step": 10921 }, { "epoch": 0.7, "grad_norm": 1.228657603263855, "learning_rate": 2.113665857296288e-06, "loss": 0.5379, "step": 10922 }, { "epoch": 0.71, "grad_norm": 1.2372928857803345, "learning_rate": 2.1128123617854996e-06, "loss": 0.5277, "step": 10923 }, { "epoch": 0.71, "grad_norm": 1.19700288772583, "learning_rate": 2.1119589924662346e-06, "loss": 0.5336, "step": 10924 }, { "epoch": 0.71, "grad_norm": 1.1007219552993774, "learning_rate": 2.111105749375791e-06, "loss": 0.5142, "step": 10925 }, { "epoch": 0.71, "grad_norm": 1.230251431465149, "learning_rate": 2.1102526325514617e-06, "loss": 0.547, "step": 10926 }, { "epoch": 0.71, "grad_norm": 1.1218841075897217, "learning_rate": 2.1093996420305337e-06, "loss": 0.5335, "step": 10927 }, { "epoch": 0.71, "grad_norm": 1.1333955526351929, "learning_rate": 2.1085467778502915e-06, "loss": 0.515, "step": 10928 }, { "epoch": 0.71, "grad_norm": 1.1063426733016968, "learning_rate": 2.107694040048008e-06, "loss": 0.5182, "step": 10929 }, { "epoch": 0.71, "grad_norm": 1.2738878726959229, "learning_rate": 2.1068414286609553e-06, "loss": 0.5361, "step": 10930 }, { "epoch": 0.71, "grad_norm": 1.2022713422775269, "learning_rate": 2.1059889437264e-06, "loss": 0.525, "step": 10931 }, { "epoch": 0.71, "grad_norm": 1.2396583557128906, "learning_rate": 2.1051365852816004e-06, "loss": 0.4982, "step": 10932 }, { "epoch": 0.71, "grad_norm": 1.2496657371520996, "learning_rate": 2.1042843533638123e-06, "loss": 0.5093, "step": 10933 }, { "epoch": 0.71, "grad_norm": 1.1759883165359497, "learning_rate": 2.103432248010286e-06, "loss": 0.4961, "step": 10934 }, { "epoch": 0.71, "grad_norm": 1.1343529224395752, "learning_rate": 2.1025802692582607e-06, "loss": 0.5216, "step": 10935 }, { "epoch": 0.71, "grad_norm": 1.1687930822372437, "learning_rate": 2.101728417144977e-06, "loss": 0.5019, "step": 10936 }, { "epoch": 0.71, "grad_norm": 1.1434701681137085, "learning_rate": 2.1008766917076663e-06, "loss": 0.5224, "step": 10937 }, { "epoch": 0.71, "grad_norm": 1.0434842109680176, "learning_rate": 2.1000250929835554e-06, "loss": 0.5134, "step": 10938 }, { "epoch": 0.71, "grad_norm": 1.3745777606964111, "learning_rate": 2.0991736210098655e-06, "loss": 0.5281, "step": 10939 }, { "epoch": 0.71, "grad_norm": 1.1505494117736816, "learning_rate": 2.098322275823815e-06, "loss": 0.5138, "step": 10940 }, { "epoch": 0.71, "grad_norm": 1.2229636907577515, "learning_rate": 2.0974710574626085e-06, "loss": 0.492, "step": 10941 }, { "epoch": 0.71, "grad_norm": 1.2031042575836182, "learning_rate": 2.096619965963454e-06, "loss": 0.5184, "step": 10942 }, { "epoch": 0.71, "grad_norm": 1.2436527013778687, "learning_rate": 2.09576900136355e-06, "loss": 0.5331, "step": 10943 }, { "epoch": 0.71, "grad_norm": 1.1415568590164185, "learning_rate": 2.0949181637000896e-06, "loss": 0.4953, "step": 10944 }, { "epoch": 0.71, "grad_norm": 1.1630218029022217, "learning_rate": 2.0940674530102618e-06, "loss": 0.5035, "step": 10945 }, { "epoch": 0.71, "grad_norm": 1.0661331415176392, "learning_rate": 2.0932168693312494e-06, "loss": 0.4729, "step": 10946 }, { "epoch": 0.71, "grad_norm": 1.2413548231124878, "learning_rate": 2.0923664127002263e-06, "loss": 0.5328, "step": 10947 }, { "epoch": 0.71, "grad_norm": 1.1722779273986816, "learning_rate": 2.0915160831543656e-06, "loss": 0.5099, "step": 10948 }, { "epoch": 0.71, "grad_norm": 1.1263946294784546, "learning_rate": 2.090665880730833e-06, "loss": 0.5497, "step": 10949 }, { "epoch": 0.71, "grad_norm": 1.1242125034332275, "learning_rate": 2.0898158054667884e-06, "loss": 0.5264, "step": 10950 }, { "epoch": 0.71, "grad_norm": 1.2162784337997437, "learning_rate": 2.0889658573993866e-06, "loss": 0.5203, "step": 10951 }, { "epoch": 0.71, "grad_norm": 1.1904443502426147, "learning_rate": 2.0881160365657787e-06, "loss": 0.5406, "step": 10952 }, { "epoch": 0.71, "grad_norm": 1.152585744857788, "learning_rate": 2.087266343003104e-06, "loss": 0.4917, "step": 10953 }, { "epoch": 0.71, "grad_norm": 1.2544835805892944, "learning_rate": 2.0864167767485026e-06, "loss": 0.5356, "step": 10954 }, { "epoch": 0.71, "grad_norm": 1.211654543876648, "learning_rate": 2.085567337839107e-06, "loss": 0.5481, "step": 10955 }, { "epoch": 0.71, "grad_norm": 1.1094915866851807, "learning_rate": 2.0847180263120432e-06, "loss": 0.4596, "step": 10956 }, { "epoch": 0.71, "grad_norm": 1.1920171976089478, "learning_rate": 2.0838688422044347e-06, "loss": 0.5381, "step": 10957 }, { "epoch": 0.71, "grad_norm": 1.2113484144210815, "learning_rate": 2.083019785553394e-06, "loss": 0.5393, "step": 10958 }, { "epoch": 0.71, "grad_norm": 1.1329344511032104, "learning_rate": 2.0821708563960323e-06, "loss": 0.451, "step": 10959 }, { "epoch": 0.71, "grad_norm": 1.2831807136535645, "learning_rate": 2.0813220547694545e-06, "loss": 0.5589, "step": 10960 }, { "epoch": 0.71, "grad_norm": 1.2042254209518433, "learning_rate": 2.080473380710759e-06, "loss": 0.4957, "step": 10961 }, { "epoch": 0.71, "grad_norm": 1.2739726305007935, "learning_rate": 2.0796248342570402e-06, "loss": 0.5846, "step": 10962 }, { "epoch": 0.71, "grad_norm": 1.0394171476364136, "learning_rate": 2.0787764154453864e-06, "loss": 0.5315, "step": 10963 }, { "epoch": 0.71, "grad_norm": 1.272746205329895, "learning_rate": 2.0779281243128775e-06, "loss": 0.5425, "step": 10964 }, { "epoch": 0.71, "grad_norm": 1.2502162456512451, "learning_rate": 2.077079960896591e-06, "loss": 0.5201, "step": 10965 }, { "epoch": 0.71, "grad_norm": 1.2268046140670776, "learning_rate": 2.076231925233598e-06, "loss": 0.5124, "step": 10966 }, { "epoch": 0.71, "grad_norm": 1.1913813352584839, "learning_rate": 2.075384017360964e-06, "loss": 0.5038, "step": 10967 }, { "epoch": 0.71, "grad_norm": 1.132325291633606, "learning_rate": 2.0745362373157496e-06, "loss": 0.5453, "step": 10968 }, { "epoch": 0.71, "grad_norm": 1.1152671575546265, "learning_rate": 2.0736885851350096e-06, "loss": 0.4799, "step": 10969 }, { "epoch": 0.71, "grad_norm": 1.2182691097259521, "learning_rate": 2.07284106085579e-06, "loss": 0.5571, "step": 10970 }, { "epoch": 0.71, "grad_norm": 1.1383436918258667, "learning_rate": 2.0719936645151355e-06, "loss": 0.5159, "step": 10971 }, { "epoch": 0.71, "grad_norm": 1.2349929809570312, "learning_rate": 2.0711463961500838e-06, "loss": 0.5589, "step": 10972 }, { "epoch": 0.71, "grad_norm": 1.1483157873153687, "learning_rate": 2.070299255797666e-06, "loss": 0.4902, "step": 10973 }, { "epoch": 0.71, "grad_norm": 1.1804920434951782, "learning_rate": 2.0694522434949094e-06, "loss": 0.5659, "step": 10974 }, { "epoch": 0.71, "grad_norm": 1.3400694131851196, "learning_rate": 2.0686053592788365e-06, "loss": 0.57, "step": 10975 }, { "epoch": 0.71, "grad_norm": 1.1550425291061401, "learning_rate": 2.0677586031864576e-06, "loss": 0.489, "step": 10976 }, { "epoch": 0.71, "grad_norm": 1.2146352529525757, "learning_rate": 2.066911975254785e-06, "loss": 0.5411, "step": 10977 }, { "epoch": 0.71, "grad_norm": 1.2046092748641968, "learning_rate": 2.066065475520823e-06, "loss": 0.5287, "step": 10978 }, { "epoch": 0.71, "grad_norm": 1.1530790328979492, "learning_rate": 2.0652191040215693e-06, "loss": 0.4965, "step": 10979 }, { "epoch": 0.71, "grad_norm": 1.3009995222091675, "learning_rate": 2.0643728607940166e-06, "loss": 0.5479, "step": 10980 }, { "epoch": 0.71, "grad_norm": 1.2056808471679688, "learning_rate": 2.063526745875154e-06, "loss": 0.5311, "step": 10981 }, { "epoch": 0.71, "grad_norm": 1.173947811126709, "learning_rate": 2.062680759301959e-06, "loss": 0.4828, "step": 10982 }, { "epoch": 0.71, "grad_norm": 1.22642183303833, "learning_rate": 2.0618349011114104e-06, "loss": 0.5523, "step": 10983 }, { "epoch": 0.71, "grad_norm": 1.2595455646514893, "learning_rate": 2.060989171340477e-06, "loss": 0.5357, "step": 10984 }, { "epoch": 0.71, "grad_norm": 1.2140384912490845, "learning_rate": 2.060143570026124e-06, "loss": 0.5608, "step": 10985 }, { "epoch": 0.71, "grad_norm": 1.1176152229309082, "learning_rate": 2.059298097205313e-06, "loss": 0.4588, "step": 10986 }, { "epoch": 0.71, "grad_norm": 1.1942483186721802, "learning_rate": 2.0584527529149934e-06, "loss": 0.5394, "step": 10987 }, { "epoch": 0.71, "grad_norm": 1.2425072193145752, "learning_rate": 2.057607537192114e-06, "loss": 0.5274, "step": 10988 }, { "epoch": 0.71, "grad_norm": 1.2124979496002197, "learning_rate": 2.0567624500736185e-06, "loss": 0.5483, "step": 10989 }, { "epoch": 0.71, "grad_norm": 1.1577330827713013, "learning_rate": 2.0559174915964418e-06, "loss": 0.5031, "step": 10990 }, { "epoch": 0.71, "grad_norm": 1.1227809190750122, "learning_rate": 2.0550726617975182e-06, "loss": 0.4898, "step": 10991 }, { "epoch": 0.71, "grad_norm": 1.22527015209198, "learning_rate": 2.0542279607137684e-06, "loss": 0.4929, "step": 10992 }, { "epoch": 0.71, "grad_norm": 1.2533596754074097, "learning_rate": 2.0533833883821143e-06, "loss": 0.5093, "step": 10993 }, { "epoch": 0.71, "grad_norm": 1.1502468585968018, "learning_rate": 2.0525389448394694e-06, "loss": 0.4717, "step": 10994 }, { "epoch": 0.71, "grad_norm": 1.1745405197143555, "learning_rate": 2.0516946301227452e-06, "loss": 0.5255, "step": 10995 }, { "epoch": 0.71, "grad_norm": 1.1489413976669312, "learning_rate": 2.0508504442688397e-06, "loss": 0.5024, "step": 10996 }, { "epoch": 0.71, "grad_norm": 1.2153180837631226, "learning_rate": 2.0500063873146527e-06, "loss": 0.5467, "step": 10997 }, { "epoch": 0.71, "grad_norm": 1.2806814908981323, "learning_rate": 2.0491624592970753e-06, "loss": 0.5261, "step": 10998 }, { "epoch": 0.71, "grad_norm": 1.1411585807800293, "learning_rate": 2.0483186602529938e-06, "loss": 0.486, "step": 10999 }, { "epoch": 0.71, "grad_norm": 1.1816222667694092, "learning_rate": 2.0474749902192896e-06, "loss": 0.5386, "step": 11000 }, { "epoch": 0.71, "grad_norm": 1.2139897346496582, "learning_rate": 2.046631449232834e-06, "loss": 0.5203, "step": 11001 }, { "epoch": 0.71, "grad_norm": 1.1438015699386597, "learning_rate": 2.0457880373304988e-06, "loss": 0.5033, "step": 11002 }, { "epoch": 0.71, "grad_norm": 1.2015044689178467, "learning_rate": 2.044944754549146e-06, "loss": 0.4914, "step": 11003 }, { "epoch": 0.71, "grad_norm": 1.0419646501541138, "learning_rate": 2.044101600925636e-06, "loss": 0.4538, "step": 11004 }, { "epoch": 0.71, "grad_norm": 1.0976723432540894, "learning_rate": 2.0432585764968162e-06, "loss": 0.5113, "step": 11005 }, { "epoch": 0.71, "grad_norm": 1.058115005493164, "learning_rate": 2.042415681299536e-06, "loss": 0.4958, "step": 11006 }, { "epoch": 0.71, "grad_norm": 1.1173226833343506, "learning_rate": 2.0415729153706355e-06, "loss": 0.4836, "step": 11007 }, { "epoch": 0.71, "grad_norm": 1.225178599357605, "learning_rate": 2.0407302787469502e-06, "loss": 0.5207, "step": 11008 }, { "epoch": 0.71, "grad_norm": 1.1174618005752563, "learning_rate": 2.0398877714653094e-06, "loss": 0.5291, "step": 11009 }, { "epoch": 0.71, "grad_norm": 1.2507660388946533, "learning_rate": 2.039045393562539e-06, "loss": 0.5264, "step": 11010 }, { "epoch": 0.71, "grad_norm": 1.2900736331939697, "learning_rate": 2.0382031450754525e-06, "loss": 0.5096, "step": 11011 }, { "epoch": 0.71, "grad_norm": 1.11432945728302, "learning_rate": 2.0373610260408656e-06, "loss": 0.4953, "step": 11012 }, { "epoch": 0.71, "grad_norm": 1.1885151863098145, "learning_rate": 2.0365190364955845e-06, "loss": 0.4882, "step": 11013 }, { "epoch": 0.71, "grad_norm": 1.0927493572235107, "learning_rate": 2.03567717647641e-06, "loss": 0.514, "step": 11014 }, { "epoch": 0.71, "grad_norm": 1.1448920965194702, "learning_rate": 2.0348354460201387e-06, "loss": 0.5349, "step": 11015 }, { "epoch": 0.71, "grad_norm": 1.2067970037460327, "learning_rate": 2.033993845163561e-06, "loss": 0.5354, "step": 11016 }, { "epoch": 0.71, "grad_norm": 1.1603467464447021, "learning_rate": 2.033152373943458e-06, "loss": 0.5054, "step": 11017 }, { "epoch": 0.71, "grad_norm": 1.233099102973938, "learning_rate": 2.032311032396611e-06, "loss": 0.5142, "step": 11018 }, { "epoch": 0.71, "grad_norm": 1.1770883798599243, "learning_rate": 2.0314698205597915e-06, "loss": 0.5262, "step": 11019 }, { "epoch": 0.71, "grad_norm": 1.1530286073684692, "learning_rate": 2.030628738469768e-06, "loss": 0.5483, "step": 11020 }, { "epoch": 0.71, "grad_norm": 1.165831208229065, "learning_rate": 2.0297877861633003e-06, "loss": 0.5151, "step": 11021 }, { "epoch": 0.71, "grad_norm": 1.15817129611969, "learning_rate": 2.028946963677147e-06, "loss": 0.4561, "step": 11022 }, { "epoch": 0.71, "grad_norm": 1.207376480102539, "learning_rate": 2.0281062710480552e-06, "loss": 0.5474, "step": 11023 }, { "epoch": 0.71, "grad_norm": 1.2013784646987915, "learning_rate": 2.0272657083127705e-06, "loss": 0.5407, "step": 11024 }, { "epoch": 0.71, "grad_norm": 1.1538957357406616, "learning_rate": 2.0264252755080322e-06, "loss": 0.5118, "step": 11025 }, { "epoch": 0.71, "grad_norm": 1.2734509706497192, "learning_rate": 2.025584972670573e-06, "loss": 0.5094, "step": 11026 }, { "epoch": 0.71, "grad_norm": 1.1547635793685913, "learning_rate": 2.02474479983712e-06, "loss": 0.5296, "step": 11027 }, { "epoch": 0.71, "grad_norm": 1.3374853134155273, "learning_rate": 2.023904757044398e-06, "loss": 0.5735, "step": 11028 }, { "epoch": 0.71, "grad_norm": 1.1156275272369385, "learning_rate": 2.023064844329119e-06, "loss": 0.504, "step": 11029 }, { "epoch": 0.71, "grad_norm": 1.3124126195907593, "learning_rate": 2.0222250617279944e-06, "loss": 0.5576, "step": 11030 }, { "epoch": 0.71, "grad_norm": 1.2276924848556519, "learning_rate": 2.0213854092777304e-06, "loss": 0.5301, "step": 11031 }, { "epoch": 0.71, "grad_norm": 1.1846524477005005, "learning_rate": 2.0205458870150248e-06, "loss": 0.5456, "step": 11032 }, { "epoch": 0.71, "grad_norm": 1.2135207653045654, "learning_rate": 2.019706494976571e-06, "loss": 0.5215, "step": 11033 }, { "epoch": 0.71, "grad_norm": 1.2034220695495605, "learning_rate": 2.0188672331990594e-06, "loss": 0.5551, "step": 11034 }, { "epoch": 0.71, "grad_norm": 1.104327917098999, "learning_rate": 2.018028101719167e-06, "loss": 0.5242, "step": 11035 }, { "epoch": 0.71, "grad_norm": 1.2486478090286255, "learning_rate": 2.017189100573574e-06, "loss": 0.5093, "step": 11036 }, { "epoch": 0.71, "grad_norm": 1.1476960182189941, "learning_rate": 2.0163502297989486e-06, "loss": 0.504, "step": 11037 }, { "epoch": 0.71, "grad_norm": 1.2401280403137207, "learning_rate": 2.0155114894319572e-06, "loss": 0.4662, "step": 11038 }, { "epoch": 0.71, "grad_norm": 1.3088401556015015, "learning_rate": 2.0146728795092586e-06, "loss": 0.5171, "step": 11039 }, { "epoch": 0.71, "grad_norm": 1.1352276802062988, "learning_rate": 2.0138344000675076e-06, "loss": 0.5191, "step": 11040 }, { "epoch": 0.71, "grad_norm": 1.1585423946380615, "learning_rate": 2.0129960511433487e-06, "loss": 0.4793, "step": 11041 }, { "epoch": 0.71, "grad_norm": 1.1978154182434082, "learning_rate": 2.0121578327734265e-06, "loss": 0.5464, "step": 11042 }, { "epoch": 0.71, "grad_norm": 1.1181037425994873, "learning_rate": 2.011319744994376e-06, "loss": 0.5526, "step": 11043 }, { "epoch": 0.71, "grad_norm": 1.164182424545288, "learning_rate": 2.010481787842829e-06, "loss": 0.5454, "step": 11044 }, { "epoch": 0.71, "grad_norm": 1.0868151187896729, "learning_rate": 2.0096439613554115e-06, "loss": 0.4925, "step": 11045 }, { "epoch": 0.71, "grad_norm": 1.3813055753707886, "learning_rate": 2.0088062655687397e-06, "loss": 0.5306, "step": 11046 }, { "epoch": 0.71, "grad_norm": 1.0967265367507935, "learning_rate": 2.007968700519428e-06, "loss": 0.4824, "step": 11047 }, { "epoch": 0.71, "grad_norm": 1.202020525932312, "learning_rate": 2.0071312662440856e-06, "loss": 0.5643, "step": 11048 }, { "epoch": 0.71, "grad_norm": 1.198833703994751, "learning_rate": 2.0062939627793136e-06, "loss": 0.5778, "step": 11049 }, { "epoch": 0.71, "grad_norm": 1.1588895320892334, "learning_rate": 2.005456790161709e-06, "loss": 0.522, "step": 11050 }, { "epoch": 0.71, "grad_norm": 1.1687685251235962, "learning_rate": 2.0046197484278633e-06, "loss": 0.5183, "step": 11051 }, { "epoch": 0.71, "grad_norm": 1.2235785722732544, "learning_rate": 2.0037828376143586e-06, "loss": 0.589, "step": 11052 }, { "epoch": 0.71, "grad_norm": 1.1728606224060059, "learning_rate": 2.002946057757776e-06, "loss": 0.5295, "step": 11053 }, { "epoch": 0.71, "grad_norm": 1.1998884677886963, "learning_rate": 2.002109408894688e-06, "loss": 0.5537, "step": 11054 }, { "epoch": 0.71, "grad_norm": 1.2309032678604126, "learning_rate": 2.0012728910616634e-06, "loss": 0.5894, "step": 11055 }, { "epoch": 0.71, "grad_norm": 1.0635708570480347, "learning_rate": 2.0004365042952645e-06, "loss": 0.4996, "step": 11056 }, { "epoch": 0.71, "grad_norm": 1.1448440551757812, "learning_rate": 1.9996002486320483e-06, "loss": 0.5227, "step": 11057 }, { "epoch": 0.71, "grad_norm": 1.2357063293457031, "learning_rate": 1.9987641241085622e-06, "loss": 0.5283, "step": 11058 }, { "epoch": 0.71, "grad_norm": 1.2155507802963257, "learning_rate": 1.9979281307613527e-06, "loss": 0.5639, "step": 11059 }, { "epoch": 0.71, "grad_norm": 1.1481010913848877, "learning_rate": 1.99709226862696e-06, "loss": 0.5174, "step": 11060 }, { "epoch": 0.71, "grad_norm": 1.1131728887557983, "learning_rate": 1.996256537741916e-06, "loss": 0.4769, "step": 11061 }, { "epoch": 0.71, "grad_norm": 1.1238988637924194, "learning_rate": 1.995420938142749e-06, "loss": 0.5025, "step": 11062 }, { "epoch": 0.71, "grad_norm": 1.167853832244873, "learning_rate": 1.9945854698659832e-06, "loss": 0.4957, "step": 11063 }, { "epoch": 0.71, "grad_norm": 1.2350841760635376, "learning_rate": 1.99375013294813e-06, "loss": 0.569, "step": 11064 }, { "epoch": 0.71, "grad_norm": 1.2819474935531616, "learning_rate": 1.9929149274257027e-06, "loss": 0.4995, "step": 11065 }, { "epoch": 0.71, "grad_norm": 1.2006618976593018, "learning_rate": 1.9920798533352053e-06, "loss": 0.5199, "step": 11066 }, { "epoch": 0.71, "grad_norm": 1.1142550706863403, "learning_rate": 1.991244910713137e-06, "loss": 0.5395, "step": 11067 }, { "epoch": 0.71, "grad_norm": 1.1227684020996094, "learning_rate": 1.9904100995959914e-06, "loss": 0.5794, "step": 11068 }, { "epoch": 0.71, "grad_norm": 1.149542212486267, "learning_rate": 1.9895754200202565e-06, "loss": 0.5255, "step": 11069 }, { "epoch": 0.71, "grad_norm": 1.2013235092163086, "learning_rate": 1.988740872022412e-06, "loss": 0.5151, "step": 11070 }, { "epoch": 0.71, "grad_norm": 1.1357641220092773, "learning_rate": 1.987906455638934e-06, "loss": 0.5193, "step": 11071 }, { "epoch": 0.71, "grad_norm": 1.1498252153396606, "learning_rate": 1.987072170906294e-06, "loss": 0.5156, "step": 11072 }, { "epoch": 0.71, "grad_norm": 1.2315514087677002, "learning_rate": 1.9862380178609564e-06, "loss": 0.5301, "step": 11073 }, { "epoch": 0.71, "grad_norm": 1.2341629266738892, "learning_rate": 1.985403996539379e-06, "loss": 0.6031, "step": 11074 }, { "epoch": 0.71, "grad_norm": 1.1291269063949585, "learning_rate": 1.9845701069780166e-06, "loss": 0.534, "step": 11075 }, { "epoch": 0.71, "grad_norm": 1.1960976123809814, "learning_rate": 1.983736349213313e-06, "loss": 0.474, "step": 11076 }, { "epoch": 0.72, "grad_norm": 1.2108039855957031, "learning_rate": 1.982902723281712e-06, "loss": 0.556, "step": 11077 }, { "epoch": 0.72, "grad_norm": 1.1010340452194214, "learning_rate": 1.9820692292196493e-06, "loss": 0.5295, "step": 11078 }, { "epoch": 0.72, "grad_norm": 1.1954740285873413, "learning_rate": 1.9812358670635533e-06, "loss": 0.4779, "step": 11079 }, { "epoch": 0.72, "grad_norm": 1.172312617301941, "learning_rate": 1.9804026368498514e-06, "loss": 0.5643, "step": 11080 }, { "epoch": 0.72, "grad_norm": 1.0654834508895874, "learning_rate": 1.9795695386149573e-06, "loss": 0.4941, "step": 11081 }, { "epoch": 0.72, "grad_norm": 1.2100834846496582, "learning_rate": 1.978736572395286e-06, "loss": 0.5183, "step": 11082 }, { "epoch": 0.72, "grad_norm": 1.318042278289795, "learning_rate": 1.977903738227244e-06, "loss": 0.5782, "step": 11083 }, { "epoch": 0.72, "grad_norm": 1.1127444505691528, "learning_rate": 1.9770710361472343e-06, "loss": 0.5119, "step": 11084 }, { "epoch": 0.72, "grad_norm": 1.1835476160049438, "learning_rate": 1.976238466191649e-06, "loss": 0.5132, "step": 11085 }, { "epoch": 0.72, "grad_norm": 1.148400902748108, "learning_rate": 1.975406028396879e-06, "loss": 0.4751, "step": 11086 }, { "epoch": 0.72, "grad_norm": 1.2359836101531982, "learning_rate": 1.974573722799307e-06, "loss": 0.5114, "step": 11087 }, { "epoch": 0.72, "grad_norm": 1.122793197631836, "learning_rate": 1.9737415494353128e-06, "loss": 0.5279, "step": 11088 }, { "epoch": 0.72, "grad_norm": 1.2114402055740356, "learning_rate": 1.972909508341269e-06, "loss": 0.5838, "step": 11089 }, { "epoch": 0.72, "grad_norm": 1.1387890577316284, "learning_rate": 1.972077599553539e-06, "loss": 0.5074, "step": 11090 }, { "epoch": 0.72, "grad_norm": 1.1870217323303223, "learning_rate": 1.971245823108485e-06, "loss": 0.5349, "step": 11091 }, { "epoch": 0.72, "grad_norm": 1.1514025926589966, "learning_rate": 1.970414179042462e-06, "loss": 0.4762, "step": 11092 }, { "epoch": 0.72, "grad_norm": 1.252679467201233, "learning_rate": 1.96958266739182e-06, "loss": 0.521, "step": 11093 }, { "epoch": 0.72, "grad_norm": 1.0765278339385986, "learning_rate": 1.9687512881928995e-06, "loss": 0.5135, "step": 11094 }, { "epoch": 0.72, "grad_norm": 1.2319992780685425, "learning_rate": 1.9679200414820392e-06, "loss": 0.5438, "step": 11095 }, { "epoch": 0.72, "grad_norm": 1.0572688579559326, "learning_rate": 1.9670889272955717e-06, "loss": 0.5105, "step": 11096 }, { "epoch": 0.72, "grad_norm": 1.1850006580352783, "learning_rate": 1.9662579456698217e-06, "loss": 0.5104, "step": 11097 }, { "epoch": 0.72, "grad_norm": 1.107637882232666, "learning_rate": 1.9654270966411115e-06, "loss": 0.5023, "step": 11098 }, { "epoch": 0.72, "grad_norm": 1.2358818054199219, "learning_rate": 1.964596380245752e-06, "loss": 0.5626, "step": 11099 }, { "epoch": 0.72, "grad_norm": 1.2866177558898926, "learning_rate": 1.963765796520053e-06, "loss": 0.5425, "step": 11100 }, { "epoch": 0.72, "grad_norm": 1.1964001655578613, "learning_rate": 1.9629353455003175e-06, "loss": 0.56, "step": 11101 }, { "epoch": 0.72, "grad_norm": 1.2255994081497192, "learning_rate": 1.9621050272228425e-06, "loss": 0.5123, "step": 11102 }, { "epoch": 0.72, "grad_norm": 1.1625604629516602, "learning_rate": 1.961274841723919e-06, "loss": 0.5309, "step": 11103 }, { "epoch": 0.72, "grad_norm": 1.2465362548828125, "learning_rate": 1.960444789039834e-06, "loss": 0.5263, "step": 11104 }, { "epoch": 0.72, "grad_norm": 1.1116845607757568, "learning_rate": 1.9596148692068627e-06, "loss": 0.4917, "step": 11105 }, { "epoch": 0.72, "grad_norm": 1.197014331817627, "learning_rate": 1.958785082261282e-06, "loss": 0.508, "step": 11106 }, { "epoch": 0.72, "grad_norm": 1.2776246070861816, "learning_rate": 1.957955428239358e-06, "loss": 0.5273, "step": 11107 }, { "epoch": 0.72, "grad_norm": 1.1194803714752197, "learning_rate": 1.9571259071773542e-06, "loss": 0.4693, "step": 11108 }, { "epoch": 0.72, "grad_norm": 1.153292179107666, "learning_rate": 1.9562965191115263e-06, "loss": 0.5178, "step": 11109 }, { "epoch": 0.72, "grad_norm": 1.1847569942474365, "learning_rate": 1.9554672640781265e-06, "loss": 0.5392, "step": 11110 }, { "epoch": 0.72, "grad_norm": 1.1261603832244873, "learning_rate": 1.9546381421133952e-06, "loss": 0.4648, "step": 11111 }, { "epoch": 0.72, "grad_norm": 1.1425937414169312, "learning_rate": 1.953809153253575e-06, "loss": 0.5175, "step": 11112 }, { "epoch": 0.72, "grad_norm": 1.2052353620529175, "learning_rate": 1.952980297534896e-06, "loss": 0.556, "step": 11113 }, { "epoch": 0.72, "grad_norm": 1.1139724254608154, "learning_rate": 1.952151574993587e-06, "loss": 0.5439, "step": 11114 }, { "epoch": 0.72, "grad_norm": 1.1460152864456177, "learning_rate": 1.9513229856658692e-06, "loss": 0.5471, "step": 11115 }, { "epoch": 0.72, "grad_norm": 1.1581193208694458, "learning_rate": 1.95049452958796e-06, "loss": 0.513, "step": 11116 }, { "epoch": 0.72, "grad_norm": 1.1304556131362915, "learning_rate": 1.9496662067960655e-06, "loss": 0.4931, "step": 11117 }, { "epoch": 0.72, "grad_norm": 1.2454166412353516, "learning_rate": 1.948838017326391e-06, "loss": 0.5437, "step": 11118 }, { "epoch": 0.72, "grad_norm": 1.203216314315796, "learning_rate": 1.9480099612151344e-06, "loss": 0.4795, "step": 11119 }, { "epoch": 0.72, "grad_norm": 1.358306646347046, "learning_rate": 1.9471820384984887e-06, "loss": 0.5405, "step": 11120 }, { "epoch": 0.72, "grad_norm": 1.1673425436019897, "learning_rate": 1.9463542492126392e-06, "loss": 0.5193, "step": 11121 }, { "epoch": 0.72, "grad_norm": 1.190351963043213, "learning_rate": 1.9455265933937687e-06, "loss": 0.5176, "step": 11122 }, { "epoch": 0.72, "grad_norm": 1.160899043083191, "learning_rate": 1.9446990710780485e-06, "loss": 0.5167, "step": 11123 }, { "epoch": 0.72, "grad_norm": 1.1242200136184692, "learning_rate": 1.943871682301649e-06, "loss": 0.5662, "step": 11124 }, { "epoch": 0.72, "grad_norm": 1.2985631227493286, "learning_rate": 1.9430444271007336e-06, "loss": 0.6143, "step": 11125 }, { "epoch": 0.72, "grad_norm": 1.0956913232803345, "learning_rate": 1.942217305511459e-06, "loss": 0.4666, "step": 11126 }, { "epoch": 0.72, "grad_norm": 1.1091556549072266, "learning_rate": 1.9413903175699787e-06, "loss": 0.5023, "step": 11127 }, { "epoch": 0.72, "grad_norm": 1.3408472537994385, "learning_rate": 1.9405634633124347e-06, "loss": 0.5447, "step": 11128 }, { "epoch": 0.72, "grad_norm": 1.3867262601852417, "learning_rate": 1.939736742774968e-06, "loss": 0.5598, "step": 11129 }, { "epoch": 0.72, "grad_norm": 1.2257075309753418, "learning_rate": 1.938910155993713e-06, "loss": 0.5193, "step": 11130 }, { "epoch": 0.72, "grad_norm": 1.2554773092269897, "learning_rate": 1.938083703004797e-06, "loss": 0.5578, "step": 11131 }, { "epoch": 0.72, "grad_norm": 1.2114932537078857, "learning_rate": 1.937257383844343e-06, "loss": 0.5418, "step": 11132 }, { "epoch": 0.72, "grad_norm": 1.1873869895935059, "learning_rate": 1.936431198548468e-06, "loss": 0.5069, "step": 11133 }, { "epoch": 0.72, "grad_norm": 1.215657353401184, "learning_rate": 1.93560514715328e-06, "loss": 0.5365, "step": 11134 }, { "epoch": 0.72, "grad_norm": 1.1187082529067993, "learning_rate": 1.9347792296948843e-06, "loss": 0.5073, "step": 11135 }, { "epoch": 0.72, "grad_norm": 1.1314243078231812, "learning_rate": 1.93395344620938e-06, "loss": 0.4915, "step": 11136 }, { "epoch": 0.72, "grad_norm": 1.439869999885559, "learning_rate": 1.9331277967328603e-06, "loss": 0.5126, "step": 11137 }, { "epoch": 0.72, "grad_norm": 1.2595329284667969, "learning_rate": 1.932302281301412e-06, "loss": 0.583, "step": 11138 }, { "epoch": 0.72, "grad_norm": 1.276685357093811, "learning_rate": 1.931476899951118e-06, "loss": 0.5232, "step": 11139 }, { "epoch": 0.72, "grad_norm": 1.1896977424621582, "learning_rate": 1.9306516527180497e-06, "loss": 0.4798, "step": 11140 }, { "epoch": 0.72, "grad_norm": 1.2270066738128662, "learning_rate": 1.929826539638279e-06, "loss": 0.5537, "step": 11141 }, { "epoch": 0.72, "grad_norm": 1.1144675016403198, "learning_rate": 1.929001560747869e-06, "loss": 0.4833, "step": 11142 }, { "epoch": 0.72, "grad_norm": 1.1744294166564941, "learning_rate": 1.9281767160828774e-06, "loss": 0.5303, "step": 11143 }, { "epoch": 0.72, "grad_norm": 1.1457992792129517, "learning_rate": 1.9273520056793567e-06, "loss": 0.5115, "step": 11144 }, { "epoch": 0.72, "grad_norm": 1.212559461593628, "learning_rate": 1.926527429573353e-06, "loss": 0.5034, "step": 11145 }, { "epoch": 0.72, "grad_norm": 1.173916220664978, "learning_rate": 1.9257029878009048e-06, "loss": 0.5564, "step": 11146 }, { "epoch": 0.72, "grad_norm": 1.1776673793792725, "learning_rate": 1.9248786803980468e-06, "loss": 0.5488, "step": 11147 }, { "epoch": 0.72, "grad_norm": 1.1414098739624023, "learning_rate": 1.9240545074008077e-06, "loss": 0.4794, "step": 11148 }, { "epoch": 0.72, "grad_norm": 1.128842830657959, "learning_rate": 1.92323046884521e-06, "loss": 0.4831, "step": 11149 }, { "epoch": 0.72, "grad_norm": 1.2404917478561401, "learning_rate": 1.922406564767271e-06, "loss": 0.5011, "step": 11150 }, { "epoch": 0.72, "grad_norm": 1.1762399673461914, "learning_rate": 1.921582795203002e-06, "loss": 0.5397, "step": 11151 }, { "epoch": 0.72, "grad_norm": 1.2033969163894653, "learning_rate": 1.920759160188405e-06, "loss": 0.5492, "step": 11152 }, { "epoch": 0.72, "grad_norm": 1.1662517786026, "learning_rate": 1.9199356597594806e-06, "loss": 0.5405, "step": 11153 }, { "epoch": 0.72, "grad_norm": 1.2436069250106812, "learning_rate": 1.9191122939522217e-06, "loss": 0.5284, "step": 11154 }, { "epoch": 0.72, "grad_norm": 1.2270176410675049, "learning_rate": 1.9182890628026156e-06, "loss": 0.5642, "step": 11155 }, { "epoch": 0.72, "grad_norm": 1.16153883934021, "learning_rate": 1.917465966346644e-06, "loss": 0.5039, "step": 11156 }, { "epoch": 0.72, "grad_norm": 1.2064933776855469, "learning_rate": 1.916643004620283e-06, "loss": 0.4842, "step": 11157 }, { "epoch": 0.72, "grad_norm": 1.1125978231430054, "learning_rate": 1.9158201776594996e-06, "loss": 0.4626, "step": 11158 }, { "epoch": 0.72, "grad_norm": 1.1517518758773804, "learning_rate": 1.914997485500259e-06, "loss": 0.5045, "step": 11159 }, { "epoch": 0.72, "grad_norm": 1.3463020324707031, "learning_rate": 1.9141749281785193e-06, "loss": 0.5819, "step": 11160 }, { "epoch": 0.72, "grad_norm": 1.1577012538909912, "learning_rate": 1.9133525057302315e-06, "loss": 0.4709, "step": 11161 }, { "epoch": 0.72, "grad_norm": 1.086620569229126, "learning_rate": 1.9125302181913422e-06, "loss": 0.5543, "step": 11162 }, { "epoch": 0.72, "grad_norm": 1.209309697151184, "learning_rate": 1.9117080655977933e-06, "loss": 0.532, "step": 11163 }, { "epoch": 0.72, "grad_norm": 1.1494183540344238, "learning_rate": 1.9108860479855145e-06, "loss": 0.4919, "step": 11164 }, { "epoch": 0.72, "grad_norm": 1.2304211854934692, "learning_rate": 1.9100641653904374e-06, "loss": 0.5322, "step": 11165 }, { "epoch": 0.72, "grad_norm": 1.2852345705032349, "learning_rate": 1.9092424178484825e-06, "loss": 0.5276, "step": 11166 }, { "epoch": 0.72, "grad_norm": 1.0482994318008423, "learning_rate": 1.9084208053955676e-06, "loss": 0.5304, "step": 11167 }, { "epoch": 0.72, "grad_norm": 1.1424877643585205, "learning_rate": 1.9075993280676054e-06, "loss": 0.4781, "step": 11168 }, { "epoch": 0.72, "grad_norm": 1.2126827239990234, "learning_rate": 1.9067779859004953e-06, "loss": 0.5401, "step": 11169 }, { "epoch": 0.72, "grad_norm": 1.27863609790802, "learning_rate": 1.9059567789301398e-06, "loss": 0.5524, "step": 11170 }, { "epoch": 0.72, "grad_norm": 1.1486433744430542, "learning_rate": 1.9051357071924298e-06, "loss": 0.5046, "step": 11171 }, { "epoch": 0.72, "grad_norm": 1.2937250137329102, "learning_rate": 1.904314770723254e-06, "loss": 0.5717, "step": 11172 }, { "epoch": 0.72, "grad_norm": 1.0246193408966064, "learning_rate": 1.9034939695584936e-06, "loss": 0.5019, "step": 11173 }, { "epoch": 0.72, "grad_norm": 1.088505744934082, "learning_rate": 1.9026733037340216e-06, "loss": 0.4934, "step": 11174 }, { "epoch": 0.72, "grad_norm": 1.1161624193191528, "learning_rate": 1.9018527732857079e-06, "loss": 0.4768, "step": 11175 }, { "epoch": 0.72, "grad_norm": 1.2179079055786133, "learning_rate": 1.901032378249416e-06, "loss": 0.4973, "step": 11176 }, { "epoch": 0.72, "grad_norm": 1.2483941316604614, "learning_rate": 1.900212118661004e-06, "loss": 0.5257, "step": 11177 }, { "epoch": 0.72, "grad_norm": 1.1309367418289185, "learning_rate": 1.8993919945563238e-06, "loss": 0.5159, "step": 11178 }, { "epoch": 0.72, "grad_norm": 1.158149242401123, "learning_rate": 1.8985720059712187e-06, "loss": 0.5052, "step": 11179 }, { "epoch": 0.72, "grad_norm": 1.224539875984192, "learning_rate": 1.8977521529415287e-06, "loss": 0.5658, "step": 11180 }, { "epoch": 0.72, "grad_norm": 1.1563612222671509, "learning_rate": 1.8969324355030889e-06, "loss": 0.5345, "step": 11181 }, { "epoch": 0.72, "grad_norm": 1.1042399406433105, "learning_rate": 1.8961128536917278e-06, "loss": 0.4944, "step": 11182 }, { "epoch": 0.72, "grad_norm": 1.326820969581604, "learning_rate": 1.8952934075432645e-06, "loss": 0.5527, "step": 11183 }, { "epoch": 0.72, "grad_norm": 1.175475001335144, "learning_rate": 1.8944740970935154e-06, "loss": 0.4882, "step": 11184 }, { "epoch": 0.72, "grad_norm": 1.2526313066482544, "learning_rate": 1.8936549223782912e-06, "loss": 0.5355, "step": 11185 }, { "epoch": 0.72, "grad_norm": 1.315591812133789, "learning_rate": 1.892835883433396e-06, "loss": 0.481, "step": 11186 }, { "epoch": 0.72, "grad_norm": 1.253286361694336, "learning_rate": 1.8920169802946299e-06, "loss": 0.504, "step": 11187 }, { "epoch": 0.72, "grad_norm": 1.2663425207138062, "learning_rate": 1.8911982129977801e-06, "loss": 0.537, "step": 11188 }, { "epoch": 0.72, "grad_norm": 1.2959251403808594, "learning_rate": 1.8903795815786362e-06, "loss": 0.5418, "step": 11189 }, { "epoch": 0.72, "grad_norm": 1.1453135013580322, "learning_rate": 1.8895610860729784e-06, "loss": 0.5228, "step": 11190 }, { "epoch": 0.72, "grad_norm": 1.1243208646774292, "learning_rate": 1.8887427265165798e-06, "loss": 0.5083, "step": 11191 }, { "epoch": 0.72, "grad_norm": 1.1629217863082886, "learning_rate": 1.8879245029452114e-06, "loss": 0.5182, "step": 11192 }, { "epoch": 0.72, "grad_norm": 1.2140849828720093, "learning_rate": 1.8871064153946322e-06, "loss": 0.5178, "step": 11193 }, { "epoch": 0.72, "grad_norm": 1.1670396327972412, "learning_rate": 1.8862884639005995e-06, "loss": 0.5236, "step": 11194 }, { "epoch": 0.72, "grad_norm": 1.1942534446716309, "learning_rate": 1.8854706484988656e-06, "loss": 0.519, "step": 11195 }, { "epoch": 0.72, "grad_norm": 1.0772361755371094, "learning_rate": 1.8846529692251737e-06, "loss": 0.5053, "step": 11196 }, { "epoch": 0.72, "grad_norm": 1.2062923908233643, "learning_rate": 1.8838354261152626e-06, "loss": 0.5333, "step": 11197 }, { "epoch": 0.72, "grad_norm": 1.1685937643051147, "learning_rate": 1.8830180192048664e-06, "loss": 0.5201, "step": 11198 }, { "epoch": 0.72, "grad_norm": 1.1612898111343384, "learning_rate": 1.88220074852971e-06, "loss": 0.4932, "step": 11199 }, { "epoch": 0.72, "grad_norm": 1.0697768926620483, "learning_rate": 1.8813836141255144e-06, "loss": 0.518, "step": 11200 }, { "epoch": 0.72, "grad_norm": 1.3386666774749756, "learning_rate": 1.8805666160279945e-06, "loss": 0.5295, "step": 11201 }, { "epoch": 0.72, "grad_norm": 1.1796164512634277, "learning_rate": 1.8797497542728598e-06, "loss": 0.5806, "step": 11202 }, { "epoch": 0.72, "grad_norm": 1.2330178022384644, "learning_rate": 1.8789330288958134e-06, "loss": 0.5684, "step": 11203 }, { "epoch": 0.72, "grad_norm": 1.2531923055648804, "learning_rate": 1.878116439932553e-06, "loss": 0.5521, "step": 11204 }, { "epoch": 0.72, "grad_norm": 1.2110682725906372, "learning_rate": 1.877299987418767e-06, "loss": 0.505, "step": 11205 }, { "epoch": 0.72, "grad_norm": 1.072243332862854, "learning_rate": 1.8764836713901418e-06, "loss": 0.5331, "step": 11206 }, { "epoch": 0.72, "grad_norm": 1.0861519575119019, "learning_rate": 1.875667491882357e-06, "loss": 0.5165, "step": 11207 }, { "epoch": 0.72, "grad_norm": 1.2967116832733154, "learning_rate": 1.8748514489310848e-06, "loss": 0.5276, "step": 11208 }, { "epoch": 0.72, "grad_norm": 1.2094964981079102, "learning_rate": 1.8740355425719924e-06, "loss": 0.4886, "step": 11209 }, { "epoch": 0.72, "grad_norm": 1.2783738374710083, "learning_rate": 1.8732197728407436e-06, "loss": 0.4832, "step": 11210 }, { "epoch": 0.72, "grad_norm": 1.1483545303344727, "learning_rate": 1.8724041397729897e-06, "loss": 0.4816, "step": 11211 }, { "epoch": 0.72, "grad_norm": 1.2709290981292725, "learning_rate": 1.8715886434043811e-06, "loss": 0.488, "step": 11212 }, { "epoch": 0.72, "grad_norm": 1.201551079750061, "learning_rate": 1.8707732837705623e-06, "loss": 0.5165, "step": 11213 }, { "epoch": 0.72, "grad_norm": 1.2958528995513916, "learning_rate": 1.869958060907169e-06, "loss": 0.5566, "step": 11214 }, { "epoch": 0.72, "grad_norm": 1.066649317741394, "learning_rate": 1.8691429748498352e-06, "loss": 0.5215, "step": 11215 }, { "epoch": 0.72, "grad_norm": 1.0900696516036987, "learning_rate": 1.8683280256341823e-06, "loss": 0.4787, "step": 11216 }, { "epoch": 0.72, "grad_norm": 1.2173469066619873, "learning_rate": 1.8675132132958318e-06, "loss": 0.4804, "step": 11217 }, { "epoch": 0.72, "grad_norm": 1.1603050231933594, "learning_rate": 1.8666985378703966e-06, "loss": 0.5053, "step": 11218 }, { "epoch": 0.72, "grad_norm": 1.2323611974716187, "learning_rate": 1.8658839993934846e-06, "loss": 0.5542, "step": 11219 }, { "epoch": 0.72, "grad_norm": 1.1452990770339966, "learning_rate": 1.8650695979006966e-06, "loss": 0.5266, "step": 11220 }, { "epoch": 0.72, "grad_norm": 1.204560399055481, "learning_rate": 1.8642553334276303e-06, "loss": 0.533, "step": 11221 }, { "epoch": 0.72, "grad_norm": 1.1549381017684937, "learning_rate": 1.8634412060098711e-06, "loss": 0.5043, "step": 11222 }, { "epoch": 0.72, "grad_norm": 1.235442876815796, "learning_rate": 1.8626272156830045e-06, "loss": 0.5349, "step": 11223 }, { "epoch": 0.72, "grad_norm": 1.384665846824646, "learning_rate": 1.8618133624826073e-06, "loss": 0.5492, "step": 11224 }, { "epoch": 0.72, "grad_norm": 1.1662143468856812, "learning_rate": 1.860999646444252e-06, "loss": 0.471, "step": 11225 }, { "epoch": 0.72, "grad_norm": 1.1032761335372925, "learning_rate": 1.8601860676035033e-06, "loss": 0.4475, "step": 11226 }, { "epoch": 0.72, "grad_norm": 1.1604833602905273, "learning_rate": 1.8593726259959221e-06, "loss": 0.5241, "step": 11227 }, { "epoch": 0.72, "grad_norm": 1.2942005395889282, "learning_rate": 1.8585593216570591e-06, "loss": 0.5197, "step": 11228 }, { "epoch": 0.72, "grad_norm": 1.5077788829803467, "learning_rate": 1.8577461546224629e-06, "loss": 0.5567, "step": 11229 }, { "epoch": 0.72, "grad_norm": 1.2448527812957764, "learning_rate": 1.8569331249276756e-06, "loss": 0.4849, "step": 11230 }, { "epoch": 0.72, "grad_norm": 1.0923120975494385, "learning_rate": 1.8561202326082317e-06, "loss": 0.4638, "step": 11231 }, { "epoch": 0.73, "grad_norm": 1.0905803442001343, "learning_rate": 1.8553074776996617e-06, "loss": 0.4965, "step": 11232 }, { "epoch": 0.73, "grad_norm": 1.245733380317688, "learning_rate": 1.8544948602374896e-06, "loss": 0.5571, "step": 11233 }, { "epoch": 0.73, "grad_norm": 1.0654505491256714, "learning_rate": 1.8536823802572301e-06, "loss": 0.5065, "step": 11234 }, { "epoch": 0.73, "grad_norm": 1.143241286277771, "learning_rate": 1.852870037794396e-06, "loss": 0.5143, "step": 11235 }, { "epoch": 0.73, "grad_norm": 1.1370887756347656, "learning_rate": 1.852057832884493e-06, "loss": 0.5341, "step": 11236 }, { "epoch": 0.73, "grad_norm": 1.1743981838226318, "learning_rate": 1.85124576556302e-06, "loss": 0.5011, "step": 11237 }, { "epoch": 0.73, "grad_norm": 1.1555896997451782, "learning_rate": 1.850433835865471e-06, "loss": 0.5279, "step": 11238 }, { "epoch": 0.73, "grad_norm": 1.2380138635635376, "learning_rate": 1.8496220438273348e-06, "loss": 0.5025, "step": 11239 }, { "epoch": 0.73, "grad_norm": 1.1249439716339111, "learning_rate": 1.8488103894840898e-06, "loss": 0.4855, "step": 11240 }, { "epoch": 0.73, "grad_norm": 1.1455096006393433, "learning_rate": 1.8479988728712118e-06, "loss": 0.4966, "step": 11241 }, { "epoch": 0.73, "grad_norm": 1.2379428148269653, "learning_rate": 1.8471874940241707e-06, "loss": 0.5426, "step": 11242 }, { "epoch": 0.73, "grad_norm": 1.215570330619812, "learning_rate": 1.84637625297843e-06, "loss": 0.5006, "step": 11243 }, { "epoch": 0.73, "grad_norm": 1.0911847352981567, "learning_rate": 1.8455651497694471e-06, "loss": 0.4975, "step": 11244 }, { "epoch": 0.73, "grad_norm": 1.1234922409057617, "learning_rate": 1.8447541844326743e-06, "loss": 0.4889, "step": 11245 }, { "epoch": 0.73, "grad_norm": 1.167667269706726, "learning_rate": 1.8439433570035542e-06, "loss": 0.5497, "step": 11246 }, { "epoch": 0.73, "grad_norm": 1.2588670253753662, "learning_rate": 1.843132667517527e-06, "loss": 0.5392, "step": 11247 }, { "epoch": 0.73, "grad_norm": 1.2237859964370728, "learning_rate": 1.8423221160100258e-06, "loss": 0.5444, "step": 11248 }, { "epoch": 0.73, "grad_norm": 1.2692779302597046, "learning_rate": 1.8415117025164785e-06, "loss": 0.5124, "step": 11249 }, { "epoch": 0.73, "grad_norm": 1.0752055644989014, "learning_rate": 1.840701427072305e-06, "loss": 0.5128, "step": 11250 }, { "epoch": 0.73, "grad_norm": 1.0940816402435303, "learning_rate": 1.8398912897129235e-06, "loss": 0.4512, "step": 11251 }, { "epoch": 0.73, "grad_norm": 1.1111416816711426, "learning_rate": 1.8390812904737381e-06, "loss": 0.5032, "step": 11252 }, { "epoch": 0.73, "grad_norm": 1.050673484802246, "learning_rate": 1.8382714293901549e-06, "loss": 0.4642, "step": 11253 }, { "epoch": 0.73, "grad_norm": 1.1253176927566528, "learning_rate": 1.8374617064975698e-06, "loss": 0.5066, "step": 11254 }, { "epoch": 0.73, "grad_norm": 1.1021959781646729, "learning_rate": 1.8366521218313743e-06, "loss": 0.5004, "step": 11255 }, { "epoch": 0.73, "grad_norm": 1.1014885902404785, "learning_rate": 1.8358426754269543e-06, "loss": 0.4889, "step": 11256 }, { "epoch": 0.73, "grad_norm": 1.2916264533996582, "learning_rate": 1.8350333673196857e-06, "loss": 0.5076, "step": 11257 }, { "epoch": 0.73, "grad_norm": 1.1588846445083618, "learning_rate": 1.8342241975449433e-06, "loss": 0.4891, "step": 11258 }, { "epoch": 0.73, "grad_norm": 1.182580590248108, "learning_rate": 1.8334151661380927e-06, "loss": 0.534, "step": 11259 }, { "epoch": 0.73, "grad_norm": 1.2135188579559326, "learning_rate": 1.8326062731344957e-06, "loss": 0.4781, "step": 11260 }, { "epoch": 0.73, "grad_norm": 1.1216490268707275, "learning_rate": 1.831797518569507e-06, "loss": 0.5098, "step": 11261 }, { "epoch": 0.73, "grad_norm": 1.195313811302185, "learning_rate": 1.8309889024784755e-06, "loss": 0.5338, "step": 11262 }, { "epoch": 0.73, "grad_norm": 1.062075138092041, "learning_rate": 1.830180424896742e-06, "loss": 0.4689, "step": 11263 }, { "epoch": 0.73, "grad_norm": 1.2012944221496582, "learning_rate": 1.829372085859643e-06, "loss": 0.5308, "step": 11264 }, { "epoch": 0.73, "grad_norm": 1.284162163734436, "learning_rate": 1.8285638854025106e-06, "loss": 0.5552, "step": 11265 }, { "epoch": 0.73, "grad_norm": 1.1648794412612915, "learning_rate": 1.8277558235606679e-06, "loss": 0.5476, "step": 11266 }, { "epoch": 0.73, "grad_norm": 1.1946502923965454, "learning_rate": 1.8269479003694357e-06, "loss": 0.5164, "step": 11267 }, { "epoch": 0.73, "grad_norm": 1.2455244064331055, "learning_rate": 1.8261401158641224e-06, "loss": 0.5438, "step": 11268 }, { "epoch": 0.73, "grad_norm": 1.2723462581634521, "learning_rate": 1.8253324700800357e-06, "loss": 0.5672, "step": 11269 }, { "epoch": 0.73, "grad_norm": 1.242195725440979, "learning_rate": 1.8245249630524763e-06, "loss": 0.4977, "step": 11270 }, { "epoch": 0.73, "grad_norm": 1.192008376121521, "learning_rate": 1.8237175948167395e-06, "loss": 0.5066, "step": 11271 }, { "epoch": 0.73, "grad_norm": 1.2443443536758423, "learning_rate": 1.8229103654081097e-06, "loss": 0.517, "step": 11272 }, { "epoch": 0.73, "grad_norm": 1.1678773164749146, "learning_rate": 1.8221032748618716e-06, "loss": 0.4856, "step": 11273 }, { "epoch": 0.73, "grad_norm": 1.1309504508972168, "learning_rate": 1.8212963232132996e-06, "loss": 0.5184, "step": 11274 }, { "epoch": 0.73, "grad_norm": 1.2570682764053345, "learning_rate": 1.8204895104976645e-06, "loss": 0.5014, "step": 11275 }, { "epoch": 0.73, "grad_norm": 1.1700202226638794, "learning_rate": 1.8196828367502312e-06, "loss": 0.5109, "step": 11276 }, { "epoch": 0.73, "grad_norm": 1.1730973720550537, "learning_rate": 1.818876302006254e-06, "loss": 0.4998, "step": 11277 }, { "epoch": 0.73, "grad_norm": 1.281926155090332, "learning_rate": 1.8180699063009865e-06, "loss": 0.4751, "step": 11278 }, { "epoch": 0.73, "grad_norm": 1.221891164779663, "learning_rate": 1.8172636496696739e-06, "loss": 0.548, "step": 11279 }, { "epoch": 0.73, "grad_norm": 1.1127299070358276, "learning_rate": 1.816457532147557e-06, "loss": 0.475, "step": 11280 }, { "epoch": 0.73, "grad_norm": 1.234969973564148, "learning_rate": 1.815651553769866e-06, "loss": 0.5204, "step": 11281 }, { "epoch": 0.73, "grad_norm": 1.1269022226333618, "learning_rate": 1.81484571457183e-06, "loss": 0.5019, "step": 11282 }, { "epoch": 0.73, "grad_norm": 1.1187567710876465, "learning_rate": 1.81404001458867e-06, "loss": 0.5153, "step": 11283 }, { "epoch": 0.73, "grad_norm": 1.33547842502594, "learning_rate": 1.8132344538556013e-06, "loss": 0.5304, "step": 11284 }, { "epoch": 0.73, "grad_norm": 1.22954261302948, "learning_rate": 1.812429032407832e-06, "loss": 0.5523, "step": 11285 }, { "epoch": 0.73, "grad_norm": 1.2471673488616943, "learning_rate": 1.8116237502805677e-06, "loss": 0.5312, "step": 11286 }, { "epoch": 0.73, "grad_norm": 1.1526877880096436, "learning_rate": 1.8108186075090017e-06, "loss": 0.4919, "step": 11287 }, { "epoch": 0.73, "grad_norm": 1.1076233386993408, "learning_rate": 1.8100136041283262e-06, "loss": 0.5483, "step": 11288 }, { "epoch": 0.73, "grad_norm": 1.2665228843688965, "learning_rate": 1.8092087401737257e-06, "loss": 0.5126, "step": 11289 }, { "epoch": 0.73, "grad_norm": 1.1766163110733032, "learning_rate": 1.8084040156803783e-06, "loss": 0.5039, "step": 11290 }, { "epoch": 0.73, "grad_norm": 1.2189066410064697, "learning_rate": 1.8075994306834577e-06, "loss": 0.5553, "step": 11291 }, { "epoch": 0.73, "grad_norm": 1.1119301319122314, "learning_rate": 1.806794985218131e-06, "loss": 0.4764, "step": 11292 }, { "epoch": 0.73, "grad_norm": 1.2660185098648071, "learning_rate": 1.8059906793195547e-06, "loss": 0.5334, "step": 11293 }, { "epoch": 0.73, "grad_norm": 1.182334303855896, "learning_rate": 1.8051865130228858e-06, "loss": 0.4958, "step": 11294 }, { "epoch": 0.73, "grad_norm": 1.1629712581634521, "learning_rate": 1.8043824863632715e-06, "loss": 0.49, "step": 11295 }, { "epoch": 0.73, "grad_norm": 1.2598352432250977, "learning_rate": 1.8035785993758537e-06, "loss": 0.5649, "step": 11296 }, { "epoch": 0.73, "grad_norm": 1.1671124696731567, "learning_rate": 1.8027748520957706e-06, "loss": 0.5709, "step": 11297 }, { "epoch": 0.73, "grad_norm": 1.1375865936279297, "learning_rate": 1.8019712445581472e-06, "loss": 0.5284, "step": 11298 }, { "epoch": 0.73, "grad_norm": 1.1871299743652344, "learning_rate": 1.8011677767981107e-06, "loss": 0.5184, "step": 11299 }, { "epoch": 0.73, "grad_norm": 1.2261265516281128, "learning_rate": 1.8003644488507772e-06, "loss": 0.5138, "step": 11300 }, { "epoch": 0.73, "grad_norm": 1.141527533531189, "learning_rate": 1.799561260751258e-06, "loss": 0.52, "step": 11301 }, { "epoch": 0.73, "grad_norm": 1.3061267137527466, "learning_rate": 1.7987582125346591e-06, "loss": 0.5391, "step": 11302 }, { "epoch": 0.73, "grad_norm": 1.2418605089187622, "learning_rate": 1.7979553042360815e-06, "loss": 0.5504, "step": 11303 }, { "epoch": 0.73, "grad_norm": 1.372352957725525, "learning_rate": 1.7971525358906139e-06, "loss": 0.5612, "step": 11304 }, { "epoch": 0.73, "grad_norm": 1.3001868724822998, "learning_rate": 1.7963499075333457e-06, "loss": 0.4875, "step": 11305 }, { "epoch": 0.73, "grad_norm": 1.2188711166381836, "learning_rate": 1.7955474191993576e-06, "loss": 0.5416, "step": 11306 }, { "epoch": 0.73, "grad_norm": 1.217103123664856, "learning_rate": 1.7947450709237245e-06, "loss": 0.4771, "step": 11307 }, { "epoch": 0.73, "grad_norm": 1.149217963218689, "learning_rate": 1.7939428627415146e-06, "loss": 0.5483, "step": 11308 }, { "epoch": 0.73, "grad_norm": 1.2326955795288086, "learning_rate": 1.7931407946877916e-06, "loss": 0.5325, "step": 11309 }, { "epoch": 0.73, "grad_norm": 1.2306917905807495, "learning_rate": 1.7923388667976094e-06, "loss": 0.5134, "step": 11310 }, { "epoch": 0.73, "grad_norm": 1.1763728857040405, "learning_rate": 1.7915370791060194e-06, "loss": 0.531, "step": 11311 }, { "epoch": 0.73, "grad_norm": 1.1879855394363403, "learning_rate": 1.7907354316480657e-06, "loss": 0.5341, "step": 11312 }, { "epoch": 0.73, "grad_norm": 1.2542610168457031, "learning_rate": 1.789933924458786e-06, "loss": 0.52, "step": 11313 }, { "epoch": 0.73, "grad_norm": 1.1502327919006348, "learning_rate": 1.7891325575732126e-06, "loss": 0.5276, "step": 11314 }, { "epoch": 0.73, "grad_norm": 1.1974308490753174, "learning_rate": 1.7883313310263727e-06, "loss": 0.4923, "step": 11315 }, { "epoch": 0.73, "grad_norm": 1.180938959121704, "learning_rate": 1.7875302448532828e-06, "loss": 0.5272, "step": 11316 }, { "epoch": 0.73, "grad_norm": 1.1341545581817627, "learning_rate": 1.7867292990889567e-06, "loss": 0.4738, "step": 11317 }, { "epoch": 0.73, "grad_norm": 1.1679683923721313, "learning_rate": 1.7859284937684036e-06, "loss": 0.4632, "step": 11318 }, { "epoch": 0.73, "grad_norm": 1.1747913360595703, "learning_rate": 1.7851278289266227e-06, "loss": 0.4846, "step": 11319 }, { "epoch": 0.73, "grad_norm": 1.351081371307373, "learning_rate": 1.7843273045986104e-06, "loss": 0.5404, "step": 11320 }, { "epoch": 0.73, "grad_norm": 1.138426423072815, "learning_rate": 1.7835269208193568e-06, "loss": 0.5304, "step": 11321 }, { "epoch": 0.73, "grad_norm": 1.2293095588684082, "learning_rate": 1.782726677623841e-06, "loss": 0.5627, "step": 11322 }, { "epoch": 0.73, "grad_norm": 1.174782156944275, "learning_rate": 1.7819265750470417e-06, "loss": 0.5721, "step": 11323 }, { "epoch": 0.73, "grad_norm": 1.216890573501587, "learning_rate": 1.7811266131239286e-06, "loss": 0.5331, "step": 11324 }, { "epoch": 0.73, "grad_norm": 1.1425622701644897, "learning_rate": 1.7803267918894668e-06, "loss": 0.5573, "step": 11325 }, { "epoch": 0.73, "grad_norm": 1.1561899185180664, "learning_rate": 1.7795271113786138e-06, "loss": 0.4973, "step": 11326 }, { "epoch": 0.73, "grad_norm": 1.2355194091796875, "learning_rate": 1.778727571626324e-06, "loss": 0.5484, "step": 11327 }, { "epoch": 0.73, "grad_norm": 1.2565181255340576, "learning_rate": 1.7779281726675391e-06, "loss": 0.4745, "step": 11328 }, { "epoch": 0.73, "grad_norm": 1.271705985069275, "learning_rate": 1.7771289145372005e-06, "loss": 0.5399, "step": 11329 }, { "epoch": 0.73, "grad_norm": 1.2559579610824585, "learning_rate": 1.776329797270242e-06, "loss": 0.5306, "step": 11330 }, { "epoch": 0.73, "grad_norm": 1.1326322555541992, "learning_rate": 1.7755308209015914e-06, "loss": 0.4984, "step": 11331 }, { "epoch": 0.73, "grad_norm": 1.1659139394760132, "learning_rate": 1.7747319854661688e-06, "loss": 0.4889, "step": 11332 }, { "epoch": 0.73, "grad_norm": 1.2619587182998657, "learning_rate": 1.7739332909988915e-06, "loss": 0.5238, "step": 11333 }, { "epoch": 0.73, "grad_norm": 1.1423650979995728, "learning_rate": 1.7731347375346653e-06, "loss": 0.4927, "step": 11334 }, { "epoch": 0.73, "grad_norm": 1.1742557287216187, "learning_rate": 1.772336325108394e-06, "loss": 0.4901, "step": 11335 }, { "epoch": 0.73, "grad_norm": 1.247071385383606, "learning_rate": 1.7715380537549748e-06, "loss": 0.5264, "step": 11336 }, { "epoch": 0.73, "grad_norm": 1.2242027521133423, "learning_rate": 1.7707399235092975e-06, "loss": 0.4782, "step": 11337 }, { "epoch": 0.73, "grad_norm": 1.238186001777649, "learning_rate": 1.7699419344062468e-06, "loss": 0.4955, "step": 11338 }, { "epoch": 0.73, "grad_norm": 1.170073390007019, "learning_rate": 1.769144086480702e-06, "loss": 0.523, "step": 11339 }, { "epoch": 0.73, "grad_norm": 1.1881592273712158, "learning_rate": 1.7683463797675316e-06, "loss": 0.5243, "step": 11340 }, { "epoch": 0.73, "grad_norm": 1.254133701324463, "learning_rate": 1.7675488143016035e-06, "loss": 0.538, "step": 11341 }, { "epoch": 0.73, "grad_norm": 1.1539380550384521, "learning_rate": 1.7667513901177764e-06, "loss": 0.5236, "step": 11342 }, { "epoch": 0.73, "grad_norm": 1.1952643394470215, "learning_rate": 1.7659541072509045e-06, "loss": 0.5349, "step": 11343 }, { "epoch": 0.73, "grad_norm": 1.0873743295669556, "learning_rate": 1.7651569657358365e-06, "loss": 0.5068, "step": 11344 }, { "epoch": 0.73, "grad_norm": 1.2518113851547241, "learning_rate": 1.7643599656074096e-06, "loss": 0.5846, "step": 11345 }, { "epoch": 0.73, "grad_norm": 1.0702929496765137, "learning_rate": 1.7635631069004605e-06, "loss": 0.4741, "step": 11346 }, { "epoch": 0.73, "grad_norm": 1.112889289855957, "learning_rate": 1.7627663896498177e-06, "loss": 0.5016, "step": 11347 }, { "epoch": 0.73, "grad_norm": 1.099312663078308, "learning_rate": 1.7619698138903042e-06, "loss": 0.4852, "step": 11348 }, { "epoch": 0.73, "grad_norm": 1.1645952463150024, "learning_rate": 1.7611733796567354e-06, "loss": 0.5115, "step": 11349 }, { "epoch": 0.73, "grad_norm": 1.2639411687850952, "learning_rate": 1.7603770869839243e-06, "loss": 0.5266, "step": 11350 }, { "epoch": 0.73, "grad_norm": 1.22994863986969, "learning_rate": 1.75958093590667e-06, "loss": 0.5641, "step": 11351 }, { "epoch": 0.73, "grad_norm": 1.1898947954177856, "learning_rate": 1.7587849264597728e-06, "loss": 0.5267, "step": 11352 }, { "epoch": 0.73, "grad_norm": 1.2518885135650635, "learning_rate": 1.757989058678024e-06, "loss": 0.5137, "step": 11353 }, { "epoch": 0.73, "grad_norm": 1.2123403549194336, "learning_rate": 1.7571933325962094e-06, "loss": 0.5309, "step": 11354 }, { "epoch": 0.73, "grad_norm": 1.2328228950500488, "learning_rate": 1.7563977482491074e-06, "loss": 0.4974, "step": 11355 }, { "epoch": 0.73, "grad_norm": 1.2016657590866089, "learning_rate": 1.7556023056714927e-06, "loss": 0.5206, "step": 11356 }, { "epoch": 0.73, "grad_norm": 1.2845319509506226, "learning_rate": 1.754807004898129e-06, "loss": 0.5385, "step": 11357 }, { "epoch": 0.73, "grad_norm": 1.1465197801589966, "learning_rate": 1.7540118459637784e-06, "loss": 0.503, "step": 11358 }, { "epoch": 0.73, "grad_norm": 1.1109650135040283, "learning_rate": 1.7532168289031953e-06, "loss": 0.502, "step": 11359 }, { "epoch": 0.73, "grad_norm": 1.2487410306930542, "learning_rate": 1.7524219537511294e-06, "loss": 0.5409, "step": 11360 }, { "epoch": 0.73, "grad_norm": 1.2779006958007812, "learning_rate": 1.7516272205423196e-06, "loss": 0.5538, "step": 11361 }, { "epoch": 0.73, "grad_norm": 1.0913103818893433, "learning_rate": 1.750832629311503e-06, "loss": 0.5531, "step": 11362 }, { "epoch": 0.73, "grad_norm": 1.4023499488830566, "learning_rate": 1.750038180093409e-06, "loss": 0.5192, "step": 11363 }, { "epoch": 0.73, "grad_norm": 1.252956509590149, "learning_rate": 1.7492438729227611e-06, "loss": 0.5616, "step": 11364 }, { "epoch": 0.73, "grad_norm": 1.1492323875427246, "learning_rate": 1.7484497078342788e-06, "loss": 0.5218, "step": 11365 }, { "epoch": 0.73, "grad_norm": 1.2524950504302979, "learning_rate": 1.747655684862668e-06, "loss": 0.5168, "step": 11366 }, { "epoch": 0.73, "grad_norm": 1.2217388153076172, "learning_rate": 1.7468618040426366e-06, "loss": 0.5544, "step": 11367 }, { "epoch": 0.73, "grad_norm": 1.1928576231002808, "learning_rate": 1.7460680654088825e-06, "loss": 0.4791, "step": 11368 }, { "epoch": 0.73, "grad_norm": 1.229003667831421, "learning_rate": 1.7452744689961e-06, "loss": 0.505, "step": 11369 }, { "epoch": 0.73, "grad_norm": 1.0877676010131836, "learning_rate": 1.7444810148389706e-06, "loss": 0.4903, "step": 11370 }, { "epoch": 0.73, "grad_norm": 1.4063071012496948, "learning_rate": 1.7436877029721776e-06, "loss": 0.5427, "step": 11371 }, { "epoch": 0.73, "grad_norm": 1.2903668880462646, "learning_rate": 1.7428945334303932e-06, "loss": 0.5472, "step": 11372 }, { "epoch": 0.73, "grad_norm": 1.2565548419952393, "learning_rate": 1.7421015062482854e-06, "loss": 0.5099, "step": 11373 }, { "epoch": 0.73, "grad_norm": 1.24567711353302, "learning_rate": 1.7413086214605168e-06, "loss": 0.5408, "step": 11374 }, { "epoch": 0.73, "grad_norm": 1.165353775024414, "learning_rate": 1.7405158791017397e-06, "loss": 0.5319, "step": 11375 }, { "epoch": 0.73, "grad_norm": 1.3155068159103394, "learning_rate": 1.7397232792066033e-06, "loss": 0.5235, "step": 11376 }, { "epoch": 0.73, "grad_norm": 1.2494138479232788, "learning_rate": 1.738930821809751e-06, "loss": 0.5027, "step": 11377 }, { "epoch": 0.73, "grad_norm": 1.156894564628601, "learning_rate": 1.738138506945819e-06, "loss": 0.5396, "step": 11378 }, { "epoch": 0.73, "grad_norm": 1.2078814506530762, "learning_rate": 1.737346334649437e-06, "loss": 0.5277, "step": 11379 }, { "epoch": 0.73, "grad_norm": 1.1929703950881958, "learning_rate": 1.7365543049552303e-06, "loss": 0.568, "step": 11380 }, { "epoch": 0.73, "grad_norm": 1.2124412059783936, "learning_rate": 1.735762417897814e-06, "loss": 0.4898, "step": 11381 }, { "epoch": 0.73, "grad_norm": 1.1626975536346436, "learning_rate": 1.7349706735118e-06, "loss": 0.4802, "step": 11382 }, { "epoch": 0.73, "grad_norm": 1.2377197742462158, "learning_rate": 1.734179071831794e-06, "loss": 0.5338, "step": 11383 }, { "epoch": 0.73, "grad_norm": 1.1582576036453247, "learning_rate": 1.7333876128923955e-06, "loss": 0.5303, "step": 11384 }, { "epoch": 0.73, "grad_norm": 1.140830159187317, "learning_rate": 1.7325962967281979e-06, "loss": 0.5088, "step": 11385 }, { "epoch": 0.73, "grad_norm": 1.1307902336120605, "learning_rate": 1.7318051233737843e-06, "loss": 0.4769, "step": 11386 }, { "epoch": 0.74, "grad_norm": 1.3291741609573364, "learning_rate": 1.7310140928637366e-06, "loss": 0.5449, "step": 11387 }, { "epoch": 0.74, "grad_norm": 1.1840354204177856, "learning_rate": 1.7302232052326289e-06, "loss": 0.4886, "step": 11388 }, { "epoch": 0.74, "grad_norm": 1.13436758518219, "learning_rate": 1.7294324605150286e-06, "loss": 0.4774, "step": 11389 }, { "epoch": 0.74, "grad_norm": 1.2330478429794312, "learning_rate": 1.728641858745498e-06, "loss": 0.5339, "step": 11390 }, { "epoch": 0.74, "grad_norm": 1.2000550031661987, "learning_rate": 1.7278513999585923e-06, "loss": 0.545, "step": 11391 }, { "epoch": 0.74, "grad_norm": 1.234697937965393, "learning_rate": 1.7270610841888585e-06, "loss": 0.5158, "step": 11392 }, { "epoch": 0.74, "grad_norm": 1.1586626768112183, "learning_rate": 1.7262709114708403e-06, "loss": 0.496, "step": 11393 }, { "epoch": 0.74, "grad_norm": 1.123124122619629, "learning_rate": 1.7254808818390738e-06, "loss": 0.5381, "step": 11394 }, { "epoch": 0.74, "grad_norm": 1.2906373739242554, "learning_rate": 1.7246909953280904e-06, "loss": 0.5556, "step": 11395 }, { "epoch": 0.74, "grad_norm": 1.1873425245285034, "learning_rate": 1.7239012519724124e-06, "loss": 0.4999, "step": 11396 }, { "epoch": 0.74, "grad_norm": 1.1942275762557983, "learning_rate": 1.7231116518065605e-06, "loss": 0.561, "step": 11397 }, { "epoch": 0.74, "grad_norm": 1.1706655025482178, "learning_rate": 1.722322194865042e-06, "loss": 0.5387, "step": 11398 }, { "epoch": 0.74, "grad_norm": 1.2674933671951294, "learning_rate": 1.7215328811823633e-06, "loss": 0.4579, "step": 11399 }, { "epoch": 0.74, "grad_norm": 1.0947818756103516, "learning_rate": 1.7207437107930247e-06, "loss": 0.5411, "step": 11400 }, { "epoch": 0.74, "grad_norm": 1.3182566165924072, "learning_rate": 1.7199546837315172e-06, "loss": 0.5347, "step": 11401 }, { "epoch": 0.74, "grad_norm": 1.2314971685409546, "learning_rate": 1.7191658000323285e-06, "loss": 0.5374, "step": 11402 }, { "epoch": 0.74, "grad_norm": 1.1523463726043701, "learning_rate": 1.7183770597299394e-06, "loss": 0.5513, "step": 11403 }, { "epoch": 0.74, "grad_norm": 1.2509082555770874, "learning_rate": 1.7175884628588202e-06, "loss": 0.5548, "step": 11404 }, { "epoch": 0.74, "grad_norm": 1.141445517539978, "learning_rate": 1.716800009453441e-06, "loss": 0.5076, "step": 11405 }, { "epoch": 0.74, "grad_norm": 1.1030018329620361, "learning_rate": 1.7160116995482623e-06, "loss": 0.5464, "step": 11406 }, { "epoch": 0.74, "grad_norm": 1.0988909006118774, "learning_rate": 1.71522353317774e-06, "loss": 0.492, "step": 11407 }, { "epoch": 0.74, "grad_norm": 1.2210842370986938, "learning_rate": 1.7144355103763216e-06, "loss": 0.588, "step": 11408 }, { "epoch": 0.74, "grad_norm": 1.2530882358551025, "learning_rate": 1.7136476311784521e-06, "loss": 0.5375, "step": 11409 }, { "epoch": 0.74, "grad_norm": 1.284630537033081, "learning_rate": 1.7128598956185643e-06, "loss": 0.5262, "step": 11410 }, { "epoch": 0.74, "grad_norm": 1.1544694900512695, "learning_rate": 1.7120723037310893e-06, "loss": 0.475, "step": 11411 }, { "epoch": 0.74, "grad_norm": 1.1594303846359253, "learning_rate": 1.7112848555504508e-06, "loss": 0.5456, "step": 11412 }, { "epoch": 0.74, "grad_norm": 1.168526530265808, "learning_rate": 1.7104975511110666e-06, "loss": 0.4997, "step": 11413 }, { "epoch": 0.74, "grad_norm": 1.180791974067688, "learning_rate": 1.7097103904473472e-06, "loss": 0.5502, "step": 11414 }, { "epoch": 0.74, "grad_norm": 1.2415729761123657, "learning_rate": 1.7089233735936988e-06, "loss": 0.5207, "step": 11415 }, { "epoch": 0.74, "grad_norm": 1.1723504066467285, "learning_rate": 1.7081365005845174e-06, "loss": 0.5052, "step": 11416 }, { "epoch": 0.74, "grad_norm": 1.1793195009231567, "learning_rate": 1.707349771454197e-06, "loss": 0.5389, "step": 11417 }, { "epoch": 0.74, "grad_norm": 1.1599552631378174, "learning_rate": 1.7065631862371224e-06, "loss": 0.4947, "step": 11418 }, { "epoch": 0.74, "grad_norm": 1.1996958255767822, "learning_rate": 1.7057767449676737e-06, "loss": 0.4954, "step": 11419 }, { "epoch": 0.74, "grad_norm": 1.1019747257232666, "learning_rate": 1.7049904476802242e-06, "loss": 0.4752, "step": 11420 }, { "epoch": 0.74, "grad_norm": 1.256060242652893, "learning_rate": 1.7042042944091426e-06, "loss": 0.5176, "step": 11421 }, { "epoch": 0.74, "grad_norm": 1.3169373273849487, "learning_rate": 1.7034182851887865e-06, "loss": 0.536, "step": 11422 }, { "epoch": 0.74, "grad_norm": 1.23801589012146, "learning_rate": 1.7026324200535122e-06, "loss": 0.5404, "step": 11423 }, { "epoch": 0.74, "grad_norm": 1.2634522914886475, "learning_rate": 1.701846699037667e-06, "loss": 0.5303, "step": 11424 }, { "epoch": 0.74, "grad_norm": 1.1552752256393433, "learning_rate": 1.7010611221755934e-06, "loss": 0.5183, "step": 11425 }, { "epoch": 0.74, "grad_norm": 1.09823477268219, "learning_rate": 1.7002756895016286e-06, "loss": 0.5132, "step": 11426 }, { "epoch": 0.74, "grad_norm": 1.116080403327942, "learning_rate": 1.6994904010500984e-06, "loss": 0.5369, "step": 11427 }, { "epoch": 0.74, "grad_norm": 1.1514630317687988, "learning_rate": 1.698705256855327e-06, "loss": 0.5085, "step": 11428 }, { "epoch": 0.74, "grad_norm": 1.1664339303970337, "learning_rate": 1.6979202569516319e-06, "loss": 0.5081, "step": 11429 }, { "epoch": 0.74, "grad_norm": 1.2463723421096802, "learning_rate": 1.6971354013733222e-06, "loss": 0.4888, "step": 11430 }, { "epoch": 0.74, "grad_norm": 1.2606642246246338, "learning_rate": 1.6963506901547028e-06, "loss": 0.5028, "step": 11431 }, { "epoch": 0.74, "grad_norm": 1.2108830213546753, "learning_rate": 1.695566123330073e-06, "loss": 0.5693, "step": 11432 }, { "epoch": 0.74, "grad_norm": 1.1448777914047241, "learning_rate": 1.6947817009337208e-06, "loss": 0.5559, "step": 11433 }, { "epoch": 0.74, "grad_norm": 1.1835485696792603, "learning_rate": 1.693997422999933e-06, "loss": 0.5595, "step": 11434 }, { "epoch": 0.74, "grad_norm": 1.114194393157959, "learning_rate": 1.6932132895629876e-06, "loss": 0.4789, "step": 11435 }, { "epoch": 0.74, "grad_norm": 1.2425131797790527, "learning_rate": 1.6924293006571585e-06, "loss": 0.5163, "step": 11436 }, { "epoch": 0.74, "grad_norm": 1.169586420059204, "learning_rate": 1.6916454563167107e-06, "loss": 0.4946, "step": 11437 }, { "epoch": 0.74, "grad_norm": 1.209097981452942, "learning_rate": 1.6908617565759061e-06, "loss": 0.5475, "step": 11438 }, { "epoch": 0.74, "grad_norm": 1.1812546253204346, "learning_rate": 1.6900782014689942e-06, "loss": 0.483, "step": 11439 }, { "epoch": 0.74, "grad_norm": 1.180187463760376, "learning_rate": 1.6892947910302248e-06, "loss": 0.5232, "step": 11440 }, { "epoch": 0.74, "grad_norm": 1.0959057807922363, "learning_rate": 1.6885115252938383e-06, "loss": 0.474, "step": 11441 }, { "epoch": 0.74, "grad_norm": 1.1417840719223022, "learning_rate": 1.6877284042940696e-06, "loss": 0.4998, "step": 11442 }, { "epoch": 0.74, "grad_norm": 1.2176015377044678, "learning_rate": 1.6869454280651465e-06, "loss": 0.4757, "step": 11443 }, { "epoch": 0.74, "grad_norm": 1.1536104679107666, "learning_rate": 1.6861625966412926e-06, "loss": 0.5339, "step": 11444 }, { "epoch": 0.74, "grad_norm": 1.229071021080017, "learning_rate": 1.6853799100567198e-06, "loss": 0.5042, "step": 11445 }, { "epoch": 0.74, "grad_norm": 1.1993908882141113, "learning_rate": 1.6845973683456391e-06, "loss": 0.5161, "step": 11446 }, { "epoch": 0.74, "grad_norm": 1.1893585920333862, "learning_rate": 1.683814971542254e-06, "loss": 0.516, "step": 11447 }, { "epoch": 0.74, "grad_norm": 1.1342647075653076, "learning_rate": 1.6830327196807606e-06, "loss": 0.4914, "step": 11448 }, { "epoch": 0.74, "grad_norm": 1.1738849878311157, "learning_rate": 1.6822506127953508e-06, "loss": 0.5046, "step": 11449 }, { "epoch": 0.74, "grad_norm": 1.1749461889266968, "learning_rate": 1.6814686509202048e-06, "loss": 0.4925, "step": 11450 }, { "epoch": 0.74, "grad_norm": 1.2491956949234009, "learning_rate": 1.6806868340895027e-06, "loss": 0.4916, "step": 11451 }, { "epoch": 0.74, "grad_norm": 1.220725417137146, "learning_rate": 1.6799051623374152e-06, "loss": 0.5036, "step": 11452 }, { "epoch": 0.74, "grad_norm": 1.393132209777832, "learning_rate": 1.6791236356981066e-06, "loss": 0.5474, "step": 11453 }, { "epoch": 0.74, "grad_norm": 1.1742507219314575, "learning_rate": 1.6783422542057376e-06, "loss": 0.494, "step": 11454 }, { "epoch": 0.74, "grad_norm": 1.372879981994629, "learning_rate": 1.6775610178944575e-06, "loss": 0.5069, "step": 11455 }, { "epoch": 0.74, "grad_norm": 1.2387337684631348, "learning_rate": 1.6767799267984124e-06, "loss": 0.5162, "step": 11456 }, { "epoch": 0.74, "grad_norm": 1.1731669902801514, "learning_rate": 1.6759989809517436e-06, "loss": 0.533, "step": 11457 }, { "epoch": 0.74, "grad_norm": 1.2145236730575562, "learning_rate": 1.6752181803885848e-06, "loss": 0.5235, "step": 11458 }, { "epoch": 0.74, "grad_norm": 1.205731749534607, "learning_rate": 1.6744375251430589e-06, "loss": 0.5005, "step": 11459 }, { "epoch": 0.74, "grad_norm": 1.1881147623062134, "learning_rate": 1.6736570152492892e-06, "loss": 0.5164, "step": 11460 }, { "epoch": 0.74, "grad_norm": 1.288623571395874, "learning_rate": 1.6728766507413896e-06, "loss": 0.5123, "step": 11461 }, { "epoch": 0.74, "grad_norm": 1.142701506614685, "learning_rate": 1.6720964316534672e-06, "loss": 0.5319, "step": 11462 }, { "epoch": 0.74, "grad_norm": 1.0745983123779297, "learning_rate": 1.6713163580196252e-06, "loss": 0.5507, "step": 11463 }, { "epoch": 0.74, "grad_norm": 1.178621768951416, "learning_rate": 1.6705364298739557e-06, "loss": 0.5452, "step": 11464 }, { "epoch": 0.74, "grad_norm": 1.1172876358032227, "learning_rate": 1.6697566472505484e-06, "loss": 0.4741, "step": 11465 }, { "epoch": 0.74, "grad_norm": 1.221750020980835, "learning_rate": 1.6689770101834862e-06, "loss": 0.4948, "step": 11466 }, { "epoch": 0.74, "grad_norm": 1.3197439908981323, "learning_rate": 1.6681975187068444e-06, "loss": 0.5558, "step": 11467 }, { "epoch": 0.74, "grad_norm": 1.0678631067276, "learning_rate": 1.6674181728546952e-06, "loss": 0.4602, "step": 11468 }, { "epoch": 0.74, "grad_norm": 1.3095130920410156, "learning_rate": 1.6666389726610971e-06, "loss": 0.5045, "step": 11469 }, { "epoch": 0.74, "grad_norm": 1.1622225046157837, "learning_rate": 1.6658599181601099e-06, "loss": 0.4291, "step": 11470 }, { "epoch": 0.74, "grad_norm": 1.2067291736602783, "learning_rate": 1.6650810093857833e-06, "loss": 0.4572, "step": 11471 }, { "epoch": 0.74, "grad_norm": 1.1672298908233643, "learning_rate": 1.6643022463721615e-06, "loss": 0.4825, "step": 11472 }, { "epoch": 0.74, "grad_norm": 1.1791954040527344, "learning_rate": 1.6635236291532836e-06, "loss": 0.569, "step": 11473 }, { "epoch": 0.74, "grad_norm": 1.2182488441467285, "learning_rate": 1.6627451577631782e-06, "loss": 0.5652, "step": 11474 }, { "epoch": 0.74, "grad_norm": 1.2409021854400635, "learning_rate": 1.6619668322358723e-06, "loss": 0.5248, "step": 11475 }, { "epoch": 0.74, "grad_norm": 1.1509860754013062, "learning_rate": 1.6611886526053833e-06, "loss": 0.5321, "step": 11476 }, { "epoch": 0.74, "grad_norm": 1.3409067392349243, "learning_rate": 1.660410618905724e-06, "loss": 0.5352, "step": 11477 }, { "epoch": 0.74, "grad_norm": 1.2664971351623535, "learning_rate": 1.6596327311709003e-06, "loss": 0.4934, "step": 11478 }, { "epoch": 0.74, "grad_norm": 1.1963188648223877, "learning_rate": 1.6588549894349137e-06, "loss": 0.5487, "step": 11479 }, { "epoch": 0.74, "grad_norm": 1.2444933652877808, "learning_rate": 1.6580773937317536e-06, "loss": 0.5466, "step": 11480 }, { "epoch": 0.74, "grad_norm": 1.201413631439209, "learning_rate": 1.6572999440954079e-06, "loss": 0.4952, "step": 11481 }, { "epoch": 0.74, "grad_norm": 1.2755686044692993, "learning_rate": 1.6565226405598578e-06, "loss": 0.5209, "step": 11482 }, { "epoch": 0.74, "grad_norm": 1.0854783058166504, "learning_rate": 1.6557454831590764e-06, "loss": 0.5001, "step": 11483 }, { "epoch": 0.74, "grad_norm": 1.162850022315979, "learning_rate": 1.654968471927032e-06, "loss": 0.5424, "step": 11484 }, { "epoch": 0.74, "grad_norm": 1.1502032279968262, "learning_rate": 1.654191606897687e-06, "loss": 0.5534, "step": 11485 }, { "epoch": 0.74, "grad_norm": 1.0960888862609863, "learning_rate": 1.6534148881049928e-06, "loss": 0.5112, "step": 11486 }, { "epoch": 0.74, "grad_norm": 1.1396915912628174, "learning_rate": 1.6526383155828995e-06, "loss": 0.5372, "step": 11487 }, { "epoch": 0.74, "grad_norm": 1.1786644458770752, "learning_rate": 1.6518618893653494e-06, "loss": 0.4971, "step": 11488 }, { "epoch": 0.74, "grad_norm": 1.191896915435791, "learning_rate": 1.6510856094862771e-06, "loss": 0.5724, "step": 11489 }, { "epoch": 0.74, "grad_norm": 1.196642518043518, "learning_rate": 1.650309475979613e-06, "loss": 0.5449, "step": 11490 }, { "epoch": 0.74, "grad_norm": 1.1519216299057007, "learning_rate": 1.6495334888792814e-06, "loss": 0.4871, "step": 11491 }, { "epoch": 0.74, "grad_norm": 1.1229584217071533, "learning_rate": 1.6487576482191942e-06, "loss": 0.5254, "step": 11492 }, { "epoch": 0.74, "grad_norm": 1.240673303604126, "learning_rate": 1.6479819540332642e-06, "loss": 0.5219, "step": 11493 }, { "epoch": 0.74, "grad_norm": 1.0904337167739868, "learning_rate": 1.647206406355395e-06, "loss": 0.4533, "step": 11494 }, { "epoch": 0.74, "grad_norm": 1.1346814632415771, "learning_rate": 1.6464310052194831e-06, "loss": 0.473, "step": 11495 }, { "epoch": 0.74, "grad_norm": 1.096703290939331, "learning_rate": 1.64565575065942e-06, "loss": 0.5048, "step": 11496 }, { "epoch": 0.74, "grad_norm": 1.2381402254104614, "learning_rate": 1.6448806427090907e-06, "loss": 0.4871, "step": 11497 }, { "epoch": 0.74, "grad_norm": 1.3165488243103027, "learning_rate": 1.6441056814023714e-06, "loss": 0.5425, "step": 11498 }, { "epoch": 0.74, "grad_norm": 1.280000925064087, "learning_rate": 1.643330866773134e-06, "loss": 0.5053, "step": 11499 }, { "epoch": 0.74, "grad_norm": 1.113370656967163, "learning_rate": 1.6425561988552442e-06, "loss": 0.4946, "step": 11500 }, { "epoch": 0.74, "grad_norm": 1.1382668018341064, "learning_rate": 1.6417816776825601e-06, "loss": 0.5529, "step": 11501 }, { "epoch": 0.74, "grad_norm": 1.2958276271820068, "learning_rate": 1.6410073032889352e-06, "loss": 0.5448, "step": 11502 }, { "epoch": 0.74, "grad_norm": 1.1537398099899292, "learning_rate": 1.6402330757082163e-06, "loss": 0.5125, "step": 11503 }, { "epoch": 0.74, "grad_norm": 1.215323805809021, "learning_rate": 1.6394589949742396e-06, "loss": 0.5137, "step": 11504 }, { "epoch": 0.74, "grad_norm": 1.1273839473724365, "learning_rate": 1.6386850611208398e-06, "loss": 0.4611, "step": 11505 }, { "epoch": 0.74, "grad_norm": 1.173946499824524, "learning_rate": 1.6379112741818436e-06, "loss": 0.5218, "step": 11506 }, { "epoch": 0.74, "grad_norm": 1.1189926862716675, "learning_rate": 1.6371376341910717e-06, "loss": 0.4895, "step": 11507 }, { "epoch": 0.74, "grad_norm": 1.2458953857421875, "learning_rate": 1.6363641411823371e-06, "loss": 0.5407, "step": 11508 }, { "epoch": 0.74, "grad_norm": 1.1985758543014526, "learning_rate": 1.6355907951894495e-06, "loss": 0.5302, "step": 11509 }, { "epoch": 0.74, "grad_norm": 1.2044849395751953, "learning_rate": 1.6348175962462059e-06, "loss": 0.5496, "step": 11510 }, { "epoch": 0.74, "grad_norm": 1.2612385749816895, "learning_rate": 1.6340445443864035e-06, "loss": 0.5047, "step": 11511 }, { "epoch": 0.74, "grad_norm": 1.0899449586868286, "learning_rate": 1.6332716396438291e-06, "loss": 0.5534, "step": 11512 }, { "epoch": 0.74, "grad_norm": 1.1610395908355713, "learning_rate": 1.6324988820522658e-06, "loss": 0.5439, "step": 11513 }, { "epoch": 0.74, "grad_norm": 1.1549959182739258, "learning_rate": 1.6317262716454896e-06, "loss": 0.5223, "step": 11514 }, { "epoch": 0.74, "grad_norm": 1.1526423692703247, "learning_rate": 1.6309538084572657e-06, "loss": 0.5155, "step": 11515 }, { "epoch": 0.74, "grad_norm": 1.114152193069458, "learning_rate": 1.6301814925213588e-06, "loss": 0.4971, "step": 11516 }, { "epoch": 0.74, "grad_norm": 1.149244785308838, "learning_rate": 1.6294093238715248e-06, "loss": 0.538, "step": 11517 }, { "epoch": 0.74, "grad_norm": 1.2155531644821167, "learning_rate": 1.6286373025415126e-06, "loss": 0.5301, "step": 11518 }, { "epoch": 0.74, "grad_norm": 1.1516351699829102, "learning_rate": 1.6278654285650657e-06, "loss": 0.5343, "step": 11519 }, { "epoch": 0.74, "grad_norm": 1.220798134803772, "learning_rate": 1.6270937019759232e-06, "loss": 0.5699, "step": 11520 }, { "epoch": 0.74, "grad_norm": 1.095894694328308, "learning_rate": 1.6263221228078102e-06, "loss": 0.5228, "step": 11521 }, { "epoch": 0.74, "grad_norm": 1.251181721687317, "learning_rate": 1.625550691094453e-06, "loss": 0.5006, "step": 11522 }, { "epoch": 0.74, "grad_norm": 1.2241750955581665, "learning_rate": 1.6247794068695695e-06, "loss": 0.5148, "step": 11523 }, { "epoch": 0.74, "grad_norm": 1.163530945777893, "learning_rate": 1.6240082701668697e-06, "loss": 0.5472, "step": 11524 }, { "epoch": 0.74, "grad_norm": 1.256258487701416, "learning_rate": 1.6232372810200582e-06, "loss": 0.5651, "step": 11525 }, { "epoch": 0.74, "grad_norm": 1.2117550373077393, "learning_rate": 1.6224664394628343e-06, "loss": 0.5257, "step": 11526 }, { "epoch": 0.74, "grad_norm": 1.1970738172531128, "learning_rate": 1.6216957455288867e-06, "loss": 0.5322, "step": 11527 }, { "epoch": 0.74, "grad_norm": 1.2032763957977295, "learning_rate": 1.6209251992519021e-06, "loss": 0.4922, "step": 11528 }, { "epoch": 0.74, "grad_norm": 1.2205661535263062, "learning_rate": 1.6201548006655592e-06, "loss": 0.5094, "step": 11529 }, { "epoch": 0.74, "grad_norm": 1.1739455461502075, "learning_rate": 1.6193845498035294e-06, "loss": 0.5343, "step": 11530 }, { "epoch": 0.74, "grad_norm": 1.2154271602630615, "learning_rate": 1.6186144466994791e-06, "loss": 0.5267, "step": 11531 }, { "epoch": 0.74, "grad_norm": 1.2005914449691772, "learning_rate": 1.6178444913870684e-06, "loss": 0.5109, "step": 11532 }, { "epoch": 0.74, "grad_norm": 1.1094424724578857, "learning_rate": 1.6170746838999478e-06, "loss": 0.4547, "step": 11533 }, { "epoch": 0.74, "grad_norm": 1.1851140260696411, "learning_rate": 1.6163050242717643e-06, "loss": 0.5318, "step": 11534 }, { "epoch": 0.74, "grad_norm": 1.2423300743103027, "learning_rate": 1.6155355125361582e-06, "loss": 0.5073, "step": 11535 }, { "epoch": 0.74, "grad_norm": 1.2136952877044678, "learning_rate": 1.614766148726763e-06, "loss": 0.4557, "step": 11536 }, { "epoch": 0.74, "grad_norm": 1.2240136861801147, "learning_rate": 1.6139969328772053e-06, "loss": 0.5201, "step": 11537 }, { "epoch": 0.74, "grad_norm": 1.1437897682189941, "learning_rate": 1.6132278650211075e-06, "loss": 0.5393, "step": 11538 }, { "epoch": 0.74, "grad_norm": 1.1608543395996094, "learning_rate": 1.6124589451920796e-06, "loss": 0.5297, "step": 11539 }, { "epoch": 0.74, "grad_norm": 1.2279497385025024, "learning_rate": 1.6116901734237316e-06, "loss": 0.4731, "step": 11540 }, { "epoch": 0.74, "grad_norm": 1.2313896417617798, "learning_rate": 1.6109215497496644e-06, "loss": 0.5117, "step": 11541 }, { "epoch": 0.75, "grad_norm": 1.188614010810852, "learning_rate": 1.610153074203472e-06, "loss": 0.5487, "step": 11542 }, { "epoch": 0.75, "grad_norm": 1.2868452072143555, "learning_rate": 1.6093847468187445e-06, "loss": 0.5602, "step": 11543 }, { "epoch": 0.75, "grad_norm": 1.2156825065612793, "learning_rate": 1.6086165676290605e-06, "loss": 0.5358, "step": 11544 }, { "epoch": 0.75, "grad_norm": 1.1951611042022705, "learning_rate": 1.6078485366679958e-06, "loss": 0.505, "step": 11545 }, { "epoch": 0.75, "grad_norm": 1.2019875049591064, "learning_rate": 1.6070806539691203e-06, "loss": 0.5048, "step": 11546 }, { "epoch": 0.75, "grad_norm": 1.226463794708252, "learning_rate": 1.6063129195659965e-06, "loss": 0.5033, "step": 11547 }, { "epoch": 0.75, "grad_norm": 1.258355736732483, "learning_rate": 1.6055453334921784e-06, "loss": 0.5273, "step": 11548 }, { "epoch": 0.75, "grad_norm": 1.2376635074615479, "learning_rate": 1.6047778957812154e-06, "loss": 0.4905, "step": 11549 }, { "epoch": 0.75, "grad_norm": 1.1717249155044556, "learning_rate": 1.6040106064666512e-06, "loss": 0.5207, "step": 11550 }, { "epoch": 0.75, "grad_norm": 1.063504695892334, "learning_rate": 1.603243465582021e-06, "loss": 0.4946, "step": 11551 }, { "epoch": 0.75, "grad_norm": 1.2119736671447754, "learning_rate": 1.6024764731608573e-06, "loss": 0.4969, "step": 11552 }, { "epoch": 0.75, "grad_norm": 1.156589150428772, "learning_rate": 1.6017096292366792e-06, "loss": 0.5123, "step": 11553 }, { "epoch": 0.75, "grad_norm": 1.2454582452774048, "learning_rate": 1.6009429338430055e-06, "loss": 0.5638, "step": 11554 }, { "epoch": 0.75, "grad_norm": 1.1123387813568115, "learning_rate": 1.6001763870133469e-06, "loss": 0.5525, "step": 11555 }, { "epoch": 0.75, "grad_norm": 1.2169387340545654, "learning_rate": 1.5994099887812065e-06, "loss": 0.5788, "step": 11556 }, { "epoch": 0.75, "grad_norm": 1.124944806098938, "learning_rate": 1.5986437391800836e-06, "loss": 0.4927, "step": 11557 }, { "epoch": 0.75, "grad_norm": 1.1726857423782349, "learning_rate": 1.5978776382434658e-06, "loss": 0.4929, "step": 11558 }, { "epoch": 0.75, "grad_norm": 1.3007780313491821, "learning_rate": 1.5971116860048386e-06, "loss": 0.5089, "step": 11559 }, { "epoch": 0.75, "grad_norm": 1.13925302028656, "learning_rate": 1.5963458824976796e-06, "loss": 0.5007, "step": 11560 }, { "epoch": 0.75, "grad_norm": 1.1566557884216309, "learning_rate": 1.5955802277554627e-06, "loss": 0.4944, "step": 11561 }, { "epoch": 0.75, "grad_norm": 1.1395238637924194, "learning_rate": 1.5948147218116489e-06, "loss": 0.5161, "step": 11562 }, { "epoch": 0.75, "grad_norm": 1.1816364526748657, "learning_rate": 1.594049364699698e-06, "loss": 0.5203, "step": 11563 }, { "epoch": 0.75, "grad_norm": 1.3518967628479004, "learning_rate": 1.5932841564530616e-06, "loss": 0.4536, "step": 11564 }, { "epoch": 0.75, "grad_norm": 1.2906415462493896, "learning_rate": 1.5925190971051857e-06, "loss": 0.5229, "step": 11565 }, { "epoch": 0.75, "grad_norm": 1.0999939441680908, "learning_rate": 1.5917541866895087e-06, "loss": 0.4886, "step": 11566 }, { "epoch": 0.75, "grad_norm": 1.1383925676345825, "learning_rate": 1.5909894252394642e-06, "loss": 0.5238, "step": 11567 }, { "epoch": 0.75, "grad_norm": 1.2582117319107056, "learning_rate": 1.590224812788475e-06, "loss": 0.5233, "step": 11568 }, { "epoch": 0.75, "grad_norm": 1.0905942916870117, "learning_rate": 1.5894603493699618e-06, "loss": 0.5519, "step": 11569 }, { "epoch": 0.75, "grad_norm": 1.2305467128753662, "learning_rate": 1.5886960350173375e-06, "loss": 0.5213, "step": 11570 }, { "epoch": 0.75, "grad_norm": 1.209648847579956, "learning_rate": 1.5879318697640084e-06, "loss": 0.4985, "step": 11571 }, { "epoch": 0.75, "grad_norm": 1.2961373329162598, "learning_rate": 1.5871678536433738e-06, "loss": 0.5078, "step": 11572 }, { "epoch": 0.75, "grad_norm": 1.0986796617507935, "learning_rate": 1.5864039866888286e-06, "loss": 0.4653, "step": 11573 }, { "epoch": 0.75, "grad_norm": 1.2284914255142212, "learning_rate": 1.5856402689337563e-06, "loss": 0.5474, "step": 11574 }, { "epoch": 0.75, "grad_norm": 1.1523016691207886, "learning_rate": 1.584876700411539e-06, "loss": 0.5028, "step": 11575 }, { "epoch": 0.75, "grad_norm": 1.13498854637146, "learning_rate": 1.5841132811555498e-06, "loss": 0.4958, "step": 11576 }, { "epoch": 0.75, "grad_norm": 1.2590835094451904, "learning_rate": 1.5833500111991563e-06, "loss": 0.4748, "step": 11577 }, { "epoch": 0.75, "grad_norm": 1.1413660049438477, "learning_rate": 1.5825868905757185e-06, "loss": 0.5205, "step": 11578 }, { "epoch": 0.75, "grad_norm": 1.1966818571090698, "learning_rate": 1.5818239193185918e-06, "loss": 0.5299, "step": 11579 }, { "epoch": 0.75, "grad_norm": 1.1646360158920288, "learning_rate": 1.5810610974611218e-06, "loss": 0.5157, "step": 11580 }, { "epoch": 0.75, "grad_norm": 1.3229469060897827, "learning_rate": 1.5802984250366499e-06, "loss": 0.4675, "step": 11581 }, { "epoch": 0.75, "grad_norm": 1.2087057828903198, "learning_rate": 1.5795359020785105e-06, "loss": 0.5316, "step": 11582 }, { "epoch": 0.75, "grad_norm": 1.2215077877044678, "learning_rate": 1.5787735286200323e-06, "loss": 0.543, "step": 11583 }, { "epoch": 0.75, "grad_norm": 1.1452735662460327, "learning_rate": 1.5780113046945366e-06, "loss": 0.5204, "step": 11584 }, { "epoch": 0.75, "grad_norm": 1.217673897743225, "learning_rate": 1.5772492303353393e-06, "loss": 0.512, "step": 11585 }, { "epoch": 0.75, "grad_norm": 1.2506802082061768, "learning_rate": 1.576487305575745e-06, "loss": 0.5461, "step": 11586 }, { "epoch": 0.75, "grad_norm": 1.1626254320144653, "learning_rate": 1.5757255304490588e-06, "loss": 0.5054, "step": 11587 }, { "epoch": 0.75, "grad_norm": 1.168448805809021, "learning_rate": 1.5749639049885746e-06, "loss": 0.5358, "step": 11588 }, { "epoch": 0.75, "grad_norm": 1.3106945753097534, "learning_rate": 1.574202429227581e-06, "loss": 0.5691, "step": 11589 }, { "epoch": 0.75, "grad_norm": 1.1798901557922363, "learning_rate": 1.5734411031993612e-06, "loss": 0.5386, "step": 11590 }, { "epoch": 0.75, "grad_norm": 1.2368006706237793, "learning_rate": 1.5726799269371912e-06, "loss": 0.5121, "step": 11591 }, { "epoch": 0.75, "grad_norm": 1.2560313940048218, "learning_rate": 1.5719189004743373e-06, "loss": 0.5314, "step": 11592 }, { "epoch": 0.75, "grad_norm": 1.1271378993988037, "learning_rate": 1.5711580238440643e-06, "loss": 0.4814, "step": 11593 }, { "epoch": 0.75, "grad_norm": 1.2830201387405396, "learning_rate": 1.570397297079627e-06, "loss": 0.478, "step": 11594 }, { "epoch": 0.75, "grad_norm": 1.1097346544265747, "learning_rate": 1.569636720214276e-06, "loss": 0.5128, "step": 11595 }, { "epoch": 0.75, "grad_norm": 1.2003202438354492, "learning_rate": 1.5688762932812528e-06, "loss": 0.5381, "step": 11596 }, { "epoch": 0.75, "grad_norm": 1.1269254684448242, "learning_rate": 1.5681160163137964e-06, "loss": 0.5037, "step": 11597 }, { "epoch": 0.75, "grad_norm": 1.1972811222076416, "learning_rate": 1.5673558893451329e-06, "loss": 0.5048, "step": 11598 }, { "epoch": 0.75, "grad_norm": 1.1660151481628418, "learning_rate": 1.5665959124084867e-06, "loss": 0.4948, "step": 11599 }, { "epoch": 0.75, "grad_norm": 1.2991868257522583, "learning_rate": 1.5658360855370757e-06, "loss": 0.5422, "step": 11600 }, { "epoch": 0.75, "grad_norm": 1.2600021362304688, "learning_rate": 1.5650764087641085e-06, "loss": 0.5291, "step": 11601 }, { "epoch": 0.75, "grad_norm": 1.225648283958435, "learning_rate": 1.564316882122791e-06, "loss": 0.5114, "step": 11602 }, { "epoch": 0.75, "grad_norm": 1.1297026872634888, "learning_rate": 1.5635575056463171e-06, "loss": 0.5051, "step": 11603 }, { "epoch": 0.75, "grad_norm": 1.2588481903076172, "learning_rate": 1.5627982793678782e-06, "loss": 0.5667, "step": 11604 }, { "epoch": 0.75, "grad_norm": 1.2358213663101196, "learning_rate": 1.5620392033206583e-06, "loss": 0.5065, "step": 11605 }, { "epoch": 0.75, "grad_norm": 1.1933343410491943, "learning_rate": 1.561280277537835e-06, "loss": 0.4903, "step": 11606 }, { "epoch": 0.75, "grad_norm": 1.174987554550171, "learning_rate": 1.5605215020525783e-06, "loss": 0.4634, "step": 11607 }, { "epoch": 0.75, "grad_norm": 1.189400315284729, "learning_rate": 1.559762876898055e-06, "loss": 0.5419, "step": 11608 }, { "epoch": 0.75, "grad_norm": 1.1513264179229736, "learning_rate": 1.5590044021074185e-06, "loss": 0.5096, "step": 11609 }, { "epoch": 0.75, "grad_norm": 1.172520399093628, "learning_rate": 1.5582460777138215e-06, "loss": 0.5367, "step": 11610 }, { "epoch": 0.75, "grad_norm": 1.1766911745071411, "learning_rate": 1.5574879037504093e-06, "loss": 0.4879, "step": 11611 }, { "epoch": 0.75, "grad_norm": 1.0977723598480225, "learning_rate": 1.5567298802503184e-06, "loss": 0.5327, "step": 11612 }, { "epoch": 0.75, "grad_norm": 1.1126872301101685, "learning_rate": 1.5559720072466806e-06, "loss": 0.5141, "step": 11613 }, { "epoch": 0.75, "grad_norm": 1.181013584136963, "learning_rate": 1.5552142847726227e-06, "loss": 0.5394, "step": 11614 }, { "epoch": 0.75, "grad_norm": 1.093039631843567, "learning_rate": 1.5544567128612586e-06, "loss": 0.4787, "step": 11615 }, { "epoch": 0.75, "grad_norm": 1.1438485383987427, "learning_rate": 1.5536992915457028e-06, "loss": 0.5062, "step": 11616 }, { "epoch": 0.75, "grad_norm": 1.1702617406845093, "learning_rate": 1.5529420208590584e-06, "loss": 0.5478, "step": 11617 }, { "epoch": 0.75, "grad_norm": 1.266739010810852, "learning_rate": 1.5521849008344253e-06, "loss": 0.524, "step": 11618 }, { "epoch": 0.75, "grad_norm": 1.0406285524368286, "learning_rate": 1.5514279315048946e-06, "loss": 0.5004, "step": 11619 }, { "epoch": 0.75, "grad_norm": 1.2636293172836304, "learning_rate": 1.5506711129035534e-06, "loss": 0.5783, "step": 11620 }, { "epoch": 0.75, "grad_norm": 1.3005503416061401, "learning_rate": 1.5499144450634768e-06, "loss": 0.4933, "step": 11621 }, { "epoch": 0.75, "grad_norm": 1.2587714195251465, "learning_rate": 1.5491579280177383e-06, "loss": 0.5333, "step": 11622 }, { "epoch": 0.75, "grad_norm": 1.2209430932998657, "learning_rate": 1.5484015617994036e-06, "loss": 0.5359, "step": 11623 }, { "epoch": 0.75, "grad_norm": 1.0941540002822876, "learning_rate": 1.5476453464415314e-06, "loss": 0.4967, "step": 11624 }, { "epoch": 0.75, "grad_norm": 1.2430263757705688, "learning_rate": 1.5468892819771736e-06, "loss": 0.5531, "step": 11625 }, { "epoch": 0.75, "grad_norm": 1.198628306388855, "learning_rate": 1.546133368439378e-06, "loss": 0.5243, "step": 11626 }, { "epoch": 0.75, "grad_norm": 1.1689265966415405, "learning_rate": 1.5453776058611803e-06, "loss": 0.5146, "step": 11627 }, { "epoch": 0.75, "grad_norm": 1.228805661201477, "learning_rate": 1.544621994275614e-06, "loss": 0.4708, "step": 11628 }, { "epoch": 0.75, "grad_norm": 1.1624518632888794, "learning_rate": 1.5438665337157056e-06, "loss": 0.4944, "step": 11629 }, { "epoch": 0.75, "grad_norm": 1.3256303071975708, "learning_rate": 1.5431112242144742e-06, "loss": 0.544, "step": 11630 }, { "epoch": 0.75, "grad_norm": 1.1569340229034424, "learning_rate": 1.5423560658049318e-06, "loss": 0.5402, "step": 11631 }, { "epoch": 0.75, "grad_norm": 1.1466619968414307, "learning_rate": 1.5416010585200876e-06, "loss": 0.5286, "step": 11632 }, { "epoch": 0.75, "grad_norm": 1.228804349899292, "learning_rate": 1.5408462023929354e-06, "loss": 0.5211, "step": 11633 }, { "epoch": 0.75, "grad_norm": 1.2771960496902466, "learning_rate": 1.5400914974564718e-06, "loss": 0.5383, "step": 11634 }, { "epoch": 0.75, "grad_norm": 1.2332102060317993, "learning_rate": 1.5393369437436817e-06, "loss": 0.5149, "step": 11635 }, { "epoch": 0.75, "grad_norm": 1.2797446250915527, "learning_rate": 1.5385825412875455e-06, "loss": 0.4817, "step": 11636 }, { "epoch": 0.75, "grad_norm": 1.2411839962005615, "learning_rate": 1.5378282901210372e-06, "loss": 0.5228, "step": 11637 }, { "epoch": 0.75, "grad_norm": 1.1796666383743286, "learning_rate": 1.5370741902771208e-06, "loss": 0.5541, "step": 11638 }, { "epoch": 0.75, "grad_norm": 1.1568318605422974, "learning_rate": 1.5363202417887563e-06, "loss": 0.5018, "step": 11639 }, { "epoch": 0.75, "grad_norm": 1.2845959663391113, "learning_rate": 1.535566444688898e-06, "loss": 0.4705, "step": 11640 }, { "epoch": 0.75, "grad_norm": 1.1314681768417358, "learning_rate": 1.534812799010494e-06, "loss": 0.5248, "step": 11641 }, { "epoch": 0.75, "grad_norm": 1.133621335029602, "learning_rate": 1.5340593047864799e-06, "loss": 0.4707, "step": 11642 }, { "epoch": 0.75, "grad_norm": 1.2367560863494873, "learning_rate": 1.533305962049792e-06, "loss": 0.5546, "step": 11643 }, { "epoch": 0.75, "grad_norm": 1.1420739889144897, "learning_rate": 1.532552770833356e-06, "loss": 0.4925, "step": 11644 }, { "epoch": 0.75, "grad_norm": 1.1610304117202759, "learning_rate": 1.5317997311700921e-06, "loss": 0.5343, "step": 11645 }, { "epoch": 0.75, "grad_norm": 1.1026208400726318, "learning_rate": 1.5310468430929153e-06, "loss": 0.48, "step": 11646 }, { "epoch": 0.75, "grad_norm": 1.2216469049453735, "learning_rate": 1.5302941066347298e-06, "loss": 0.4971, "step": 11647 }, { "epoch": 0.75, "grad_norm": 1.1449178457260132, "learning_rate": 1.529541521828437e-06, "loss": 0.4983, "step": 11648 }, { "epoch": 0.75, "grad_norm": 1.1776535511016846, "learning_rate": 1.5287890887069302e-06, "loss": 0.487, "step": 11649 }, { "epoch": 0.75, "grad_norm": 1.2745866775512695, "learning_rate": 1.5280368073030983e-06, "loss": 0.5886, "step": 11650 }, { "epoch": 0.75, "grad_norm": 1.1441032886505127, "learning_rate": 1.5272846776498178e-06, "loss": 0.489, "step": 11651 }, { "epoch": 0.75, "grad_norm": 1.2062625885009766, "learning_rate": 1.5265326997799645e-06, "loss": 0.4694, "step": 11652 }, { "epoch": 0.75, "grad_norm": 1.1715134382247925, "learning_rate": 1.5257808737264051e-06, "loss": 0.4823, "step": 11653 }, { "epoch": 0.75, "grad_norm": 1.159462332725525, "learning_rate": 1.525029199522e-06, "loss": 0.5317, "step": 11654 }, { "epoch": 0.75, "grad_norm": 1.1998904943466187, "learning_rate": 1.5242776771996054e-06, "loss": 0.5803, "step": 11655 }, { "epoch": 0.75, "grad_norm": 1.2154593467712402, "learning_rate": 1.5235263067920635e-06, "loss": 0.5261, "step": 11656 }, { "epoch": 0.75, "grad_norm": 1.2353402376174927, "learning_rate": 1.5227750883322179e-06, "loss": 0.5629, "step": 11657 }, { "epoch": 0.75, "grad_norm": 1.0873581171035767, "learning_rate": 1.5220240218529014e-06, "loss": 0.4524, "step": 11658 }, { "epoch": 0.75, "grad_norm": 1.200012445449829, "learning_rate": 1.521273107386942e-06, "loss": 0.4837, "step": 11659 }, { "epoch": 0.75, "grad_norm": 1.1392320394515991, "learning_rate": 1.5205223449671596e-06, "loss": 0.5382, "step": 11660 }, { "epoch": 0.75, "grad_norm": 1.1708930730819702, "learning_rate": 1.5197717346263703e-06, "loss": 0.5551, "step": 11661 }, { "epoch": 0.75, "grad_norm": 1.1562764644622803, "learning_rate": 1.5190212763973771e-06, "loss": 0.4985, "step": 11662 }, { "epoch": 0.75, "grad_norm": 1.1869421005249023, "learning_rate": 1.5182709703129838e-06, "loss": 0.5356, "step": 11663 }, { "epoch": 0.75, "grad_norm": 1.2102347612380981, "learning_rate": 1.5175208164059829e-06, "loss": 0.5302, "step": 11664 }, { "epoch": 0.75, "grad_norm": 1.143551230430603, "learning_rate": 1.516770814709162e-06, "loss": 0.5539, "step": 11665 }, { "epoch": 0.75, "grad_norm": 1.1885368824005127, "learning_rate": 1.516020965255302e-06, "loss": 0.5312, "step": 11666 }, { "epoch": 0.75, "grad_norm": 1.290737509727478, "learning_rate": 1.5152712680771786e-06, "loss": 0.5816, "step": 11667 }, { "epoch": 0.75, "grad_norm": 1.1477265357971191, "learning_rate": 1.5145217232075555e-06, "loss": 0.5532, "step": 11668 }, { "epoch": 0.75, "grad_norm": 1.179892897605896, "learning_rate": 1.5137723306791957e-06, "loss": 0.5128, "step": 11669 }, { "epoch": 0.75, "grad_norm": 1.1006578207015991, "learning_rate": 1.5130230905248522e-06, "loss": 0.5093, "step": 11670 }, { "epoch": 0.75, "grad_norm": 1.221873164176941, "learning_rate": 1.512274002777273e-06, "loss": 0.5634, "step": 11671 }, { "epoch": 0.75, "grad_norm": 1.2342033386230469, "learning_rate": 1.511525067469199e-06, "loss": 0.5159, "step": 11672 }, { "epoch": 0.75, "grad_norm": 1.317613959312439, "learning_rate": 1.5107762846333657e-06, "loss": 0.5155, "step": 11673 }, { "epoch": 0.75, "grad_norm": 1.2068735361099243, "learning_rate": 1.5100276543024967e-06, "loss": 0.5519, "step": 11674 }, { "epoch": 0.75, "grad_norm": 1.18550443649292, "learning_rate": 1.5092791765093145e-06, "loss": 0.4782, "step": 11675 }, { "epoch": 0.75, "grad_norm": 1.2119287252426147, "learning_rate": 1.5085308512865333e-06, "loss": 0.4757, "step": 11676 }, { "epoch": 0.75, "grad_norm": 1.2200446128845215, "learning_rate": 1.5077826786668608e-06, "loss": 0.5534, "step": 11677 }, { "epoch": 0.75, "grad_norm": 1.145687222480774, "learning_rate": 1.5070346586829977e-06, "loss": 0.5123, "step": 11678 }, { "epoch": 0.75, "grad_norm": 1.2546261548995972, "learning_rate": 1.5062867913676383e-06, "loss": 0.5011, "step": 11679 }, { "epoch": 0.75, "grad_norm": 1.2098712921142578, "learning_rate": 1.5055390767534683e-06, "loss": 0.5231, "step": 11680 }, { "epoch": 0.75, "grad_norm": 1.228467345237732, "learning_rate": 1.5047915148731695e-06, "loss": 0.5334, "step": 11681 }, { "epoch": 0.75, "grad_norm": 1.2398324012756348, "learning_rate": 1.5040441057594158e-06, "loss": 0.5452, "step": 11682 }, { "epoch": 0.75, "grad_norm": 1.088010549545288, "learning_rate": 1.5032968494448746e-06, "loss": 0.5341, "step": 11683 }, { "epoch": 0.75, "grad_norm": 1.0964233875274658, "learning_rate": 1.502549745962208e-06, "loss": 0.4883, "step": 11684 }, { "epoch": 0.75, "grad_norm": 1.321697473526001, "learning_rate": 1.5018027953440667e-06, "loss": 0.4948, "step": 11685 }, { "epoch": 0.75, "grad_norm": 1.3065457344055176, "learning_rate": 1.5010559976231004e-06, "loss": 0.5572, "step": 11686 }, { "epoch": 0.75, "grad_norm": 1.369077205657959, "learning_rate": 1.5003093528319485e-06, "loss": 0.5739, "step": 11687 }, { "epoch": 0.75, "grad_norm": 1.2144898176193237, "learning_rate": 1.4995628610032453e-06, "loss": 0.549, "step": 11688 }, { "epoch": 0.75, "grad_norm": 1.1526261568069458, "learning_rate": 1.4988165221696183e-06, "loss": 0.5063, "step": 11689 }, { "epoch": 0.75, "grad_norm": 1.2328579425811768, "learning_rate": 1.49807033636369e-06, "loss": 0.533, "step": 11690 }, { "epoch": 0.75, "grad_norm": 1.1424622535705566, "learning_rate": 1.4973243036180702e-06, "loss": 0.543, "step": 11691 }, { "epoch": 0.75, "grad_norm": 1.2449840307235718, "learning_rate": 1.496578423965368e-06, "loss": 0.538, "step": 11692 }, { "epoch": 0.75, "grad_norm": 1.1609119176864624, "learning_rate": 1.4958326974381842e-06, "loss": 0.4941, "step": 11693 }, { "epoch": 0.75, "grad_norm": 1.1332581043243408, "learning_rate": 1.4950871240691124e-06, "loss": 0.4837, "step": 11694 }, { "epoch": 0.75, "grad_norm": 1.1795215606689453, "learning_rate": 1.4943417038907392e-06, "loss": 0.4932, "step": 11695 }, { "epoch": 0.75, "grad_norm": 1.1534814834594727, "learning_rate": 1.493596436935647e-06, "loss": 0.5008, "step": 11696 }, { "epoch": 0.76, "grad_norm": 1.1971821784973145, "learning_rate": 1.4928513232364067e-06, "loss": 0.5119, "step": 11697 }, { "epoch": 0.76, "grad_norm": 1.0986087322235107, "learning_rate": 1.4921063628255866e-06, "loss": 0.5085, "step": 11698 }, { "epoch": 0.76, "grad_norm": 1.1290898323059082, "learning_rate": 1.4913615557357464e-06, "loss": 0.519, "step": 11699 }, { "epoch": 0.76, "grad_norm": 1.2816680669784546, "learning_rate": 1.4906169019994404e-06, "loss": 0.5478, "step": 11700 }, { "epoch": 0.76, "grad_norm": 1.2865384817123413, "learning_rate": 1.4898724016492155e-06, "loss": 0.5332, "step": 11701 }, { "epoch": 0.76, "grad_norm": 1.1714812517166138, "learning_rate": 1.4891280547176129e-06, "loss": 0.5395, "step": 11702 }, { "epoch": 0.76, "grad_norm": 1.0962942838668823, "learning_rate": 1.488383861237163e-06, "loss": 0.4708, "step": 11703 }, { "epoch": 0.76, "grad_norm": 1.170555830001831, "learning_rate": 1.4876398212403952e-06, "loss": 0.5088, "step": 11704 }, { "epoch": 0.76, "grad_norm": 1.117993950843811, "learning_rate": 1.4868959347598283e-06, "loss": 0.5638, "step": 11705 }, { "epoch": 0.76, "grad_norm": 1.214414119720459, "learning_rate": 1.4861522018279766e-06, "loss": 0.5009, "step": 11706 }, { "epoch": 0.76, "grad_norm": 1.0860309600830078, "learning_rate": 1.4854086224773462e-06, "loss": 0.5058, "step": 11707 }, { "epoch": 0.76, "grad_norm": 1.1287407875061035, "learning_rate": 1.4846651967404384e-06, "loss": 0.5176, "step": 11708 }, { "epoch": 0.76, "grad_norm": 1.2603610754013062, "learning_rate": 1.4839219246497437e-06, "loss": 0.5063, "step": 11709 }, { "epoch": 0.76, "grad_norm": 1.4032843112945557, "learning_rate": 1.4831788062377501e-06, "loss": 0.5295, "step": 11710 }, { "epoch": 0.76, "grad_norm": 1.1606719493865967, "learning_rate": 1.4824358415369372e-06, "loss": 0.5326, "step": 11711 }, { "epoch": 0.76, "grad_norm": 1.1516084671020508, "learning_rate": 1.4816930305797782e-06, "loss": 0.4651, "step": 11712 }, { "epoch": 0.76, "grad_norm": 1.2010828256607056, "learning_rate": 1.48095037339874e-06, "loss": 0.5035, "step": 11713 }, { "epoch": 0.76, "grad_norm": 1.1652356386184692, "learning_rate": 1.4802078700262828e-06, "loss": 0.4858, "step": 11714 }, { "epoch": 0.76, "grad_norm": 1.377055287361145, "learning_rate": 1.4794655204948572e-06, "loss": 0.5176, "step": 11715 }, { "epoch": 0.76, "grad_norm": 1.2622298002243042, "learning_rate": 1.4787233248369103e-06, "loss": 0.5146, "step": 11716 }, { "epoch": 0.76, "grad_norm": 1.2091368436813354, "learning_rate": 1.4779812830848823e-06, "loss": 0.5187, "step": 11717 }, { "epoch": 0.76, "grad_norm": 1.1377474069595337, "learning_rate": 1.477239395271205e-06, "loss": 0.5085, "step": 11718 }, { "epoch": 0.76, "grad_norm": 1.1563968658447266, "learning_rate": 1.476497661428305e-06, "loss": 0.4902, "step": 11719 }, { "epoch": 0.76, "grad_norm": 1.1927944421768188, "learning_rate": 1.475756081588603e-06, "loss": 0.5114, "step": 11720 }, { "epoch": 0.76, "grad_norm": 1.1032248735427856, "learning_rate": 1.4750146557845085e-06, "loss": 0.5083, "step": 11721 }, { "epoch": 0.76, "grad_norm": 1.2222723960876465, "learning_rate": 1.4742733840484291e-06, "loss": 0.5133, "step": 11722 }, { "epoch": 0.76, "grad_norm": 1.2711048126220703, "learning_rate": 1.4735322664127633e-06, "loss": 0.5283, "step": 11723 }, { "epoch": 0.76, "grad_norm": 1.25006902217865, "learning_rate": 1.4727913029099034e-06, "loss": 0.5598, "step": 11724 }, { "epoch": 0.76, "grad_norm": 1.1614102125167847, "learning_rate": 1.4720504935722374e-06, "loss": 0.5356, "step": 11725 }, { "epoch": 0.76, "grad_norm": 1.1971099376678467, "learning_rate": 1.4713098384321407e-06, "loss": 0.5103, "step": 11726 }, { "epoch": 0.76, "grad_norm": 1.090288519859314, "learning_rate": 1.470569337521986e-06, "loss": 0.4824, "step": 11727 }, { "epoch": 0.76, "grad_norm": 1.1919430494308472, "learning_rate": 1.4698289908741403e-06, "loss": 0.5754, "step": 11728 }, { "epoch": 0.76, "grad_norm": 1.1840593814849854, "learning_rate": 1.4690887985209611e-06, "loss": 0.5282, "step": 11729 }, { "epoch": 0.76, "grad_norm": 1.2224175930023193, "learning_rate": 1.4683487604948022e-06, "loss": 0.5402, "step": 11730 }, { "epoch": 0.76, "grad_norm": 1.2565630674362183, "learning_rate": 1.4676088768280056e-06, "loss": 0.5282, "step": 11731 }, { "epoch": 0.76, "grad_norm": 1.130118727684021, "learning_rate": 1.4668691475529112e-06, "loss": 0.4594, "step": 11732 }, { "epoch": 0.76, "grad_norm": 1.2039040327072144, "learning_rate": 1.466129572701851e-06, "loss": 0.5373, "step": 11733 }, { "epoch": 0.76, "grad_norm": 1.2314478158950806, "learning_rate": 1.4653901523071494e-06, "loss": 0.5041, "step": 11734 }, { "epoch": 0.76, "grad_norm": 1.2157565355300903, "learning_rate": 1.4646508864011267e-06, "loss": 0.5212, "step": 11735 }, { "epoch": 0.76, "grad_norm": 1.2095240354537964, "learning_rate": 1.4639117750160908e-06, "loss": 0.5496, "step": 11736 }, { "epoch": 0.76, "grad_norm": 1.139059066772461, "learning_rate": 1.463172818184348e-06, "loss": 0.5002, "step": 11737 }, { "epoch": 0.76, "grad_norm": 1.1665582656860352, "learning_rate": 1.4624340159381961e-06, "loss": 0.5014, "step": 11738 }, { "epoch": 0.76, "grad_norm": 1.1893881559371948, "learning_rate": 1.4616953683099283e-06, "loss": 0.5166, "step": 11739 }, { "epoch": 0.76, "grad_norm": 1.2075743675231934, "learning_rate": 1.4609568753318248e-06, "loss": 0.5455, "step": 11740 }, { "epoch": 0.76, "grad_norm": 1.2258706092834473, "learning_rate": 1.4602185370361654e-06, "loss": 0.5189, "step": 11741 }, { "epoch": 0.76, "grad_norm": 1.2030266523361206, "learning_rate": 1.4594803534552215e-06, "loss": 0.5374, "step": 11742 }, { "epoch": 0.76, "grad_norm": 1.1401441097259521, "learning_rate": 1.4587423246212569e-06, "loss": 0.513, "step": 11743 }, { "epoch": 0.76, "grad_norm": 1.1302251815795898, "learning_rate": 1.4580044505665296e-06, "loss": 0.4704, "step": 11744 }, { "epoch": 0.76, "grad_norm": 1.219006061553955, "learning_rate": 1.457266731323288e-06, "loss": 0.5396, "step": 11745 }, { "epoch": 0.76, "grad_norm": 1.169005274772644, "learning_rate": 1.456529166923777e-06, "loss": 0.5456, "step": 11746 }, { "epoch": 0.76, "grad_norm": 1.2635380029678345, "learning_rate": 1.455791757400234e-06, "loss": 0.5347, "step": 11747 }, { "epoch": 0.76, "grad_norm": 1.1386297941207886, "learning_rate": 1.4550545027848895e-06, "loss": 0.491, "step": 11748 }, { "epoch": 0.76, "grad_norm": 1.1812151670455933, "learning_rate": 1.4543174031099677e-06, "loss": 0.5042, "step": 11749 }, { "epoch": 0.76, "grad_norm": 1.3072788715362549, "learning_rate": 1.4535804584076825e-06, "loss": 0.5267, "step": 11750 }, { "epoch": 0.76, "grad_norm": 1.248367190361023, "learning_rate": 1.452843668710246e-06, "loss": 0.4858, "step": 11751 }, { "epoch": 0.76, "grad_norm": 1.1931098699569702, "learning_rate": 1.4521070340498605e-06, "loss": 0.4702, "step": 11752 }, { "epoch": 0.76, "grad_norm": 1.2424342632293701, "learning_rate": 1.451370554458723e-06, "loss": 0.5612, "step": 11753 }, { "epoch": 0.76, "grad_norm": 1.17421293258667, "learning_rate": 1.4506342299690234e-06, "loss": 0.5004, "step": 11754 }, { "epoch": 0.76, "grad_norm": 1.2072842121124268, "learning_rate": 1.4498980606129453e-06, "loss": 0.5439, "step": 11755 }, { "epoch": 0.76, "grad_norm": 1.1933190822601318, "learning_rate": 1.4491620464226625e-06, "loss": 0.5127, "step": 11756 }, { "epoch": 0.76, "grad_norm": 1.2891206741333008, "learning_rate": 1.4484261874303446e-06, "loss": 0.4943, "step": 11757 }, { "epoch": 0.76, "grad_norm": 1.190520167350769, "learning_rate": 1.4476904836681555e-06, "loss": 0.5021, "step": 11758 }, { "epoch": 0.76, "grad_norm": 1.1647205352783203, "learning_rate": 1.44695493516825e-06, "loss": 0.5172, "step": 11759 }, { "epoch": 0.76, "grad_norm": 1.26753568649292, "learning_rate": 1.4462195419627773e-06, "loss": 0.5752, "step": 11760 }, { "epoch": 0.76, "grad_norm": 1.2369004487991333, "learning_rate": 1.4454843040838812e-06, "loss": 0.4929, "step": 11761 }, { "epoch": 0.76, "grad_norm": 1.2064579725265503, "learning_rate": 1.4447492215636937e-06, "loss": 0.4869, "step": 11762 }, { "epoch": 0.76, "grad_norm": 1.2840003967285156, "learning_rate": 1.444014294434345e-06, "loss": 0.5446, "step": 11763 }, { "epoch": 0.76, "grad_norm": 1.2300233840942383, "learning_rate": 1.4432795227279573e-06, "loss": 0.5509, "step": 11764 }, { "epoch": 0.76, "grad_norm": 1.2508283853530884, "learning_rate": 1.442544906476645e-06, "loss": 0.4993, "step": 11765 }, { "epoch": 0.76, "grad_norm": 1.2068792581558228, "learning_rate": 1.4418104457125165e-06, "loss": 0.5229, "step": 11766 }, { "epoch": 0.76, "grad_norm": 1.116418480873108, "learning_rate": 1.4410761404676743e-06, "loss": 0.51, "step": 11767 }, { "epoch": 0.76, "grad_norm": 1.1030062437057495, "learning_rate": 1.440341990774211e-06, "loss": 0.5128, "step": 11768 }, { "epoch": 0.76, "grad_norm": 1.2607592344284058, "learning_rate": 1.4396079966642146e-06, "loss": 0.5082, "step": 11769 }, { "epoch": 0.76, "grad_norm": 1.2086291313171387, "learning_rate": 1.4388741581697674e-06, "loss": 0.4992, "step": 11770 }, { "epoch": 0.76, "grad_norm": 1.1879266500473022, "learning_rate": 1.4381404753229421e-06, "loss": 0.464, "step": 11771 }, { "epoch": 0.76, "grad_norm": 1.2876427173614502, "learning_rate": 1.4374069481558096e-06, "loss": 0.5414, "step": 11772 }, { "epoch": 0.76, "grad_norm": 1.3928418159484863, "learning_rate": 1.4366735767004253e-06, "loss": 0.5324, "step": 11773 }, { "epoch": 0.76, "grad_norm": 1.1065810918807983, "learning_rate": 1.4359403609888455e-06, "loss": 0.4948, "step": 11774 }, { "epoch": 0.76, "grad_norm": 1.1202738285064697, "learning_rate": 1.4352073010531176e-06, "loss": 0.5114, "step": 11775 }, { "epoch": 0.76, "grad_norm": 1.208545207977295, "learning_rate": 1.434474396925281e-06, "loss": 0.5562, "step": 11776 }, { "epoch": 0.76, "grad_norm": 1.0779211521148682, "learning_rate": 1.433741648637369e-06, "loss": 0.4876, "step": 11777 }, { "epoch": 0.76, "grad_norm": 1.1158183813095093, "learning_rate": 1.4330090562214105e-06, "loss": 0.5164, "step": 11778 }, { "epoch": 0.76, "grad_norm": 1.2107758522033691, "learning_rate": 1.4322766197094217e-06, "loss": 0.5068, "step": 11779 }, { "epoch": 0.76, "grad_norm": 1.1600279808044434, "learning_rate": 1.4315443391334166e-06, "loss": 0.534, "step": 11780 }, { "epoch": 0.76, "grad_norm": 1.2837845087051392, "learning_rate": 1.4308122145254022e-06, "loss": 0.5351, "step": 11781 }, { "epoch": 0.76, "grad_norm": 1.2118408679962158, "learning_rate": 1.4300802459173768e-06, "loss": 0.521, "step": 11782 }, { "epoch": 0.76, "grad_norm": 1.2615011930465698, "learning_rate": 1.4293484333413338e-06, "loss": 0.5627, "step": 11783 }, { "epoch": 0.76, "grad_norm": 1.1953206062316895, "learning_rate": 1.428616776829259e-06, "loss": 0.5455, "step": 11784 }, { "epoch": 0.76, "grad_norm": 1.124371886253357, "learning_rate": 1.42788527641313e-06, "loss": 0.5347, "step": 11785 }, { "epoch": 0.76, "grad_norm": 1.1877260208129883, "learning_rate": 1.427153932124919e-06, "loss": 0.5324, "step": 11786 }, { "epoch": 0.76, "grad_norm": 1.184618353843689, "learning_rate": 1.4264227439965917e-06, "loss": 0.4808, "step": 11787 }, { "epoch": 0.76, "grad_norm": 1.2519786357879639, "learning_rate": 1.4256917120601067e-06, "loss": 0.5523, "step": 11788 }, { "epoch": 0.76, "grad_norm": 1.2331600189208984, "learning_rate": 1.4249608363474143e-06, "loss": 0.5381, "step": 11789 }, { "epoch": 0.76, "grad_norm": 1.110904335975647, "learning_rate": 1.424230116890462e-06, "loss": 0.4918, "step": 11790 }, { "epoch": 0.76, "grad_norm": 1.1338822841644287, "learning_rate": 1.4234995537211844e-06, "loss": 0.53, "step": 11791 }, { "epoch": 0.76, "grad_norm": 1.219692587852478, "learning_rate": 1.4227691468715133e-06, "loss": 0.5805, "step": 11792 }, { "epoch": 0.76, "grad_norm": 1.10301673412323, "learning_rate": 1.4220388963733734e-06, "loss": 0.4607, "step": 11793 }, { "epoch": 0.76, "grad_norm": 1.188125491142273, "learning_rate": 1.4213088022586824e-06, "loss": 0.5117, "step": 11794 }, { "epoch": 0.76, "grad_norm": 1.2174510955810547, "learning_rate": 1.4205788645593505e-06, "loss": 0.4766, "step": 11795 }, { "epoch": 0.76, "grad_norm": 1.208105206489563, "learning_rate": 1.4198490833072826e-06, "loss": 0.5033, "step": 11796 }, { "epoch": 0.76, "grad_norm": 1.1149932146072388, "learning_rate": 1.4191194585343731e-06, "loss": 0.5001, "step": 11797 }, { "epoch": 0.76, "grad_norm": 1.0754508972167969, "learning_rate": 1.4183899902725135e-06, "loss": 0.487, "step": 11798 }, { "epoch": 0.76, "grad_norm": 1.126879096031189, "learning_rate": 1.4176606785535863e-06, "loss": 0.5212, "step": 11799 }, { "epoch": 0.76, "grad_norm": 1.191695213317871, "learning_rate": 1.4169315234094683e-06, "loss": 0.5079, "step": 11800 }, { "epoch": 0.76, "grad_norm": 1.2651350498199463, "learning_rate": 1.416202524872029e-06, "loss": 0.5506, "step": 11801 }, { "epoch": 0.76, "grad_norm": 1.1683282852172852, "learning_rate": 1.415473682973133e-06, "loss": 0.538, "step": 11802 }, { "epoch": 0.76, "grad_norm": 1.1235939264297485, "learning_rate": 1.414744997744632e-06, "loss": 0.4994, "step": 11803 }, { "epoch": 0.76, "grad_norm": 1.2505656480789185, "learning_rate": 1.4140164692183772e-06, "loss": 0.4984, "step": 11804 }, { "epoch": 0.76, "grad_norm": 1.1887264251708984, "learning_rate": 1.4132880974262109e-06, "loss": 0.5059, "step": 11805 }, { "epoch": 0.76, "grad_norm": 1.2529419660568237, "learning_rate": 1.4125598823999675e-06, "loss": 0.5687, "step": 11806 }, { "epoch": 0.76, "grad_norm": 1.1991322040557861, "learning_rate": 1.4118318241714767e-06, "loss": 0.5251, "step": 11807 }, { "epoch": 0.76, "grad_norm": 1.1972763538360596, "learning_rate": 1.4111039227725603e-06, "loss": 0.5488, "step": 11808 }, { "epoch": 0.76, "grad_norm": 1.1643311977386475, "learning_rate": 1.4103761782350305e-06, "loss": 0.5088, "step": 11809 }, { "epoch": 0.76, "grad_norm": 1.142770767211914, "learning_rate": 1.4096485905906965e-06, "loss": 0.4992, "step": 11810 }, { "epoch": 0.76, "grad_norm": 1.2061958312988281, "learning_rate": 1.4089211598713593e-06, "loss": 0.5611, "step": 11811 }, { "epoch": 0.76, "grad_norm": 1.2425671815872192, "learning_rate": 1.4081938861088134e-06, "loss": 0.5487, "step": 11812 }, { "epoch": 0.76, "grad_norm": 1.297186017036438, "learning_rate": 1.407466769334847e-06, "loss": 0.5308, "step": 11813 }, { "epoch": 0.76, "grad_norm": 1.297104001045227, "learning_rate": 1.406739809581238e-06, "loss": 0.5384, "step": 11814 }, { "epoch": 0.76, "grad_norm": 1.0911211967468262, "learning_rate": 1.4060130068797605e-06, "loss": 0.465, "step": 11815 }, { "epoch": 0.76, "grad_norm": 1.1914665699005127, "learning_rate": 1.4052863612621826e-06, "loss": 0.5394, "step": 11816 }, { "epoch": 0.76, "grad_norm": 1.2354129552841187, "learning_rate": 1.4045598727602633e-06, "loss": 0.5016, "step": 11817 }, { "epoch": 0.76, "grad_norm": 1.2017574310302734, "learning_rate": 1.4038335414057553e-06, "loss": 0.4686, "step": 11818 }, { "epoch": 0.76, "grad_norm": 1.0594898462295532, "learning_rate": 1.4031073672304068e-06, "loss": 0.485, "step": 11819 }, { "epoch": 0.76, "grad_norm": 1.1998382806777954, "learning_rate": 1.4023813502659533e-06, "loss": 0.5438, "step": 11820 }, { "epoch": 0.76, "grad_norm": 1.2338001728057861, "learning_rate": 1.4016554905441293e-06, "loss": 0.5346, "step": 11821 }, { "epoch": 0.76, "grad_norm": 1.1747218370437622, "learning_rate": 1.4009297880966599e-06, "loss": 0.495, "step": 11822 }, { "epoch": 0.76, "grad_norm": 1.1373077630996704, "learning_rate": 1.4002042429552637e-06, "loss": 0.5177, "step": 11823 }, { "epoch": 0.76, "grad_norm": 1.1194369792938232, "learning_rate": 1.3994788551516542e-06, "loss": 0.5421, "step": 11824 }, { "epoch": 0.76, "grad_norm": 1.2339638471603394, "learning_rate": 1.3987536247175326e-06, "loss": 0.5512, "step": 11825 }, { "epoch": 0.76, "grad_norm": 1.2339344024658203, "learning_rate": 1.3980285516845986e-06, "loss": 0.5711, "step": 11826 }, { "epoch": 0.76, "grad_norm": 1.2455259561538696, "learning_rate": 1.3973036360845438e-06, "loss": 0.5616, "step": 11827 }, { "epoch": 0.76, "grad_norm": 1.1560866832733154, "learning_rate": 1.3965788779490537e-06, "loss": 0.4581, "step": 11828 }, { "epoch": 0.76, "grad_norm": 1.2218468189239502, "learning_rate": 1.3958542773098016e-06, "loss": 0.4962, "step": 11829 }, { "epoch": 0.76, "grad_norm": 1.1147984266281128, "learning_rate": 1.3951298341984605e-06, "loss": 0.5346, "step": 11830 }, { "epoch": 0.76, "grad_norm": 1.2070077657699585, "learning_rate": 1.394405548646694e-06, "loss": 0.49, "step": 11831 }, { "epoch": 0.76, "grad_norm": 1.1864527463912964, "learning_rate": 1.3936814206861581e-06, "loss": 0.5895, "step": 11832 }, { "epoch": 0.76, "grad_norm": 1.2474524974822998, "learning_rate": 1.3929574503485044e-06, "loss": 0.5299, "step": 11833 }, { "epoch": 0.76, "grad_norm": 1.0751981735229492, "learning_rate": 1.3922336376653727e-06, "loss": 0.4692, "step": 11834 }, { "epoch": 0.76, "grad_norm": 1.270793080329895, "learning_rate": 1.3915099826684008e-06, "loss": 0.546, "step": 11835 }, { "epoch": 0.76, "grad_norm": 1.2478007078170776, "learning_rate": 1.3907864853892172e-06, "loss": 0.5439, "step": 11836 }, { "epoch": 0.76, "grad_norm": 1.0924904346466064, "learning_rate": 1.3900631458594465e-06, "loss": 0.5076, "step": 11837 }, { "epoch": 0.76, "grad_norm": 1.186142086982727, "learning_rate": 1.3893399641106997e-06, "loss": 0.4702, "step": 11838 }, { "epoch": 0.76, "grad_norm": 1.21247136592865, "learning_rate": 1.388616940174588e-06, "loss": 0.5262, "step": 11839 }, { "epoch": 0.76, "grad_norm": 1.202311396598816, "learning_rate": 1.3878940740827123e-06, "loss": 0.5382, "step": 11840 }, { "epoch": 0.76, "grad_norm": 1.19049072265625, "learning_rate": 1.387171365866667e-06, "loss": 0.5298, "step": 11841 }, { "epoch": 0.76, "grad_norm": 1.1323981285095215, "learning_rate": 1.3864488155580403e-06, "loss": 0.4615, "step": 11842 }, { "epoch": 0.76, "grad_norm": 1.2065249681472778, "learning_rate": 1.3857264231884144e-06, "loss": 0.5627, "step": 11843 }, { "epoch": 0.76, "grad_norm": 1.1835254430770874, "learning_rate": 1.3850041887893596e-06, "loss": 0.507, "step": 11844 }, { "epoch": 0.76, "grad_norm": 1.1803741455078125, "learning_rate": 1.384282112392445e-06, "loss": 0.5938, "step": 11845 }, { "epoch": 0.76, "grad_norm": 1.2734689712524414, "learning_rate": 1.3835601940292309e-06, "loss": 0.5371, "step": 11846 }, { "epoch": 0.76, "grad_norm": 1.1240248680114746, "learning_rate": 1.3828384337312701e-06, "loss": 0.5634, "step": 11847 }, { "epoch": 0.76, "grad_norm": 1.1329272985458374, "learning_rate": 1.3821168315301086e-06, "loss": 0.5142, "step": 11848 }, { "epoch": 0.76, "grad_norm": 1.1923860311508179, "learning_rate": 1.3813953874572882e-06, "loss": 0.5367, "step": 11849 }, { "epoch": 0.76, "grad_norm": 1.0844008922576904, "learning_rate": 1.3806741015443376e-06, "loss": 0.4879, "step": 11850 }, { "epoch": 0.76, "grad_norm": 1.2335748672485352, "learning_rate": 1.3799529738227836e-06, "loss": 0.5178, "step": 11851 }, { "epoch": 0.77, "grad_norm": 1.171873688697815, "learning_rate": 1.3792320043241459e-06, "loss": 0.534, "step": 11852 }, { "epoch": 0.77, "grad_norm": 1.1749931573867798, "learning_rate": 1.378511193079935e-06, "loss": 0.5174, "step": 11853 }, { "epoch": 0.77, "grad_norm": 1.1642042398452759, "learning_rate": 1.3777905401216584e-06, "loss": 0.5262, "step": 11854 }, { "epoch": 0.77, "grad_norm": 1.2237433195114136, "learning_rate": 1.37707004548081e-06, "loss": 0.5184, "step": 11855 }, { "epoch": 0.77, "grad_norm": 1.2096415758132935, "learning_rate": 1.3763497091888827e-06, "loss": 0.5559, "step": 11856 }, { "epoch": 0.77, "grad_norm": 1.1568093299865723, "learning_rate": 1.3756295312773604e-06, "loss": 0.5319, "step": 11857 }, { "epoch": 0.77, "grad_norm": 1.2068380117416382, "learning_rate": 1.37490951177772e-06, "loss": 0.5435, "step": 11858 }, { "epoch": 0.77, "grad_norm": 1.1604087352752686, "learning_rate": 1.3741896507214324e-06, "loss": 0.4794, "step": 11859 }, { "epoch": 0.77, "grad_norm": 1.1683967113494873, "learning_rate": 1.3734699481399621e-06, "loss": 0.5058, "step": 11860 }, { "epoch": 0.77, "grad_norm": 1.2720873355865479, "learning_rate": 1.3727504040647621e-06, "loss": 0.573, "step": 11861 }, { "epoch": 0.77, "grad_norm": 1.1831626892089844, "learning_rate": 1.3720310185272833e-06, "loss": 0.5456, "step": 11862 }, { "epoch": 0.77, "grad_norm": 1.1405285596847534, "learning_rate": 1.3713117915589685e-06, "loss": 0.4931, "step": 11863 }, { "epoch": 0.77, "grad_norm": 1.190377116203308, "learning_rate": 1.3705927231912535e-06, "loss": 0.5215, "step": 11864 }, { "epoch": 0.77, "grad_norm": 1.2027692794799805, "learning_rate": 1.369873813455566e-06, "loss": 0.524, "step": 11865 }, { "epoch": 0.77, "grad_norm": 1.1458959579467773, "learning_rate": 1.36915506238333e-06, "loss": 0.4931, "step": 11866 }, { "epoch": 0.77, "grad_norm": 1.205475091934204, "learning_rate": 1.3684364700059572e-06, "loss": 0.5214, "step": 11867 }, { "epoch": 0.77, "grad_norm": 1.067671537399292, "learning_rate": 1.3677180363548564e-06, "loss": 0.4575, "step": 11868 }, { "epoch": 0.77, "grad_norm": 1.2475552558898926, "learning_rate": 1.3669997614614293e-06, "loss": 0.5819, "step": 11869 }, { "epoch": 0.77, "grad_norm": 1.1167640686035156, "learning_rate": 1.3662816453570692e-06, "loss": 0.5186, "step": 11870 }, { "epoch": 0.77, "grad_norm": 1.2168452739715576, "learning_rate": 1.3655636880731632e-06, "loss": 0.5961, "step": 11871 }, { "epoch": 0.77, "grad_norm": 1.1825857162475586, "learning_rate": 1.3648458896410927e-06, "loss": 0.5209, "step": 11872 }, { "epoch": 0.77, "grad_norm": 1.1964285373687744, "learning_rate": 1.364128250092228e-06, "loss": 0.5341, "step": 11873 }, { "epoch": 0.77, "grad_norm": 1.1205097436904907, "learning_rate": 1.3634107694579373e-06, "loss": 0.5281, "step": 11874 }, { "epoch": 0.77, "grad_norm": 1.195879340171814, "learning_rate": 1.3626934477695791e-06, "loss": 0.5, "step": 11875 }, { "epoch": 0.77, "grad_norm": 1.2374457120895386, "learning_rate": 1.3619762850585061e-06, "loss": 0.5303, "step": 11876 }, { "epoch": 0.77, "grad_norm": 1.2193101644515991, "learning_rate": 1.3612592813560632e-06, "loss": 0.5192, "step": 11877 }, { "epoch": 0.77, "grad_norm": 1.1988513469696045, "learning_rate": 1.3605424366935905e-06, "loss": 0.486, "step": 11878 }, { "epoch": 0.77, "grad_norm": 1.1516177654266357, "learning_rate": 1.3598257511024165e-06, "loss": 0.5315, "step": 11879 }, { "epoch": 0.77, "grad_norm": 1.1303828954696655, "learning_rate": 1.359109224613867e-06, "loss": 0.5094, "step": 11880 }, { "epoch": 0.77, "grad_norm": 1.2442959547042847, "learning_rate": 1.3583928572592603e-06, "loss": 0.5383, "step": 11881 }, { "epoch": 0.77, "grad_norm": 1.2967137098312378, "learning_rate": 1.3576766490699057e-06, "loss": 0.5443, "step": 11882 }, { "epoch": 0.77, "grad_norm": 1.1157587766647339, "learning_rate": 1.3569606000771074e-06, "loss": 0.4615, "step": 11883 }, { "epoch": 0.77, "grad_norm": 1.1731407642364502, "learning_rate": 1.356244710312164e-06, "loss": 0.5162, "step": 11884 }, { "epoch": 0.77, "grad_norm": 1.1890466213226318, "learning_rate": 1.3555289798063614e-06, "loss": 0.4995, "step": 11885 }, { "epoch": 0.77, "grad_norm": 1.1879658699035645, "learning_rate": 1.3548134085909847e-06, "loss": 0.5365, "step": 11886 }, { "epoch": 0.77, "grad_norm": 1.16111159324646, "learning_rate": 1.3540979966973089e-06, "loss": 0.487, "step": 11887 }, { "epoch": 0.77, "grad_norm": 1.1622705459594727, "learning_rate": 1.3533827441566034e-06, "loss": 0.5078, "step": 11888 }, { "epoch": 0.77, "grad_norm": 1.1365692615509033, "learning_rate": 1.3526676510001297e-06, "loss": 0.4772, "step": 11889 }, { "epoch": 0.77, "grad_norm": 1.111362099647522, "learning_rate": 1.351952717259144e-06, "loss": 0.5047, "step": 11890 }, { "epoch": 0.77, "grad_norm": 1.289001226425171, "learning_rate": 1.3512379429648915e-06, "loss": 0.5167, "step": 11891 }, { "epoch": 0.77, "grad_norm": 1.1330008506774902, "learning_rate": 1.3505233281486146e-06, "loss": 0.5042, "step": 11892 }, { "epoch": 0.77, "grad_norm": 1.026189923286438, "learning_rate": 1.3498088728415471e-06, "loss": 0.5075, "step": 11893 }, { "epoch": 0.77, "grad_norm": 1.1687690019607544, "learning_rate": 1.3490945770749165e-06, "loss": 0.4933, "step": 11894 }, { "epoch": 0.77, "grad_norm": 1.2513998746871948, "learning_rate": 1.3483804408799422e-06, "loss": 0.4766, "step": 11895 }, { "epoch": 0.77, "grad_norm": 1.281636118888855, "learning_rate": 1.347666464287839e-06, "loss": 0.5313, "step": 11896 }, { "epoch": 0.77, "grad_norm": 1.2995197772979736, "learning_rate": 1.3469526473298101e-06, "loss": 0.5266, "step": 11897 }, { "epoch": 0.77, "grad_norm": 1.2519009113311768, "learning_rate": 1.346238990037056e-06, "loss": 0.5239, "step": 11898 }, { "epoch": 0.77, "grad_norm": 1.1635918617248535, "learning_rate": 1.345525492440769e-06, "loss": 0.565, "step": 11899 }, { "epoch": 0.77, "grad_norm": 1.1776256561279297, "learning_rate": 1.3448121545721338e-06, "loss": 0.5508, "step": 11900 }, { "epoch": 0.77, "grad_norm": 1.1684355735778809, "learning_rate": 1.3440989764623301e-06, "loss": 0.4664, "step": 11901 }, { "epoch": 0.77, "grad_norm": 1.130954384803772, "learning_rate": 1.3433859581425269e-06, "loss": 0.5202, "step": 11902 }, { "epoch": 0.77, "grad_norm": 1.3072775602340698, "learning_rate": 1.342673099643889e-06, "loss": 0.542, "step": 11903 }, { "epoch": 0.77, "grad_norm": 1.0674312114715576, "learning_rate": 1.341960400997574e-06, "loss": 0.461, "step": 11904 }, { "epoch": 0.77, "grad_norm": 1.0970771312713623, "learning_rate": 1.3412478622347318e-06, "loss": 0.5039, "step": 11905 }, { "epoch": 0.77, "grad_norm": 1.1781724691390991, "learning_rate": 1.3405354833865058e-06, "loss": 0.5079, "step": 11906 }, { "epoch": 0.77, "grad_norm": 1.2436301708221436, "learning_rate": 1.3398232644840336e-06, "loss": 0.5646, "step": 11907 }, { "epoch": 0.77, "grad_norm": 1.1543914079666138, "learning_rate": 1.3391112055584422e-06, "loss": 0.508, "step": 11908 }, { "epoch": 0.77, "grad_norm": 1.1641061305999756, "learning_rate": 1.3383993066408545e-06, "loss": 0.4884, "step": 11909 }, { "epoch": 0.77, "grad_norm": 1.1257277727127075, "learning_rate": 1.3376875677623858e-06, "loss": 0.5104, "step": 11910 }, { "epoch": 0.77, "grad_norm": 1.0991697311401367, "learning_rate": 1.3369759889541451e-06, "loss": 0.5452, "step": 11911 }, { "epoch": 0.77, "grad_norm": 1.177520751953125, "learning_rate": 1.3362645702472326e-06, "loss": 0.5187, "step": 11912 }, { "epoch": 0.77, "grad_norm": 1.1269896030426025, "learning_rate": 1.3355533116727454e-06, "loss": 0.5574, "step": 11913 }, { "epoch": 0.77, "grad_norm": 1.2707329988479614, "learning_rate": 1.3348422132617667e-06, "loss": 0.5247, "step": 11914 }, { "epoch": 0.77, "grad_norm": 1.229905128479004, "learning_rate": 1.3341312750453782e-06, "loss": 0.5394, "step": 11915 }, { "epoch": 0.77, "grad_norm": 1.2038058042526245, "learning_rate": 1.3334204970546543e-06, "loss": 0.561, "step": 11916 }, { "epoch": 0.77, "grad_norm": 1.2182741165161133, "learning_rate": 1.3327098793206616e-06, "loss": 0.5275, "step": 11917 }, { "epoch": 0.77, "grad_norm": 1.290492296218872, "learning_rate": 1.331999421874457e-06, "loss": 0.5036, "step": 11918 }, { "epoch": 0.77, "grad_norm": 1.1450541019439697, "learning_rate": 1.3312891247470943e-06, "loss": 0.4894, "step": 11919 }, { "epoch": 0.77, "grad_norm": 1.1636488437652588, "learning_rate": 1.3305789879696184e-06, "loss": 0.509, "step": 11920 }, { "epoch": 0.77, "grad_norm": 1.23149573802948, "learning_rate": 1.3298690115730679e-06, "loss": 0.5337, "step": 11921 }, { "epoch": 0.77, "grad_norm": 1.2229859828948975, "learning_rate": 1.3291591955884747e-06, "loss": 0.5602, "step": 11922 }, { "epoch": 0.77, "grad_norm": 1.1302440166473389, "learning_rate": 1.328449540046861e-06, "loss": 0.4989, "step": 11923 }, { "epoch": 0.77, "grad_norm": 1.2146915197372437, "learning_rate": 1.3277400449792455e-06, "loss": 0.5128, "step": 11924 }, { "epoch": 0.77, "grad_norm": 1.1208969354629517, "learning_rate": 1.3270307104166375e-06, "loss": 0.5055, "step": 11925 }, { "epoch": 0.77, "grad_norm": 1.1777251958847046, "learning_rate": 1.3263215363900429e-06, "loss": 0.5342, "step": 11926 }, { "epoch": 0.77, "grad_norm": 1.1152184009552002, "learning_rate": 1.325612522930454e-06, "loss": 0.5115, "step": 11927 }, { "epoch": 0.77, "grad_norm": 1.0849496126174927, "learning_rate": 1.324903670068861e-06, "loss": 0.4706, "step": 11928 }, { "epoch": 0.77, "grad_norm": 1.1691869497299194, "learning_rate": 1.324194977836248e-06, "loss": 0.5122, "step": 11929 }, { "epoch": 0.77, "grad_norm": 1.2112740278244019, "learning_rate": 1.3234864462635876e-06, "loss": 0.5667, "step": 11930 }, { "epoch": 0.77, "grad_norm": 1.1879609823226929, "learning_rate": 1.3227780753818514e-06, "loss": 0.5265, "step": 11931 }, { "epoch": 0.77, "grad_norm": 1.2425923347473145, "learning_rate": 1.3220698652219966e-06, "loss": 0.5338, "step": 11932 }, { "epoch": 0.77, "grad_norm": 1.276361107826233, "learning_rate": 1.3213618158149783e-06, "loss": 0.5243, "step": 11933 }, { "epoch": 0.77, "grad_norm": 1.1841051578521729, "learning_rate": 1.320653927191744e-06, "loss": 0.5207, "step": 11934 }, { "epoch": 0.77, "grad_norm": 1.2046527862548828, "learning_rate": 1.3199461993832342e-06, "loss": 0.5006, "step": 11935 }, { "epoch": 0.77, "grad_norm": 1.1888424158096313, "learning_rate": 1.319238632420381e-06, "loss": 0.505, "step": 11936 }, { "epoch": 0.77, "grad_norm": 1.2289751768112183, "learning_rate": 1.3185312263341127e-06, "loss": 0.5047, "step": 11937 }, { "epoch": 0.77, "grad_norm": 1.20099675655365, "learning_rate": 1.3178239811553445e-06, "loss": 0.4828, "step": 11938 }, { "epoch": 0.77, "grad_norm": 1.24049973487854, "learning_rate": 1.3171168969149895e-06, "loss": 0.5512, "step": 11939 }, { "epoch": 0.77, "grad_norm": 1.1269878149032593, "learning_rate": 1.3164099736439535e-06, "loss": 0.5456, "step": 11940 }, { "epoch": 0.77, "grad_norm": 1.109281063079834, "learning_rate": 1.3157032113731333e-06, "loss": 0.4685, "step": 11941 }, { "epoch": 0.77, "grad_norm": 1.205859899520874, "learning_rate": 1.3149966101334216e-06, "loss": 0.5699, "step": 11942 }, { "epoch": 0.77, "grad_norm": 1.1784952878952026, "learning_rate": 1.3142901699556997e-06, "loss": 0.4536, "step": 11943 }, { "epoch": 0.77, "grad_norm": 1.15065336227417, "learning_rate": 1.3135838908708448e-06, "loss": 0.5218, "step": 11944 }, { "epoch": 0.77, "grad_norm": 1.3162108659744263, "learning_rate": 1.312877772909727e-06, "loss": 0.5186, "step": 11945 }, { "epoch": 0.77, "grad_norm": 1.272961139678955, "learning_rate": 1.3121718161032088e-06, "loss": 0.5349, "step": 11946 }, { "epoch": 0.77, "grad_norm": 1.194139838218689, "learning_rate": 1.311466020482146e-06, "loss": 0.4954, "step": 11947 }, { "epoch": 0.77, "grad_norm": 1.1246510744094849, "learning_rate": 1.3107603860773882e-06, "loss": 0.5094, "step": 11948 }, { "epoch": 0.77, "grad_norm": 1.1644587516784668, "learning_rate": 1.3100549129197743e-06, "loss": 0.5076, "step": 11949 }, { "epoch": 0.77, "grad_norm": 1.1909031867980957, "learning_rate": 1.3093496010401396e-06, "loss": 0.5332, "step": 11950 }, { "epoch": 0.77, "grad_norm": 1.1864479780197144, "learning_rate": 1.3086444504693119e-06, "loss": 0.4638, "step": 11951 }, { "epoch": 0.77, "grad_norm": 1.2998448610305786, "learning_rate": 1.3079394612381119e-06, "loss": 0.5312, "step": 11952 }, { "epoch": 0.77, "grad_norm": 1.177371859550476, "learning_rate": 1.3072346333773516e-06, "loss": 0.5196, "step": 11953 }, { "epoch": 0.77, "grad_norm": 1.1689420938491821, "learning_rate": 1.3065299669178404e-06, "loss": 0.5348, "step": 11954 }, { "epoch": 0.77, "grad_norm": 1.143101453781128, "learning_rate": 1.3058254618903733e-06, "loss": 0.5587, "step": 11955 }, { "epoch": 0.77, "grad_norm": 1.2342846393585205, "learning_rate": 1.305121118325744e-06, "loss": 0.5174, "step": 11956 }, { "epoch": 0.77, "grad_norm": 1.2685511112213135, "learning_rate": 1.3044169362547377e-06, "loss": 0.5275, "step": 11957 }, { "epoch": 0.77, "grad_norm": 1.1704199314117432, "learning_rate": 1.3037129157081323e-06, "loss": 0.4972, "step": 11958 }, { "epoch": 0.77, "grad_norm": 1.1601113080978394, "learning_rate": 1.3030090567166986e-06, "loss": 0.5289, "step": 11959 }, { "epoch": 0.77, "grad_norm": 1.2096737623214722, "learning_rate": 1.3023053593112027e-06, "loss": 0.5522, "step": 11960 }, { "epoch": 0.77, "grad_norm": 1.1553518772125244, "learning_rate": 1.3016018235223975e-06, "loss": 0.5113, "step": 11961 }, { "epoch": 0.77, "grad_norm": 1.1586564779281616, "learning_rate": 1.3008984493810351e-06, "loss": 0.5542, "step": 11962 }, { "epoch": 0.77, "grad_norm": 1.262728214263916, "learning_rate": 1.3001952369178571e-06, "loss": 0.5492, "step": 11963 }, { "epoch": 0.77, "grad_norm": 1.1494942903518677, "learning_rate": 1.2994921861636e-06, "loss": 0.53, "step": 11964 }, { "epoch": 0.77, "grad_norm": 1.2047028541564941, "learning_rate": 1.2987892971489918e-06, "loss": 0.5472, "step": 11965 }, { "epoch": 0.77, "grad_norm": 1.2351596355438232, "learning_rate": 1.2980865699047557e-06, "loss": 0.5492, "step": 11966 }, { "epoch": 0.77, "grad_norm": 1.2855210304260254, "learning_rate": 1.2973840044616032e-06, "loss": 0.5393, "step": 11967 }, { "epoch": 0.77, "grad_norm": 1.2022746801376343, "learning_rate": 1.2966816008502432e-06, "loss": 0.5231, "step": 11968 }, { "epoch": 0.77, "grad_norm": 1.1627057790756226, "learning_rate": 1.2959793591013754e-06, "loss": 0.5489, "step": 11969 }, { "epoch": 0.77, "grad_norm": 1.239031195640564, "learning_rate": 1.295277279245693e-06, "loss": 0.5267, "step": 11970 }, { "epoch": 0.77, "grad_norm": 1.1545417308807373, "learning_rate": 1.2945753613138828e-06, "loss": 0.5071, "step": 11971 }, { "epoch": 0.77, "grad_norm": 1.3325409889221191, "learning_rate": 1.2938736053366252e-06, "loss": 0.5431, "step": 11972 }, { "epoch": 0.77, "grad_norm": 1.1135345697402954, "learning_rate": 1.2931720113445884e-06, "loss": 0.4844, "step": 11973 }, { "epoch": 0.77, "grad_norm": 1.3802945613861084, "learning_rate": 1.2924705793684394e-06, "loss": 0.5534, "step": 11974 }, { "epoch": 0.77, "grad_norm": 1.1814676523208618, "learning_rate": 1.2917693094388361e-06, "loss": 0.513, "step": 11975 }, { "epoch": 0.77, "grad_norm": 1.2476989030838013, "learning_rate": 1.2910682015864284e-06, "loss": 0.5326, "step": 11976 }, { "epoch": 0.77, "grad_norm": 1.2024662494659424, "learning_rate": 1.2903672558418606e-06, "loss": 0.4954, "step": 11977 }, { "epoch": 0.77, "grad_norm": 1.1553947925567627, "learning_rate": 1.289666472235771e-06, "loss": 0.5463, "step": 11978 }, { "epoch": 0.77, "grad_norm": 1.1719648838043213, "learning_rate": 1.2889658507987852e-06, "loss": 0.525, "step": 11979 }, { "epoch": 0.77, "grad_norm": 1.149017333984375, "learning_rate": 1.2882653915615273e-06, "loss": 0.5373, "step": 11980 }, { "epoch": 0.77, "grad_norm": 1.1680302619934082, "learning_rate": 1.287565094554613e-06, "loss": 0.524, "step": 11981 }, { "epoch": 0.77, "grad_norm": 1.1936609745025635, "learning_rate": 1.2868649598086502e-06, "loss": 0.4939, "step": 11982 }, { "epoch": 0.77, "grad_norm": 1.186750054359436, "learning_rate": 1.2861649873542415e-06, "loss": 0.4817, "step": 11983 }, { "epoch": 0.77, "grad_norm": 1.1827023029327393, "learning_rate": 1.285465177221978e-06, "loss": 0.4936, "step": 11984 }, { "epoch": 0.77, "grad_norm": 1.235571026802063, "learning_rate": 1.2847655294424476e-06, "loss": 0.5356, "step": 11985 }, { "epoch": 0.77, "grad_norm": 1.3561276197433472, "learning_rate": 1.284066044046231e-06, "loss": 0.5487, "step": 11986 }, { "epoch": 0.77, "grad_norm": 1.2475216388702393, "learning_rate": 1.2833667210639001e-06, "loss": 0.5336, "step": 11987 }, { "epoch": 0.77, "grad_norm": 1.3393410444259644, "learning_rate": 1.2826675605260209e-06, "loss": 0.5648, "step": 11988 }, { "epoch": 0.77, "grad_norm": 1.2745391130447388, "learning_rate": 1.2819685624631528e-06, "loss": 0.5206, "step": 11989 }, { "epoch": 0.77, "grad_norm": 1.2448561191558838, "learning_rate": 1.2812697269058454e-06, "loss": 0.5368, "step": 11990 }, { "epoch": 0.77, "grad_norm": 1.160786747932434, "learning_rate": 1.2805710538846433e-06, "loss": 0.54, "step": 11991 }, { "epoch": 0.77, "grad_norm": 1.2170542478561401, "learning_rate": 1.2798725434300846e-06, "loss": 0.5328, "step": 11992 }, { "epoch": 0.77, "grad_norm": 1.0654157400131226, "learning_rate": 1.2791741955726983e-06, "loss": 0.4829, "step": 11993 }, { "epoch": 0.77, "grad_norm": 1.191421389579773, "learning_rate": 1.2784760103430089e-06, "loss": 0.5167, "step": 11994 }, { "epoch": 0.77, "grad_norm": 1.1858515739440918, "learning_rate": 1.2777779877715324e-06, "loss": 0.5505, "step": 11995 }, { "epoch": 0.77, "grad_norm": 1.2435224056243896, "learning_rate": 1.2770801278887752e-06, "loss": 0.5345, "step": 11996 }, { "epoch": 0.77, "grad_norm": 1.209402322769165, "learning_rate": 1.2763824307252397e-06, "loss": 0.5294, "step": 11997 }, { "epoch": 0.77, "grad_norm": 1.1540557146072388, "learning_rate": 1.275684896311422e-06, "loss": 0.5546, "step": 11998 }, { "epoch": 0.77, "grad_norm": 1.2701667547225952, "learning_rate": 1.2749875246778082e-06, "loss": 0.4912, "step": 11999 }, { "epoch": 0.77, "grad_norm": 1.1055161952972412, "learning_rate": 1.2742903158548792e-06, "loss": 0.4992, "step": 12000 }, { "epoch": 0.77, "grad_norm": 1.231231451034546, "learning_rate": 1.2735932698731095e-06, "loss": 0.5143, "step": 12001 }, { "epoch": 0.77, "grad_norm": 1.2576789855957031, "learning_rate": 1.2728963867629623e-06, "loss": 0.5108, "step": 12002 }, { "epoch": 0.77, "grad_norm": 1.216569423675537, "learning_rate": 1.272199666554898e-06, "loss": 0.55, "step": 12003 }, { "epoch": 0.77, "grad_norm": 1.0940594673156738, "learning_rate": 1.2715031092793684e-06, "loss": 0.5199, "step": 12004 }, { "epoch": 0.77, "grad_norm": 1.3838788270950317, "learning_rate": 1.270806714966818e-06, "loss": 0.594, "step": 12005 }, { "epoch": 0.77, "grad_norm": 1.2583163976669312, "learning_rate": 1.2701104836476862e-06, "loss": 0.5426, "step": 12006 }, { "epoch": 0.78, "grad_norm": 1.2442724704742432, "learning_rate": 1.2694144153524008e-06, "loss": 0.5562, "step": 12007 }, { "epoch": 0.78, "grad_norm": 1.1881530284881592, "learning_rate": 1.2687185101113857e-06, "loss": 0.4976, "step": 12008 }, { "epoch": 0.78, "grad_norm": 1.3692315816879272, "learning_rate": 1.2680227679550577e-06, "loss": 0.4971, "step": 12009 }, { "epoch": 0.78, "grad_norm": 1.2972126007080078, "learning_rate": 1.2673271889138267e-06, "loss": 0.5493, "step": 12010 }, { "epoch": 0.78, "grad_norm": 1.1516474485397339, "learning_rate": 1.2666317730180944e-06, "loss": 0.5139, "step": 12011 }, { "epoch": 0.78, "grad_norm": 1.1680371761322021, "learning_rate": 1.2659365202982542e-06, "loss": 0.4864, "step": 12012 }, { "epoch": 0.78, "grad_norm": 1.1979987621307373, "learning_rate": 1.2652414307846944e-06, "loss": 0.4735, "step": 12013 }, { "epoch": 0.78, "grad_norm": 1.1584595441818237, "learning_rate": 1.2645465045077959e-06, "loss": 0.5373, "step": 12014 }, { "epoch": 0.78, "grad_norm": 1.143261194229126, "learning_rate": 1.2638517414979334e-06, "loss": 0.5068, "step": 12015 }, { "epoch": 0.78, "grad_norm": 1.2337373495101929, "learning_rate": 1.263157141785471e-06, "loss": 0.4699, "step": 12016 }, { "epoch": 0.78, "grad_norm": 1.1866614818572998, "learning_rate": 1.2624627054007682e-06, "loss": 0.4961, "step": 12017 }, { "epoch": 0.78, "grad_norm": 1.1523702144622803, "learning_rate": 1.2617684323741774e-06, "loss": 0.493, "step": 12018 }, { "epoch": 0.78, "grad_norm": 1.158778429031372, "learning_rate": 1.261074322736044e-06, "loss": 0.5061, "step": 12019 }, { "epoch": 0.78, "grad_norm": 1.3525530099868774, "learning_rate": 1.2603803765167073e-06, "loss": 0.5399, "step": 12020 }, { "epoch": 0.78, "grad_norm": 1.149389386177063, "learning_rate": 1.2596865937464937e-06, "loss": 0.5436, "step": 12021 }, { "epoch": 0.78, "grad_norm": 1.4609144926071167, "learning_rate": 1.2589929744557294e-06, "loss": 0.4575, "step": 12022 }, { "epoch": 0.78, "grad_norm": 1.0883641242980957, "learning_rate": 1.2582995186747304e-06, "loss": 0.527, "step": 12023 }, { "epoch": 0.78, "grad_norm": 1.1574411392211914, "learning_rate": 1.2576062264338051e-06, "loss": 0.5236, "step": 12024 }, { "epoch": 0.78, "grad_norm": 1.2400217056274414, "learning_rate": 1.2569130977632582e-06, "loss": 0.525, "step": 12025 }, { "epoch": 0.78, "grad_norm": 1.3173350095748901, "learning_rate": 1.2562201326933809e-06, "loss": 0.579, "step": 12026 }, { "epoch": 0.78, "grad_norm": 1.1294797658920288, "learning_rate": 1.2555273312544625e-06, "loss": 0.509, "step": 12027 }, { "epoch": 0.78, "grad_norm": 1.3469088077545166, "learning_rate": 1.2548346934767836e-06, "loss": 0.537, "step": 12028 }, { "epoch": 0.78, "grad_norm": 1.195713996887207, "learning_rate": 1.2541422193906172e-06, "loss": 0.499, "step": 12029 }, { "epoch": 0.78, "grad_norm": 1.1473889350891113, "learning_rate": 1.253449909026232e-06, "loss": 0.5355, "step": 12030 }, { "epoch": 0.78, "grad_norm": 1.19156014919281, "learning_rate": 1.2527577624138832e-06, "loss": 0.5078, "step": 12031 }, { "epoch": 0.78, "grad_norm": 1.1286993026733398, "learning_rate": 1.2520657795838248e-06, "loss": 0.4914, "step": 12032 }, { "epoch": 0.78, "grad_norm": 1.2049225568771362, "learning_rate": 1.2513739605663012e-06, "loss": 0.5203, "step": 12033 }, { "epoch": 0.78, "grad_norm": 1.131740927696228, "learning_rate": 1.2506823053915501e-06, "loss": 0.4619, "step": 12034 }, { "epoch": 0.78, "grad_norm": 1.178587555885315, "learning_rate": 1.2499908140898027e-06, "loss": 0.5162, "step": 12035 }, { "epoch": 0.78, "grad_norm": 1.2207536697387695, "learning_rate": 1.2492994866912827e-06, "loss": 0.5059, "step": 12036 }, { "epoch": 0.78, "grad_norm": 1.1849151849746704, "learning_rate": 1.2486083232262035e-06, "loss": 0.5406, "step": 12037 }, { "epoch": 0.78, "grad_norm": 1.2171415090560913, "learning_rate": 1.247917323724776e-06, "loss": 0.5273, "step": 12038 }, { "epoch": 0.78, "grad_norm": 1.1531161069869995, "learning_rate": 1.247226488217202e-06, "loss": 0.511, "step": 12039 }, { "epoch": 0.78, "grad_norm": 1.2600089311599731, "learning_rate": 1.2465358167336755e-06, "loss": 0.5704, "step": 12040 }, { "epoch": 0.78, "grad_norm": 1.1857119798660278, "learning_rate": 1.2458453093043848e-06, "loss": 0.5329, "step": 12041 }, { "epoch": 0.78, "grad_norm": 1.1299464702606201, "learning_rate": 1.2451549659595108e-06, "loss": 0.5207, "step": 12042 }, { "epoch": 0.78, "grad_norm": 1.2359520196914673, "learning_rate": 1.2444647867292243e-06, "loss": 0.5841, "step": 12043 }, { "epoch": 0.78, "grad_norm": 1.1254791021347046, "learning_rate": 1.2437747716436927e-06, "loss": 0.5063, "step": 12044 }, { "epoch": 0.78, "grad_norm": 1.2186570167541504, "learning_rate": 1.2430849207330748e-06, "loss": 0.5318, "step": 12045 }, { "epoch": 0.78, "grad_norm": 1.2958751916885376, "learning_rate": 1.2423952340275214e-06, "loss": 0.5116, "step": 12046 }, { "epoch": 0.78, "grad_norm": 1.2469234466552734, "learning_rate": 1.2417057115571785e-06, "loss": 0.5162, "step": 12047 }, { "epoch": 0.78, "grad_norm": 1.1629793643951416, "learning_rate": 1.241016353352183e-06, "loss": 0.5043, "step": 12048 }, { "epoch": 0.78, "grad_norm": 1.22207510471344, "learning_rate": 1.2403271594426636e-06, "loss": 0.5139, "step": 12049 }, { "epoch": 0.78, "grad_norm": 1.1362097263336182, "learning_rate": 1.2396381298587435e-06, "loss": 0.473, "step": 12050 }, { "epoch": 0.78, "grad_norm": 1.2379591464996338, "learning_rate": 1.238949264630539e-06, "loss": 0.5414, "step": 12051 }, { "epoch": 0.78, "grad_norm": 1.1927381753921509, "learning_rate": 1.2382605637881585e-06, "loss": 0.5381, "step": 12052 }, { "epoch": 0.78, "grad_norm": 1.1150075197219849, "learning_rate": 1.2375720273617037e-06, "loss": 0.5024, "step": 12053 }, { "epoch": 0.78, "grad_norm": 1.1356000900268555, "learning_rate": 1.236883655381269e-06, "loss": 0.5388, "step": 12054 }, { "epoch": 0.78, "grad_norm": 1.1623892784118652, "learning_rate": 1.23619544787694e-06, "loss": 0.4654, "step": 12055 }, { "epoch": 0.78, "grad_norm": 1.318964958190918, "learning_rate": 1.2355074048787974e-06, "loss": 0.5207, "step": 12056 }, { "epoch": 0.78, "grad_norm": 1.2802362442016602, "learning_rate": 1.2348195264169133e-06, "loss": 0.5414, "step": 12057 }, { "epoch": 0.78, "grad_norm": 1.1867194175720215, "learning_rate": 1.234131812521353e-06, "loss": 0.5089, "step": 12058 }, { "epoch": 0.78, "grad_norm": 1.1437242031097412, "learning_rate": 1.2334442632221755e-06, "loss": 0.4845, "step": 12059 }, { "epoch": 0.78, "grad_norm": 1.21151864528656, "learning_rate": 1.2327568785494331e-06, "loss": 0.5472, "step": 12060 }, { "epoch": 0.78, "grad_norm": 1.2702127695083618, "learning_rate": 1.232069658533166e-06, "loss": 0.5446, "step": 12061 }, { "epoch": 0.78, "grad_norm": 1.3110930919647217, "learning_rate": 1.2313826032034131e-06, "loss": 0.5554, "step": 12062 }, { "epoch": 0.78, "grad_norm": 1.1139732599258423, "learning_rate": 1.230695712590203e-06, "loss": 0.4908, "step": 12063 }, { "epoch": 0.78, "grad_norm": 1.2589679956436157, "learning_rate": 1.2300089867235587e-06, "loss": 0.5193, "step": 12064 }, { "epoch": 0.78, "grad_norm": 1.2024177312850952, "learning_rate": 1.229322425633495e-06, "loss": 0.5205, "step": 12065 }, { "epoch": 0.78, "grad_norm": 1.0935111045837402, "learning_rate": 1.228636029350021e-06, "loss": 0.498, "step": 12066 }, { "epoch": 0.78, "grad_norm": 1.1684595346450806, "learning_rate": 1.227949797903134e-06, "loss": 0.5236, "step": 12067 }, { "epoch": 0.78, "grad_norm": 1.2216053009033203, "learning_rate": 1.2272637313228302e-06, "loss": 0.528, "step": 12068 }, { "epoch": 0.78, "grad_norm": 1.1727852821350098, "learning_rate": 1.226577829639094e-06, "loss": 0.4285, "step": 12069 }, { "epoch": 0.78, "grad_norm": 1.2069628238677979, "learning_rate": 1.2258920928819056e-06, "loss": 0.5264, "step": 12070 }, { "epoch": 0.78, "grad_norm": 1.2269399166107178, "learning_rate": 1.2252065210812387e-06, "loss": 0.539, "step": 12071 }, { "epoch": 0.78, "grad_norm": 1.3081145286560059, "learning_rate": 1.2245211142670532e-06, "loss": 0.5069, "step": 12072 }, { "epoch": 0.78, "grad_norm": 1.2248257398605347, "learning_rate": 1.2238358724693095e-06, "loss": 0.5135, "step": 12073 }, { "epoch": 0.78, "grad_norm": 1.1373260021209717, "learning_rate": 1.2231507957179567e-06, "loss": 0.4653, "step": 12074 }, { "epoch": 0.78, "grad_norm": 1.3075448274612427, "learning_rate": 1.2224658840429387e-06, "loss": 0.504, "step": 12075 }, { "epoch": 0.78, "grad_norm": 1.1633334159851074, "learning_rate": 1.2217811374741906e-06, "loss": 0.5045, "step": 12076 }, { "epoch": 0.78, "grad_norm": 1.1567401885986328, "learning_rate": 1.221096556041642e-06, "loss": 0.4882, "step": 12077 }, { "epoch": 0.78, "grad_norm": 1.2919797897338867, "learning_rate": 1.2204121397752123e-06, "loss": 0.5712, "step": 12078 }, { "epoch": 0.78, "grad_norm": 1.124253749847412, "learning_rate": 1.2197278887048164e-06, "loss": 0.5601, "step": 12079 }, { "epoch": 0.78, "grad_norm": 1.227935791015625, "learning_rate": 1.2190438028603612e-06, "loss": 0.5295, "step": 12080 }, { "epoch": 0.78, "grad_norm": 1.185563087463379, "learning_rate": 1.2183598822717468e-06, "loss": 0.5108, "step": 12081 }, { "epoch": 0.78, "grad_norm": 1.1466652154922485, "learning_rate": 1.2176761269688647e-06, "loss": 0.5043, "step": 12082 }, { "epoch": 0.78, "grad_norm": 1.187247395515442, "learning_rate": 1.2169925369816021e-06, "loss": 0.5146, "step": 12083 }, { "epoch": 0.78, "grad_norm": 1.1531779766082764, "learning_rate": 1.216309112339834e-06, "loss": 0.5228, "step": 12084 }, { "epoch": 0.78, "grad_norm": 1.1651039123535156, "learning_rate": 1.2156258530734328e-06, "loss": 0.4899, "step": 12085 }, { "epoch": 0.78, "grad_norm": 1.0656815767288208, "learning_rate": 1.2149427592122614e-06, "loss": 0.4838, "step": 12086 }, { "epoch": 0.78, "grad_norm": 1.1075448989868164, "learning_rate": 1.2142598307861763e-06, "loss": 0.4882, "step": 12087 }, { "epoch": 0.78, "grad_norm": 1.1790963411331177, "learning_rate": 1.2135770678250269e-06, "loss": 0.4627, "step": 12088 }, { "epoch": 0.78, "grad_norm": 1.2095303535461426, "learning_rate": 1.2128944703586565e-06, "loss": 0.4941, "step": 12089 }, { "epoch": 0.78, "grad_norm": 1.2154308557510376, "learning_rate": 1.2122120384168961e-06, "loss": 0.5576, "step": 12090 }, { "epoch": 0.78, "grad_norm": 1.1513322591781616, "learning_rate": 1.211529772029575e-06, "loss": 0.5274, "step": 12091 }, { "epoch": 0.78, "grad_norm": 1.2654087543487549, "learning_rate": 1.2108476712265133e-06, "loss": 0.5016, "step": 12092 }, { "epoch": 0.78, "grad_norm": 1.2730265855789185, "learning_rate": 1.2101657360375234e-06, "loss": 0.5221, "step": 12093 }, { "epoch": 0.78, "grad_norm": 1.1469143629074097, "learning_rate": 1.2094839664924113e-06, "loss": 0.4662, "step": 12094 }, { "epoch": 0.78, "grad_norm": 1.1601510047912598, "learning_rate": 1.208802362620977e-06, "loss": 0.5231, "step": 12095 }, { "epoch": 0.78, "grad_norm": 1.1052677631378174, "learning_rate": 1.2081209244530085e-06, "loss": 0.4943, "step": 12096 }, { "epoch": 0.78, "grad_norm": 1.2310073375701904, "learning_rate": 1.2074396520182912e-06, "loss": 0.5063, "step": 12097 }, { "epoch": 0.78, "grad_norm": 1.2481964826583862, "learning_rate": 1.2067585453466018e-06, "loss": 0.5177, "step": 12098 }, { "epoch": 0.78, "grad_norm": 1.1704059839248657, "learning_rate": 1.2060776044677096e-06, "loss": 0.5306, "step": 12099 }, { "epoch": 0.78, "grad_norm": 1.1889817714691162, "learning_rate": 1.2053968294113789e-06, "loss": 0.5109, "step": 12100 }, { "epoch": 0.78, "grad_norm": 1.151665210723877, "learning_rate": 1.2047162202073603e-06, "loss": 0.5108, "step": 12101 }, { "epoch": 0.78, "grad_norm": 1.0556895732879639, "learning_rate": 1.2040357768854044e-06, "loss": 0.48, "step": 12102 }, { "epoch": 0.78, "grad_norm": 1.2284564971923828, "learning_rate": 1.2033554994752505e-06, "loss": 0.5619, "step": 12103 }, { "epoch": 0.78, "grad_norm": 1.2749706506729126, "learning_rate": 1.2026753880066338e-06, "loss": 0.4823, "step": 12104 }, { "epoch": 0.78, "grad_norm": 1.2281581163406372, "learning_rate": 1.2019954425092772e-06, "loss": 0.5375, "step": 12105 }, { "epoch": 0.78, "grad_norm": 1.0396451950073242, "learning_rate": 1.2013156630129014e-06, "loss": 0.5163, "step": 12106 }, { "epoch": 0.78, "grad_norm": 1.239700436592102, "learning_rate": 1.2006360495472163e-06, "loss": 0.5507, "step": 12107 }, { "epoch": 0.78, "grad_norm": 1.18292236328125, "learning_rate": 1.1999566021419278e-06, "loss": 0.4973, "step": 12108 }, { "epoch": 0.78, "grad_norm": 1.199827790260315, "learning_rate": 1.1992773208267327e-06, "loss": 0.4923, "step": 12109 }, { "epoch": 0.78, "grad_norm": 1.1299859285354614, "learning_rate": 1.1985982056313189e-06, "loss": 0.5213, "step": 12110 }, { "epoch": 0.78, "grad_norm": 1.0813422203063965, "learning_rate": 1.1979192565853698e-06, "loss": 0.4742, "step": 12111 }, { "epoch": 0.78, "grad_norm": 1.1666113138198853, "learning_rate": 1.1972404737185606e-06, "loss": 0.5374, "step": 12112 }, { "epoch": 0.78, "grad_norm": 1.192782998085022, "learning_rate": 1.1965618570605602e-06, "loss": 0.4871, "step": 12113 }, { "epoch": 0.78, "grad_norm": 1.1498490571975708, "learning_rate": 1.1958834066410263e-06, "loss": 0.4881, "step": 12114 }, { "epoch": 0.78, "grad_norm": 1.3053984642028809, "learning_rate": 1.1952051224896144e-06, "loss": 0.476, "step": 12115 }, { "epoch": 0.78, "grad_norm": 1.124802827835083, "learning_rate": 1.1945270046359702e-06, "loss": 0.5119, "step": 12116 }, { "epoch": 0.78, "grad_norm": 1.3558155298233032, "learning_rate": 1.1938490531097323e-06, "loss": 0.5116, "step": 12117 }, { "epoch": 0.78, "grad_norm": 1.2492148876190186, "learning_rate": 1.1931712679405332e-06, "loss": 0.5243, "step": 12118 }, { "epoch": 0.78, "grad_norm": 1.1431233882904053, "learning_rate": 1.1924936491579953e-06, "loss": 0.5328, "step": 12119 }, { "epoch": 0.78, "grad_norm": 1.1670910120010376, "learning_rate": 1.1918161967917364e-06, "loss": 0.5038, "step": 12120 }, { "epoch": 0.78, "grad_norm": 1.1146998405456543, "learning_rate": 1.1911389108713667e-06, "loss": 0.4477, "step": 12121 }, { "epoch": 0.78, "grad_norm": 1.296169638633728, "learning_rate": 1.1904617914264877e-06, "loss": 0.5284, "step": 12122 }, { "epoch": 0.78, "grad_norm": 1.2394485473632812, "learning_rate": 1.189784838486695e-06, "loss": 0.596, "step": 12123 }, { "epoch": 0.78, "grad_norm": 1.2446699142456055, "learning_rate": 1.1891080520815783e-06, "loss": 0.4554, "step": 12124 }, { "epoch": 0.78, "grad_norm": 1.1631736755371094, "learning_rate": 1.1884314322407147e-06, "loss": 0.5292, "step": 12125 }, { "epoch": 0.78, "grad_norm": 1.1727570295333862, "learning_rate": 1.1877549789936794e-06, "loss": 0.528, "step": 12126 }, { "epoch": 0.78, "grad_norm": 1.128079891204834, "learning_rate": 1.1870786923700378e-06, "loss": 0.5581, "step": 12127 }, { "epoch": 0.78, "grad_norm": 1.2828201055526733, "learning_rate": 1.18640257239935e-06, "loss": 0.5361, "step": 12128 }, { "epoch": 0.78, "grad_norm": 1.2246907949447632, "learning_rate": 1.1857266191111655e-06, "loss": 0.4975, "step": 12129 }, { "epoch": 0.78, "grad_norm": 1.3344511985778809, "learning_rate": 1.1850508325350318e-06, "loss": 0.5202, "step": 12130 }, { "epoch": 0.78, "grad_norm": 1.2916885614395142, "learning_rate": 1.1843752127004815e-06, "loss": 0.5107, "step": 12131 }, { "epoch": 0.78, "grad_norm": 1.191829800605774, "learning_rate": 1.1836997596370464e-06, "loss": 0.5253, "step": 12132 }, { "epoch": 0.78, "grad_norm": 1.220005989074707, "learning_rate": 1.1830244733742491e-06, "loss": 0.5467, "step": 12133 }, { "epoch": 0.78, "grad_norm": 1.2459838390350342, "learning_rate": 1.1823493539416036e-06, "loss": 0.5597, "step": 12134 }, { "epoch": 0.78, "grad_norm": 1.076669454574585, "learning_rate": 1.1816744013686182e-06, "loss": 0.4643, "step": 12135 }, { "epoch": 0.78, "grad_norm": 1.1837158203125, "learning_rate": 1.1809996156847948e-06, "loss": 0.5218, "step": 12136 }, { "epoch": 0.78, "grad_norm": 1.2700368165969849, "learning_rate": 1.1803249969196234e-06, "loss": 0.5697, "step": 12137 }, { "epoch": 0.78, "grad_norm": 1.1684527397155762, "learning_rate": 1.179650545102592e-06, "loss": 0.5148, "step": 12138 }, { "epoch": 0.78, "grad_norm": 1.258347511291504, "learning_rate": 1.1789762602631781e-06, "loss": 0.5155, "step": 12139 }, { "epoch": 0.78, "grad_norm": 1.0420432090759277, "learning_rate": 1.1783021424308538e-06, "loss": 0.4643, "step": 12140 }, { "epoch": 0.78, "grad_norm": 1.107442855834961, "learning_rate": 1.177628191635083e-06, "loss": 0.488, "step": 12141 }, { "epoch": 0.78, "grad_norm": 1.0735962390899658, "learning_rate": 1.176954407905323e-06, "loss": 0.5237, "step": 12142 }, { "epoch": 0.78, "grad_norm": 1.2058600187301636, "learning_rate": 1.176280791271021e-06, "loss": 0.4957, "step": 12143 }, { "epoch": 0.78, "grad_norm": 1.2827706336975098, "learning_rate": 1.1756073417616203e-06, "loss": 0.5518, "step": 12144 }, { "epoch": 0.78, "grad_norm": 1.0674024820327759, "learning_rate": 1.1749340594065557e-06, "loss": 0.5081, "step": 12145 }, { "epoch": 0.78, "grad_norm": 1.180775761604309, "learning_rate": 1.1742609442352542e-06, "loss": 0.5054, "step": 12146 }, { "epoch": 0.78, "grad_norm": 1.1788743734359741, "learning_rate": 1.1735879962771368e-06, "loss": 0.5069, "step": 12147 }, { "epoch": 0.78, "grad_norm": 1.1511240005493164, "learning_rate": 1.172915215561617e-06, "loss": 0.4843, "step": 12148 }, { "epoch": 0.78, "grad_norm": 1.126879334449768, "learning_rate": 1.1722426021180977e-06, "loss": 0.5071, "step": 12149 }, { "epoch": 0.78, "grad_norm": 1.1251252889633179, "learning_rate": 1.1715701559759784e-06, "loss": 0.4665, "step": 12150 }, { "epoch": 0.78, "grad_norm": 1.1580089330673218, "learning_rate": 1.1708978771646506e-06, "loss": 0.4897, "step": 12151 }, { "epoch": 0.78, "grad_norm": 1.1763533353805542, "learning_rate": 1.170225765713497e-06, "loss": 0.5049, "step": 12152 }, { "epoch": 0.78, "grad_norm": 1.2495225667953491, "learning_rate": 1.1695538216518953e-06, "loss": 0.4958, "step": 12153 }, { "epoch": 0.78, "grad_norm": 1.205925464630127, "learning_rate": 1.1688820450092125e-06, "loss": 0.5671, "step": 12154 }, { "epoch": 0.78, "grad_norm": 1.048897385597229, "learning_rate": 1.1682104358148111e-06, "loss": 0.4502, "step": 12155 }, { "epoch": 0.78, "grad_norm": 1.1687606573104858, "learning_rate": 1.1675389940980447e-06, "loss": 0.5402, "step": 12156 }, { "epoch": 0.78, "grad_norm": 1.2010518312454224, "learning_rate": 1.1668677198882615e-06, "loss": 0.4881, "step": 12157 }, { "epoch": 0.78, "grad_norm": 1.2319492101669312, "learning_rate": 1.1661966132148013e-06, "loss": 0.4793, "step": 12158 }, { "epoch": 0.78, "grad_norm": 1.0997495651245117, "learning_rate": 1.165525674106996e-06, "loss": 0.5054, "step": 12159 }, { "epoch": 0.78, "grad_norm": 1.119642972946167, "learning_rate": 1.1648549025941696e-06, "loss": 0.4736, "step": 12160 }, { "epoch": 0.78, "grad_norm": 1.1536246538162231, "learning_rate": 1.1641842987056407e-06, "loss": 0.5228, "step": 12161 }, { "epoch": 0.79, "grad_norm": 1.2040159702301025, "learning_rate": 1.1635138624707193e-06, "loss": 0.5057, "step": 12162 }, { "epoch": 0.79, "grad_norm": 1.1398322582244873, "learning_rate": 1.1628435939187088e-06, "loss": 0.505, "step": 12163 }, { "epoch": 0.79, "grad_norm": 1.242343544960022, "learning_rate": 1.1621734930789047e-06, "loss": 0.4997, "step": 12164 }, { "epoch": 0.79, "grad_norm": 1.3000015020370483, "learning_rate": 1.1615035599805975e-06, "loss": 0.4956, "step": 12165 }, { "epoch": 0.79, "grad_norm": 1.1649268865585327, "learning_rate": 1.160833794653064e-06, "loss": 0.5144, "step": 12166 }, { "epoch": 0.79, "grad_norm": 1.3450673818588257, "learning_rate": 1.1601641971255806e-06, "loss": 0.4882, "step": 12167 }, { "epoch": 0.79, "grad_norm": 1.110526204109192, "learning_rate": 1.159494767427413e-06, "loss": 0.5035, "step": 12168 }, { "epoch": 0.79, "grad_norm": 1.185383677482605, "learning_rate": 1.158825505587821e-06, "loss": 0.5215, "step": 12169 }, { "epoch": 0.79, "grad_norm": 1.112664818763733, "learning_rate": 1.1581564116360556e-06, "loss": 0.5275, "step": 12170 }, { "epoch": 0.79, "grad_norm": 1.1219205856323242, "learning_rate": 1.1574874856013624e-06, "loss": 0.4581, "step": 12171 }, { "epoch": 0.79, "grad_norm": 1.1593557596206665, "learning_rate": 1.156818727512976e-06, "loss": 0.4726, "step": 12172 }, { "epoch": 0.79, "grad_norm": 1.2171074151992798, "learning_rate": 1.1561501374001277e-06, "loss": 0.507, "step": 12173 }, { "epoch": 0.79, "grad_norm": 1.1863594055175781, "learning_rate": 1.1554817152920394e-06, "loss": 0.5304, "step": 12174 }, { "epoch": 0.79, "grad_norm": 1.2493867874145508, "learning_rate": 1.1548134612179267e-06, "loss": 0.5152, "step": 12175 }, { "epoch": 0.79, "grad_norm": 1.2588043212890625, "learning_rate": 1.154145375206997e-06, "loss": 0.5204, "step": 12176 }, { "epoch": 0.79, "grad_norm": 1.2123390436172485, "learning_rate": 1.1534774572884517e-06, "loss": 0.5336, "step": 12177 }, { "epoch": 0.79, "grad_norm": 1.1560920476913452, "learning_rate": 1.1528097074914807e-06, "loss": 0.481, "step": 12178 }, { "epoch": 0.79, "grad_norm": 1.1752175092697144, "learning_rate": 1.152142125845272e-06, "loss": 0.5227, "step": 12179 }, { "epoch": 0.79, "grad_norm": 1.245282530784607, "learning_rate": 1.1514747123790033e-06, "loss": 0.5437, "step": 12180 }, { "epoch": 0.79, "grad_norm": 1.163149356842041, "learning_rate": 1.1508074671218456e-06, "loss": 0.5067, "step": 12181 }, { "epoch": 0.79, "grad_norm": 1.2765886783599854, "learning_rate": 1.1501403901029623e-06, "loss": 0.5392, "step": 12182 }, { "epoch": 0.79, "grad_norm": 1.2619414329528809, "learning_rate": 1.1494734813515113e-06, "loss": 0.4957, "step": 12183 }, { "epoch": 0.79, "grad_norm": 1.1697314977645874, "learning_rate": 1.1488067408966386e-06, "loss": 0.5015, "step": 12184 }, { "epoch": 0.79, "grad_norm": 1.1389132738113403, "learning_rate": 1.1481401687674871e-06, "loss": 0.4864, "step": 12185 }, { "epoch": 0.79, "grad_norm": 1.0785566568374634, "learning_rate": 1.1474737649931912e-06, "loss": 0.4657, "step": 12186 }, { "epoch": 0.79, "grad_norm": 1.247786045074463, "learning_rate": 1.1468075296028768e-06, "loss": 0.537, "step": 12187 }, { "epoch": 0.79, "grad_norm": 1.0923845767974854, "learning_rate": 1.146141462625664e-06, "loss": 0.4314, "step": 12188 }, { "epoch": 0.79, "grad_norm": 1.1118217706680298, "learning_rate": 1.1454755640906662e-06, "loss": 0.5177, "step": 12189 }, { "epoch": 0.79, "grad_norm": 1.1538735628128052, "learning_rate": 1.1448098340269853e-06, "loss": 0.492, "step": 12190 }, { "epoch": 0.79, "grad_norm": 1.1053173542022705, "learning_rate": 1.1441442724637203e-06, "loss": 0.4937, "step": 12191 }, { "epoch": 0.79, "grad_norm": 1.116723656654358, "learning_rate": 1.1434788794299606e-06, "loss": 0.496, "step": 12192 }, { "epoch": 0.79, "grad_norm": 1.250994086265564, "learning_rate": 1.1428136549547909e-06, "loss": 0.4913, "step": 12193 }, { "epoch": 0.79, "grad_norm": 1.0517046451568604, "learning_rate": 1.1421485990672826e-06, "loss": 0.4607, "step": 12194 }, { "epoch": 0.79, "grad_norm": 1.1341431140899658, "learning_rate": 1.141483711796506e-06, "loss": 0.517, "step": 12195 }, { "epoch": 0.79, "grad_norm": 1.1588284969329834, "learning_rate": 1.1408189931715213e-06, "loss": 0.5091, "step": 12196 }, { "epoch": 0.79, "grad_norm": 1.19283926486969, "learning_rate": 1.1401544432213817e-06, "loss": 0.5133, "step": 12197 }, { "epoch": 0.79, "grad_norm": 1.2045984268188477, "learning_rate": 1.1394900619751343e-06, "loss": 0.5513, "step": 12198 }, { "epoch": 0.79, "grad_norm": 1.2422878742218018, "learning_rate": 1.1388258494618143e-06, "loss": 0.5341, "step": 12199 }, { "epoch": 0.79, "grad_norm": 1.2276387214660645, "learning_rate": 1.1381618057104548e-06, "loss": 0.5484, "step": 12200 }, { "epoch": 0.79, "grad_norm": 1.1906236410140991, "learning_rate": 1.1374979307500788e-06, "loss": 0.5119, "step": 12201 }, { "epoch": 0.79, "grad_norm": 1.124925971031189, "learning_rate": 1.1368342246097042e-06, "loss": 0.4966, "step": 12202 }, { "epoch": 0.79, "grad_norm": 1.236069679260254, "learning_rate": 1.1361706873183375e-06, "loss": 0.5481, "step": 12203 }, { "epoch": 0.79, "grad_norm": 1.1436285972595215, "learning_rate": 1.1355073189049808e-06, "loss": 0.5237, "step": 12204 }, { "epoch": 0.79, "grad_norm": 1.0436939001083374, "learning_rate": 1.1348441193986294e-06, "loss": 0.4696, "step": 12205 }, { "epoch": 0.79, "grad_norm": 1.2061195373535156, "learning_rate": 1.134181088828269e-06, "loss": 0.5209, "step": 12206 }, { "epoch": 0.79, "grad_norm": 1.0912761688232422, "learning_rate": 1.1335182272228802e-06, "loss": 0.47, "step": 12207 }, { "epoch": 0.79, "grad_norm": 1.2789361476898193, "learning_rate": 1.132855534611433e-06, "loss": 0.5027, "step": 12208 }, { "epoch": 0.79, "grad_norm": 1.2055065631866455, "learning_rate": 1.1321930110228929e-06, "loss": 0.506, "step": 12209 }, { "epoch": 0.79, "grad_norm": 1.1563111543655396, "learning_rate": 1.1315306564862172e-06, "loss": 0.5306, "step": 12210 }, { "epoch": 0.79, "grad_norm": 1.0815991163253784, "learning_rate": 1.1308684710303559e-06, "loss": 0.4759, "step": 12211 }, { "epoch": 0.79, "grad_norm": 1.1698552370071411, "learning_rate": 1.1302064546842528e-06, "loss": 0.5125, "step": 12212 }, { "epoch": 0.79, "grad_norm": 1.1890521049499512, "learning_rate": 1.1295446074768396e-06, "loss": 0.4616, "step": 12213 }, { "epoch": 0.79, "grad_norm": 1.2541251182556152, "learning_rate": 1.1288829294370456e-06, "loss": 0.482, "step": 12214 }, { "epoch": 0.79, "grad_norm": 1.3066325187683105, "learning_rate": 1.1282214205937913e-06, "loss": 0.5048, "step": 12215 }, { "epoch": 0.79, "grad_norm": 1.2239291667938232, "learning_rate": 1.1275600809759895e-06, "loss": 0.5114, "step": 12216 }, { "epoch": 0.79, "grad_norm": 1.1793841123580933, "learning_rate": 1.1268989106125455e-06, "loss": 0.515, "step": 12217 }, { "epoch": 0.79, "grad_norm": 1.3457168340682983, "learning_rate": 1.1262379095323588e-06, "loss": 0.5096, "step": 12218 }, { "epoch": 0.79, "grad_norm": 1.170249342918396, "learning_rate": 1.125577077764317e-06, "loss": 0.528, "step": 12219 }, { "epoch": 0.79, "grad_norm": 1.181739330291748, "learning_rate": 1.1249164153373054e-06, "loss": 0.4519, "step": 12220 }, { "epoch": 0.79, "grad_norm": 1.1807727813720703, "learning_rate": 1.124255922280199e-06, "loss": 0.5204, "step": 12221 }, { "epoch": 0.79, "grad_norm": 1.185152292251587, "learning_rate": 1.123595598621867e-06, "loss": 0.5333, "step": 12222 }, { "epoch": 0.79, "grad_norm": 1.1430108547210693, "learning_rate": 1.1229354443911699e-06, "loss": 0.519, "step": 12223 }, { "epoch": 0.79, "grad_norm": 1.1580170392990112, "learning_rate": 1.1222754596169632e-06, "loss": 0.5015, "step": 12224 }, { "epoch": 0.79, "grad_norm": 1.196638822555542, "learning_rate": 1.1216156443280901e-06, "loss": 0.5578, "step": 12225 }, { "epoch": 0.79, "grad_norm": 1.3353339433670044, "learning_rate": 1.1209559985533908e-06, "loss": 0.5487, "step": 12226 }, { "epoch": 0.79, "grad_norm": 1.2045220136642456, "learning_rate": 1.120296522321696e-06, "loss": 0.5258, "step": 12227 }, { "epoch": 0.79, "grad_norm": 1.2293343544006348, "learning_rate": 1.1196372156618314e-06, "loss": 0.532, "step": 12228 }, { "epoch": 0.79, "grad_norm": 1.2402169704437256, "learning_rate": 1.1189780786026122e-06, "loss": 0.5123, "step": 12229 }, { "epoch": 0.79, "grad_norm": 1.1853837966918945, "learning_rate": 1.1183191111728497e-06, "loss": 0.5254, "step": 12230 }, { "epoch": 0.79, "grad_norm": 1.1330171823501587, "learning_rate": 1.1176603134013425e-06, "loss": 0.5022, "step": 12231 }, { "epoch": 0.79, "grad_norm": 1.1870380640029907, "learning_rate": 1.1170016853168864e-06, "loss": 0.5069, "step": 12232 }, { "epoch": 0.79, "grad_norm": 1.118704915046692, "learning_rate": 1.1163432269482683e-06, "loss": 0.5647, "step": 12233 }, { "epoch": 0.79, "grad_norm": 1.1798481941223145, "learning_rate": 1.1156849383242675e-06, "loss": 0.5302, "step": 12234 }, { "epoch": 0.79, "grad_norm": 1.192665457725525, "learning_rate": 1.115026819473657e-06, "loss": 0.5127, "step": 12235 }, { "epoch": 0.79, "grad_norm": 1.2075694799423218, "learning_rate": 1.1143688704252015e-06, "loss": 0.4862, "step": 12236 }, { "epoch": 0.79, "grad_norm": 1.2775055170059204, "learning_rate": 1.1137110912076566e-06, "loss": 0.5151, "step": 12237 }, { "epoch": 0.79, "grad_norm": 1.2146772146224976, "learning_rate": 1.1130534818497734e-06, "loss": 0.5141, "step": 12238 }, { "epoch": 0.79, "grad_norm": 1.2288085222244263, "learning_rate": 1.1123960423802938e-06, "loss": 0.4482, "step": 12239 }, { "epoch": 0.79, "grad_norm": 1.1516740322113037, "learning_rate": 1.1117387728279528e-06, "loss": 0.5138, "step": 12240 }, { "epoch": 0.79, "grad_norm": 1.0835427045822144, "learning_rate": 1.1110816732214802e-06, "loss": 0.4617, "step": 12241 }, { "epoch": 0.79, "grad_norm": 1.1580911874771118, "learning_rate": 1.1104247435895922e-06, "loss": 0.4868, "step": 12242 }, { "epoch": 0.79, "grad_norm": 1.2249449491500854, "learning_rate": 1.1097679839610037e-06, "loss": 0.5348, "step": 12243 }, { "epoch": 0.79, "grad_norm": 1.1459262371063232, "learning_rate": 1.1091113943644193e-06, "loss": 0.5358, "step": 12244 }, { "epoch": 0.79, "grad_norm": 1.1214982271194458, "learning_rate": 1.1084549748285378e-06, "loss": 0.5211, "step": 12245 }, { "epoch": 0.79, "grad_norm": 1.2486474514007568, "learning_rate": 1.1077987253820489e-06, "loss": 0.4877, "step": 12246 }, { "epoch": 0.79, "grad_norm": 1.1353693008422852, "learning_rate": 1.1071426460536367e-06, "loss": 0.4961, "step": 12247 }, { "epoch": 0.79, "grad_norm": 1.308457612991333, "learning_rate": 1.1064867368719745e-06, "loss": 0.5564, "step": 12248 }, { "epoch": 0.79, "grad_norm": 1.1821197271347046, "learning_rate": 1.1058309978657316e-06, "loss": 0.531, "step": 12249 }, { "epoch": 0.79, "grad_norm": 1.1453591585159302, "learning_rate": 1.1051754290635691e-06, "loss": 0.5459, "step": 12250 }, { "epoch": 0.79, "grad_norm": 1.2634581327438354, "learning_rate": 1.1045200304941395e-06, "loss": 0.5365, "step": 12251 }, { "epoch": 0.79, "grad_norm": 1.1508845090866089, "learning_rate": 1.1038648021860886e-06, "loss": 0.5299, "step": 12252 }, { "epoch": 0.79, "grad_norm": 1.2660822868347168, "learning_rate": 1.1032097441680567e-06, "loss": 0.4927, "step": 12253 }, { "epoch": 0.79, "grad_norm": 1.1578863859176636, "learning_rate": 1.1025548564686718e-06, "loss": 0.4902, "step": 12254 }, { "epoch": 0.79, "grad_norm": 1.165189266204834, "learning_rate": 1.1019001391165585e-06, "loss": 0.495, "step": 12255 }, { "epoch": 0.79, "grad_norm": 1.2880899906158447, "learning_rate": 1.1012455921403325e-06, "loss": 0.5322, "step": 12256 }, { "epoch": 0.79, "grad_norm": 1.2604553699493408, "learning_rate": 1.1005912155686034e-06, "loss": 0.5511, "step": 12257 }, { "epoch": 0.79, "grad_norm": 1.2193013429641724, "learning_rate": 1.099937009429971e-06, "loss": 0.5224, "step": 12258 }, { "epoch": 0.79, "grad_norm": 1.2240363359451294, "learning_rate": 1.099282973753032e-06, "loss": 0.5249, "step": 12259 }, { "epoch": 0.79, "grad_norm": 1.152980923652649, "learning_rate": 1.098629108566368e-06, "loss": 0.4841, "step": 12260 }, { "epoch": 0.79, "grad_norm": 1.1335632801055908, "learning_rate": 1.0979754138985598e-06, "loss": 0.4743, "step": 12261 }, { "epoch": 0.79, "grad_norm": 1.2128881216049194, "learning_rate": 1.0973218897781795e-06, "loss": 0.5064, "step": 12262 }, { "epoch": 0.79, "grad_norm": 1.2269062995910645, "learning_rate": 1.0966685362337902e-06, "loss": 0.5597, "step": 12263 }, { "epoch": 0.79, "grad_norm": 1.1320489645004272, "learning_rate": 1.0960153532939482e-06, "loss": 0.476, "step": 12264 }, { "epoch": 0.79, "grad_norm": 1.2014281749725342, "learning_rate": 1.0953623409872043e-06, "loss": 0.5452, "step": 12265 }, { "epoch": 0.79, "grad_norm": 1.155135154724121, "learning_rate": 1.094709499342097e-06, "loss": 0.5398, "step": 12266 }, { "epoch": 0.79, "grad_norm": 1.2242487668991089, "learning_rate": 1.0940568283871617e-06, "loss": 0.5229, "step": 12267 }, { "epoch": 0.79, "grad_norm": 1.2729405164718628, "learning_rate": 1.093404328150925e-06, "loss": 0.4798, "step": 12268 }, { "epoch": 0.79, "grad_norm": 1.218387246131897, "learning_rate": 1.0927519986619062e-06, "loss": 0.5669, "step": 12269 }, { "epoch": 0.79, "grad_norm": 1.1534411907196045, "learning_rate": 1.092099839948616e-06, "loss": 0.4943, "step": 12270 }, { "epoch": 0.79, "grad_norm": 1.164977788925171, "learning_rate": 1.091447852039561e-06, "loss": 0.5198, "step": 12271 }, { "epoch": 0.79, "grad_norm": 1.123197317123413, "learning_rate": 1.090796034963235e-06, "loss": 0.5252, "step": 12272 }, { "epoch": 0.79, "grad_norm": 1.1592118740081787, "learning_rate": 1.0901443887481283e-06, "loss": 0.5092, "step": 12273 }, { "epoch": 0.79, "grad_norm": 1.1600391864776611, "learning_rate": 1.0894929134227227e-06, "loss": 0.4562, "step": 12274 }, { "epoch": 0.79, "grad_norm": 1.1803733110427856, "learning_rate": 1.088841609015493e-06, "loss": 0.501, "step": 12275 }, { "epoch": 0.79, "grad_norm": 1.142816185951233, "learning_rate": 1.0881904755549056e-06, "loss": 0.539, "step": 12276 }, { "epoch": 0.79, "grad_norm": 1.0958449840545654, "learning_rate": 1.0875395130694205e-06, "loss": 0.5067, "step": 12277 }, { "epoch": 0.79, "grad_norm": 1.1790858507156372, "learning_rate": 1.0868887215874885e-06, "loss": 0.5298, "step": 12278 }, { "epoch": 0.79, "grad_norm": 1.248893141746521, "learning_rate": 1.0862381011375538e-06, "loss": 0.5172, "step": 12279 }, { "epoch": 0.79, "grad_norm": 1.2533607482910156, "learning_rate": 1.0855876517480546e-06, "loss": 0.5399, "step": 12280 }, { "epoch": 0.79, "grad_norm": 1.1249656677246094, "learning_rate": 1.08493737344742e-06, "loss": 0.5363, "step": 12281 }, { "epoch": 0.79, "grad_norm": 1.152716040611267, "learning_rate": 1.0842872662640724e-06, "loss": 0.5381, "step": 12282 }, { "epoch": 0.79, "grad_norm": 1.1661332845687866, "learning_rate": 1.0836373302264252e-06, "loss": 0.5369, "step": 12283 }, { "epoch": 0.79, "grad_norm": 1.2334054708480835, "learning_rate": 1.0829875653628852e-06, "loss": 0.5757, "step": 12284 }, { "epoch": 0.79, "grad_norm": 1.207593560218811, "learning_rate": 1.0823379717018534e-06, "loss": 0.5008, "step": 12285 }, { "epoch": 0.79, "grad_norm": 1.2017793655395508, "learning_rate": 1.0816885492717206e-06, "loss": 0.5191, "step": 12286 }, { "epoch": 0.79, "grad_norm": 1.1105197668075562, "learning_rate": 1.0810392981008737e-06, "loss": 0.4823, "step": 12287 }, { "epoch": 0.79, "grad_norm": 1.1630055904388428, "learning_rate": 1.0803902182176861e-06, "loss": 0.4961, "step": 12288 }, { "epoch": 0.79, "grad_norm": 1.2630215883255005, "learning_rate": 1.07974130965053e-06, "loss": 0.5253, "step": 12289 }, { "epoch": 0.79, "grad_norm": 1.1888798475265503, "learning_rate": 1.0790925724277662e-06, "loss": 0.5156, "step": 12290 }, { "epoch": 0.79, "grad_norm": 1.222453236579895, "learning_rate": 1.0784440065777507e-06, "loss": 0.4984, "step": 12291 }, { "epoch": 0.79, "grad_norm": 1.2326058149337769, "learning_rate": 1.0777956121288308e-06, "loss": 0.5531, "step": 12292 }, { "epoch": 0.79, "grad_norm": 1.2900415658950806, "learning_rate": 1.0771473891093442e-06, "loss": 0.4855, "step": 12293 }, { "epoch": 0.79, "grad_norm": 1.1109516620635986, "learning_rate": 1.076499337547624e-06, "loss": 0.5165, "step": 12294 }, { "epoch": 0.79, "grad_norm": 1.1760152578353882, "learning_rate": 1.0758514574719946e-06, "loss": 0.5152, "step": 12295 }, { "epoch": 0.79, "grad_norm": 1.2021089792251587, "learning_rate": 1.0752037489107753e-06, "loss": 0.521, "step": 12296 }, { "epoch": 0.79, "grad_norm": 1.2252254486083984, "learning_rate": 1.0745562118922725e-06, "loss": 0.5612, "step": 12297 }, { "epoch": 0.79, "grad_norm": 1.2707457542419434, "learning_rate": 1.07390884644479e-06, "loss": 0.5817, "step": 12298 }, { "epoch": 0.79, "grad_norm": 1.2982579469680786, "learning_rate": 1.0732616525966227e-06, "loss": 0.4914, "step": 12299 }, { "epoch": 0.79, "grad_norm": 1.1900527477264404, "learning_rate": 1.072614630376057e-06, "loss": 0.5155, "step": 12300 }, { "epoch": 0.79, "grad_norm": 1.0723912715911865, "learning_rate": 1.0719677798113747e-06, "loss": 0.4926, "step": 12301 }, { "epoch": 0.79, "grad_norm": 1.1811646223068237, "learning_rate": 1.0713211009308444e-06, "loss": 0.528, "step": 12302 }, { "epoch": 0.79, "grad_norm": 1.2538102865219116, "learning_rate": 1.0706745937627333e-06, "loss": 0.5066, "step": 12303 }, { "epoch": 0.79, "grad_norm": 1.2333792448043823, "learning_rate": 1.0700282583352977e-06, "loss": 0.5116, "step": 12304 }, { "epoch": 0.79, "grad_norm": 1.1309318542480469, "learning_rate": 1.0693820946767875e-06, "loss": 0.5347, "step": 12305 }, { "epoch": 0.79, "grad_norm": 1.1915203332901, "learning_rate": 1.0687361028154457e-06, "loss": 0.5296, "step": 12306 }, { "epoch": 0.79, "grad_norm": 1.1553854942321777, "learning_rate": 1.0680902827795053e-06, "loss": 0.4802, "step": 12307 }, { "epoch": 0.79, "grad_norm": 1.2889983654022217, "learning_rate": 1.0674446345971939e-06, "loss": 0.4971, "step": 12308 }, { "epoch": 0.79, "grad_norm": 1.1527162790298462, "learning_rate": 1.0667991582967313e-06, "loss": 0.5234, "step": 12309 }, { "epoch": 0.79, "grad_norm": 1.234336495399475, "learning_rate": 1.0661538539063298e-06, "loss": 0.5487, "step": 12310 }, { "epoch": 0.79, "grad_norm": 1.1626380681991577, "learning_rate": 1.0655087214541937e-06, "loss": 0.5078, "step": 12311 }, { "epoch": 0.79, "grad_norm": 1.209570050239563, "learning_rate": 1.064863760968522e-06, "loss": 0.5077, "step": 12312 }, { "epoch": 0.79, "grad_norm": 1.1084805727005005, "learning_rate": 1.064218972477501e-06, "loss": 0.4585, "step": 12313 }, { "epoch": 0.79, "grad_norm": 1.2469834089279175, "learning_rate": 1.0635743560093142e-06, "loss": 0.5283, "step": 12314 }, { "epoch": 0.79, "grad_norm": 1.1201618909835815, "learning_rate": 1.062929911592136e-06, "loss": 0.4932, "step": 12315 }, { "epoch": 0.79, "grad_norm": 1.1572402715682983, "learning_rate": 1.0622856392541337e-06, "loss": 0.4361, "step": 12316 }, { "epoch": 0.8, "grad_norm": 1.265375018119812, "learning_rate": 1.061641539023467e-06, "loss": 0.5243, "step": 12317 }, { "epoch": 0.8, "grad_norm": 1.2631657123565674, "learning_rate": 1.0609976109282887e-06, "loss": 0.5608, "step": 12318 }, { "epoch": 0.8, "grad_norm": 1.2399582862854004, "learning_rate": 1.0603538549967407e-06, "loss": 0.4974, "step": 12319 }, { "epoch": 0.8, "grad_norm": 1.2148377895355225, "learning_rate": 1.0597102712569613e-06, "loss": 0.4884, "step": 12320 }, { "epoch": 0.8, "grad_norm": 1.1261426210403442, "learning_rate": 1.0590668597370795e-06, "loss": 0.4935, "step": 12321 }, { "epoch": 0.8, "grad_norm": 1.1408730745315552, "learning_rate": 1.0584236204652183e-06, "loss": 0.5063, "step": 12322 }, { "epoch": 0.8, "grad_norm": 1.1473283767700195, "learning_rate": 1.0577805534694908e-06, "loss": 0.5187, "step": 12323 }, { "epoch": 0.8, "grad_norm": 1.2678961753845215, "learning_rate": 1.0571376587780053e-06, "loss": 0.5157, "step": 12324 }, { "epoch": 0.8, "grad_norm": 1.1226847171783447, "learning_rate": 1.0564949364188587e-06, "loss": 0.5018, "step": 12325 }, { "epoch": 0.8, "grad_norm": 1.0776233673095703, "learning_rate": 1.0558523864201442e-06, "loss": 0.4991, "step": 12326 }, { "epoch": 0.8, "grad_norm": 1.166049838066101, "learning_rate": 1.055210008809946e-06, "loss": 0.5272, "step": 12327 }, { "epoch": 0.8, "grad_norm": 1.2146615982055664, "learning_rate": 1.054567803616341e-06, "loss": 0.5248, "step": 12328 }, { "epoch": 0.8, "grad_norm": 1.176337480545044, "learning_rate": 1.0539257708673982e-06, "loss": 0.5047, "step": 12329 }, { "epoch": 0.8, "grad_norm": 1.2650574445724487, "learning_rate": 1.0532839105911786e-06, "loss": 0.5606, "step": 12330 }, { "epoch": 0.8, "grad_norm": 1.1787029504776, "learning_rate": 1.0526422228157363e-06, "loss": 0.4791, "step": 12331 }, { "epoch": 0.8, "grad_norm": 1.205115556716919, "learning_rate": 1.052000707569118e-06, "loss": 0.5187, "step": 12332 }, { "epoch": 0.8, "grad_norm": 1.129937767982483, "learning_rate": 1.051359364879363e-06, "loss": 0.5402, "step": 12333 }, { "epoch": 0.8, "grad_norm": 1.2859303951263428, "learning_rate": 1.050718194774502e-06, "loss": 0.5138, "step": 12334 }, { "epoch": 0.8, "grad_norm": 1.2050777673721313, "learning_rate": 1.0500771972825614e-06, "loss": 0.5547, "step": 12335 }, { "epoch": 0.8, "grad_norm": 1.2017494440078735, "learning_rate": 1.0494363724315543e-06, "loss": 0.5238, "step": 12336 }, { "epoch": 0.8, "grad_norm": 1.2244352102279663, "learning_rate": 1.0487957202494903e-06, "loss": 0.5409, "step": 12337 }, { "epoch": 0.8, "grad_norm": 1.3700402975082397, "learning_rate": 1.0481552407643715e-06, "loss": 0.5741, "step": 12338 }, { "epoch": 0.8, "grad_norm": 1.2194650173187256, "learning_rate": 1.0475149340041906e-06, "loss": 0.493, "step": 12339 }, { "epoch": 0.8, "grad_norm": 1.200368881225586, "learning_rate": 1.0468747999969354e-06, "loss": 0.5167, "step": 12340 }, { "epoch": 0.8, "grad_norm": 1.281440258026123, "learning_rate": 1.046234838770584e-06, "loss": 0.5467, "step": 12341 }, { "epoch": 0.8, "grad_norm": 1.1676963567733765, "learning_rate": 1.0455950503531058e-06, "loss": 0.4912, "step": 12342 }, { "epoch": 0.8, "grad_norm": 1.202378511428833, "learning_rate": 1.0449554347724655e-06, "loss": 0.5168, "step": 12343 }, { "epoch": 0.8, "grad_norm": 1.1995350122451782, "learning_rate": 1.0443159920566192e-06, "loss": 0.5355, "step": 12344 }, { "epoch": 0.8, "grad_norm": 1.1624248027801514, "learning_rate": 1.0436767222335148e-06, "loss": 0.4956, "step": 12345 }, { "epoch": 0.8, "grad_norm": 1.2064236402511597, "learning_rate": 1.0430376253310935e-06, "loss": 0.4708, "step": 12346 }, { "epoch": 0.8, "grad_norm": 1.128794550895691, "learning_rate": 1.0423987013772897e-06, "loss": 0.525, "step": 12347 }, { "epoch": 0.8, "grad_norm": 1.1921652555465698, "learning_rate": 1.0417599504000269e-06, "loss": 0.5115, "step": 12348 }, { "epoch": 0.8, "grad_norm": 1.2513492107391357, "learning_rate": 1.041121372427224e-06, "loss": 0.4756, "step": 12349 }, { "epoch": 0.8, "grad_norm": 1.3582829236984253, "learning_rate": 1.0404829674867921e-06, "loss": 0.5196, "step": 12350 }, { "epoch": 0.8, "grad_norm": 1.1798782348632812, "learning_rate": 1.0398447356066337e-06, "loss": 0.5084, "step": 12351 }, { "epoch": 0.8, "grad_norm": 1.1412689685821533, "learning_rate": 1.0392066768146447e-06, "loss": 0.5065, "step": 12352 }, { "epoch": 0.8, "grad_norm": 1.2415624856948853, "learning_rate": 1.0385687911387144e-06, "loss": 0.505, "step": 12353 }, { "epoch": 0.8, "grad_norm": 1.1307848691940308, "learning_rate": 1.0379310786067203e-06, "loss": 0.5137, "step": 12354 }, { "epoch": 0.8, "grad_norm": 1.2415462732315063, "learning_rate": 1.037293539246536e-06, "loss": 0.5165, "step": 12355 }, { "epoch": 0.8, "grad_norm": 1.1777105331420898, "learning_rate": 1.0366561730860275e-06, "loss": 0.5142, "step": 12356 }, { "epoch": 0.8, "grad_norm": 1.1728191375732422, "learning_rate": 1.0360189801530524e-06, "loss": 0.4542, "step": 12357 }, { "epoch": 0.8, "grad_norm": 1.1357043981552124, "learning_rate": 1.03538196047546e-06, "loss": 0.5012, "step": 12358 }, { "epoch": 0.8, "grad_norm": 1.2551279067993164, "learning_rate": 1.034745114081095e-06, "loss": 0.5328, "step": 12359 }, { "epoch": 0.8, "grad_norm": 1.2132456302642822, "learning_rate": 1.0341084409977886e-06, "loss": 0.4817, "step": 12360 }, { "epoch": 0.8, "grad_norm": 1.2141560316085815, "learning_rate": 1.0334719412533705e-06, "loss": 0.5378, "step": 12361 }, { "epoch": 0.8, "grad_norm": 1.1618473529815674, "learning_rate": 1.0328356148756602e-06, "loss": 0.4972, "step": 12362 }, { "epoch": 0.8, "grad_norm": 1.2355566024780273, "learning_rate": 1.0321994618924696e-06, "loss": 0.5056, "step": 12363 }, { "epoch": 0.8, "grad_norm": 1.188349723815918, "learning_rate": 1.0315634823316033e-06, "loss": 0.4924, "step": 12364 }, { "epoch": 0.8, "grad_norm": 1.1648483276367188, "learning_rate": 1.0309276762208604e-06, "loss": 0.5257, "step": 12365 }, { "epoch": 0.8, "grad_norm": 1.197574496269226, "learning_rate": 1.0302920435880265e-06, "loss": 0.5322, "step": 12366 }, { "epoch": 0.8, "grad_norm": 1.2752832174301147, "learning_rate": 1.0296565844608858e-06, "loss": 0.5447, "step": 12367 }, { "epoch": 0.8, "grad_norm": 1.120090126991272, "learning_rate": 1.0290212988672116e-06, "loss": 0.5477, "step": 12368 }, { "epoch": 0.8, "grad_norm": 1.1836888790130615, "learning_rate": 1.0283861868347716e-06, "loss": 0.5041, "step": 12369 }, { "epoch": 0.8, "grad_norm": 1.1711686849594116, "learning_rate": 1.027751248391326e-06, "loss": 0.5221, "step": 12370 }, { "epoch": 0.8, "grad_norm": 1.2066786289215088, "learning_rate": 1.0271164835646231e-06, "loss": 0.5257, "step": 12371 }, { "epoch": 0.8, "grad_norm": 1.1441757678985596, "learning_rate": 1.0264818923824083e-06, "loss": 0.4777, "step": 12372 }, { "epoch": 0.8, "grad_norm": 1.1831631660461426, "learning_rate": 1.0258474748724184e-06, "loss": 0.5198, "step": 12373 }, { "epoch": 0.8, "grad_norm": 1.1347532272338867, "learning_rate": 1.0252132310623825e-06, "loss": 0.5006, "step": 12374 }, { "epoch": 0.8, "grad_norm": 1.1714057922363281, "learning_rate": 1.0245791609800204e-06, "loss": 0.5499, "step": 12375 }, { "epoch": 0.8, "grad_norm": 1.188279390335083, "learning_rate": 1.023945264653048e-06, "loss": 0.5279, "step": 12376 }, { "epoch": 0.8, "grad_norm": 1.191027045249939, "learning_rate": 1.0233115421091687e-06, "loss": 0.5045, "step": 12377 }, { "epoch": 0.8, "grad_norm": 1.1557989120483398, "learning_rate": 1.0226779933760816e-06, "loss": 0.5935, "step": 12378 }, { "epoch": 0.8, "grad_norm": 1.244519591331482, "learning_rate": 1.022044618481478e-06, "loss": 0.4913, "step": 12379 }, { "epoch": 0.8, "grad_norm": 1.1915388107299805, "learning_rate": 1.0214114174530403e-06, "loss": 0.5276, "step": 12380 }, { "epoch": 0.8, "grad_norm": 1.2358416318893433, "learning_rate": 1.0207783903184465e-06, "loss": 0.5113, "step": 12381 }, { "epoch": 0.8, "grad_norm": 1.3465923070907593, "learning_rate": 1.0201455371053615e-06, "loss": 0.5005, "step": 12382 }, { "epoch": 0.8, "grad_norm": 1.1690424680709839, "learning_rate": 1.0195128578414464e-06, "loss": 0.5279, "step": 12383 }, { "epoch": 0.8, "grad_norm": 1.2165509462356567, "learning_rate": 1.018880352554355e-06, "loss": 0.4864, "step": 12384 }, { "epoch": 0.8, "grad_norm": 1.2310611009597778, "learning_rate": 1.0182480212717333e-06, "loss": 0.5113, "step": 12385 }, { "epoch": 0.8, "grad_norm": 1.1210347414016724, "learning_rate": 1.0176158640212158e-06, "loss": 0.5789, "step": 12386 }, { "epoch": 0.8, "grad_norm": 1.2033723592758179, "learning_rate": 1.0169838808304344e-06, "loss": 0.4729, "step": 12387 }, { "epoch": 0.8, "grad_norm": 1.2784755229949951, "learning_rate": 1.0163520717270114e-06, "loss": 0.5572, "step": 12388 }, { "epoch": 0.8, "grad_norm": 1.2942836284637451, "learning_rate": 1.015720436738561e-06, "loss": 0.5047, "step": 12389 }, { "epoch": 0.8, "grad_norm": 1.3342751264572144, "learning_rate": 1.0150889758926924e-06, "loss": 0.5335, "step": 12390 }, { "epoch": 0.8, "grad_norm": 1.1633421182632446, "learning_rate": 1.0144576892170021e-06, "loss": 0.4884, "step": 12391 }, { "epoch": 0.8, "grad_norm": 1.1164084672927856, "learning_rate": 1.013826576739083e-06, "loss": 0.4869, "step": 12392 }, { "epoch": 0.8, "grad_norm": 1.2017810344696045, "learning_rate": 1.0131956384865204e-06, "loss": 0.4853, "step": 12393 }, { "epoch": 0.8, "grad_norm": 1.307615041732788, "learning_rate": 1.012564874486891e-06, "loss": 0.5311, "step": 12394 }, { "epoch": 0.8, "grad_norm": 1.1383390426635742, "learning_rate": 1.0119342847677626e-06, "loss": 0.5158, "step": 12395 }, { "epoch": 0.8, "grad_norm": 1.1285009384155273, "learning_rate": 1.0113038693566967e-06, "loss": 0.4923, "step": 12396 }, { "epoch": 0.8, "grad_norm": 1.2863867282867432, "learning_rate": 1.010673628281248e-06, "loss": 0.5416, "step": 12397 }, { "epoch": 0.8, "grad_norm": 1.2821191549301147, "learning_rate": 1.0100435615689624e-06, "loss": 0.515, "step": 12398 }, { "epoch": 0.8, "grad_norm": 1.2492355108261108, "learning_rate": 1.0094136692473783e-06, "loss": 0.5279, "step": 12399 }, { "epoch": 0.8, "grad_norm": 1.135340690612793, "learning_rate": 1.0087839513440285e-06, "loss": 0.4792, "step": 12400 }, { "epoch": 0.8, "grad_norm": 1.2265816926956177, "learning_rate": 1.0081544078864331e-06, "loss": 0.5639, "step": 12401 }, { "epoch": 0.8, "grad_norm": 1.0814872980117798, "learning_rate": 1.007525038902109e-06, "loss": 0.5069, "step": 12402 }, { "epoch": 0.8, "grad_norm": 1.1830906867980957, "learning_rate": 1.0068958444185656e-06, "loss": 0.5618, "step": 12403 }, { "epoch": 0.8, "grad_norm": 1.2127007246017456, "learning_rate": 1.0062668244633022e-06, "loss": 0.4975, "step": 12404 }, { "epoch": 0.8, "grad_norm": 1.1561946868896484, "learning_rate": 1.0056379790638116e-06, "loss": 0.4786, "step": 12405 }, { "epoch": 0.8, "grad_norm": 1.1544829607009888, "learning_rate": 1.005009308247581e-06, "loss": 0.4966, "step": 12406 }, { "epoch": 0.8, "grad_norm": 1.2257059812545776, "learning_rate": 1.004380812042085e-06, "loss": 0.4967, "step": 12407 }, { "epoch": 0.8, "grad_norm": 1.2457278966903687, "learning_rate": 1.0037524904747946e-06, "loss": 0.4992, "step": 12408 }, { "epoch": 0.8, "grad_norm": 1.1800047159194946, "learning_rate": 1.003124343573173e-06, "loss": 0.5168, "step": 12409 }, { "epoch": 0.8, "grad_norm": 1.2139772176742554, "learning_rate": 1.0024963713646735e-06, "loss": 0.4902, "step": 12410 }, { "epoch": 0.8, "grad_norm": 1.3025966882705688, "learning_rate": 1.001868573876746e-06, "loss": 0.4873, "step": 12411 }, { "epoch": 0.8, "grad_norm": 1.2285410165786743, "learning_rate": 1.001240951136826e-06, "loss": 0.494, "step": 12412 }, { "epoch": 0.8, "grad_norm": 1.1696313619613647, "learning_rate": 1.0006135031723468e-06, "loss": 0.5021, "step": 12413 }, { "epoch": 0.8, "grad_norm": 1.405064344406128, "learning_rate": 9.999862300107332e-07, "loss": 0.539, "step": 12414 }, { "epoch": 0.8, "grad_norm": 1.2202783823013306, "learning_rate": 9.993591316794015e-07, "loss": 0.5256, "step": 12415 }, { "epoch": 0.8, "grad_norm": 1.148065209388733, "learning_rate": 9.9873220820576e-07, "loss": 0.5371, "step": 12416 }, { "epoch": 0.8, "grad_norm": 1.2093113660812378, "learning_rate": 9.981054596172118e-07, "loss": 0.4769, "step": 12417 }, { "epoch": 0.8, "grad_norm": 1.1340442895889282, "learning_rate": 9.974788859411478e-07, "loss": 0.5259, "step": 12418 }, { "epoch": 0.8, "grad_norm": 1.1635088920593262, "learning_rate": 9.968524872049545e-07, "loss": 0.5291, "step": 12419 }, { "epoch": 0.8, "grad_norm": 1.233289361000061, "learning_rate": 9.962262634360114e-07, "loss": 0.5192, "step": 12420 }, { "epoch": 0.8, "grad_norm": 1.2614854574203491, "learning_rate": 9.956002146616877e-07, "loss": 0.5447, "step": 12421 }, { "epoch": 0.8, "grad_norm": 1.249100923538208, "learning_rate": 9.949743409093476e-07, "loss": 0.5306, "step": 12422 }, { "epoch": 0.8, "grad_norm": 1.2414579391479492, "learning_rate": 9.943486422063476e-07, "loss": 0.501, "step": 12423 }, { "epoch": 0.8, "grad_norm": 1.0705302953720093, "learning_rate": 9.937231185800318e-07, "loss": 0.5356, "step": 12424 }, { "epoch": 0.8, "grad_norm": 1.2736282348632812, "learning_rate": 9.930977700577427e-07, "loss": 0.5456, "step": 12425 }, { "epoch": 0.8, "grad_norm": 1.1932493448257446, "learning_rate": 9.924725966668114e-07, "loss": 0.5677, "step": 12426 }, { "epoch": 0.8, "grad_norm": 1.094190239906311, "learning_rate": 9.918475984345637e-07, "loss": 0.5105, "step": 12427 }, { "epoch": 0.8, "grad_norm": 1.387352466583252, "learning_rate": 9.912227753883164e-07, "loss": 0.5588, "step": 12428 }, { "epoch": 0.8, "grad_norm": 1.2111414670944214, "learning_rate": 9.9059812755538e-07, "loss": 0.5422, "step": 12429 }, { "epoch": 0.8, "grad_norm": 1.2141313552856445, "learning_rate": 9.89973654963054e-07, "loss": 0.4458, "step": 12430 }, { "epoch": 0.8, "grad_norm": 1.3131195306777954, "learning_rate": 9.893493576386332e-07, "loss": 0.5223, "step": 12431 }, { "epoch": 0.8, "grad_norm": 1.166204810142517, "learning_rate": 9.887252356094045e-07, "loss": 0.5162, "step": 12432 }, { "epoch": 0.8, "grad_norm": 1.0909031629562378, "learning_rate": 9.881012889026465e-07, "loss": 0.5224, "step": 12433 }, { "epoch": 0.8, "grad_norm": 1.2448983192443848, "learning_rate": 9.874775175456302e-07, "loss": 0.5454, "step": 12434 }, { "epoch": 0.8, "grad_norm": 1.192007303237915, "learning_rate": 9.86853921565621e-07, "loss": 0.5067, "step": 12435 }, { "epoch": 0.8, "grad_norm": 1.199819564819336, "learning_rate": 9.86230500989871e-07, "loss": 0.5052, "step": 12436 }, { "epoch": 0.8, "grad_norm": 1.2634843587875366, "learning_rate": 9.856072558456304e-07, "loss": 0.4801, "step": 12437 }, { "epoch": 0.8, "grad_norm": 1.232539176940918, "learning_rate": 9.84984186160139e-07, "loss": 0.5152, "step": 12438 }, { "epoch": 0.8, "grad_norm": 1.2613450288772583, "learning_rate": 9.843612919606304e-07, "loss": 0.4945, "step": 12439 }, { "epoch": 0.8, "grad_norm": 1.2684054374694824, "learning_rate": 9.837385732743288e-07, "loss": 0.5381, "step": 12440 }, { "epoch": 0.8, "grad_norm": 1.1684802770614624, "learning_rate": 9.831160301284537e-07, "loss": 0.5082, "step": 12441 }, { "epoch": 0.8, "grad_norm": 1.216524362564087, "learning_rate": 9.824936625502119e-07, "loss": 0.5355, "step": 12442 }, { "epoch": 0.8, "grad_norm": 1.208569884300232, "learning_rate": 9.81871470566807e-07, "loss": 0.5101, "step": 12443 }, { "epoch": 0.8, "grad_norm": 1.0836726427078247, "learning_rate": 9.812494542054329e-07, "loss": 0.5196, "step": 12444 }, { "epoch": 0.8, "grad_norm": 1.1760594844818115, "learning_rate": 9.806276134932763e-07, "loss": 0.4838, "step": 12445 }, { "epoch": 0.8, "grad_norm": 1.1713696718215942, "learning_rate": 9.800059484575176e-07, "loss": 0.5314, "step": 12446 }, { "epoch": 0.8, "grad_norm": 1.2052453756332397, "learning_rate": 9.793844591253276e-07, "loss": 0.5272, "step": 12447 }, { "epoch": 0.8, "grad_norm": 1.1702351570129395, "learning_rate": 9.787631455238684e-07, "loss": 0.511, "step": 12448 }, { "epoch": 0.8, "grad_norm": 1.2208919525146484, "learning_rate": 9.781420076802973e-07, "loss": 0.487, "step": 12449 }, { "epoch": 0.8, "grad_norm": 1.1753394603729248, "learning_rate": 9.775210456217626e-07, "loss": 0.5337, "step": 12450 }, { "epoch": 0.8, "grad_norm": 1.1603866815567017, "learning_rate": 9.769002593754045e-07, "loss": 0.5142, "step": 12451 }, { "epoch": 0.8, "grad_norm": 1.2402204275131226, "learning_rate": 9.762796489683568e-07, "loss": 0.4984, "step": 12452 }, { "epoch": 0.8, "grad_norm": 1.2360457181930542, "learning_rate": 9.756592144277454e-07, "loss": 0.5141, "step": 12453 }, { "epoch": 0.8, "grad_norm": 1.1803343296051025, "learning_rate": 9.750389557806854e-07, "loss": 0.4812, "step": 12454 }, { "epoch": 0.8, "grad_norm": 1.1961785554885864, "learning_rate": 9.744188730542886e-07, "loss": 0.527, "step": 12455 }, { "epoch": 0.8, "grad_norm": 1.1193710565567017, "learning_rate": 9.737989662756564e-07, "loss": 0.438, "step": 12456 }, { "epoch": 0.8, "grad_norm": 1.2800159454345703, "learning_rate": 9.731792354718833e-07, "loss": 0.5425, "step": 12457 }, { "epoch": 0.8, "grad_norm": 1.0319100618362427, "learning_rate": 9.725596806700582e-07, "loss": 0.4913, "step": 12458 }, { "epoch": 0.8, "grad_norm": 1.093320608139038, "learning_rate": 9.719403018972566e-07, "loss": 0.4639, "step": 12459 }, { "epoch": 0.8, "grad_norm": 1.2324881553649902, "learning_rate": 9.713210991805528e-07, "loss": 0.5263, "step": 12460 }, { "epoch": 0.8, "grad_norm": 1.2825443744659424, "learning_rate": 9.70702072547009e-07, "loss": 0.5302, "step": 12461 }, { "epoch": 0.8, "grad_norm": 1.130718469619751, "learning_rate": 9.700832220236821e-07, "loss": 0.491, "step": 12462 }, { "epoch": 0.8, "grad_norm": 1.177242398262024, "learning_rate": 9.694645476376202e-07, "loss": 0.5214, "step": 12463 }, { "epoch": 0.8, "grad_norm": 1.2151143550872803, "learning_rate": 9.688460494158652e-07, "loss": 0.5059, "step": 12464 }, { "epoch": 0.8, "grad_norm": 1.1699512004852295, "learning_rate": 9.682277273854478e-07, "loss": 0.538, "step": 12465 }, { "epoch": 0.8, "grad_norm": 1.1969504356384277, "learning_rate": 9.676095815733943e-07, "loss": 0.5405, "step": 12466 }, { "epoch": 0.8, "grad_norm": 1.1875643730163574, "learning_rate": 9.669916120067223e-07, "loss": 0.5013, "step": 12467 }, { "epoch": 0.8, "grad_norm": 1.3775718212127686, "learning_rate": 9.663738187124416e-07, "loss": 0.5063, "step": 12468 }, { "epoch": 0.8, "grad_norm": 1.229931354522705, "learning_rate": 9.657562017175542e-07, "loss": 0.5757, "step": 12469 }, { "epoch": 0.8, "grad_norm": 1.165999174118042, "learning_rate": 9.651387610490559e-07, "loss": 0.5482, "step": 12470 }, { "epoch": 0.8, "grad_norm": 1.3010220527648926, "learning_rate": 9.64521496733931e-07, "loss": 0.5693, "step": 12471 }, { "epoch": 0.81, "grad_norm": 1.1140789985656738, "learning_rate": 9.639044087991607e-07, "loss": 0.5016, "step": 12472 }, { "epoch": 0.81, "grad_norm": 1.1499733924865723, "learning_rate": 9.632874972717144e-07, "loss": 0.4785, "step": 12473 }, { "epoch": 0.81, "grad_norm": 1.1898306608200073, "learning_rate": 9.626707621785585e-07, "loss": 0.542, "step": 12474 }, { "epoch": 0.81, "grad_norm": 1.107660174369812, "learning_rate": 9.620542035466457e-07, "loss": 0.497, "step": 12475 }, { "epoch": 0.81, "grad_norm": 1.2188642024993896, "learning_rate": 9.614378214029258e-07, "loss": 0.4626, "step": 12476 }, { "epoch": 0.81, "grad_norm": 1.1648160219192505, "learning_rate": 9.608216157743388e-07, "loss": 0.5008, "step": 12477 }, { "epoch": 0.81, "grad_norm": 1.2773667573928833, "learning_rate": 9.60205586687818e-07, "loss": 0.5084, "step": 12478 }, { "epoch": 0.81, "grad_norm": 1.218509316444397, "learning_rate": 9.595897341702893e-07, "loss": 0.4851, "step": 12479 }, { "epoch": 0.81, "grad_norm": 1.2323731184005737, "learning_rate": 9.589740582486679e-07, "loss": 0.551, "step": 12480 }, { "epoch": 0.81, "grad_norm": 1.2243293523788452, "learning_rate": 9.58358558949864e-07, "loss": 0.5083, "step": 12481 }, { "epoch": 0.81, "grad_norm": 1.2135041952133179, "learning_rate": 9.577432363007804e-07, "loss": 0.5486, "step": 12482 }, { "epoch": 0.81, "grad_norm": 1.2279492616653442, "learning_rate": 9.571280903283114e-07, "loss": 0.4728, "step": 12483 }, { "epoch": 0.81, "grad_norm": 1.0574015378952026, "learning_rate": 9.56513121059342e-07, "loss": 0.4695, "step": 12484 }, { "epoch": 0.81, "grad_norm": 1.1485661268234253, "learning_rate": 9.558983285207517e-07, "loss": 0.501, "step": 12485 }, { "epoch": 0.81, "grad_norm": 1.1253286600112915, "learning_rate": 9.552837127394115e-07, "loss": 0.5494, "step": 12486 }, { "epoch": 0.81, "grad_norm": 1.13064444065094, "learning_rate": 9.546692737421853e-07, "loss": 0.5448, "step": 12487 }, { "epoch": 0.81, "grad_norm": 1.2155967950820923, "learning_rate": 9.540550115559288e-07, "loss": 0.5684, "step": 12488 }, { "epoch": 0.81, "grad_norm": 1.279335379600525, "learning_rate": 9.534409262074879e-07, "loss": 0.541, "step": 12489 }, { "epoch": 0.81, "grad_norm": 1.1510096788406372, "learning_rate": 9.528270177237036e-07, "loss": 0.5504, "step": 12490 }, { "epoch": 0.81, "grad_norm": 1.2832368612289429, "learning_rate": 9.522132861314088e-07, "loss": 0.5222, "step": 12491 }, { "epoch": 0.81, "grad_norm": 1.3008390665054321, "learning_rate": 9.515997314574271e-07, "loss": 0.4822, "step": 12492 }, { "epoch": 0.81, "grad_norm": 1.202696442604065, "learning_rate": 9.509863537285768e-07, "loss": 0.5415, "step": 12493 }, { "epoch": 0.81, "grad_norm": 1.1700165271759033, "learning_rate": 9.503731529716676e-07, "loss": 0.5365, "step": 12494 }, { "epoch": 0.81, "grad_norm": 1.1357568502426147, "learning_rate": 9.497601292134984e-07, "loss": 0.5161, "step": 12495 }, { "epoch": 0.81, "grad_norm": 1.1601505279541016, "learning_rate": 9.491472824808646e-07, "loss": 0.5191, "step": 12496 }, { "epoch": 0.81, "grad_norm": 1.150714635848999, "learning_rate": 9.485346128005513e-07, "loss": 0.504, "step": 12497 }, { "epoch": 0.81, "grad_norm": 1.1936070919036865, "learning_rate": 9.479221201993372e-07, "loss": 0.5938, "step": 12498 }, { "epoch": 0.81, "grad_norm": 1.1207889318466187, "learning_rate": 9.473098047039935e-07, "loss": 0.5058, "step": 12499 }, { "epoch": 0.81, "grad_norm": 1.134727954864502, "learning_rate": 9.466976663412813e-07, "loss": 0.5089, "step": 12500 }, { "epoch": 0.81, "grad_norm": 1.2413079738616943, "learning_rate": 9.460857051379568e-07, "loss": 0.5134, "step": 12501 }, { "epoch": 0.81, "grad_norm": 1.2467176914215088, "learning_rate": 9.454739211207664e-07, "loss": 0.4765, "step": 12502 }, { "epoch": 0.81, "grad_norm": 1.1089297533035278, "learning_rate": 9.448623143164504e-07, "loss": 0.5134, "step": 12503 }, { "epoch": 0.81, "grad_norm": 1.1719685792922974, "learning_rate": 9.442508847517401e-07, "loss": 0.5187, "step": 12504 }, { "epoch": 0.81, "grad_norm": 1.206447958946228, "learning_rate": 9.436396324533614e-07, "loss": 0.5064, "step": 12505 }, { "epoch": 0.81, "grad_norm": 1.1709370613098145, "learning_rate": 9.430285574480269e-07, "loss": 0.4825, "step": 12506 }, { "epoch": 0.81, "grad_norm": 1.1613030433654785, "learning_rate": 9.424176597624474e-07, "loss": 0.5339, "step": 12507 }, { "epoch": 0.81, "grad_norm": 1.1543810367584229, "learning_rate": 9.418069394233231e-07, "loss": 0.5292, "step": 12508 }, { "epoch": 0.81, "grad_norm": 1.2152824401855469, "learning_rate": 9.411963964573473e-07, "loss": 0.5226, "step": 12509 }, { "epoch": 0.81, "grad_norm": 1.2140886783599854, "learning_rate": 9.405860308912046e-07, "loss": 0.5186, "step": 12510 }, { "epoch": 0.81, "grad_norm": 1.0730963945388794, "learning_rate": 9.399758427515748e-07, "loss": 0.5031, "step": 12511 }, { "epoch": 0.81, "grad_norm": 1.2665927410125732, "learning_rate": 9.39365832065125e-07, "loss": 0.5152, "step": 12512 }, { "epoch": 0.81, "grad_norm": 1.2060946226119995, "learning_rate": 9.387559988585176e-07, "loss": 0.5448, "step": 12513 }, { "epoch": 0.81, "grad_norm": 1.1080623865127563, "learning_rate": 9.381463431584076e-07, "loss": 0.4623, "step": 12514 }, { "epoch": 0.81, "grad_norm": 1.1995514631271362, "learning_rate": 9.375368649914413e-07, "loss": 0.5235, "step": 12515 }, { "epoch": 0.81, "grad_norm": 1.246528148651123, "learning_rate": 9.369275643842568e-07, "loss": 0.5432, "step": 12516 }, { "epoch": 0.81, "grad_norm": 1.2687513828277588, "learning_rate": 9.363184413634874e-07, "loss": 0.5295, "step": 12517 }, { "epoch": 0.81, "grad_norm": 1.2221988439559937, "learning_rate": 9.357094959557534e-07, "loss": 0.549, "step": 12518 }, { "epoch": 0.81, "grad_norm": 1.1665458679199219, "learning_rate": 9.351007281876706e-07, "loss": 0.5524, "step": 12519 }, { "epoch": 0.81, "grad_norm": 1.1384321451187134, "learning_rate": 9.344921380858479e-07, "loss": 0.5069, "step": 12520 }, { "epoch": 0.81, "grad_norm": 1.3239710330963135, "learning_rate": 9.338837256768846e-07, "loss": 0.521, "step": 12521 }, { "epoch": 0.81, "grad_norm": 1.1626596450805664, "learning_rate": 9.332754909873726e-07, "loss": 0.465, "step": 12522 }, { "epoch": 0.81, "grad_norm": 1.1810518503189087, "learning_rate": 9.326674340438984e-07, "loss": 0.4753, "step": 12523 }, { "epoch": 0.81, "grad_norm": 1.252454400062561, "learning_rate": 9.320595548730354e-07, "loss": 0.5401, "step": 12524 }, { "epoch": 0.81, "grad_norm": 1.166062355041504, "learning_rate": 9.314518535013533e-07, "loss": 0.4903, "step": 12525 }, { "epoch": 0.81, "grad_norm": 1.2952477931976318, "learning_rate": 9.308443299554143e-07, "loss": 0.4938, "step": 12526 }, { "epoch": 0.81, "grad_norm": 1.1866681575775146, "learning_rate": 9.302369842617709e-07, "loss": 0.4889, "step": 12527 }, { "epoch": 0.81, "grad_norm": 1.1296221017837524, "learning_rate": 9.296298164469686e-07, "loss": 0.4949, "step": 12528 }, { "epoch": 0.81, "grad_norm": 1.2603429555892944, "learning_rate": 9.290228265375472e-07, "loss": 0.4943, "step": 12529 }, { "epoch": 0.81, "grad_norm": 1.1670564413070679, "learning_rate": 9.284160145600329e-07, "loss": 0.4832, "step": 12530 }, { "epoch": 0.81, "grad_norm": 1.2214441299438477, "learning_rate": 9.278093805409499e-07, "loss": 0.5581, "step": 12531 }, { "epoch": 0.81, "grad_norm": 1.2363721132278442, "learning_rate": 9.272029245068126e-07, "loss": 0.5001, "step": 12532 }, { "epoch": 0.81, "grad_norm": 1.1344246864318848, "learning_rate": 9.265966464841275e-07, "loss": 0.5118, "step": 12533 }, { "epoch": 0.81, "grad_norm": 1.1121529340744019, "learning_rate": 9.259905464993935e-07, "loss": 0.5088, "step": 12534 }, { "epoch": 0.81, "grad_norm": 1.1485044956207275, "learning_rate": 9.253846245791026e-07, "loss": 0.495, "step": 12535 }, { "epoch": 0.81, "grad_norm": 1.2576172351837158, "learning_rate": 9.24778880749736e-07, "loss": 0.4961, "step": 12536 }, { "epoch": 0.81, "grad_norm": 1.1817177534103394, "learning_rate": 9.241733150377707e-07, "loss": 0.5861, "step": 12537 }, { "epoch": 0.81, "grad_norm": 1.1311677694320679, "learning_rate": 9.235679274696735e-07, "loss": 0.5115, "step": 12538 }, { "epoch": 0.81, "grad_norm": 1.316467046737671, "learning_rate": 9.229627180719053e-07, "loss": 0.5209, "step": 12539 }, { "epoch": 0.81, "grad_norm": 1.112846851348877, "learning_rate": 9.223576868709188e-07, "loss": 0.5255, "step": 12540 }, { "epoch": 0.81, "grad_norm": 1.1167383193969727, "learning_rate": 9.217528338931558e-07, "loss": 0.5233, "step": 12541 }, { "epoch": 0.81, "grad_norm": 1.1687496900558472, "learning_rate": 9.21148159165055e-07, "loss": 0.4592, "step": 12542 }, { "epoch": 0.81, "grad_norm": 1.27400541305542, "learning_rate": 9.205436627130443e-07, "loss": 0.524, "step": 12543 }, { "epoch": 0.81, "grad_norm": 1.1197208166122437, "learning_rate": 9.199393445635452e-07, "loss": 0.5124, "step": 12544 }, { "epoch": 0.81, "grad_norm": 1.1155433654785156, "learning_rate": 9.193352047429699e-07, "loss": 0.538, "step": 12545 }, { "epoch": 0.81, "grad_norm": 1.1583220958709717, "learning_rate": 9.187312432777268e-07, "loss": 0.4788, "step": 12546 }, { "epoch": 0.81, "grad_norm": 1.1710834503173828, "learning_rate": 9.181274601942092e-07, "loss": 0.5223, "step": 12547 }, { "epoch": 0.81, "grad_norm": 1.1726101636886597, "learning_rate": 9.175238555188093e-07, "loss": 0.4956, "step": 12548 }, { "epoch": 0.81, "grad_norm": 1.2117125988006592, "learning_rate": 9.169204292779088e-07, "loss": 0.5399, "step": 12549 }, { "epoch": 0.81, "grad_norm": 1.232404112815857, "learning_rate": 9.163171814978816e-07, "loss": 0.4998, "step": 12550 }, { "epoch": 0.81, "grad_norm": 1.1645088195800781, "learning_rate": 9.157141122050944e-07, "loss": 0.4951, "step": 12551 }, { "epoch": 0.81, "grad_norm": 1.1879945993423462, "learning_rate": 9.151112214259072e-07, "loss": 0.5273, "step": 12552 }, { "epoch": 0.81, "grad_norm": 1.1594632863998413, "learning_rate": 9.145085091866679e-07, "loss": 0.4901, "step": 12553 }, { "epoch": 0.81, "grad_norm": 1.1354613304138184, "learning_rate": 9.139059755137214e-07, "loss": 0.4995, "step": 12554 }, { "epoch": 0.81, "grad_norm": 1.2195121049880981, "learning_rate": 9.133036204334023e-07, "loss": 0.501, "step": 12555 }, { "epoch": 0.81, "grad_norm": 1.231748342514038, "learning_rate": 9.127014439720378e-07, "loss": 0.4873, "step": 12556 }, { "epoch": 0.81, "grad_norm": 1.2274914979934692, "learning_rate": 9.120994461559479e-07, "loss": 0.5227, "step": 12557 }, { "epoch": 0.81, "grad_norm": 1.114144206047058, "learning_rate": 9.114976270114456e-07, "loss": 0.4849, "step": 12558 }, { "epoch": 0.81, "grad_norm": 1.1788475513458252, "learning_rate": 9.108959865648326e-07, "loss": 0.5457, "step": 12559 }, { "epoch": 0.81, "grad_norm": 1.266687035560608, "learning_rate": 9.102945248424055e-07, "loss": 0.5594, "step": 12560 }, { "epoch": 0.81, "grad_norm": 1.267632007598877, "learning_rate": 9.096932418704535e-07, "loss": 0.5338, "step": 12561 }, { "epoch": 0.81, "grad_norm": 1.1598576307296753, "learning_rate": 9.090921376752565e-07, "loss": 0.462, "step": 12562 }, { "epoch": 0.81, "grad_norm": 1.3148469924926758, "learning_rate": 9.084912122830885e-07, "loss": 0.5613, "step": 12563 }, { "epoch": 0.81, "grad_norm": 1.324208378791809, "learning_rate": 9.078904657202126e-07, "loss": 0.5442, "step": 12564 }, { "epoch": 0.81, "grad_norm": 1.0494660139083862, "learning_rate": 9.072898980128864e-07, "loss": 0.4799, "step": 12565 }, { "epoch": 0.81, "grad_norm": 1.2179325819015503, "learning_rate": 9.066895091873596e-07, "loss": 0.5418, "step": 12566 }, { "epoch": 0.81, "grad_norm": 1.125240683555603, "learning_rate": 9.060892992698733e-07, "loss": 0.4761, "step": 12567 }, { "epoch": 0.81, "grad_norm": 1.1668726205825806, "learning_rate": 9.054892682866628e-07, "loss": 0.4792, "step": 12568 }, { "epoch": 0.81, "grad_norm": 1.1798032522201538, "learning_rate": 9.04889416263951e-07, "loss": 0.5202, "step": 12569 }, { "epoch": 0.81, "grad_norm": 1.1798591613769531, "learning_rate": 9.042897432279573e-07, "loss": 0.5258, "step": 12570 }, { "epoch": 0.81, "grad_norm": 1.1747788190841675, "learning_rate": 9.036902492048921e-07, "loss": 0.482, "step": 12571 }, { "epoch": 0.81, "grad_norm": 1.513009786605835, "learning_rate": 9.030909342209587e-07, "loss": 0.525, "step": 12572 }, { "epoch": 0.81, "grad_norm": 1.253914713859558, "learning_rate": 9.024917983023485e-07, "loss": 0.5541, "step": 12573 }, { "epoch": 0.81, "grad_norm": 1.1943681240081787, "learning_rate": 9.018928414752509e-07, "loss": 0.5148, "step": 12574 }, { "epoch": 0.81, "grad_norm": 1.2543848752975464, "learning_rate": 9.012940637658435e-07, "loss": 0.5253, "step": 12575 }, { "epoch": 0.81, "grad_norm": 1.1201013326644897, "learning_rate": 9.006954652002975e-07, "loss": 0.496, "step": 12576 }, { "epoch": 0.81, "grad_norm": 1.2461951971054077, "learning_rate": 9.000970458047781e-07, "loss": 0.5141, "step": 12577 }, { "epoch": 0.81, "grad_norm": 1.1853209733963013, "learning_rate": 8.994988056054377e-07, "loss": 0.5239, "step": 12578 }, { "epoch": 0.81, "grad_norm": 1.2537821531295776, "learning_rate": 8.989007446284248e-07, "loss": 0.5444, "step": 12579 }, { "epoch": 0.81, "grad_norm": 1.257197380065918, "learning_rate": 8.983028628998797e-07, "loss": 0.5292, "step": 12580 }, { "epoch": 0.81, "grad_norm": 1.2683217525482178, "learning_rate": 8.977051604459347e-07, "loss": 0.4951, "step": 12581 }, { "epoch": 0.81, "grad_norm": 1.1350109577178955, "learning_rate": 8.971076372927124e-07, "loss": 0.531, "step": 12582 }, { "epoch": 0.81, "grad_norm": 1.1756598949432373, "learning_rate": 8.965102934663294e-07, "loss": 0.5145, "step": 12583 }, { "epoch": 0.81, "grad_norm": 1.1082723140716553, "learning_rate": 8.959131289928941e-07, "loss": 0.5175, "step": 12584 }, { "epoch": 0.81, "grad_norm": 1.1272125244140625, "learning_rate": 8.953161438985075e-07, "loss": 0.492, "step": 12585 }, { "epoch": 0.81, "grad_norm": 1.283219337463379, "learning_rate": 8.947193382092623e-07, "loss": 0.5179, "step": 12586 }, { "epoch": 0.81, "grad_norm": 1.134782314300537, "learning_rate": 8.941227119512436e-07, "loss": 0.4672, "step": 12587 }, { "epoch": 0.81, "grad_norm": 1.1613552570343018, "learning_rate": 8.935262651505272e-07, "loss": 0.485, "step": 12588 }, { "epoch": 0.81, "grad_norm": 1.1509957313537598, "learning_rate": 8.929299978331829e-07, "loss": 0.5378, "step": 12589 }, { "epoch": 0.81, "grad_norm": 1.2257870435714722, "learning_rate": 8.923339100252714e-07, "loss": 0.5381, "step": 12590 }, { "epoch": 0.81, "grad_norm": 1.2080590724945068, "learning_rate": 8.917380017528476e-07, "loss": 0.4919, "step": 12591 }, { "epoch": 0.81, "grad_norm": 1.4110140800476074, "learning_rate": 8.911422730419555e-07, "loss": 0.53, "step": 12592 }, { "epoch": 0.81, "grad_norm": 1.234687089920044, "learning_rate": 8.905467239186355e-07, "loss": 0.5036, "step": 12593 }, { "epoch": 0.81, "grad_norm": 1.167095422744751, "learning_rate": 8.899513544089139e-07, "loss": 0.508, "step": 12594 }, { "epoch": 0.81, "grad_norm": 1.280957579612732, "learning_rate": 8.893561645388149e-07, "loss": 0.5831, "step": 12595 }, { "epoch": 0.81, "grad_norm": 1.141226887702942, "learning_rate": 8.887611543343527e-07, "loss": 0.5006, "step": 12596 }, { "epoch": 0.81, "grad_norm": 1.17335844039917, "learning_rate": 8.881663238215326e-07, "loss": 0.5037, "step": 12597 }, { "epoch": 0.81, "grad_norm": 1.1950252056121826, "learning_rate": 8.87571673026354e-07, "loss": 0.5099, "step": 12598 }, { "epoch": 0.81, "grad_norm": 1.1947938203811646, "learning_rate": 8.869772019748091e-07, "loss": 0.5264, "step": 12599 }, { "epoch": 0.81, "grad_norm": 1.147994041442871, "learning_rate": 8.863829106928773e-07, "loss": 0.4964, "step": 12600 }, { "epoch": 0.81, "grad_norm": 1.2394646406173706, "learning_rate": 8.857887992065351e-07, "loss": 0.5001, "step": 12601 }, { "epoch": 0.81, "grad_norm": 1.0759949684143066, "learning_rate": 8.851948675417499e-07, "loss": 0.4882, "step": 12602 }, { "epoch": 0.81, "grad_norm": 1.1385003328323364, "learning_rate": 8.846011157244805e-07, "loss": 0.4799, "step": 12603 }, { "epoch": 0.81, "grad_norm": 1.1908601522445679, "learning_rate": 8.840075437806784e-07, "loss": 0.5338, "step": 12604 }, { "epoch": 0.81, "grad_norm": 1.1568058729171753, "learning_rate": 8.834141517362888e-07, "loss": 0.5127, "step": 12605 }, { "epoch": 0.81, "grad_norm": 1.1646028757095337, "learning_rate": 8.828209396172443e-07, "loss": 0.5066, "step": 12606 }, { "epoch": 0.81, "grad_norm": 1.249694585800171, "learning_rate": 8.822279074494738e-07, "loss": 0.4979, "step": 12607 }, { "epoch": 0.81, "grad_norm": 1.1848671436309814, "learning_rate": 8.816350552588976e-07, "loss": 0.502, "step": 12608 }, { "epoch": 0.81, "grad_norm": 1.2094799280166626, "learning_rate": 8.810423830714277e-07, "loss": 0.5095, "step": 12609 }, { "epoch": 0.81, "grad_norm": 1.1747586727142334, "learning_rate": 8.804498909129683e-07, "loss": 0.5482, "step": 12610 }, { "epoch": 0.81, "grad_norm": 1.110028624534607, "learning_rate": 8.798575788094166e-07, "loss": 0.4955, "step": 12611 }, { "epoch": 0.81, "grad_norm": 1.3285610675811768, "learning_rate": 8.79265446786659e-07, "loss": 0.504, "step": 12612 }, { "epoch": 0.81, "grad_norm": 1.1982682943344116, "learning_rate": 8.786734948705766e-07, "loss": 0.5229, "step": 12613 }, { "epoch": 0.81, "grad_norm": 1.1678409576416016, "learning_rate": 8.780817230870431e-07, "loss": 0.4623, "step": 12614 }, { "epoch": 0.81, "grad_norm": 1.1771471500396729, "learning_rate": 8.774901314619227e-07, "loss": 0.4674, "step": 12615 }, { "epoch": 0.81, "grad_norm": 1.026249647140503, "learning_rate": 8.768987200210721e-07, "loss": 0.4308, "step": 12616 }, { "epoch": 0.81, "grad_norm": 1.1657695770263672, "learning_rate": 8.763074887903422e-07, "loss": 0.5245, "step": 12617 }, { "epoch": 0.81, "grad_norm": 1.2257652282714844, "learning_rate": 8.757164377955718e-07, "loss": 0.514, "step": 12618 }, { "epoch": 0.81, "grad_norm": 1.1743050813674927, "learning_rate": 8.751255670625947e-07, "loss": 0.4976, "step": 12619 }, { "epoch": 0.81, "grad_norm": 1.2469207048416138, "learning_rate": 8.745348766172368e-07, "loss": 0.5046, "step": 12620 }, { "epoch": 0.81, "grad_norm": 1.2238940000534058, "learning_rate": 8.739443664853154e-07, "loss": 0.5769, "step": 12621 }, { "epoch": 0.81, "grad_norm": 1.2589079141616821, "learning_rate": 8.733540366926408e-07, "loss": 0.4934, "step": 12622 }, { "epoch": 0.81, "grad_norm": 1.1018285751342773, "learning_rate": 8.727638872650151e-07, "loss": 0.5164, "step": 12623 }, { "epoch": 0.81, "grad_norm": 1.231801986694336, "learning_rate": 8.721739182282307e-07, "loss": 0.4903, "step": 12624 }, { "epoch": 0.81, "grad_norm": 1.273984432220459, "learning_rate": 8.71584129608074e-07, "loss": 0.4944, "step": 12625 }, { "epoch": 0.81, "grad_norm": 1.2269258499145508, "learning_rate": 8.709945214303239e-07, "loss": 0.5233, "step": 12626 }, { "epoch": 0.82, "grad_norm": 1.1680506467819214, "learning_rate": 8.704050937207503e-07, "loss": 0.5106, "step": 12627 }, { "epoch": 0.82, "grad_norm": 1.2234420776367188, "learning_rate": 8.698158465051166e-07, "loss": 0.5847, "step": 12628 }, { "epoch": 0.82, "grad_norm": 1.1627352237701416, "learning_rate": 8.692267798091753e-07, "loss": 0.547, "step": 12629 }, { "epoch": 0.82, "grad_norm": 1.1909936666488647, "learning_rate": 8.686378936586736e-07, "loss": 0.5663, "step": 12630 }, { "epoch": 0.82, "grad_norm": 1.1910535097122192, "learning_rate": 8.680491880793507e-07, "loss": 0.4982, "step": 12631 }, { "epoch": 0.82, "grad_norm": 1.1163344383239746, "learning_rate": 8.674606630969368e-07, "loss": 0.4866, "step": 12632 }, { "epoch": 0.82, "grad_norm": 1.1418123245239258, "learning_rate": 8.668723187371558e-07, "loss": 0.4973, "step": 12633 }, { "epoch": 0.82, "grad_norm": 1.2098463773727417, "learning_rate": 8.662841550257228e-07, "loss": 0.5446, "step": 12634 }, { "epoch": 0.82, "grad_norm": 1.3649415969848633, "learning_rate": 8.656961719883434e-07, "loss": 0.5469, "step": 12635 }, { "epoch": 0.82, "grad_norm": 1.0907953977584839, "learning_rate": 8.651083696507173e-07, "loss": 0.4771, "step": 12636 }, { "epoch": 0.82, "grad_norm": 1.1233389377593994, "learning_rate": 8.645207480385364e-07, "loss": 0.4958, "step": 12637 }, { "epoch": 0.82, "grad_norm": 1.2613131999969482, "learning_rate": 8.639333071774841e-07, "loss": 0.5599, "step": 12638 }, { "epoch": 0.82, "grad_norm": 1.2236906290054321, "learning_rate": 8.633460470932359e-07, "loss": 0.4787, "step": 12639 }, { "epoch": 0.82, "grad_norm": 1.2219270467758179, "learning_rate": 8.6275896781146e-07, "loss": 0.5329, "step": 12640 }, { "epoch": 0.82, "grad_norm": 1.20073401927948, "learning_rate": 8.621720693578145e-07, "loss": 0.5473, "step": 12641 }, { "epoch": 0.82, "grad_norm": 1.2106726169586182, "learning_rate": 8.61585351757952e-07, "loss": 0.4823, "step": 12642 }, { "epoch": 0.82, "grad_norm": 1.0762656927108765, "learning_rate": 8.60998815037517e-07, "loss": 0.4934, "step": 12643 }, { "epoch": 0.82, "grad_norm": 1.2431317567825317, "learning_rate": 8.604124592221441e-07, "loss": 0.5237, "step": 12644 }, { "epoch": 0.82, "grad_norm": 1.2014691829681396, "learning_rate": 8.598262843374633e-07, "loss": 0.5115, "step": 12645 }, { "epoch": 0.82, "grad_norm": 1.2616130113601685, "learning_rate": 8.592402904090946e-07, "loss": 0.5626, "step": 12646 }, { "epoch": 0.82, "grad_norm": 1.3094249963760376, "learning_rate": 8.586544774626482e-07, "loss": 0.4947, "step": 12647 }, { "epoch": 0.82, "grad_norm": 1.2422057390213013, "learning_rate": 8.580688455237296e-07, "loss": 0.5346, "step": 12648 }, { "epoch": 0.82, "grad_norm": 1.1202564239501953, "learning_rate": 8.574833946179356e-07, "loss": 0.5175, "step": 12649 }, { "epoch": 0.82, "grad_norm": 1.1829955577850342, "learning_rate": 8.568981247708547e-07, "loss": 0.5053, "step": 12650 }, { "epoch": 0.82, "grad_norm": 1.1324769258499146, "learning_rate": 8.563130360080674e-07, "loss": 0.4878, "step": 12651 }, { "epoch": 0.82, "grad_norm": 1.1219736337661743, "learning_rate": 8.557281283551478e-07, "loss": 0.5215, "step": 12652 }, { "epoch": 0.82, "grad_norm": 1.2173808813095093, "learning_rate": 8.551434018376575e-07, "loss": 0.5021, "step": 12653 }, { "epoch": 0.82, "grad_norm": 1.1812161207199097, "learning_rate": 8.545588564811558e-07, "loss": 0.5094, "step": 12654 }, { "epoch": 0.82, "grad_norm": 1.166925311088562, "learning_rate": 8.539744923111909e-07, "loss": 0.4841, "step": 12655 }, { "epoch": 0.82, "grad_norm": 1.16317617893219, "learning_rate": 8.533903093533042e-07, "loss": 0.459, "step": 12656 }, { "epoch": 0.82, "grad_norm": 1.1119375228881836, "learning_rate": 8.528063076330301e-07, "loss": 0.522, "step": 12657 }, { "epoch": 0.82, "grad_norm": 1.136955738067627, "learning_rate": 8.522224871758911e-07, "loss": 0.4545, "step": 12658 }, { "epoch": 0.82, "grad_norm": 1.232373833656311, "learning_rate": 8.51638848007405e-07, "loss": 0.4955, "step": 12659 }, { "epoch": 0.82, "grad_norm": 1.1868547201156616, "learning_rate": 8.510553901530827e-07, "loss": 0.4749, "step": 12660 }, { "epoch": 0.82, "grad_norm": 1.120537281036377, "learning_rate": 8.504721136384259e-07, "loss": 0.5182, "step": 12661 }, { "epoch": 0.82, "grad_norm": 1.1562122106552124, "learning_rate": 8.498890184889258e-07, "loss": 0.5346, "step": 12662 }, { "epoch": 0.82, "grad_norm": 1.3338490724563599, "learning_rate": 8.493061047300693e-07, "loss": 0.5244, "step": 12663 }, { "epoch": 0.82, "grad_norm": 1.204783320426941, "learning_rate": 8.487233723873345e-07, "loss": 0.5305, "step": 12664 }, { "epoch": 0.82, "grad_norm": 1.1211961507797241, "learning_rate": 8.481408214861903e-07, "loss": 0.5171, "step": 12665 }, { "epoch": 0.82, "grad_norm": 1.2636579275131226, "learning_rate": 8.475584520520996e-07, "loss": 0.5452, "step": 12666 }, { "epoch": 0.82, "grad_norm": 1.0789942741394043, "learning_rate": 8.46976264110515e-07, "loss": 0.4875, "step": 12667 }, { "epoch": 0.82, "grad_norm": 1.2358227968215942, "learning_rate": 8.463942576868828e-07, "loss": 0.5112, "step": 12668 }, { "epoch": 0.82, "grad_norm": 1.2768694162368774, "learning_rate": 8.458124328066408e-07, "loss": 0.5165, "step": 12669 }, { "epoch": 0.82, "grad_norm": 1.1379411220550537, "learning_rate": 8.45230789495221e-07, "loss": 0.4924, "step": 12670 }, { "epoch": 0.82, "grad_norm": 1.1495550870895386, "learning_rate": 8.446493277780427e-07, "loss": 0.5324, "step": 12671 }, { "epoch": 0.82, "grad_norm": 1.2140841484069824, "learning_rate": 8.440680476805213e-07, "loss": 0.5098, "step": 12672 }, { "epoch": 0.82, "grad_norm": 1.1915205717086792, "learning_rate": 8.434869492280628e-07, "loss": 0.5191, "step": 12673 }, { "epoch": 0.82, "grad_norm": 1.230688452720642, "learning_rate": 8.429060324460658e-07, "loss": 0.5041, "step": 12674 }, { "epoch": 0.82, "grad_norm": 1.2912167310714722, "learning_rate": 8.423252973599222e-07, "loss": 0.5348, "step": 12675 }, { "epoch": 0.82, "grad_norm": 1.2183645963668823, "learning_rate": 8.417447439950116e-07, "loss": 0.5235, "step": 12676 }, { "epoch": 0.82, "grad_norm": 1.2609732151031494, "learning_rate": 8.411643723767099e-07, "loss": 0.5164, "step": 12677 }, { "epoch": 0.82, "grad_norm": 1.1786702871322632, "learning_rate": 8.405841825303834e-07, "loss": 0.57, "step": 12678 }, { "epoch": 0.82, "grad_norm": 1.0649516582489014, "learning_rate": 8.400041744813909e-07, "loss": 0.489, "step": 12679 }, { "epoch": 0.82, "grad_norm": 1.1493688821792603, "learning_rate": 8.394243482550829e-07, "loss": 0.486, "step": 12680 }, { "epoch": 0.82, "grad_norm": 1.2449747323989868, "learning_rate": 8.388447038768038e-07, "loss": 0.5408, "step": 12681 }, { "epoch": 0.82, "grad_norm": 1.1226146221160889, "learning_rate": 8.38265241371885e-07, "loss": 0.4767, "step": 12682 }, { "epoch": 0.82, "grad_norm": 1.1056411266326904, "learning_rate": 8.376859607656557e-07, "loss": 0.4976, "step": 12683 }, { "epoch": 0.82, "grad_norm": 1.1919156312942505, "learning_rate": 8.371068620834333e-07, "loss": 0.5053, "step": 12684 }, { "epoch": 0.82, "grad_norm": 1.270458698272705, "learning_rate": 8.365279453505304e-07, "loss": 0.5564, "step": 12685 }, { "epoch": 0.82, "grad_norm": 1.1427292823791504, "learning_rate": 8.359492105922484e-07, "loss": 0.4902, "step": 12686 }, { "epoch": 0.82, "grad_norm": 1.1450626850128174, "learning_rate": 8.353706578338849e-07, "loss": 0.542, "step": 12687 }, { "epoch": 0.82, "grad_norm": 1.1900336742401123, "learning_rate": 8.347922871007236e-07, "loss": 0.5222, "step": 12688 }, { "epoch": 0.82, "grad_norm": 1.1286118030548096, "learning_rate": 8.342140984180446e-07, "loss": 0.5187, "step": 12689 }, { "epoch": 0.82, "grad_norm": 1.1950706243515015, "learning_rate": 8.336360918111202e-07, "loss": 0.4681, "step": 12690 }, { "epoch": 0.82, "grad_norm": 1.3015718460083008, "learning_rate": 8.330582673052124e-07, "loss": 0.595, "step": 12691 }, { "epoch": 0.82, "grad_norm": 1.1583551168441772, "learning_rate": 8.324806249255768e-07, "loss": 0.4634, "step": 12692 }, { "epoch": 0.82, "grad_norm": 1.1842066049575806, "learning_rate": 8.319031646974624e-07, "loss": 0.5223, "step": 12693 }, { "epoch": 0.82, "grad_norm": 1.2050070762634277, "learning_rate": 8.313258866461055e-07, "loss": 0.4797, "step": 12694 }, { "epoch": 0.82, "grad_norm": 1.1746621131896973, "learning_rate": 8.307487907967383e-07, "loss": 0.5372, "step": 12695 }, { "epoch": 0.82, "grad_norm": 1.1652072668075562, "learning_rate": 8.301718771745854e-07, "loss": 0.5827, "step": 12696 }, { "epoch": 0.82, "grad_norm": 1.1852585077285767, "learning_rate": 8.295951458048607e-07, "loss": 0.5104, "step": 12697 }, { "epoch": 0.82, "grad_norm": 1.2193083763122559, "learning_rate": 8.290185967127729e-07, "loss": 0.5131, "step": 12698 }, { "epoch": 0.82, "grad_norm": 1.1800190210342407, "learning_rate": 8.284422299235218e-07, "loss": 0.459, "step": 12699 }, { "epoch": 0.82, "grad_norm": 1.1415421962738037, "learning_rate": 8.278660454622973e-07, "loss": 0.5191, "step": 12700 }, { "epoch": 0.82, "grad_norm": 1.1071538925170898, "learning_rate": 8.272900433542835e-07, "loss": 0.5231, "step": 12701 }, { "epoch": 0.82, "grad_norm": 1.2447600364685059, "learning_rate": 8.267142236246562e-07, "loss": 0.5545, "step": 12702 }, { "epoch": 0.82, "grad_norm": 1.1420241594314575, "learning_rate": 8.261385862985827e-07, "loss": 0.5289, "step": 12703 }, { "epoch": 0.82, "grad_norm": 1.2244149446487427, "learning_rate": 8.255631314012231e-07, "loss": 0.5363, "step": 12704 }, { "epoch": 0.82, "grad_norm": 1.2154275178909302, "learning_rate": 8.249878589577298e-07, "loss": 0.5354, "step": 12705 }, { "epoch": 0.82, "grad_norm": 1.1227766275405884, "learning_rate": 8.244127689932446e-07, "loss": 0.4616, "step": 12706 }, { "epoch": 0.82, "grad_norm": 1.2087570428848267, "learning_rate": 8.23837861532904e-07, "loss": 0.512, "step": 12707 }, { "epoch": 0.82, "grad_norm": 1.0835784673690796, "learning_rate": 8.232631366018362e-07, "loss": 0.4738, "step": 12708 }, { "epoch": 0.82, "grad_norm": 1.0668379068374634, "learning_rate": 8.2268859422516e-07, "loss": 0.4608, "step": 12709 }, { "epoch": 0.82, "grad_norm": 1.147648811340332, "learning_rate": 8.221142344279892e-07, "loss": 0.5082, "step": 12710 }, { "epoch": 0.82, "grad_norm": 1.0749698877334595, "learning_rate": 8.215400572354249e-07, "loss": 0.5276, "step": 12711 }, { "epoch": 0.82, "grad_norm": 1.1776182651519775, "learning_rate": 8.20966062672564e-07, "loss": 0.5374, "step": 12712 }, { "epoch": 0.82, "grad_norm": 1.181884765625, "learning_rate": 8.203922507644946e-07, "loss": 0.4586, "step": 12713 }, { "epoch": 0.82, "grad_norm": 1.1199836730957031, "learning_rate": 8.198186215362969e-07, "loss": 0.4765, "step": 12714 }, { "epoch": 0.82, "grad_norm": 1.2236438989639282, "learning_rate": 8.192451750130414e-07, "loss": 0.5364, "step": 12715 }, { "epoch": 0.82, "grad_norm": 1.164984941482544, "learning_rate": 8.186719112197944e-07, "loss": 0.5381, "step": 12716 }, { "epoch": 0.82, "grad_norm": 1.3134676218032837, "learning_rate": 8.18098830181609e-07, "loss": 0.5415, "step": 12717 }, { "epoch": 0.82, "grad_norm": 1.1873540878295898, "learning_rate": 8.17525931923534e-07, "loss": 0.4679, "step": 12718 }, { "epoch": 0.82, "grad_norm": 1.0943182706832886, "learning_rate": 8.169532164706101e-07, "loss": 0.4704, "step": 12719 }, { "epoch": 0.82, "grad_norm": 1.1894391775131226, "learning_rate": 8.163806838478683e-07, "loss": 0.5175, "step": 12720 }, { "epoch": 0.82, "grad_norm": 1.0992931127548218, "learning_rate": 8.15808334080333e-07, "loss": 0.4683, "step": 12721 }, { "epoch": 0.82, "grad_norm": 1.1293472051620483, "learning_rate": 8.152361671930209e-07, "loss": 0.5182, "step": 12722 }, { "epoch": 0.82, "grad_norm": 1.270003318786621, "learning_rate": 8.146641832109386e-07, "loss": 0.4949, "step": 12723 }, { "epoch": 0.82, "grad_norm": 1.1993203163146973, "learning_rate": 8.140923821590863e-07, "loss": 0.5488, "step": 12724 }, { "epoch": 0.82, "grad_norm": 1.2264235019683838, "learning_rate": 8.135207640624559e-07, "loss": 0.5103, "step": 12725 }, { "epoch": 0.82, "grad_norm": 1.0949060916900635, "learning_rate": 8.129493289460321e-07, "loss": 0.5016, "step": 12726 }, { "epoch": 0.82, "grad_norm": 1.1065988540649414, "learning_rate": 8.1237807683479e-07, "loss": 0.4734, "step": 12727 }, { "epoch": 0.82, "grad_norm": 1.2214738130569458, "learning_rate": 8.118070077536993e-07, "loss": 0.514, "step": 12728 }, { "epoch": 0.82, "grad_norm": 1.2610445022583008, "learning_rate": 8.112361217277176e-07, "loss": 0.4869, "step": 12729 }, { "epoch": 0.82, "grad_norm": 1.1101888418197632, "learning_rate": 8.106654187817975e-07, "loss": 0.4786, "step": 12730 }, { "epoch": 0.82, "grad_norm": 1.1924558877944946, "learning_rate": 8.100948989408836e-07, "loss": 0.5162, "step": 12731 }, { "epoch": 0.82, "grad_norm": 1.2592412233352661, "learning_rate": 8.095245622299114e-07, "loss": 0.5431, "step": 12732 }, { "epoch": 0.82, "grad_norm": 1.3196241855621338, "learning_rate": 8.089544086738088e-07, "loss": 0.531, "step": 12733 }, { "epoch": 0.82, "grad_norm": 1.2336342334747314, "learning_rate": 8.083844382974976e-07, "loss": 0.5004, "step": 12734 }, { "epoch": 0.82, "grad_norm": 1.1595462560653687, "learning_rate": 8.078146511258867e-07, "loss": 0.5499, "step": 12735 }, { "epoch": 0.82, "grad_norm": 1.0764163732528687, "learning_rate": 8.072450471838817e-07, "loss": 0.4759, "step": 12736 }, { "epoch": 0.82, "grad_norm": 1.2369037866592407, "learning_rate": 8.066756264963776e-07, "loss": 0.5161, "step": 12737 }, { "epoch": 0.82, "grad_norm": 1.216835856437683, "learning_rate": 8.061063890882637e-07, "loss": 0.5078, "step": 12738 }, { "epoch": 0.82, "grad_norm": 1.29033625125885, "learning_rate": 8.055373349844187e-07, "loss": 0.5237, "step": 12739 }, { "epoch": 0.82, "grad_norm": 1.1519659757614136, "learning_rate": 8.049684642097161e-07, "loss": 0.4975, "step": 12740 }, { "epoch": 0.82, "grad_norm": 1.182916522026062, "learning_rate": 8.043997767890171e-07, "loss": 0.5037, "step": 12741 }, { "epoch": 0.82, "grad_norm": 1.1540255546569824, "learning_rate": 8.038312727471798e-07, "loss": 0.4974, "step": 12742 }, { "epoch": 0.82, "grad_norm": 1.2148935794830322, "learning_rate": 8.032629521090507e-07, "loss": 0.4908, "step": 12743 }, { "epoch": 0.82, "grad_norm": 1.2604386806488037, "learning_rate": 8.026948148994706e-07, "loss": 0.5116, "step": 12744 }, { "epoch": 0.82, "grad_norm": 1.1313287019729614, "learning_rate": 8.021268611432709e-07, "loss": 0.548, "step": 12745 }, { "epoch": 0.82, "grad_norm": 1.2541718482971191, "learning_rate": 8.015590908652765e-07, "loss": 0.5224, "step": 12746 }, { "epoch": 0.82, "grad_norm": 1.1678529977798462, "learning_rate": 8.00991504090301e-07, "loss": 0.4962, "step": 12747 }, { "epoch": 0.82, "grad_norm": 1.3224760293960571, "learning_rate": 8.004241008431535e-07, "loss": 0.5716, "step": 12748 }, { "epoch": 0.82, "grad_norm": 1.2142022848129272, "learning_rate": 7.998568811486335e-07, "loss": 0.5385, "step": 12749 }, { "epoch": 0.82, "grad_norm": 1.27951979637146, "learning_rate": 7.992898450315334e-07, "loss": 0.4576, "step": 12750 }, { "epoch": 0.82, "grad_norm": 1.1202735900878906, "learning_rate": 7.987229925166351e-07, "loss": 0.5073, "step": 12751 }, { "epoch": 0.82, "grad_norm": 1.1727418899536133, "learning_rate": 7.98156323628716e-07, "loss": 0.5148, "step": 12752 }, { "epoch": 0.82, "grad_norm": 1.0876365900039673, "learning_rate": 7.975898383925424e-07, "loss": 0.4965, "step": 12753 }, { "epoch": 0.82, "grad_norm": 1.1493706703186035, "learning_rate": 7.970235368328749e-07, "loss": 0.5056, "step": 12754 }, { "epoch": 0.82, "grad_norm": 1.2366125583648682, "learning_rate": 7.964574189744661e-07, "loss": 0.5351, "step": 12755 }, { "epoch": 0.82, "grad_norm": 1.1735343933105469, "learning_rate": 7.958914848420568e-07, "loss": 0.5163, "step": 12756 }, { "epoch": 0.82, "grad_norm": 1.1145256757736206, "learning_rate": 7.953257344603838e-07, "loss": 0.5011, "step": 12757 }, { "epoch": 0.82, "grad_norm": 1.1200112104415894, "learning_rate": 7.947601678541749e-07, "loss": 0.509, "step": 12758 }, { "epoch": 0.82, "grad_norm": 1.1539689302444458, "learning_rate": 7.941947850481507e-07, "loss": 0.4438, "step": 12759 }, { "epoch": 0.82, "grad_norm": 1.189202070236206, "learning_rate": 7.936295860670201e-07, "loss": 0.5094, "step": 12760 }, { "epoch": 0.82, "grad_norm": 1.1239217519760132, "learning_rate": 7.930645709354878e-07, "loss": 0.5022, "step": 12761 }, { "epoch": 0.82, "grad_norm": 1.1893768310546875, "learning_rate": 7.924997396782491e-07, "loss": 0.5459, "step": 12762 }, { "epoch": 0.82, "grad_norm": 1.197762370109558, "learning_rate": 7.919350923199909e-07, "loss": 0.5141, "step": 12763 }, { "epoch": 0.82, "grad_norm": 1.1988215446472168, "learning_rate": 7.913706288853945e-07, "loss": 0.5134, "step": 12764 }, { "epoch": 0.82, "grad_norm": 1.1121941804885864, "learning_rate": 7.90806349399128e-07, "loss": 0.5242, "step": 12765 }, { "epoch": 0.82, "grad_norm": 1.2670094966888428, "learning_rate": 7.902422538858562e-07, "loss": 0.5125, "step": 12766 }, { "epoch": 0.82, "grad_norm": 1.0538413524627686, "learning_rate": 7.896783423702342e-07, "loss": 0.4595, "step": 12767 }, { "epoch": 0.82, "grad_norm": 1.2673113346099854, "learning_rate": 7.891146148769091e-07, "loss": 0.5546, "step": 12768 }, { "epoch": 0.82, "grad_norm": 1.167482614517212, "learning_rate": 7.885510714305211e-07, "loss": 0.4952, "step": 12769 }, { "epoch": 0.82, "grad_norm": 1.1384426355361938, "learning_rate": 7.879877120556989e-07, "loss": 0.5116, "step": 12770 }, { "epoch": 0.82, "grad_norm": 1.145924687385559, "learning_rate": 7.874245367770667e-07, "loss": 0.5238, "step": 12771 }, { "epoch": 0.82, "grad_norm": 1.1774953603744507, "learning_rate": 7.868615456192391e-07, "loss": 0.5218, "step": 12772 }, { "epoch": 0.82, "grad_norm": 1.2540818452835083, "learning_rate": 7.862987386068238e-07, "loss": 0.5264, "step": 12773 }, { "epoch": 0.82, "grad_norm": 1.1066744327545166, "learning_rate": 7.857361157644188e-07, "loss": 0.4937, "step": 12774 }, { "epoch": 0.82, "grad_norm": 1.1150554418563843, "learning_rate": 7.851736771166162e-07, "loss": 0.5111, "step": 12775 }, { "epoch": 0.82, "grad_norm": 1.1358654499053955, "learning_rate": 7.846114226879969e-07, "loss": 0.513, "step": 12776 }, { "epoch": 0.82, "grad_norm": 1.2548326253890991, "learning_rate": 7.840493525031367e-07, "loss": 0.5541, "step": 12777 }, { "epoch": 0.82, "grad_norm": 1.0888433456420898, "learning_rate": 7.834874665866021e-07, "loss": 0.5127, "step": 12778 }, { "epoch": 0.82, "grad_norm": 1.241919994354248, "learning_rate": 7.829257649629512e-07, "loss": 0.5222, "step": 12779 }, { "epoch": 0.82, "grad_norm": 1.235962152481079, "learning_rate": 7.823642476567351e-07, "loss": 0.553, "step": 12780 }, { "epoch": 0.82, "grad_norm": 1.141884446144104, "learning_rate": 7.818029146924971e-07, "loss": 0.5229, "step": 12781 }, { "epoch": 0.83, "grad_norm": 1.2485352754592896, "learning_rate": 7.812417660947691e-07, "loss": 0.5001, "step": 12782 }, { "epoch": 0.83, "grad_norm": 1.136861801147461, "learning_rate": 7.806808018880795e-07, "loss": 0.5206, "step": 12783 }, { "epoch": 0.83, "grad_norm": 1.277768611907959, "learning_rate": 7.801200220969457e-07, "loss": 0.5065, "step": 12784 }, { "epoch": 0.83, "grad_norm": 1.2167969942092896, "learning_rate": 7.795594267458789e-07, "loss": 0.4561, "step": 12785 }, { "epoch": 0.83, "grad_norm": 1.1987589597702026, "learning_rate": 7.789990158593803e-07, "loss": 0.4628, "step": 12786 }, { "epoch": 0.83, "grad_norm": 1.1226694583892822, "learning_rate": 7.784387894619455e-07, "loss": 0.5103, "step": 12787 }, { "epoch": 0.83, "grad_norm": 1.1684173345565796, "learning_rate": 7.77878747578058e-07, "loss": 0.4662, "step": 12788 }, { "epoch": 0.83, "grad_norm": 1.3051916360855103, "learning_rate": 7.773188902321976e-07, "loss": 0.5612, "step": 12789 }, { "epoch": 0.83, "grad_norm": 1.1633864641189575, "learning_rate": 7.767592174488342e-07, "loss": 0.5174, "step": 12790 }, { "epoch": 0.83, "grad_norm": 1.2031208276748657, "learning_rate": 7.761997292524287e-07, "loss": 0.4848, "step": 12791 }, { "epoch": 0.83, "grad_norm": 1.1145131587982178, "learning_rate": 7.756404256674354e-07, "loss": 0.4882, "step": 12792 }, { "epoch": 0.83, "grad_norm": 1.1585031747817993, "learning_rate": 7.750813067183016e-07, "loss": 0.4812, "step": 12793 }, { "epoch": 0.83, "grad_norm": 1.2016440629959106, "learning_rate": 7.745223724294626e-07, "loss": 0.5649, "step": 12794 }, { "epoch": 0.83, "grad_norm": 1.2057157754898071, "learning_rate": 7.739636228253484e-07, "loss": 0.5193, "step": 12795 }, { "epoch": 0.83, "grad_norm": 1.1693161725997925, "learning_rate": 7.734050579303809e-07, "loss": 0.52, "step": 12796 }, { "epoch": 0.83, "grad_norm": 1.1926023960113525, "learning_rate": 7.728466777689736e-07, "loss": 0.5262, "step": 12797 }, { "epoch": 0.83, "grad_norm": 1.1173738241195679, "learning_rate": 7.72288482365533e-07, "loss": 0.5044, "step": 12798 }, { "epoch": 0.83, "grad_norm": 1.1814897060394287, "learning_rate": 7.71730471744454e-07, "loss": 0.5654, "step": 12799 }, { "epoch": 0.83, "grad_norm": 1.1585288047790527, "learning_rate": 7.711726459301272e-07, "loss": 0.4982, "step": 12800 }, { "epoch": 0.83, "grad_norm": 1.2244020700454712, "learning_rate": 7.70615004946933e-07, "loss": 0.4796, "step": 12801 }, { "epoch": 0.83, "grad_norm": 1.3209877014160156, "learning_rate": 7.700575488192458e-07, "loss": 0.5615, "step": 12802 }, { "epoch": 0.83, "grad_norm": 1.1907258033752441, "learning_rate": 7.695002775714289e-07, "loss": 0.5223, "step": 12803 }, { "epoch": 0.83, "grad_norm": 1.1618411540985107, "learning_rate": 7.689431912278416e-07, "loss": 0.4887, "step": 12804 }, { "epoch": 0.83, "grad_norm": 1.2035021781921387, "learning_rate": 7.683862898128302e-07, "loss": 0.5403, "step": 12805 }, { "epoch": 0.83, "grad_norm": 1.2033259868621826, "learning_rate": 7.678295733507357e-07, "loss": 0.4912, "step": 12806 }, { "epoch": 0.83, "grad_norm": 1.2327417135238647, "learning_rate": 7.672730418658919e-07, "loss": 0.5021, "step": 12807 }, { "epoch": 0.83, "grad_norm": 1.1597930192947388, "learning_rate": 7.667166953826227e-07, "loss": 0.5154, "step": 12808 }, { "epoch": 0.83, "grad_norm": 1.1577107906341553, "learning_rate": 7.661605339252448e-07, "loss": 0.5299, "step": 12809 }, { "epoch": 0.83, "grad_norm": 1.1501641273498535, "learning_rate": 7.656045575180682e-07, "loss": 0.4996, "step": 12810 }, { "epoch": 0.83, "grad_norm": 1.1789697408676147, "learning_rate": 7.650487661853895e-07, "loss": 0.4885, "step": 12811 }, { "epoch": 0.83, "grad_norm": 1.22719407081604, "learning_rate": 7.644931599515031e-07, "loss": 0.4979, "step": 12812 }, { "epoch": 0.83, "grad_norm": 1.2232353687286377, "learning_rate": 7.639377388406932e-07, "loss": 0.4849, "step": 12813 }, { "epoch": 0.83, "grad_norm": 1.334710955619812, "learning_rate": 7.633825028772357e-07, "loss": 0.5248, "step": 12814 }, { "epoch": 0.83, "grad_norm": 1.1855688095092773, "learning_rate": 7.628274520853978e-07, "loss": 0.5249, "step": 12815 }, { "epoch": 0.83, "grad_norm": 1.2033751010894775, "learning_rate": 7.622725864894414e-07, "loss": 0.5093, "step": 12816 }, { "epoch": 0.83, "grad_norm": 1.2559860944747925, "learning_rate": 7.617179061136154e-07, "loss": 0.511, "step": 12817 }, { "epoch": 0.83, "grad_norm": 1.3058509826660156, "learning_rate": 7.611634109821653e-07, "loss": 0.5635, "step": 12818 }, { "epoch": 0.83, "grad_norm": 1.1883482933044434, "learning_rate": 7.606091011193256e-07, "loss": 0.5149, "step": 12819 }, { "epoch": 0.83, "grad_norm": 1.2133374214172363, "learning_rate": 7.600549765493248e-07, "loss": 0.5341, "step": 12820 }, { "epoch": 0.83, "grad_norm": 1.2431252002716064, "learning_rate": 7.595010372963812e-07, "loss": 0.5058, "step": 12821 }, { "epoch": 0.83, "grad_norm": 1.1221997737884521, "learning_rate": 7.589472833847083e-07, "loss": 0.5487, "step": 12822 }, { "epoch": 0.83, "grad_norm": 1.2842774391174316, "learning_rate": 7.583937148385062e-07, "loss": 0.5279, "step": 12823 }, { "epoch": 0.83, "grad_norm": 1.0790331363677979, "learning_rate": 7.578403316819716e-07, "loss": 0.4909, "step": 12824 }, { "epoch": 0.83, "grad_norm": 1.2450724840164185, "learning_rate": 7.572871339392907e-07, "loss": 0.5176, "step": 12825 }, { "epoch": 0.83, "grad_norm": 1.2098091840744019, "learning_rate": 7.56734121634643e-07, "loss": 0.4886, "step": 12826 }, { "epoch": 0.83, "grad_norm": 1.1345962285995483, "learning_rate": 7.561812947921993e-07, "loss": 0.5155, "step": 12827 }, { "epoch": 0.83, "grad_norm": 1.155343770980835, "learning_rate": 7.55628653436123e-07, "loss": 0.5036, "step": 12828 }, { "epoch": 0.83, "grad_norm": 1.22857666015625, "learning_rate": 7.550761975905669e-07, "loss": 0.5256, "step": 12829 }, { "epoch": 0.83, "grad_norm": 1.2255806922912598, "learning_rate": 7.545239272796773e-07, "loss": 0.5448, "step": 12830 }, { "epoch": 0.83, "grad_norm": 1.1354814767837524, "learning_rate": 7.539718425275938e-07, "loss": 0.4739, "step": 12831 }, { "epoch": 0.83, "grad_norm": 1.2314867973327637, "learning_rate": 7.53419943358446e-07, "loss": 0.545, "step": 12832 }, { "epoch": 0.83, "grad_norm": 1.2936513423919678, "learning_rate": 7.528682297963568e-07, "loss": 0.5296, "step": 12833 }, { "epoch": 0.83, "grad_norm": 1.1953978538513184, "learning_rate": 7.523167018654398e-07, "loss": 0.483, "step": 12834 }, { "epoch": 0.83, "grad_norm": 1.1376725435256958, "learning_rate": 7.517653595898e-07, "loss": 0.5119, "step": 12835 }, { "epoch": 0.83, "grad_norm": 1.1696823835372925, "learning_rate": 7.512142029935359e-07, "loss": 0.5058, "step": 12836 }, { "epoch": 0.83, "grad_norm": 1.264439582824707, "learning_rate": 7.506632321007362e-07, "loss": 0.4826, "step": 12837 }, { "epoch": 0.83, "grad_norm": 1.13455069065094, "learning_rate": 7.501124469354837e-07, "loss": 0.5291, "step": 12838 }, { "epoch": 0.83, "grad_norm": 1.2977081537246704, "learning_rate": 7.495618475218524e-07, "loss": 0.4999, "step": 12839 }, { "epoch": 0.83, "grad_norm": 1.3417960405349731, "learning_rate": 7.490114338839055e-07, "loss": 0.5254, "step": 12840 }, { "epoch": 0.83, "grad_norm": 1.139760136604309, "learning_rate": 7.484612060457008e-07, "loss": 0.5108, "step": 12841 }, { "epoch": 0.83, "grad_norm": 1.1738587617874146, "learning_rate": 7.47911164031288e-07, "loss": 0.4975, "step": 12842 }, { "epoch": 0.83, "grad_norm": 1.283083200454712, "learning_rate": 7.473613078647074e-07, "loss": 0.519, "step": 12843 }, { "epoch": 0.83, "grad_norm": 1.2438558340072632, "learning_rate": 7.468116375699935e-07, "loss": 0.4961, "step": 12844 }, { "epoch": 0.83, "grad_norm": 1.2147746086120605, "learning_rate": 7.462621531711683e-07, "loss": 0.5216, "step": 12845 }, { "epoch": 0.83, "grad_norm": 1.1798005104064941, "learning_rate": 7.457128546922493e-07, "loss": 0.5101, "step": 12846 }, { "epoch": 0.83, "grad_norm": 1.1251192092895508, "learning_rate": 7.451637421572455e-07, "loss": 0.4843, "step": 12847 }, { "epoch": 0.83, "grad_norm": 1.161012887954712, "learning_rate": 7.446148155901578e-07, "loss": 0.5126, "step": 12848 }, { "epoch": 0.83, "grad_norm": 1.2086652517318726, "learning_rate": 7.44066075014977e-07, "loss": 0.5349, "step": 12849 }, { "epoch": 0.83, "grad_norm": 1.1100406646728516, "learning_rate": 7.435175204556872e-07, "loss": 0.4758, "step": 12850 }, { "epoch": 0.83, "grad_norm": 1.2411400079727173, "learning_rate": 7.429691519362642e-07, "loss": 0.5532, "step": 12851 }, { "epoch": 0.83, "grad_norm": 1.1467727422714233, "learning_rate": 7.424209694806766e-07, "loss": 0.4836, "step": 12852 }, { "epoch": 0.83, "grad_norm": 1.1544157266616821, "learning_rate": 7.418729731128854e-07, "loss": 0.506, "step": 12853 }, { "epoch": 0.83, "grad_norm": 1.3315136432647705, "learning_rate": 7.413251628568385e-07, "loss": 0.5141, "step": 12854 }, { "epoch": 0.83, "grad_norm": 1.2520833015441895, "learning_rate": 7.407775387364818e-07, "loss": 0.4999, "step": 12855 }, { "epoch": 0.83, "grad_norm": 1.1891452074050903, "learning_rate": 7.402301007757496e-07, "loss": 0.4937, "step": 12856 }, { "epoch": 0.83, "grad_norm": 1.2132823467254639, "learning_rate": 7.396828489985708e-07, "loss": 0.479, "step": 12857 }, { "epoch": 0.83, "grad_norm": 1.150643229484558, "learning_rate": 7.391357834288615e-07, "loss": 0.4996, "step": 12858 }, { "epoch": 0.83, "grad_norm": 1.2646846771240234, "learning_rate": 7.385889040905336e-07, "loss": 0.5463, "step": 12859 }, { "epoch": 0.83, "grad_norm": 1.0902022123336792, "learning_rate": 7.380422110074908e-07, "loss": 0.5175, "step": 12860 }, { "epoch": 0.83, "grad_norm": 1.25526762008667, "learning_rate": 7.374957042036268e-07, "loss": 0.5256, "step": 12861 }, { "epoch": 0.83, "grad_norm": 1.2455823421478271, "learning_rate": 7.36949383702828e-07, "loss": 0.5004, "step": 12862 }, { "epoch": 0.83, "grad_norm": 1.188513994216919, "learning_rate": 7.364032495289742e-07, "loss": 0.46, "step": 12863 }, { "epoch": 0.83, "grad_norm": 1.270830750465393, "learning_rate": 7.358573017059323e-07, "loss": 0.573, "step": 12864 }, { "epoch": 0.83, "grad_norm": 1.0953984260559082, "learning_rate": 7.353115402575666e-07, "loss": 0.4622, "step": 12865 }, { "epoch": 0.83, "grad_norm": 1.2315243482589722, "learning_rate": 7.347659652077304e-07, "loss": 0.5028, "step": 12866 }, { "epoch": 0.83, "grad_norm": 1.1782057285308838, "learning_rate": 7.342205765802695e-07, "loss": 0.519, "step": 12867 }, { "epoch": 0.83, "grad_norm": 1.0279755592346191, "learning_rate": 7.336753743990215e-07, "loss": 0.4733, "step": 12868 }, { "epoch": 0.83, "grad_norm": 1.193036675453186, "learning_rate": 7.331303586878163e-07, "loss": 0.4686, "step": 12869 }, { "epoch": 0.83, "grad_norm": 1.325628638267517, "learning_rate": 7.325855294704737e-07, "loss": 0.5263, "step": 12870 }, { "epoch": 0.83, "grad_norm": 1.2387909889221191, "learning_rate": 7.320408867708073e-07, "loss": 0.5359, "step": 12871 }, { "epoch": 0.83, "grad_norm": 1.11496102809906, "learning_rate": 7.314964306126227e-07, "loss": 0.5034, "step": 12872 }, { "epoch": 0.83, "grad_norm": 1.1868314743041992, "learning_rate": 7.309521610197157e-07, "loss": 0.5105, "step": 12873 }, { "epoch": 0.83, "grad_norm": 1.1350139379501343, "learning_rate": 7.304080780158756e-07, "loss": 0.5327, "step": 12874 }, { "epoch": 0.83, "grad_norm": 1.17665696144104, "learning_rate": 7.298641816248836e-07, "loss": 0.5492, "step": 12875 }, { "epoch": 0.83, "grad_norm": 1.2237064838409424, "learning_rate": 7.293204718705105e-07, "loss": 0.5314, "step": 12876 }, { "epoch": 0.83, "grad_norm": 1.2150423526763916, "learning_rate": 7.287769487765206e-07, "loss": 0.4695, "step": 12877 }, { "epoch": 0.83, "grad_norm": 1.1710515022277832, "learning_rate": 7.282336123666705e-07, "loss": 0.4683, "step": 12878 }, { "epoch": 0.83, "grad_norm": 1.2008159160614014, "learning_rate": 7.27690462664708e-07, "loss": 0.5496, "step": 12879 }, { "epoch": 0.83, "grad_norm": 1.1199748516082764, "learning_rate": 7.271474996943729e-07, "loss": 0.5051, "step": 12880 }, { "epoch": 0.83, "grad_norm": 1.2199069261550903, "learning_rate": 7.266047234793972e-07, "loss": 0.5466, "step": 12881 }, { "epoch": 0.83, "grad_norm": 1.2338314056396484, "learning_rate": 7.260621340435025e-07, "loss": 0.5432, "step": 12882 }, { "epoch": 0.83, "grad_norm": 1.1917157173156738, "learning_rate": 7.255197314104051e-07, "loss": 0.4786, "step": 12883 }, { "epoch": 0.83, "grad_norm": 1.1956301927566528, "learning_rate": 7.249775156038124e-07, "loss": 0.5054, "step": 12884 }, { "epoch": 0.83, "grad_norm": 1.1507395505905151, "learning_rate": 7.244354866474224e-07, "loss": 0.5004, "step": 12885 }, { "epoch": 0.83, "grad_norm": 1.1885859966278076, "learning_rate": 7.238936445649269e-07, "loss": 0.4889, "step": 12886 }, { "epoch": 0.83, "grad_norm": 1.1472814083099365, "learning_rate": 7.233519893800068e-07, "loss": 0.4535, "step": 12887 }, { "epoch": 0.83, "grad_norm": 1.2091407775878906, "learning_rate": 7.228105211163378e-07, "loss": 0.5038, "step": 12888 }, { "epoch": 0.83, "grad_norm": 1.0941641330718994, "learning_rate": 7.22269239797585e-07, "loss": 0.4999, "step": 12889 }, { "epoch": 0.83, "grad_norm": 1.1656758785247803, "learning_rate": 7.217281454474073e-07, "loss": 0.4814, "step": 12890 }, { "epoch": 0.83, "grad_norm": 1.1298006772994995, "learning_rate": 7.211872380894541e-07, "loss": 0.4923, "step": 12891 }, { "epoch": 0.83, "grad_norm": 1.20437753200531, "learning_rate": 7.206465177473687e-07, "loss": 0.508, "step": 12892 }, { "epoch": 0.83, "grad_norm": 1.1334565877914429, "learning_rate": 7.201059844447817e-07, "loss": 0.4875, "step": 12893 }, { "epoch": 0.83, "grad_norm": 1.1967601776123047, "learning_rate": 7.195656382053201e-07, "loss": 0.536, "step": 12894 }, { "epoch": 0.83, "grad_norm": 1.1927504539489746, "learning_rate": 7.190254790526008e-07, "loss": 0.4791, "step": 12895 }, { "epoch": 0.83, "grad_norm": 1.296291708946228, "learning_rate": 7.184855070102326e-07, "loss": 0.5297, "step": 12896 }, { "epoch": 0.83, "grad_norm": 1.1899672746658325, "learning_rate": 7.179457221018165e-07, "loss": 0.5344, "step": 12897 }, { "epoch": 0.83, "grad_norm": 1.1792644262313843, "learning_rate": 7.174061243509462e-07, "loss": 0.5412, "step": 12898 }, { "epoch": 0.83, "grad_norm": 1.2349562644958496, "learning_rate": 7.168667137812036e-07, "loss": 0.5099, "step": 12899 }, { "epoch": 0.83, "grad_norm": 1.122420072555542, "learning_rate": 7.163274904161666e-07, "loss": 0.5163, "step": 12900 }, { "epoch": 0.83, "grad_norm": 1.2045154571533203, "learning_rate": 7.157884542794024e-07, "loss": 0.5046, "step": 12901 }, { "epoch": 0.83, "grad_norm": 1.1743874549865723, "learning_rate": 7.152496053944725e-07, "loss": 0.4891, "step": 12902 }, { "epoch": 0.83, "grad_norm": 1.3329931497573853, "learning_rate": 7.147109437849265e-07, "loss": 0.5254, "step": 12903 }, { "epoch": 0.83, "grad_norm": 1.2567211389541626, "learning_rate": 7.141724694743108e-07, "loss": 0.5483, "step": 12904 }, { "epoch": 0.83, "grad_norm": 1.2197378873825073, "learning_rate": 7.136341824861576e-07, "loss": 0.5162, "step": 12905 }, { "epoch": 0.83, "grad_norm": 1.1862242221832275, "learning_rate": 7.130960828439948e-07, "loss": 0.5127, "step": 12906 }, { "epoch": 0.83, "grad_norm": 1.1512260437011719, "learning_rate": 7.125581705713425e-07, "loss": 0.4876, "step": 12907 }, { "epoch": 0.83, "grad_norm": 1.1598856449127197, "learning_rate": 7.120204456917102e-07, "loss": 0.525, "step": 12908 }, { "epoch": 0.83, "grad_norm": 1.1043267250061035, "learning_rate": 7.114829082286017e-07, "loss": 0.4916, "step": 12909 }, { "epoch": 0.83, "grad_norm": 1.1422978639602661, "learning_rate": 7.109455582055114e-07, "loss": 0.5254, "step": 12910 }, { "epoch": 0.83, "grad_norm": 1.1950064897537231, "learning_rate": 7.10408395645924e-07, "loss": 0.5118, "step": 12911 }, { "epoch": 0.83, "grad_norm": 1.138892412185669, "learning_rate": 7.098714205733181e-07, "loss": 0.5194, "step": 12912 }, { "epoch": 0.83, "grad_norm": 1.1453297138214111, "learning_rate": 7.093346330111639e-07, "loss": 0.5145, "step": 12913 }, { "epoch": 0.83, "grad_norm": 1.1015431880950928, "learning_rate": 7.08798032982923e-07, "loss": 0.4925, "step": 12914 }, { "epoch": 0.83, "grad_norm": 1.1785759925842285, "learning_rate": 7.082616205120485e-07, "loss": 0.5581, "step": 12915 }, { "epoch": 0.83, "grad_norm": 1.1814227104187012, "learning_rate": 7.07725395621987e-07, "loss": 0.5646, "step": 12916 }, { "epoch": 0.83, "grad_norm": 1.2555959224700928, "learning_rate": 7.071893583361728e-07, "loss": 0.4883, "step": 12917 }, { "epoch": 0.83, "grad_norm": 1.2505998611450195, "learning_rate": 7.066535086780363e-07, "loss": 0.5113, "step": 12918 }, { "epoch": 0.83, "grad_norm": 1.19558584690094, "learning_rate": 7.061178466709978e-07, "loss": 0.5464, "step": 12919 }, { "epoch": 0.83, "grad_norm": 1.2025349140167236, "learning_rate": 7.055823723384703e-07, "loss": 0.4881, "step": 12920 }, { "epoch": 0.83, "grad_norm": 1.1151503324508667, "learning_rate": 7.050470857038572e-07, "loss": 0.4906, "step": 12921 }, { "epoch": 0.83, "grad_norm": 1.1387423276901245, "learning_rate": 7.045119867905559e-07, "loss": 0.5402, "step": 12922 }, { "epoch": 0.83, "grad_norm": 1.1978704929351807, "learning_rate": 7.039770756219522e-07, "loss": 0.5459, "step": 12923 }, { "epoch": 0.83, "grad_norm": 1.21336829662323, "learning_rate": 7.034423522214268e-07, "loss": 0.5348, "step": 12924 }, { "epoch": 0.83, "grad_norm": 1.1371787786483765, "learning_rate": 7.029078166123505e-07, "loss": 0.4604, "step": 12925 }, { "epoch": 0.83, "grad_norm": 1.1892492771148682, "learning_rate": 7.023734688180872e-07, "loss": 0.5047, "step": 12926 }, { "epoch": 0.83, "grad_norm": 1.211349606513977, "learning_rate": 7.018393088619924e-07, "loss": 0.4983, "step": 12927 }, { "epoch": 0.83, "grad_norm": 1.231672763824463, "learning_rate": 7.01305336767411e-07, "loss": 0.5604, "step": 12928 }, { "epoch": 0.83, "grad_norm": 1.136507272720337, "learning_rate": 7.007715525576831e-07, "loss": 0.5126, "step": 12929 }, { "epoch": 0.83, "grad_norm": 1.3617515563964844, "learning_rate": 7.002379562561379e-07, "loss": 0.5289, "step": 12930 }, { "epoch": 0.83, "grad_norm": 1.1702545881271362, "learning_rate": 6.997045478860981e-07, "loss": 0.4856, "step": 12931 }, { "epoch": 0.83, "grad_norm": 1.159443736076355, "learning_rate": 6.991713274708778e-07, "loss": 0.5059, "step": 12932 }, { "epoch": 0.83, "grad_norm": 1.1969963312149048, "learning_rate": 6.986382950337834e-07, "loss": 0.4906, "step": 12933 }, { "epoch": 0.83, "grad_norm": 1.323120355606079, "learning_rate": 6.981054505981105e-07, "loss": 0.5983, "step": 12934 }, { "epoch": 0.83, "grad_norm": 1.2318979501724243, "learning_rate": 6.975727941871491e-07, "loss": 0.5189, "step": 12935 }, { "epoch": 0.83, "grad_norm": 1.2655014991760254, "learning_rate": 6.970403258241809e-07, "loss": 0.537, "step": 12936 }, { "epoch": 0.84, "grad_norm": 1.2639210224151611, "learning_rate": 6.965080455324791e-07, "loss": 0.4414, "step": 12937 }, { "epoch": 0.84, "grad_norm": 1.1003788709640503, "learning_rate": 6.959759533353061e-07, "loss": 0.5145, "step": 12938 }, { "epoch": 0.84, "grad_norm": 1.1168156862258911, "learning_rate": 6.954440492559201e-07, "loss": 0.4456, "step": 12939 }, { "epoch": 0.84, "grad_norm": 1.3431921005249023, "learning_rate": 6.949123333175684e-07, "loss": 0.5125, "step": 12940 }, { "epoch": 0.84, "grad_norm": 1.1211388111114502, "learning_rate": 6.943808055434914e-07, "loss": 0.4522, "step": 12941 }, { "epoch": 0.84, "grad_norm": 1.2137876749038696, "learning_rate": 6.938494659569222e-07, "loss": 0.5112, "step": 12942 }, { "epoch": 0.84, "grad_norm": 1.1610548496246338, "learning_rate": 6.933183145810818e-07, "loss": 0.4949, "step": 12943 }, { "epoch": 0.84, "grad_norm": 1.0797441005706787, "learning_rate": 6.927873514391859e-07, "loss": 0.4716, "step": 12944 }, { "epoch": 0.84, "grad_norm": 1.1651877164840698, "learning_rate": 6.922565765544425e-07, "loss": 0.5467, "step": 12945 }, { "epoch": 0.84, "grad_norm": 1.173113226890564, "learning_rate": 6.917259899500511e-07, "loss": 0.5104, "step": 12946 }, { "epoch": 0.84, "grad_norm": 1.1202436685562134, "learning_rate": 6.911955916492003e-07, "loss": 0.4875, "step": 12947 }, { "epoch": 0.84, "grad_norm": 1.1695359945297241, "learning_rate": 6.90665381675073e-07, "loss": 0.5241, "step": 12948 }, { "epoch": 0.84, "grad_norm": 1.1083106994628906, "learning_rate": 6.901353600508437e-07, "loss": 0.5002, "step": 12949 }, { "epoch": 0.84, "grad_norm": 1.2812225818634033, "learning_rate": 6.896055267996782e-07, "loss": 0.5208, "step": 12950 }, { "epoch": 0.84, "grad_norm": 1.1548129320144653, "learning_rate": 6.890758819447352e-07, "loss": 0.4952, "step": 12951 }, { "epoch": 0.84, "grad_norm": 1.212378740310669, "learning_rate": 6.885464255091617e-07, "loss": 0.5313, "step": 12952 }, { "epoch": 0.84, "grad_norm": 1.1275502443313599, "learning_rate": 6.88017157516101e-07, "loss": 0.497, "step": 12953 }, { "epoch": 0.84, "grad_norm": 1.1544641256332397, "learning_rate": 6.874880779886844e-07, "loss": 0.4528, "step": 12954 }, { "epoch": 0.84, "grad_norm": 1.2072246074676514, "learning_rate": 6.869591869500381e-07, "loss": 0.4857, "step": 12955 }, { "epoch": 0.84, "grad_norm": 1.245444416999817, "learning_rate": 6.864304844232778e-07, "loss": 0.4895, "step": 12956 }, { "epoch": 0.84, "grad_norm": 1.1883583068847656, "learning_rate": 6.859019704315128e-07, "loss": 0.4617, "step": 12957 }, { "epoch": 0.84, "grad_norm": 1.227873682975769, "learning_rate": 6.853736449978415e-07, "loss": 0.5298, "step": 12958 }, { "epoch": 0.84, "grad_norm": 1.1798590421676636, "learning_rate": 6.848455081453558e-07, "loss": 0.5301, "step": 12959 }, { "epoch": 0.84, "grad_norm": 1.2316851615905762, "learning_rate": 6.843175598971403e-07, "loss": 0.5332, "step": 12960 }, { "epoch": 0.84, "grad_norm": 1.2357043027877808, "learning_rate": 6.837898002762689e-07, "loss": 0.5096, "step": 12961 }, { "epoch": 0.84, "grad_norm": 1.1271873712539673, "learning_rate": 6.832622293058095e-07, "loss": 0.4513, "step": 12962 }, { "epoch": 0.84, "grad_norm": 1.2818070650100708, "learning_rate": 6.827348470088224e-07, "loss": 0.5301, "step": 12963 }, { "epoch": 0.84, "grad_norm": 1.282200813293457, "learning_rate": 6.822076534083549e-07, "loss": 0.5188, "step": 12964 }, { "epoch": 0.84, "grad_norm": 1.375000238418579, "learning_rate": 6.816806485274508e-07, "loss": 0.528, "step": 12965 }, { "epoch": 0.84, "grad_norm": 1.1861562728881836, "learning_rate": 6.811538323891443e-07, "loss": 0.5417, "step": 12966 }, { "epoch": 0.84, "grad_norm": 1.2091253995895386, "learning_rate": 6.806272050164608e-07, "loss": 0.5221, "step": 12967 }, { "epoch": 0.84, "grad_norm": 1.181399941444397, "learning_rate": 6.801007664324194e-07, "loss": 0.5033, "step": 12968 }, { "epoch": 0.84, "grad_norm": 1.1404392719268799, "learning_rate": 6.795745166600265e-07, "loss": 0.4947, "step": 12969 }, { "epoch": 0.84, "grad_norm": 1.24881911277771, "learning_rate": 6.790484557222849e-07, "loss": 0.5596, "step": 12970 }, { "epoch": 0.84, "grad_norm": 1.23920738697052, "learning_rate": 6.785225836421872e-07, "loss": 0.4801, "step": 12971 }, { "epoch": 0.84, "grad_norm": 1.2996147871017456, "learning_rate": 6.779969004427179e-07, "loss": 0.5488, "step": 12972 }, { "epoch": 0.84, "grad_norm": 1.1674855947494507, "learning_rate": 6.77471406146853e-07, "loss": 0.5536, "step": 12973 }, { "epoch": 0.84, "grad_norm": 1.2241108417510986, "learning_rate": 6.769461007775618e-07, "loss": 0.5382, "step": 12974 }, { "epoch": 0.84, "grad_norm": 1.2852447032928467, "learning_rate": 6.764209843578017e-07, "loss": 0.5751, "step": 12975 }, { "epoch": 0.84, "grad_norm": 1.206821084022522, "learning_rate": 6.758960569105255e-07, "loss": 0.4697, "step": 12976 }, { "epoch": 0.84, "grad_norm": 1.1361300945281982, "learning_rate": 6.753713184586763e-07, "loss": 0.4895, "step": 12977 }, { "epoch": 0.84, "grad_norm": 1.2630929946899414, "learning_rate": 6.748467690251887e-07, "loss": 0.5235, "step": 12978 }, { "epoch": 0.84, "grad_norm": 1.3193535804748535, "learning_rate": 6.743224086329903e-07, "loss": 0.5547, "step": 12979 }, { "epoch": 0.84, "grad_norm": 1.1307263374328613, "learning_rate": 6.73798237305e-07, "loss": 0.4564, "step": 12980 }, { "epoch": 0.84, "grad_norm": 1.1768670082092285, "learning_rate": 6.732742550641258e-07, "loss": 0.5136, "step": 12981 }, { "epoch": 0.84, "grad_norm": 1.0991102457046509, "learning_rate": 6.727504619332709e-07, "loss": 0.4942, "step": 12982 }, { "epoch": 0.84, "grad_norm": 1.226701021194458, "learning_rate": 6.722268579353286e-07, "loss": 0.494, "step": 12983 }, { "epoch": 0.84, "grad_norm": 1.1974421739578247, "learning_rate": 6.717034430931846e-07, "loss": 0.5045, "step": 12984 }, { "epoch": 0.84, "grad_norm": 1.2811203002929688, "learning_rate": 6.711802174297161e-07, "loss": 0.5033, "step": 12985 }, { "epoch": 0.84, "grad_norm": 1.1958789825439453, "learning_rate": 6.70657180967793e-07, "loss": 0.522, "step": 12986 }, { "epoch": 0.84, "grad_norm": 1.1273061037063599, "learning_rate": 6.701343337302735e-07, "loss": 0.5381, "step": 12987 }, { "epoch": 0.84, "grad_norm": 1.1477584838867188, "learning_rate": 6.696116757400112e-07, "loss": 0.4744, "step": 12988 }, { "epoch": 0.84, "grad_norm": 1.2282682657241821, "learning_rate": 6.690892070198496e-07, "loss": 0.559, "step": 12989 }, { "epoch": 0.84, "grad_norm": 1.1204336881637573, "learning_rate": 6.685669275926255e-07, "loss": 0.5089, "step": 12990 }, { "epoch": 0.84, "grad_norm": 1.2118279933929443, "learning_rate": 6.680448374811649e-07, "loss": 0.519, "step": 12991 }, { "epoch": 0.84, "grad_norm": 1.2201037406921387, "learning_rate": 6.675229367082897e-07, "loss": 0.5579, "step": 12992 }, { "epoch": 0.84, "grad_norm": 1.0963150262832642, "learning_rate": 6.670012252968073e-07, "loss": 0.487, "step": 12993 }, { "epoch": 0.84, "grad_norm": 1.1725174188613892, "learning_rate": 6.664797032695224e-07, "loss": 0.5386, "step": 12994 }, { "epoch": 0.84, "grad_norm": 1.310176134109497, "learning_rate": 6.65958370649229e-07, "loss": 0.4887, "step": 12995 }, { "epoch": 0.84, "grad_norm": 1.1738684177398682, "learning_rate": 6.654372274587134e-07, "loss": 0.5115, "step": 12996 }, { "epoch": 0.84, "grad_norm": 1.2846996784210205, "learning_rate": 6.649162737207532e-07, "loss": 0.5518, "step": 12997 }, { "epoch": 0.84, "grad_norm": 1.1444461345672607, "learning_rate": 6.643955094581189e-07, "loss": 0.5279, "step": 12998 }, { "epoch": 0.84, "grad_norm": 1.2780908346176147, "learning_rate": 6.638749346935697e-07, "loss": 0.5522, "step": 12999 }, { "epoch": 0.84, "grad_norm": 1.2141287326812744, "learning_rate": 6.6335454944986e-07, "loss": 0.4944, "step": 13000 }, { "epoch": 0.84, "grad_norm": 1.1813455820083618, "learning_rate": 6.628343537497339e-07, "loss": 0.5568, "step": 13001 }, { "epoch": 0.84, "grad_norm": 1.2651472091674805, "learning_rate": 6.623143476159288e-07, "loss": 0.5309, "step": 13002 }, { "epoch": 0.84, "grad_norm": 1.144870400428772, "learning_rate": 6.617945310711715e-07, "loss": 0.4921, "step": 13003 }, { "epoch": 0.84, "grad_norm": 1.2282475233078003, "learning_rate": 6.612749041381844e-07, "loss": 0.4741, "step": 13004 }, { "epoch": 0.84, "grad_norm": 1.1163734197616577, "learning_rate": 6.607554668396754e-07, "loss": 0.5243, "step": 13005 }, { "epoch": 0.84, "grad_norm": 1.156551718711853, "learning_rate": 6.602362191983496e-07, "loss": 0.5448, "step": 13006 }, { "epoch": 0.84, "grad_norm": 1.175415277481079, "learning_rate": 6.597171612369024e-07, "loss": 0.556, "step": 13007 }, { "epoch": 0.84, "grad_norm": 1.216935157775879, "learning_rate": 6.591982929780194e-07, "loss": 0.5354, "step": 13008 }, { "epoch": 0.84, "grad_norm": 1.1180870532989502, "learning_rate": 6.586796144443813e-07, "loss": 0.4629, "step": 13009 }, { "epoch": 0.84, "grad_norm": 1.2284783124923706, "learning_rate": 6.581611256586551e-07, "loss": 0.5039, "step": 13010 }, { "epoch": 0.84, "grad_norm": 1.3344508409500122, "learning_rate": 6.576428266435043e-07, "loss": 0.5237, "step": 13011 }, { "epoch": 0.84, "grad_norm": 1.2045726776123047, "learning_rate": 6.571247174215816e-07, "loss": 0.5434, "step": 13012 }, { "epoch": 0.84, "grad_norm": 1.220732569694519, "learning_rate": 6.566067980155328e-07, "loss": 0.5037, "step": 13013 }, { "epoch": 0.84, "grad_norm": 1.299457311630249, "learning_rate": 6.560890684479948e-07, "loss": 0.5513, "step": 13014 }, { "epoch": 0.84, "grad_norm": 1.2044495344161987, "learning_rate": 6.555715287415975e-07, "loss": 0.4972, "step": 13015 }, { "epoch": 0.84, "grad_norm": 1.149815320968628, "learning_rate": 6.550541789189585e-07, "loss": 0.5388, "step": 13016 }, { "epoch": 0.84, "grad_norm": 1.2210783958435059, "learning_rate": 6.545370190026917e-07, "loss": 0.5321, "step": 13017 }, { "epoch": 0.84, "grad_norm": 1.087894320487976, "learning_rate": 6.540200490153997e-07, "loss": 0.5494, "step": 13018 }, { "epoch": 0.84, "grad_norm": 1.2438708543777466, "learning_rate": 6.535032689796794e-07, "loss": 0.5316, "step": 13019 }, { "epoch": 0.84, "grad_norm": 1.1307976245880127, "learning_rate": 6.529866789181166e-07, "loss": 0.4895, "step": 13020 }, { "epoch": 0.84, "grad_norm": 1.1377856731414795, "learning_rate": 6.524702788532916e-07, "loss": 0.5164, "step": 13021 }, { "epoch": 0.84, "grad_norm": 1.2354838848114014, "learning_rate": 6.51954068807773e-07, "loss": 0.5593, "step": 13022 }, { "epoch": 0.84, "grad_norm": 1.2594127655029297, "learning_rate": 6.514380488041238e-07, "loss": 0.4906, "step": 13023 }, { "epoch": 0.84, "grad_norm": 1.140580415725708, "learning_rate": 6.509222188648984e-07, "loss": 0.4846, "step": 13024 }, { "epoch": 0.84, "grad_norm": 1.1986815929412842, "learning_rate": 6.504065790126419e-07, "loss": 0.5204, "step": 13025 }, { "epoch": 0.84, "grad_norm": 1.2280993461608887, "learning_rate": 6.498911292698929e-07, "loss": 0.4954, "step": 13026 }, { "epoch": 0.84, "grad_norm": 1.2306495904922485, "learning_rate": 6.493758696591779e-07, "loss": 0.4994, "step": 13027 }, { "epoch": 0.84, "grad_norm": 1.141158103942871, "learning_rate": 6.488608002030189e-07, "loss": 0.4811, "step": 13028 }, { "epoch": 0.84, "grad_norm": 1.2927900552749634, "learning_rate": 6.483459209239284e-07, "loss": 0.5326, "step": 13029 }, { "epoch": 0.84, "grad_norm": 1.188785433769226, "learning_rate": 6.478312318444102e-07, "loss": 0.5051, "step": 13030 }, { "epoch": 0.84, "grad_norm": 1.1926976442337036, "learning_rate": 6.473167329869612e-07, "loss": 0.5081, "step": 13031 }, { "epoch": 0.84, "grad_norm": 1.1897841691970825, "learning_rate": 6.468024243740667e-07, "loss": 0.5183, "step": 13032 }, { "epoch": 0.84, "grad_norm": 1.199693202972412, "learning_rate": 6.462883060282066e-07, "loss": 0.507, "step": 13033 }, { "epoch": 0.84, "grad_norm": 1.1463044881820679, "learning_rate": 6.457743779718523e-07, "loss": 0.4616, "step": 13034 }, { "epoch": 0.84, "grad_norm": 1.165962815284729, "learning_rate": 6.452606402274652e-07, "loss": 0.5014, "step": 13035 }, { "epoch": 0.84, "grad_norm": 1.1206934452056885, "learning_rate": 6.44747092817502e-07, "loss": 0.4774, "step": 13036 }, { "epoch": 0.84, "grad_norm": 1.1813689470291138, "learning_rate": 6.442337357644052e-07, "loss": 0.5096, "step": 13037 }, { "epoch": 0.84, "grad_norm": 1.1568963527679443, "learning_rate": 6.437205690906139e-07, "loss": 0.4904, "step": 13038 }, { "epoch": 0.84, "grad_norm": 1.1487423181533813, "learning_rate": 6.432075928185571e-07, "loss": 0.4873, "step": 13039 }, { "epoch": 0.84, "grad_norm": 1.2771307229995728, "learning_rate": 6.426948069706568e-07, "loss": 0.5288, "step": 13040 }, { "epoch": 0.84, "grad_norm": 1.1595126390457153, "learning_rate": 6.421822115693233e-07, "loss": 0.5061, "step": 13041 }, { "epoch": 0.84, "grad_norm": 1.204485297203064, "learning_rate": 6.41669806636962e-07, "loss": 0.576, "step": 13042 }, { "epoch": 0.84, "grad_norm": 1.130940556526184, "learning_rate": 6.411575921959689e-07, "loss": 0.5338, "step": 13043 }, { "epoch": 0.84, "grad_norm": 1.232926607131958, "learning_rate": 6.406455682687318e-07, "loss": 0.4996, "step": 13044 }, { "epoch": 0.84, "grad_norm": 1.0886918306350708, "learning_rate": 6.401337348776304e-07, "loss": 0.5405, "step": 13045 }, { "epoch": 0.84, "grad_norm": 1.1661828756332397, "learning_rate": 6.396220920450341e-07, "loss": 0.5007, "step": 13046 }, { "epoch": 0.84, "grad_norm": 1.1235337257385254, "learning_rate": 6.391106397933055e-07, "loss": 0.5081, "step": 13047 }, { "epoch": 0.84, "grad_norm": 1.1686605215072632, "learning_rate": 6.385993781448003e-07, "loss": 0.4871, "step": 13048 }, { "epoch": 0.84, "grad_norm": 1.1024147272109985, "learning_rate": 6.380883071218635e-07, "loss": 0.469, "step": 13049 }, { "epoch": 0.84, "grad_norm": 1.25594162940979, "learning_rate": 6.375774267468332e-07, "loss": 0.5564, "step": 13050 }, { "epoch": 0.84, "grad_norm": 1.1828256845474243, "learning_rate": 6.370667370420391e-07, "loss": 0.4834, "step": 13051 }, { "epoch": 0.84, "grad_norm": 1.2005915641784668, "learning_rate": 6.365562380298001e-07, "loss": 0.4545, "step": 13052 }, { "epoch": 0.84, "grad_norm": 1.1945769786834717, "learning_rate": 6.360459297324306e-07, "loss": 0.5445, "step": 13053 }, { "epoch": 0.84, "grad_norm": 1.2886704206466675, "learning_rate": 6.355358121722338e-07, "loss": 0.479, "step": 13054 }, { "epoch": 0.84, "grad_norm": 1.2058748006820679, "learning_rate": 6.350258853715069e-07, "loss": 0.5598, "step": 13055 }, { "epoch": 0.84, "grad_norm": 1.2333095073699951, "learning_rate": 6.345161493525371e-07, "loss": 0.5346, "step": 13056 }, { "epoch": 0.84, "grad_norm": 1.3033430576324463, "learning_rate": 6.340066041376025e-07, "loss": 0.5197, "step": 13057 }, { "epoch": 0.84, "grad_norm": 1.2854021787643433, "learning_rate": 6.334972497489749e-07, "loss": 0.4927, "step": 13058 }, { "epoch": 0.84, "grad_norm": 1.1603960990905762, "learning_rate": 6.329880862089172e-07, "loss": 0.5463, "step": 13059 }, { "epoch": 0.84, "grad_norm": 1.2220228910446167, "learning_rate": 6.324791135396824e-07, "loss": 0.4764, "step": 13060 }, { "epoch": 0.84, "grad_norm": 1.2247929573059082, "learning_rate": 6.319703317635178e-07, "loss": 0.522, "step": 13061 }, { "epoch": 0.84, "grad_norm": 1.2294995784759521, "learning_rate": 6.314617409026613e-07, "loss": 0.5302, "step": 13062 }, { "epoch": 0.84, "grad_norm": 1.1588408946990967, "learning_rate": 6.3095334097934e-07, "loss": 0.5312, "step": 13063 }, { "epoch": 0.84, "grad_norm": 1.2431800365447998, "learning_rate": 6.304451320157756e-07, "loss": 0.4912, "step": 13064 }, { "epoch": 0.84, "grad_norm": 1.1710364818572998, "learning_rate": 6.299371140341814e-07, "loss": 0.4804, "step": 13065 }, { "epoch": 0.84, "grad_norm": 1.1268348693847656, "learning_rate": 6.294292870567603e-07, "loss": 0.4904, "step": 13066 }, { "epoch": 0.84, "grad_norm": 1.1966627836227417, "learning_rate": 6.289216511057095e-07, "loss": 0.5431, "step": 13067 }, { "epoch": 0.84, "grad_norm": 1.0926772356033325, "learning_rate": 6.284142062032167e-07, "loss": 0.538, "step": 13068 }, { "epoch": 0.84, "grad_norm": 1.163811206817627, "learning_rate": 6.279069523714588e-07, "loss": 0.5035, "step": 13069 }, { "epoch": 0.84, "grad_norm": 1.143019676208496, "learning_rate": 6.273998896326083e-07, "loss": 0.4963, "step": 13070 }, { "epoch": 0.84, "grad_norm": 1.24593985080719, "learning_rate": 6.268930180088268e-07, "loss": 0.5708, "step": 13071 }, { "epoch": 0.84, "grad_norm": 1.125479817390442, "learning_rate": 6.26386337522269e-07, "loss": 0.4871, "step": 13072 }, { "epoch": 0.84, "grad_norm": 1.3027262687683105, "learning_rate": 6.258798481950801e-07, "loss": 0.529, "step": 13073 }, { "epoch": 0.84, "grad_norm": 1.190049171447754, "learning_rate": 6.253735500493985e-07, "loss": 0.515, "step": 13074 }, { "epoch": 0.84, "grad_norm": 1.3035434484481812, "learning_rate": 6.248674431073515e-07, "loss": 0.5346, "step": 13075 }, { "epoch": 0.84, "grad_norm": 1.1821845769882202, "learning_rate": 6.243615273910608e-07, "loss": 0.5088, "step": 13076 }, { "epoch": 0.84, "grad_norm": 1.2127681970596313, "learning_rate": 6.238558029226382e-07, "loss": 0.5169, "step": 13077 }, { "epoch": 0.84, "grad_norm": 1.2989193201065063, "learning_rate": 6.233502697241878e-07, "loss": 0.5513, "step": 13078 }, { "epoch": 0.84, "grad_norm": 1.1639013290405273, "learning_rate": 6.228449278178056e-07, "loss": 0.4877, "step": 13079 }, { "epoch": 0.84, "grad_norm": 1.125450611114502, "learning_rate": 6.22339777225579e-07, "loss": 0.5408, "step": 13080 }, { "epoch": 0.84, "grad_norm": 1.2068513631820679, "learning_rate": 6.218348179695855e-07, "loss": 0.536, "step": 13081 }, { "epoch": 0.84, "grad_norm": 1.1312943696975708, "learning_rate": 6.213300500718961e-07, "loss": 0.5103, "step": 13082 }, { "epoch": 0.84, "grad_norm": 1.4486464262008667, "learning_rate": 6.208254735545732e-07, "loss": 0.5318, "step": 13083 }, { "epoch": 0.84, "grad_norm": 1.1401269435882568, "learning_rate": 6.203210884396699e-07, "loss": 0.4987, "step": 13084 }, { "epoch": 0.84, "grad_norm": 1.23550546169281, "learning_rate": 6.198168947492328e-07, "loss": 0.5245, "step": 13085 }, { "epoch": 0.84, "grad_norm": 1.2216850519180298, "learning_rate": 6.193128925052988e-07, "loss": 0.5369, "step": 13086 }, { "epoch": 0.84, "grad_norm": 1.2030673027038574, "learning_rate": 6.188090817298953e-07, "loss": 0.4927, "step": 13087 }, { "epoch": 0.84, "grad_norm": 1.1460838317871094, "learning_rate": 6.18305462445043e-07, "loss": 0.5096, "step": 13088 }, { "epoch": 0.84, "grad_norm": 1.223771333694458, "learning_rate": 6.178020346727537e-07, "loss": 0.5035, "step": 13089 }, { "epoch": 0.84, "grad_norm": 1.208231806755066, "learning_rate": 6.172987984350321e-07, "loss": 0.4927, "step": 13090 }, { "epoch": 0.85, "grad_norm": 1.148630142211914, "learning_rate": 6.167957537538716e-07, "loss": 0.5121, "step": 13091 }, { "epoch": 0.85, "grad_norm": 1.1273993253707886, "learning_rate": 6.162929006512613e-07, "loss": 0.4916, "step": 13092 }, { "epoch": 0.85, "grad_norm": 1.243481993675232, "learning_rate": 6.157902391491772e-07, "loss": 0.5812, "step": 13093 }, { "epoch": 0.85, "grad_norm": 1.1732887029647827, "learning_rate": 6.152877692695902e-07, "loss": 0.4785, "step": 13094 }, { "epoch": 0.85, "grad_norm": 1.1635113954544067, "learning_rate": 6.147854910344625e-07, "loss": 0.5276, "step": 13095 }, { "epoch": 0.85, "grad_norm": 1.225266695022583, "learning_rate": 6.142834044657464e-07, "loss": 0.5138, "step": 13096 }, { "epoch": 0.85, "grad_norm": 1.1970101594924927, "learning_rate": 6.137815095853888e-07, "loss": 0.5435, "step": 13097 }, { "epoch": 0.85, "grad_norm": 1.0990991592407227, "learning_rate": 6.132798064153234e-07, "loss": 0.5103, "step": 13098 }, { "epoch": 0.85, "grad_norm": 1.2200649976730347, "learning_rate": 6.127782949774802e-07, "loss": 0.5203, "step": 13099 }, { "epoch": 0.85, "grad_norm": 1.1427059173583984, "learning_rate": 6.122769752937785e-07, "loss": 0.4836, "step": 13100 }, { "epoch": 0.85, "grad_norm": 1.1070722341537476, "learning_rate": 6.117758473861296e-07, "loss": 0.4931, "step": 13101 }, { "epoch": 0.85, "grad_norm": 1.1571201086044312, "learning_rate": 6.112749112764365e-07, "loss": 0.5055, "step": 13102 }, { "epoch": 0.85, "grad_norm": 1.182263970375061, "learning_rate": 6.107741669865952e-07, "loss": 0.5362, "step": 13103 }, { "epoch": 0.85, "grad_norm": 1.1917028427124023, "learning_rate": 6.102736145384897e-07, "loss": 0.5495, "step": 13104 }, { "epoch": 0.85, "grad_norm": 1.1707874536514282, "learning_rate": 6.097732539539992e-07, "loss": 0.4939, "step": 13105 }, { "epoch": 0.85, "grad_norm": 1.2151974439620972, "learning_rate": 6.092730852549922e-07, "loss": 0.4817, "step": 13106 }, { "epoch": 0.85, "grad_norm": 1.117382287979126, "learning_rate": 6.087731084633303e-07, "loss": 0.4781, "step": 13107 }, { "epoch": 0.85, "grad_norm": 1.1624380350112915, "learning_rate": 6.08273323600867e-07, "loss": 0.4885, "step": 13108 }, { "epoch": 0.85, "grad_norm": 1.1893572807312012, "learning_rate": 6.077737306894465e-07, "loss": 0.5059, "step": 13109 }, { "epoch": 0.85, "grad_norm": 1.1449096202850342, "learning_rate": 6.072743297509031e-07, "loss": 0.5259, "step": 13110 }, { "epoch": 0.85, "grad_norm": 1.1240276098251343, "learning_rate": 6.067751208070655e-07, "loss": 0.5201, "step": 13111 }, { "epoch": 0.85, "grad_norm": 1.0626753568649292, "learning_rate": 6.062761038797527e-07, "loss": 0.4825, "step": 13112 }, { "epoch": 0.85, "grad_norm": 1.1167383193969727, "learning_rate": 6.057772789907756e-07, "loss": 0.5126, "step": 13113 }, { "epoch": 0.85, "grad_norm": 1.1188124418258667, "learning_rate": 6.052786461619359e-07, "loss": 0.5088, "step": 13114 }, { "epoch": 0.85, "grad_norm": 1.1721664667129517, "learning_rate": 6.047802054150298e-07, "loss": 0.4836, "step": 13115 }, { "epoch": 0.85, "grad_norm": 1.1223976612091064, "learning_rate": 6.042819567718395e-07, "loss": 0.4882, "step": 13116 }, { "epoch": 0.85, "grad_norm": 1.1606619358062744, "learning_rate": 6.037839002541441e-07, "loss": 0.4847, "step": 13117 }, { "epoch": 0.85, "grad_norm": 1.1284427642822266, "learning_rate": 6.032860358837117e-07, "loss": 0.5414, "step": 13118 }, { "epoch": 0.85, "grad_norm": 1.2334686517715454, "learning_rate": 6.027883636823035e-07, "loss": 0.5228, "step": 13119 }, { "epoch": 0.85, "grad_norm": 1.2075221538543701, "learning_rate": 6.022908836716712e-07, "loss": 0.5745, "step": 13120 }, { "epoch": 0.85, "grad_norm": 1.2901196479797363, "learning_rate": 6.017935958735576e-07, "loss": 0.5341, "step": 13121 }, { "epoch": 0.85, "grad_norm": 1.2036502361297607, "learning_rate": 6.012965003096982e-07, "loss": 0.4741, "step": 13122 }, { "epoch": 0.85, "grad_norm": 1.3092833757400513, "learning_rate": 6.007995970018204e-07, "loss": 0.506, "step": 13123 }, { "epoch": 0.85, "grad_norm": 1.2776219844818115, "learning_rate": 6.00302885971642e-07, "loss": 0.5226, "step": 13124 }, { "epoch": 0.85, "grad_norm": 1.2543091773986816, "learning_rate": 5.998063672408738e-07, "loss": 0.5292, "step": 13125 }, { "epoch": 0.85, "grad_norm": 1.228637456893921, "learning_rate": 5.993100408312158e-07, "loss": 0.5457, "step": 13126 }, { "epoch": 0.85, "grad_norm": 1.1430944204330444, "learning_rate": 5.988139067643617e-07, "loss": 0.4969, "step": 13127 }, { "epoch": 0.85, "grad_norm": 1.1921567916870117, "learning_rate": 5.983179650619969e-07, "loss": 0.5471, "step": 13128 }, { "epoch": 0.85, "grad_norm": 1.1671472787857056, "learning_rate": 5.978222157457986e-07, "loss": 0.4984, "step": 13129 }, { "epoch": 0.85, "grad_norm": 1.2496230602264404, "learning_rate": 5.973266588374322e-07, "loss": 0.5019, "step": 13130 }, { "epoch": 0.85, "grad_norm": 1.35451340675354, "learning_rate": 5.96831294358558e-07, "loss": 0.5036, "step": 13131 }, { "epoch": 0.85, "grad_norm": 1.0721487998962402, "learning_rate": 5.963361223308278e-07, "loss": 0.5225, "step": 13132 }, { "epoch": 0.85, "grad_norm": 1.245505928993225, "learning_rate": 5.958411427758848e-07, "loss": 0.5029, "step": 13133 }, { "epoch": 0.85, "grad_norm": 1.0362194776535034, "learning_rate": 5.953463557153627e-07, "loss": 0.4761, "step": 13134 }, { "epoch": 0.85, "grad_norm": 1.213899850845337, "learning_rate": 5.94851761170887e-07, "loss": 0.5648, "step": 13135 }, { "epoch": 0.85, "grad_norm": 1.1607606410980225, "learning_rate": 5.94357359164075e-07, "loss": 0.5021, "step": 13136 }, { "epoch": 0.85, "grad_norm": 1.1338059902191162, "learning_rate": 5.938631497165359e-07, "loss": 0.4506, "step": 13137 }, { "epoch": 0.85, "grad_norm": 1.2448499202728271, "learning_rate": 5.933691328498719e-07, "loss": 0.5349, "step": 13138 }, { "epoch": 0.85, "grad_norm": 1.0856735706329346, "learning_rate": 5.928753085856725e-07, "loss": 0.4978, "step": 13139 }, { "epoch": 0.85, "grad_norm": 1.216173768043518, "learning_rate": 5.923816769455231e-07, "loss": 0.5473, "step": 13140 }, { "epoch": 0.85, "grad_norm": 1.2376561164855957, "learning_rate": 5.918882379509988e-07, "loss": 0.5565, "step": 13141 }, { "epoch": 0.85, "grad_norm": 1.1568304300308228, "learning_rate": 5.913949916236661e-07, "loss": 0.5177, "step": 13142 }, { "epoch": 0.85, "grad_norm": 1.164033055305481, "learning_rate": 5.909019379850845e-07, "loss": 0.5055, "step": 13143 }, { "epoch": 0.85, "grad_norm": 1.192802906036377, "learning_rate": 5.904090770568043e-07, "loss": 0.4738, "step": 13144 }, { "epoch": 0.85, "grad_norm": 1.2564573287963867, "learning_rate": 5.899164088603654e-07, "loss": 0.5578, "step": 13145 }, { "epoch": 0.85, "grad_norm": 1.0917987823486328, "learning_rate": 5.894239334173024e-07, "loss": 0.5062, "step": 13146 }, { "epoch": 0.85, "grad_norm": 1.1550207138061523, "learning_rate": 5.889316507491399e-07, "loss": 0.5296, "step": 13147 }, { "epoch": 0.85, "grad_norm": 1.244316816329956, "learning_rate": 5.884395608773941e-07, "loss": 0.4954, "step": 13148 }, { "epoch": 0.85, "grad_norm": 1.143396019935608, "learning_rate": 5.879476638235726e-07, "loss": 0.5008, "step": 13149 }, { "epoch": 0.85, "grad_norm": 1.1504112482070923, "learning_rate": 5.874559596091772e-07, "loss": 0.4944, "step": 13150 }, { "epoch": 0.85, "grad_norm": 1.2025543451309204, "learning_rate": 5.869644482556958e-07, "loss": 0.5087, "step": 13151 }, { "epoch": 0.85, "grad_norm": 1.120805025100708, "learning_rate": 5.864731297846127e-07, "loss": 0.4752, "step": 13152 }, { "epoch": 0.85, "grad_norm": 1.1120352745056152, "learning_rate": 5.859820042174019e-07, "loss": 0.4846, "step": 13153 }, { "epoch": 0.85, "grad_norm": 1.2645894289016724, "learning_rate": 5.854910715755296e-07, "loss": 0.5063, "step": 13154 }, { "epoch": 0.85, "grad_norm": 1.2657217979431152, "learning_rate": 5.850003318804531e-07, "loss": 0.5275, "step": 13155 }, { "epoch": 0.85, "grad_norm": 1.2258720397949219, "learning_rate": 5.845097851536224e-07, "loss": 0.5293, "step": 13156 }, { "epoch": 0.85, "grad_norm": 1.210503339767456, "learning_rate": 5.840194314164754e-07, "loss": 0.5026, "step": 13157 }, { "epoch": 0.85, "grad_norm": 1.1740957498550415, "learning_rate": 5.835292706904461e-07, "loss": 0.4871, "step": 13158 }, { "epoch": 0.85, "grad_norm": 1.147486925125122, "learning_rate": 5.830393029969572e-07, "loss": 0.4976, "step": 13159 }, { "epoch": 0.85, "grad_norm": 1.143352746963501, "learning_rate": 5.825495283574245e-07, "loss": 0.4718, "step": 13160 }, { "epoch": 0.85, "grad_norm": 1.3385478258132935, "learning_rate": 5.820599467932553e-07, "loss": 0.5524, "step": 13161 }, { "epoch": 0.85, "grad_norm": 1.1627106666564941, "learning_rate": 5.81570558325848e-07, "loss": 0.5515, "step": 13162 }, { "epoch": 0.85, "grad_norm": 1.204034686088562, "learning_rate": 5.810813629765911e-07, "loss": 0.4899, "step": 13163 }, { "epoch": 0.85, "grad_norm": 1.2252027988433838, "learning_rate": 5.805923607668668e-07, "loss": 0.506, "step": 13164 }, { "epoch": 0.85, "grad_norm": 1.3051679134368896, "learning_rate": 5.801035517180481e-07, "loss": 0.5264, "step": 13165 }, { "epoch": 0.85, "grad_norm": 1.226073145866394, "learning_rate": 5.796149358514997e-07, "loss": 0.4824, "step": 13166 }, { "epoch": 0.85, "grad_norm": 1.1614912748336792, "learning_rate": 5.791265131885776e-07, "loss": 0.4815, "step": 13167 }, { "epoch": 0.85, "grad_norm": 1.2253903150558472, "learning_rate": 5.786382837506305e-07, "loss": 0.5508, "step": 13168 }, { "epoch": 0.85, "grad_norm": 1.122607946395874, "learning_rate": 5.781502475589962e-07, "loss": 0.4945, "step": 13169 }, { "epoch": 0.85, "grad_norm": 1.2714588642120361, "learning_rate": 5.776624046350054e-07, "loss": 0.5335, "step": 13170 }, { "epoch": 0.85, "grad_norm": 1.1940257549285889, "learning_rate": 5.771747549999818e-07, "loss": 0.5173, "step": 13171 }, { "epoch": 0.85, "grad_norm": 1.1495202779769897, "learning_rate": 5.76687298675238e-07, "loss": 0.536, "step": 13172 }, { "epoch": 0.85, "grad_norm": 1.1361294984817505, "learning_rate": 5.762000356820807e-07, "loss": 0.5269, "step": 13173 }, { "epoch": 0.85, "grad_norm": 1.2177327871322632, "learning_rate": 5.757129660418071e-07, "loss": 0.5016, "step": 13174 }, { "epoch": 0.85, "grad_norm": 1.1013473272323608, "learning_rate": 5.752260897757033e-07, "loss": 0.5016, "step": 13175 }, { "epoch": 0.85, "grad_norm": 1.2534446716308594, "learning_rate": 5.747394069050516e-07, "loss": 0.5316, "step": 13176 }, { "epoch": 0.85, "grad_norm": 1.1304030418395996, "learning_rate": 5.742529174511235e-07, "loss": 0.4915, "step": 13177 }, { "epoch": 0.85, "grad_norm": 1.2447913885116577, "learning_rate": 5.737666214351811e-07, "loss": 0.4808, "step": 13178 }, { "epoch": 0.85, "grad_norm": 1.1721535921096802, "learning_rate": 5.732805188784801e-07, "loss": 0.4773, "step": 13179 }, { "epoch": 0.85, "grad_norm": 1.1002906560897827, "learning_rate": 5.727946098022674e-07, "loss": 0.4805, "step": 13180 }, { "epoch": 0.85, "grad_norm": 1.2370814085006714, "learning_rate": 5.723088942277793e-07, "loss": 0.5237, "step": 13181 }, { "epoch": 0.85, "grad_norm": 1.164317011833191, "learning_rate": 5.718233721762456e-07, "loss": 0.4926, "step": 13182 }, { "epoch": 0.85, "grad_norm": 1.1094019412994385, "learning_rate": 5.713380436688876e-07, "loss": 0.5494, "step": 13183 }, { "epoch": 0.85, "grad_norm": 1.191394329071045, "learning_rate": 5.708529087269177e-07, "loss": 0.5574, "step": 13184 }, { "epoch": 0.85, "grad_norm": 1.1165177822113037, "learning_rate": 5.703679673715407e-07, "loss": 0.5191, "step": 13185 }, { "epoch": 0.85, "grad_norm": 1.3184391260147095, "learning_rate": 5.698832196239501e-07, "loss": 0.5141, "step": 13186 }, { "epoch": 0.85, "grad_norm": 1.118840217590332, "learning_rate": 5.693986655053341e-07, "loss": 0.513, "step": 13187 }, { "epoch": 0.85, "grad_norm": 1.1878379583358765, "learning_rate": 5.689143050368712e-07, "loss": 0.5699, "step": 13188 }, { "epoch": 0.85, "grad_norm": 1.2779223918914795, "learning_rate": 5.684301382397317e-07, "loss": 0.5335, "step": 13189 }, { "epoch": 0.85, "grad_norm": 1.2603366374969482, "learning_rate": 5.679461651350776e-07, "loss": 0.5112, "step": 13190 }, { "epoch": 0.85, "grad_norm": 1.15440034866333, "learning_rate": 5.674623857440625e-07, "loss": 0.5384, "step": 13191 }, { "epoch": 0.85, "grad_norm": 1.1213529109954834, "learning_rate": 5.669788000878296e-07, "loss": 0.5069, "step": 13192 }, { "epoch": 0.85, "grad_norm": 1.1803550720214844, "learning_rate": 5.664954081875162e-07, "loss": 0.5243, "step": 13193 }, { "epoch": 0.85, "grad_norm": 1.1674375534057617, "learning_rate": 5.660122100642496e-07, "loss": 0.497, "step": 13194 }, { "epoch": 0.85, "grad_norm": 1.2184895277023315, "learning_rate": 5.655292057391493e-07, "loss": 0.4878, "step": 13195 }, { "epoch": 0.85, "grad_norm": 1.1093963384628296, "learning_rate": 5.650463952333268e-07, "loss": 0.509, "step": 13196 }, { "epoch": 0.85, "grad_norm": 1.2048574686050415, "learning_rate": 5.645637785678848e-07, "loss": 0.4592, "step": 13197 }, { "epoch": 0.85, "grad_norm": 1.195857048034668, "learning_rate": 5.640813557639152e-07, "loss": 0.5162, "step": 13198 }, { "epoch": 0.85, "grad_norm": 1.186370611190796, "learning_rate": 5.63599126842505e-07, "loss": 0.5152, "step": 13199 }, { "epoch": 0.85, "grad_norm": 1.1685278415679932, "learning_rate": 5.631170918247308e-07, "loss": 0.5049, "step": 13200 }, { "epoch": 0.85, "grad_norm": 1.2296881675720215, "learning_rate": 5.626352507316612e-07, "loss": 0.5422, "step": 13201 }, { "epoch": 0.85, "grad_norm": 1.14113450050354, "learning_rate": 5.621536035843561e-07, "loss": 0.4985, "step": 13202 }, { "epoch": 0.85, "grad_norm": 1.1240513324737549, "learning_rate": 5.616721504038685e-07, "loss": 0.5119, "step": 13203 }, { "epoch": 0.85, "grad_norm": 1.1167658567428589, "learning_rate": 5.61190891211239e-07, "loss": 0.476, "step": 13204 }, { "epoch": 0.85, "grad_norm": 1.2575117349624634, "learning_rate": 5.607098260275034e-07, "loss": 0.5451, "step": 13205 }, { "epoch": 0.85, "grad_norm": 1.0590157508850098, "learning_rate": 5.602289548736877e-07, "loss": 0.4564, "step": 13206 }, { "epoch": 0.85, "grad_norm": 1.1822590827941895, "learning_rate": 5.597482777708096e-07, "loss": 0.5048, "step": 13207 }, { "epoch": 0.85, "grad_norm": 1.2018271684646606, "learning_rate": 5.592677947398784e-07, "loss": 0.5092, "step": 13208 }, { "epoch": 0.85, "grad_norm": 1.1350082159042358, "learning_rate": 5.58787505801896e-07, "loss": 0.4993, "step": 13209 }, { "epoch": 0.85, "grad_norm": 1.2144044637680054, "learning_rate": 5.583074109778519e-07, "loss": 0.5699, "step": 13210 }, { "epoch": 0.85, "grad_norm": 1.2640578746795654, "learning_rate": 5.578275102887309e-07, "loss": 0.5255, "step": 13211 }, { "epoch": 0.85, "grad_norm": 1.23492431640625, "learning_rate": 5.573478037555085e-07, "loss": 0.4776, "step": 13212 }, { "epoch": 0.85, "grad_norm": 1.177364468574524, "learning_rate": 5.568682913991519e-07, "loss": 0.5328, "step": 13213 }, { "epoch": 0.85, "grad_norm": 1.2072473764419556, "learning_rate": 5.563889732406197e-07, "loss": 0.5222, "step": 13214 }, { "epoch": 0.85, "grad_norm": 1.1548314094543457, "learning_rate": 5.559098493008591e-07, "loss": 0.5565, "step": 13215 }, { "epoch": 0.85, "grad_norm": 1.225757122039795, "learning_rate": 5.55430919600814e-07, "loss": 0.5397, "step": 13216 }, { "epoch": 0.85, "grad_norm": 1.1447722911834717, "learning_rate": 5.549521841614159e-07, "loss": 0.5069, "step": 13217 }, { "epoch": 0.85, "grad_norm": 1.1875181198120117, "learning_rate": 5.544736430035902e-07, "loss": 0.5179, "step": 13218 }, { "epoch": 0.85, "grad_norm": 1.1544697284698486, "learning_rate": 5.539952961482514e-07, "loss": 0.5347, "step": 13219 }, { "epoch": 0.85, "grad_norm": 1.1783727407455444, "learning_rate": 5.535171436163072e-07, "loss": 0.5088, "step": 13220 }, { "epoch": 0.85, "grad_norm": 1.2432225942611694, "learning_rate": 5.530391854286566e-07, "loss": 0.5091, "step": 13221 }, { "epoch": 0.85, "grad_norm": 1.171609878540039, "learning_rate": 5.525614216061898e-07, "loss": 0.5131, "step": 13222 }, { "epoch": 0.85, "grad_norm": 1.2474509477615356, "learning_rate": 5.520838521697896e-07, "loss": 0.5379, "step": 13223 }, { "epoch": 0.85, "grad_norm": 1.189043641090393, "learning_rate": 5.516064771403273e-07, "loss": 0.5159, "step": 13224 }, { "epoch": 0.85, "grad_norm": 1.1770364046096802, "learning_rate": 5.511292965386694e-07, "loss": 0.5094, "step": 13225 }, { "epoch": 0.85, "grad_norm": 1.247882604598999, "learning_rate": 5.506523103856715e-07, "loss": 0.5501, "step": 13226 }, { "epoch": 0.85, "grad_norm": 1.2170188426971436, "learning_rate": 5.501755187021829e-07, "loss": 0.5207, "step": 13227 }, { "epoch": 0.85, "grad_norm": 1.2171844244003296, "learning_rate": 5.496989215090403e-07, "loss": 0.5303, "step": 13228 }, { "epoch": 0.85, "grad_norm": 1.1873931884765625, "learning_rate": 5.492225188270756e-07, "loss": 0.499, "step": 13229 }, { "epoch": 0.85, "grad_norm": 1.2837679386138916, "learning_rate": 5.487463106771118e-07, "loss": 0.5004, "step": 13230 }, { "epoch": 0.85, "grad_norm": 1.2330418825149536, "learning_rate": 5.482702970799625e-07, "loss": 0.5055, "step": 13231 }, { "epoch": 0.85, "grad_norm": 1.0996071100234985, "learning_rate": 5.477944780564343e-07, "loss": 0.5045, "step": 13232 }, { "epoch": 0.85, "grad_norm": 1.1909266710281372, "learning_rate": 5.473188536273211e-07, "loss": 0.5349, "step": 13233 }, { "epoch": 0.85, "grad_norm": 1.1707035303115845, "learning_rate": 5.468434238134124e-07, "loss": 0.5005, "step": 13234 }, { "epoch": 0.85, "grad_norm": 1.1698323488235474, "learning_rate": 5.463681886354888e-07, "loss": 0.4691, "step": 13235 }, { "epoch": 0.85, "grad_norm": 1.1995694637298584, "learning_rate": 5.458931481143209e-07, "loss": 0.4844, "step": 13236 }, { "epoch": 0.85, "grad_norm": 1.2063798904418945, "learning_rate": 5.454183022706721e-07, "loss": 0.5086, "step": 13237 }, { "epoch": 0.85, "grad_norm": 1.1807669401168823, "learning_rate": 5.44943651125297e-07, "loss": 0.5247, "step": 13238 }, { "epoch": 0.85, "grad_norm": 1.3365274667739868, "learning_rate": 5.4446919469894e-07, "loss": 0.5316, "step": 13239 }, { "epoch": 0.85, "grad_norm": 1.1693998575210571, "learning_rate": 5.43994933012339e-07, "loss": 0.4976, "step": 13240 }, { "epoch": 0.85, "grad_norm": 1.2074698209762573, "learning_rate": 5.435208660862229e-07, "loss": 0.524, "step": 13241 }, { "epoch": 0.85, "grad_norm": 1.2594034671783447, "learning_rate": 5.430469939413119e-07, "loss": 0.5367, "step": 13242 }, { "epoch": 0.85, "grad_norm": 1.2962465286254883, "learning_rate": 5.425733165983172e-07, "loss": 0.5065, "step": 13243 }, { "epoch": 0.85, "grad_norm": 1.2009998559951782, "learning_rate": 5.420998340779438e-07, "loss": 0.4771, "step": 13244 }, { "epoch": 0.85, "grad_norm": 1.1617707014083862, "learning_rate": 5.41626546400884e-07, "loss": 0.5102, "step": 13245 }, { "epoch": 0.86, "grad_norm": 1.1318126916885376, "learning_rate": 5.411534535878254e-07, "loss": 0.5334, "step": 13246 }, { "epoch": 0.86, "grad_norm": 1.1636918783187866, "learning_rate": 5.406805556594453e-07, "loss": 0.4801, "step": 13247 }, { "epoch": 0.86, "grad_norm": 1.1037850379943848, "learning_rate": 5.402078526364129e-07, "loss": 0.502, "step": 13248 }, { "epoch": 0.86, "grad_norm": 1.1607601642608643, "learning_rate": 5.397353445393888e-07, "loss": 0.5188, "step": 13249 }, { "epoch": 0.86, "grad_norm": 1.1343590021133423, "learning_rate": 5.392630313890263e-07, "loss": 0.4751, "step": 13250 }, { "epoch": 0.86, "grad_norm": 1.0733451843261719, "learning_rate": 5.387909132059665e-07, "loss": 0.5296, "step": 13251 }, { "epoch": 0.86, "grad_norm": 1.260229468345642, "learning_rate": 5.383189900108465e-07, "loss": 0.5359, "step": 13252 }, { "epoch": 0.86, "grad_norm": 1.216071605682373, "learning_rate": 5.378472618242914e-07, "loss": 0.4856, "step": 13253 }, { "epoch": 0.86, "grad_norm": 1.1730760335922241, "learning_rate": 5.373757286669202e-07, "loss": 0.5279, "step": 13254 }, { "epoch": 0.86, "grad_norm": 1.0878535509109497, "learning_rate": 5.369043905593424e-07, "loss": 0.4534, "step": 13255 }, { "epoch": 0.86, "grad_norm": 1.2755287885665894, "learning_rate": 5.364332475221596e-07, "loss": 0.5906, "step": 13256 }, { "epoch": 0.86, "grad_norm": 1.1478545665740967, "learning_rate": 5.359622995759623e-07, "loss": 0.5136, "step": 13257 }, { "epoch": 0.86, "grad_norm": 1.133834719657898, "learning_rate": 5.354915467413358e-07, "loss": 0.4891, "step": 13258 }, { "epoch": 0.86, "grad_norm": 1.1422505378723145, "learning_rate": 5.350209890388546e-07, "loss": 0.5012, "step": 13259 }, { "epoch": 0.86, "grad_norm": 1.1453986167907715, "learning_rate": 5.345506264890865e-07, "loss": 0.4929, "step": 13260 }, { "epoch": 0.86, "grad_norm": 1.0702338218688965, "learning_rate": 5.340804591125898e-07, "loss": 0.466, "step": 13261 }, { "epoch": 0.86, "grad_norm": 1.2405582666397095, "learning_rate": 5.336104869299147e-07, "loss": 0.5138, "step": 13262 }, { "epoch": 0.86, "grad_norm": 1.1276628971099854, "learning_rate": 5.331407099616009e-07, "loss": 0.5177, "step": 13263 }, { "epoch": 0.86, "grad_norm": 1.1657439470291138, "learning_rate": 5.326711282281821e-07, "loss": 0.506, "step": 13264 }, { "epoch": 0.86, "grad_norm": 1.1201355457305908, "learning_rate": 5.322017417501824e-07, "loss": 0.5179, "step": 13265 }, { "epoch": 0.86, "grad_norm": 1.2411329746246338, "learning_rate": 5.317325505481169e-07, "loss": 0.5215, "step": 13266 }, { "epoch": 0.86, "grad_norm": 1.1540769338607788, "learning_rate": 5.312635546424949e-07, "loss": 0.5182, "step": 13267 }, { "epoch": 0.86, "grad_norm": 1.2039750814437866, "learning_rate": 5.307947540538127e-07, "loss": 0.506, "step": 13268 }, { "epoch": 0.86, "grad_norm": 1.2482895851135254, "learning_rate": 5.303261488025608e-07, "loss": 0.5401, "step": 13269 }, { "epoch": 0.86, "grad_norm": 1.096146821975708, "learning_rate": 5.298577389092207e-07, "loss": 0.4851, "step": 13270 }, { "epoch": 0.86, "grad_norm": 1.1723248958587646, "learning_rate": 5.29389524394266e-07, "loss": 0.5125, "step": 13271 }, { "epoch": 0.86, "grad_norm": 1.1872270107269287, "learning_rate": 5.28921505278161e-07, "loss": 0.5299, "step": 13272 }, { "epoch": 0.86, "grad_norm": 1.3037300109863281, "learning_rate": 5.284536815813623e-07, "loss": 0.5254, "step": 13273 }, { "epoch": 0.86, "grad_norm": 1.219462275505066, "learning_rate": 5.279860533243153e-07, "loss": 0.5822, "step": 13274 }, { "epoch": 0.86, "grad_norm": 1.0461701154708862, "learning_rate": 5.275186205274601e-07, "loss": 0.4528, "step": 13275 }, { "epoch": 0.86, "grad_norm": 1.1831341981887817, "learning_rate": 5.270513832112268e-07, "loss": 0.5197, "step": 13276 }, { "epoch": 0.86, "grad_norm": 1.1069766283035278, "learning_rate": 5.265843413960376e-07, "loss": 0.478, "step": 13277 }, { "epoch": 0.86, "grad_norm": 1.2622864246368408, "learning_rate": 5.261174951023046e-07, "loss": 0.5295, "step": 13278 }, { "epoch": 0.86, "grad_norm": 1.144487977027893, "learning_rate": 5.256508443504344e-07, "loss": 0.524, "step": 13279 }, { "epoch": 0.86, "grad_norm": 1.1000093221664429, "learning_rate": 5.25184389160821e-07, "loss": 0.4831, "step": 13280 }, { "epoch": 0.86, "grad_norm": 1.1860158443450928, "learning_rate": 5.247181295538522e-07, "loss": 0.5567, "step": 13281 }, { "epoch": 0.86, "grad_norm": 1.2116295099258423, "learning_rate": 5.242520655499084e-07, "loss": 0.5268, "step": 13282 }, { "epoch": 0.86, "grad_norm": 1.1600840091705322, "learning_rate": 5.237861971693586e-07, "loss": 0.4707, "step": 13283 }, { "epoch": 0.86, "grad_norm": 1.214060664176941, "learning_rate": 5.233205244325657e-07, "loss": 0.4974, "step": 13284 }, { "epoch": 0.86, "grad_norm": 1.101527452468872, "learning_rate": 5.228550473598831e-07, "loss": 0.4806, "step": 13285 }, { "epoch": 0.86, "grad_norm": 1.139914870262146, "learning_rate": 5.223897659716548e-07, "loss": 0.4651, "step": 13286 }, { "epoch": 0.86, "grad_norm": 1.1823692321777344, "learning_rate": 5.219246802882172e-07, "loss": 0.481, "step": 13287 }, { "epoch": 0.86, "grad_norm": 1.2383424043655396, "learning_rate": 5.214597903298984e-07, "loss": 0.5345, "step": 13288 }, { "epoch": 0.86, "grad_norm": 1.30972421169281, "learning_rate": 5.209950961170174e-07, "loss": 0.546, "step": 13289 }, { "epoch": 0.86, "grad_norm": 1.2550290822982788, "learning_rate": 5.205305976698849e-07, "loss": 0.4827, "step": 13290 }, { "epoch": 0.86, "grad_norm": 1.1180943250656128, "learning_rate": 5.200662950088031e-07, "loss": 0.5059, "step": 13291 }, { "epoch": 0.86, "grad_norm": 1.144538164138794, "learning_rate": 5.196021881540647e-07, "loss": 0.5279, "step": 13292 }, { "epoch": 0.86, "grad_norm": 1.0920612812042236, "learning_rate": 5.191382771259551e-07, "loss": 0.4768, "step": 13293 }, { "epoch": 0.86, "grad_norm": 1.1589308977127075, "learning_rate": 5.186745619447503e-07, "loss": 0.5253, "step": 13294 }, { "epoch": 0.86, "grad_norm": 1.152697205543518, "learning_rate": 5.182110426307185e-07, "loss": 0.4735, "step": 13295 }, { "epoch": 0.86, "grad_norm": 1.1941287517547607, "learning_rate": 5.177477192041192e-07, "loss": 0.5157, "step": 13296 }, { "epoch": 0.86, "grad_norm": 1.341147541999817, "learning_rate": 5.172845916852037e-07, "loss": 0.4992, "step": 13297 }, { "epoch": 0.86, "grad_norm": 1.251189112663269, "learning_rate": 5.168216600942116e-07, "loss": 0.4635, "step": 13298 }, { "epoch": 0.86, "grad_norm": 1.315173625946045, "learning_rate": 5.163589244513784e-07, "loss": 0.4937, "step": 13299 }, { "epoch": 0.86, "grad_norm": 1.1895853281021118, "learning_rate": 5.158963847769288e-07, "loss": 0.5184, "step": 13300 }, { "epoch": 0.86, "grad_norm": 1.3193788528442383, "learning_rate": 5.154340410910791e-07, "loss": 0.5103, "step": 13301 }, { "epoch": 0.86, "grad_norm": 1.229539394378662, "learning_rate": 5.149718934140368e-07, "loss": 0.5211, "step": 13302 }, { "epoch": 0.86, "grad_norm": 1.2699154615402222, "learning_rate": 5.145099417660027e-07, "loss": 0.5047, "step": 13303 }, { "epoch": 0.86, "grad_norm": 1.1161799430847168, "learning_rate": 5.140481861671648e-07, "loss": 0.5015, "step": 13304 }, { "epoch": 0.86, "grad_norm": 1.1402664184570312, "learning_rate": 5.135866266377076e-07, "loss": 0.4924, "step": 13305 }, { "epoch": 0.86, "grad_norm": 1.1247493028640747, "learning_rate": 5.131252631978034e-07, "loss": 0.5593, "step": 13306 }, { "epoch": 0.86, "grad_norm": 1.1762157678604126, "learning_rate": 5.126640958676188e-07, "loss": 0.4963, "step": 13307 }, { "epoch": 0.86, "grad_norm": 1.2227439880371094, "learning_rate": 5.122031246673076e-07, "loss": 0.5558, "step": 13308 }, { "epoch": 0.86, "grad_norm": 1.1281851530075073, "learning_rate": 5.117423496170199e-07, "loss": 0.5282, "step": 13309 }, { "epoch": 0.86, "grad_norm": 1.2906659841537476, "learning_rate": 5.112817707368939e-07, "loss": 0.4983, "step": 13310 }, { "epoch": 0.86, "grad_norm": 1.1289093494415283, "learning_rate": 5.108213880470603e-07, "loss": 0.4929, "step": 13311 }, { "epoch": 0.86, "grad_norm": 1.2172417640686035, "learning_rate": 5.103612015676429e-07, "loss": 0.5079, "step": 13312 }, { "epoch": 0.86, "grad_norm": 1.163833498954773, "learning_rate": 5.099012113187535e-07, "loss": 0.5068, "step": 13313 }, { "epoch": 0.86, "grad_norm": 1.2628393173217773, "learning_rate": 5.09441417320497e-07, "loss": 0.5363, "step": 13314 }, { "epoch": 0.86, "grad_norm": 1.1396763324737549, "learning_rate": 5.089818195929702e-07, "loss": 0.5528, "step": 13315 }, { "epoch": 0.86, "grad_norm": 1.2578797340393066, "learning_rate": 5.08522418156262e-07, "loss": 0.5148, "step": 13316 }, { "epoch": 0.86, "grad_norm": 1.1591341495513916, "learning_rate": 5.080632130304502e-07, "loss": 0.5453, "step": 13317 }, { "epoch": 0.86, "grad_norm": 1.2108540534973145, "learning_rate": 5.07604204235606e-07, "loss": 0.5449, "step": 13318 }, { "epoch": 0.86, "grad_norm": 1.1942079067230225, "learning_rate": 5.071453917917913e-07, "loss": 0.5314, "step": 13319 }, { "epoch": 0.86, "grad_norm": 1.1266677379608154, "learning_rate": 5.066867757190597e-07, "loss": 0.4627, "step": 13320 }, { "epoch": 0.86, "grad_norm": 1.1765695810317993, "learning_rate": 5.062283560374576e-07, "loss": 0.5629, "step": 13321 }, { "epoch": 0.86, "grad_norm": 1.1851415634155273, "learning_rate": 5.057701327670184e-07, "loss": 0.4924, "step": 13322 }, { "epoch": 0.86, "grad_norm": 1.2909389734268188, "learning_rate": 5.053121059277722e-07, "loss": 0.5341, "step": 13323 }, { "epoch": 0.86, "grad_norm": 1.2157131433486938, "learning_rate": 5.048542755397368e-07, "loss": 0.5231, "step": 13324 }, { "epoch": 0.86, "grad_norm": 1.149127721786499, "learning_rate": 5.043966416229235e-07, "loss": 0.4891, "step": 13325 }, { "epoch": 0.86, "grad_norm": 1.3299616575241089, "learning_rate": 5.039392041973351e-07, "loss": 0.5067, "step": 13326 }, { "epoch": 0.86, "grad_norm": 1.276295781135559, "learning_rate": 5.034819632829635e-07, "loss": 0.5428, "step": 13327 }, { "epoch": 0.86, "grad_norm": 1.1311911344528198, "learning_rate": 5.030249188997937e-07, "loss": 0.51, "step": 13328 }, { "epoch": 0.86, "grad_norm": 1.242551565170288, "learning_rate": 5.025680710678021e-07, "loss": 0.5348, "step": 13329 }, { "epoch": 0.86, "grad_norm": 1.1625192165374756, "learning_rate": 5.021114198069571e-07, "loss": 0.5183, "step": 13330 }, { "epoch": 0.86, "grad_norm": 1.283227801322937, "learning_rate": 5.016549651372171e-07, "loss": 0.5415, "step": 13331 }, { "epoch": 0.86, "grad_norm": 1.1576533317565918, "learning_rate": 5.011987070785341e-07, "loss": 0.5345, "step": 13332 }, { "epoch": 0.86, "grad_norm": 1.394727349281311, "learning_rate": 5.007426456508468e-07, "loss": 0.5089, "step": 13333 }, { "epoch": 0.86, "grad_norm": 1.2200074195861816, "learning_rate": 5.002867808740908e-07, "loss": 0.4884, "step": 13334 }, { "epoch": 0.86, "grad_norm": 1.186159372329712, "learning_rate": 4.998311127681898e-07, "loss": 0.5328, "step": 13335 }, { "epoch": 0.86, "grad_norm": 1.1509203910827637, "learning_rate": 4.993756413530604e-07, "loss": 0.5073, "step": 13336 }, { "epoch": 0.86, "grad_norm": 1.3250713348388672, "learning_rate": 4.989203666486097e-07, "loss": 0.5449, "step": 13337 }, { "epoch": 0.86, "grad_norm": 1.3171641826629639, "learning_rate": 4.984652886747382e-07, "loss": 0.5984, "step": 13338 }, { "epoch": 0.86, "grad_norm": 1.2473363876342773, "learning_rate": 4.980104074513337e-07, "loss": 0.4825, "step": 13339 }, { "epoch": 0.86, "grad_norm": 1.1245323419570923, "learning_rate": 4.975557229982792e-07, "loss": 0.4833, "step": 13340 }, { "epoch": 0.86, "grad_norm": 1.2543822526931763, "learning_rate": 4.971012353354476e-07, "loss": 0.4874, "step": 13341 }, { "epoch": 0.86, "grad_norm": 1.2452774047851562, "learning_rate": 4.966469444827032e-07, "loss": 0.5927, "step": 13342 }, { "epoch": 0.86, "grad_norm": 1.3496628999710083, "learning_rate": 4.961928504599017e-07, "loss": 0.5502, "step": 13343 }, { "epoch": 0.86, "grad_norm": 1.2912954092025757, "learning_rate": 4.957389532868922e-07, "loss": 0.5407, "step": 13344 }, { "epoch": 0.86, "grad_norm": 1.2509523630142212, "learning_rate": 4.952852529835106e-07, "loss": 0.5417, "step": 13345 }, { "epoch": 0.86, "grad_norm": 1.1697983741760254, "learning_rate": 4.948317495695887e-07, "loss": 0.5569, "step": 13346 }, { "epoch": 0.86, "grad_norm": 1.2927149534225464, "learning_rate": 4.943784430649473e-07, "loss": 0.5039, "step": 13347 }, { "epoch": 0.86, "grad_norm": 1.112404465675354, "learning_rate": 4.939253334893995e-07, "loss": 0.4904, "step": 13348 }, { "epoch": 0.86, "grad_norm": 1.2381950616836548, "learning_rate": 4.934724208627495e-07, "loss": 0.5452, "step": 13349 }, { "epoch": 0.86, "grad_norm": 1.2199596166610718, "learning_rate": 4.930197052047941e-07, "loss": 0.5164, "step": 13350 }, { "epoch": 0.86, "grad_norm": 1.2015329599380493, "learning_rate": 4.925671865353182e-07, "loss": 0.4813, "step": 13351 }, { "epoch": 0.86, "grad_norm": 1.185684084892273, "learning_rate": 4.921148648741009e-07, "loss": 0.4946, "step": 13352 }, { "epoch": 0.86, "grad_norm": 1.1768648624420166, "learning_rate": 4.916627402409124e-07, "loss": 0.5441, "step": 13353 }, { "epoch": 0.86, "grad_norm": 1.3075257539749146, "learning_rate": 4.912108126555142e-07, "loss": 0.4834, "step": 13354 }, { "epoch": 0.86, "grad_norm": 1.2838975191116333, "learning_rate": 4.907590821376595e-07, "loss": 0.5459, "step": 13355 }, { "epoch": 0.86, "grad_norm": 1.2124460935592651, "learning_rate": 4.903075487070901e-07, "loss": 0.5185, "step": 13356 }, { "epoch": 0.86, "grad_norm": 1.2610046863555908, "learning_rate": 4.898562123835432e-07, "loss": 0.5133, "step": 13357 }, { "epoch": 0.86, "grad_norm": 1.1140766143798828, "learning_rate": 4.894050731867445e-07, "loss": 0.5027, "step": 13358 }, { "epoch": 0.86, "grad_norm": 1.1565136909484863, "learning_rate": 4.889541311364121e-07, "loss": 0.5077, "step": 13359 }, { "epoch": 0.86, "grad_norm": 1.1852920055389404, "learning_rate": 4.885033862522564e-07, "loss": 0.5382, "step": 13360 }, { "epoch": 0.86, "grad_norm": 1.1711703538894653, "learning_rate": 4.88052838553979e-07, "loss": 0.5251, "step": 13361 }, { "epoch": 0.86, "grad_norm": 1.16727876663208, "learning_rate": 4.8760248806127e-07, "loss": 0.5229, "step": 13362 }, { "epoch": 0.86, "grad_norm": 1.1321611404418945, "learning_rate": 4.871523347938139e-07, "loss": 0.4909, "step": 13363 }, { "epoch": 0.86, "grad_norm": 1.0939140319824219, "learning_rate": 4.867023787712861e-07, "loss": 0.485, "step": 13364 }, { "epoch": 0.86, "grad_norm": 1.260401725769043, "learning_rate": 4.862526200133527e-07, "loss": 0.5074, "step": 13365 }, { "epoch": 0.86, "grad_norm": 1.195072054862976, "learning_rate": 4.858030585396723e-07, "loss": 0.5245, "step": 13366 }, { "epoch": 0.86, "grad_norm": 1.2511425018310547, "learning_rate": 4.853536943698939e-07, "loss": 0.5104, "step": 13367 }, { "epoch": 0.86, "grad_norm": 1.0848675966262817, "learning_rate": 4.849045275236563e-07, "loss": 0.4723, "step": 13368 }, { "epoch": 0.86, "grad_norm": 1.1107370853424072, "learning_rate": 4.844555580205929e-07, "loss": 0.4845, "step": 13369 }, { "epoch": 0.86, "grad_norm": 1.228452205657959, "learning_rate": 4.840067858803272e-07, "loss": 0.5169, "step": 13370 }, { "epoch": 0.86, "grad_norm": 1.2151987552642822, "learning_rate": 4.83558211122473e-07, "loss": 0.5251, "step": 13371 }, { "epoch": 0.86, "grad_norm": 1.1480085849761963, "learning_rate": 4.831098337666368e-07, "loss": 0.5108, "step": 13372 }, { "epoch": 0.86, "grad_norm": 1.2528884410858154, "learning_rate": 4.826616538324175e-07, "loss": 0.5366, "step": 13373 }, { "epoch": 0.86, "grad_norm": 1.1098910570144653, "learning_rate": 4.82213671339401e-07, "loss": 0.4771, "step": 13374 }, { "epoch": 0.86, "grad_norm": 1.1011178493499756, "learning_rate": 4.817658863071689e-07, "loss": 0.4679, "step": 13375 }, { "epoch": 0.86, "grad_norm": 1.1828199625015259, "learning_rate": 4.813182987552928e-07, "loss": 0.5213, "step": 13376 }, { "epoch": 0.86, "grad_norm": 1.0876950025558472, "learning_rate": 4.808709087033359e-07, "loss": 0.4753, "step": 13377 }, { "epoch": 0.86, "grad_norm": 1.1215490102767944, "learning_rate": 4.804237161708514e-07, "loss": 0.5282, "step": 13378 }, { "epoch": 0.86, "grad_norm": 1.1738466024398804, "learning_rate": 4.79976721177387e-07, "loss": 0.5029, "step": 13379 }, { "epoch": 0.86, "grad_norm": 1.230905294418335, "learning_rate": 4.795299237424772e-07, "loss": 0.5071, "step": 13380 }, { "epoch": 0.86, "grad_norm": 1.1927107572555542, "learning_rate": 4.790833238856518e-07, "loss": 0.5087, "step": 13381 }, { "epoch": 0.86, "grad_norm": 1.0951076745986938, "learning_rate": 4.786369216264297e-07, "loss": 0.4716, "step": 13382 }, { "epoch": 0.86, "grad_norm": 1.1806894540786743, "learning_rate": 4.781907169843225e-07, "loss": 0.5147, "step": 13383 }, { "epoch": 0.86, "grad_norm": 1.12941312789917, "learning_rate": 4.777447099788329e-07, "loss": 0.4871, "step": 13384 }, { "epoch": 0.86, "grad_norm": 1.144736409187317, "learning_rate": 4.772989006294554e-07, "loss": 0.5058, "step": 13385 }, { "epoch": 0.86, "grad_norm": 1.1860133409500122, "learning_rate": 4.768532889556732e-07, "loss": 0.4905, "step": 13386 }, { "epoch": 0.86, "grad_norm": 1.123937964439392, "learning_rate": 4.764078749769641e-07, "loss": 0.5001, "step": 13387 }, { "epoch": 0.86, "grad_norm": 1.1770209074020386, "learning_rate": 4.759626587127952e-07, "loss": 0.5001, "step": 13388 }, { "epoch": 0.86, "grad_norm": 1.1784336566925049, "learning_rate": 4.755176401826267e-07, "loss": 0.5038, "step": 13389 }, { "epoch": 0.86, "grad_norm": 1.1327909231185913, "learning_rate": 4.75072819405909e-07, "loss": 0.4909, "step": 13390 }, { "epoch": 0.86, "grad_norm": 1.1818463802337646, "learning_rate": 4.7462819640208435e-07, "loss": 0.5022, "step": 13391 }, { "epoch": 0.86, "grad_norm": 1.2139347791671753, "learning_rate": 4.74183771190585e-07, "loss": 0.5077, "step": 13392 }, { "epoch": 0.86, "grad_norm": 1.1315193176269531, "learning_rate": 4.7373954379083595e-07, "loss": 0.4727, "step": 13393 }, { "epoch": 0.86, "grad_norm": 1.084742546081543, "learning_rate": 4.7329551422225394e-07, "loss": 0.5211, "step": 13394 }, { "epoch": 0.86, "grad_norm": 1.1581858396530151, "learning_rate": 4.728516825042456e-07, "loss": 0.5091, "step": 13395 }, { "epoch": 0.86, "grad_norm": 1.2315560579299927, "learning_rate": 4.7240804865621103e-07, "loss": 0.5267, "step": 13396 }, { "epoch": 0.86, "grad_norm": 1.3106415271759033, "learning_rate": 4.7196461269753857e-07, "loss": 0.5507, "step": 13397 }, { "epoch": 0.86, "grad_norm": 1.1235612630844116, "learning_rate": 4.715213746476105e-07, "loss": 0.4942, "step": 13398 }, { "epoch": 0.86, "grad_norm": 1.1229091882705688, "learning_rate": 4.710783345257991e-07, "loss": 0.5276, "step": 13399 }, { "epoch": 0.86, "grad_norm": 1.1081349849700928, "learning_rate": 4.7063549235146943e-07, "loss": 0.4812, "step": 13400 }, { "epoch": 0.87, "grad_norm": 1.209183931350708, "learning_rate": 4.7019284814397714e-07, "loss": 0.5126, "step": 13401 }, { "epoch": 0.87, "grad_norm": 1.223951816558838, "learning_rate": 4.697504019226673e-07, "loss": 0.5094, "step": 13402 }, { "epoch": 0.87, "grad_norm": 1.1716457605361938, "learning_rate": 4.693081537068794e-07, "loss": 0.5277, "step": 13403 }, { "epoch": 0.87, "grad_norm": 1.1943570375442505, "learning_rate": 4.688661035159425e-07, "loss": 0.4974, "step": 13404 }, { "epoch": 0.87, "grad_norm": 1.266922950744629, "learning_rate": 4.684242513691789e-07, "loss": 0.5239, "step": 13405 }, { "epoch": 0.87, "grad_norm": 1.2447952032089233, "learning_rate": 4.679825972858987e-07, "loss": 0.5117, "step": 13406 }, { "epoch": 0.87, "grad_norm": 1.2028331756591797, "learning_rate": 4.675411412854064e-07, "loss": 0.5381, "step": 13407 }, { "epoch": 0.87, "grad_norm": 1.1477909088134766, "learning_rate": 4.6709988338699717e-07, "loss": 0.4507, "step": 13408 }, { "epoch": 0.87, "grad_norm": 1.087428092956543, "learning_rate": 4.6665882360995673e-07, "loss": 0.5181, "step": 13409 }, { "epoch": 0.87, "grad_norm": 1.270556926727295, "learning_rate": 4.662179619735635e-07, "loss": 0.5274, "step": 13410 }, { "epoch": 0.87, "grad_norm": 1.1539185047149658, "learning_rate": 4.6577729849708544e-07, "loss": 0.5094, "step": 13411 }, { "epoch": 0.87, "grad_norm": 1.076921820640564, "learning_rate": 4.6533683319978316e-07, "loss": 0.5153, "step": 13412 }, { "epoch": 0.87, "grad_norm": 1.1158971786499023, "learning_rate": 4.6489656610090807e-07, "loss": 0.4939, "step": 13413 }, { "epoch": 0.87, "grad_norm": 1.140028476715088, "learning_rate": 4.6445649721970464e-07, "loss": 0.5313, "step": 13414 }, { "epoch": 0.87, "grad_norm": 1.207027554512024, "learning_rate": 4.6401662657540424e-07, "loss": 0.5297, "step": 13415 }, { "epoch": 0.87, "grad_norm": 1.1136577129364014, "learning_rate": 4.635769541872348e-07, "loss": 0.4906, "step": 13416 }, { "epoch": 0.87, "grad_norm": 1.227049708366394, "learning_rate": 4.631374800744121e-07, "loss": 0.5285, "step": 13417 }, { "epoch": 0.87, "grad_norm": 1.2237117290496826, "learning_rate": 4.6269820425614507e-07, "loss": 0.519, "step": 13418 }, { "epoch": 0.87, "grad_norm": 1.1459652185440063, "learning_rate": 4.6225912675163355e-07, "loss": 0.5004, "step": 13419 }, { "epoch": 0.87, "grad_norm": 1.140592336654663, "learning_rate": 4.6182024758006874e-07, "loss": 0.476, "step": 13420 }, { "epoch": 0.87, "grad_norm": 1.093625783920288, "learning_rate": 4.6138156676063086e-07, "loss": 0.4955, "step": 13421 }, { "epoch": 0.87, "grad_norm": 1.1993838548660278, "learning_rate": 4.6094308431249567e-07, "loss": 0.4768, "step": 13422 }, { "epoch": 0.87, "grad_norm": 1.1942938566207886, "learning_rate": 4.605048002548268e-07, "loss": 0.4978, "step": 13423 }, { "epoch": 0.87, "grad_norm": 1.0909136533737183, "learning_rate": 4.600667146067811e-07, "loss": 0.5213, "step": 13424 }, { "epoch": 0.87, "grad_norm": 1.2351185083389282, "learning_rate": 4.59628827387506e-07, "loss": 0.5275, "step": 13425 }, { "epoch": 0.87, "grad_norm": 1.1436622142791748, "learning_rate": 4.5919113861614185e-07, "loss": 0.4929, "step": 13426 }, { "epoch": 0.87, "grad_norm": 1.1430509090423584, "learning_rate": 4.5875364831181654e-07, "loss": 0.531, "step": 13427 }, { "epoch": 0.87, "grad_norm": 1.2516363859176636, "learning_rate": 4.583163564936527e-07, "loss": 0.525, "step": 13428 }, { "epoch": 0.87, "grad_norm": 1.1840845346450806, "learning_rate": 4.5787926318076323e-07, "loss": 0.5552, "step": 13429 }, { "epoch": 0.87, "grad_norm": 1.0922118425369263, "learning_rate": 4.5744236839225186e-07, "loss": 0.472, "step": 13430 }, { "epoch": 0.87, "grad_norm": 1.2119165658950806, "learning_rate": 4.5700567214721545e-07, "loss": 0.5472, "step": 13431 }, { "epoch": 0.87, "grad_norm": 1.1605602502822876, "learning_rate": 4.565691744647405e-07, "loss": 0.5086, "step": 13432 }, { "epoch": 0.87, "grad_norm": 1.0807965993881226, "learning_rate": 4.561328753639038e-07, "loss": 0.4663, "step": 13433 }, { "epoch": 0.87, "grad_norm": 1.2102335691452026, "learning_rate": 4.5569677486377586e-07, "loss": 0.5203, "step": 13434 }, { "epoch": 0.87, "grad_norm": 1.1119290590286255, "learning_rate": 4.552608729834174e-07, "loss": 0.4897, "step": 13435 }, { "epoch": 0.87, "grad_norm": 1.1768238544464111, "learning_rate": 4.5482516974188043e-07, "loss": 0.4838, "step": 13436 }, { "epoch": 0.87, "grad_norm": 1.2679917812347412, "learning_rate": 4.543896651582086e-07, "loss": 0.5518, "step": 13437 }, { "epoch": 0.87, "grad_norm": 1.2301539182662964, "learning_rate": 4.539543592514378e-07, "loss": 0.5436, "step": 13438 }, { "epoch": 0.87, "grad_norm": 1.2274912595748901, "learning_rate": 4.5351925204059176e-07, "loss": 0.5262, "step": 13439 }, { "epoch": 0.87, "grad_norm": 1.139882206916809, "learning_rate": 4.530843435446897e-07, "loss": 0.523, "step": 13440 }, { "epoch": 0.87, "grad_norm": 1.1249737739562988, "learning_rate": 4.526496337827391e-07, "loss": 0.4941, "step": 13441 }, { "epoch": 0.87, "grad_norm": 1.158082365989685, "learning_rate": 4.5221512277374146e-07, "loss": 0.4828, "step": 13442 }, { "epoch": 0.87, "grad_norm": 1.0664944648742676, "learning_rate": 4.5178081053668776e-07, "loss": 0.4891, "step": 13443 }, { "epoch": 0.87, "grad_norm": 1.2246958017349243, "learning_rate": 4.5134669709055943e-07, "loss": 0.4935, "step": 13444 }, { "epoch": 0.87, "grad_norm": 1.175700068473816, "learning_rate": 4.5091278245433136e-07, "loss": 0.5136, "step": 13445 }, { "epoch": 0.87, "grad_norm": 1.3662097454071045, "learning_rate": 4.5047906664696884e-07, "loss": 0.507, "step": 13446 }, { "epoch": 0.87, "grad_norm": 1.2716223001480103, "learning_rate": 4.5004554968742784e-07, "loss": 0.5387, "step": 13447 }, { "epoch": 0.87, "grad_norm": 1.1776188611984253, "learning_rate": 4.4961223159465774e-07, "loss": 0.5085, "step": 13448 }, { "epoch": 0.87, "grad_norm": 1.2405524253845215, "learning_rate": 4.4917911238759715e-07, "loss": 0.5023, "step": 13449 }, { "epoch": 0.87, "grad_norm": 1.1352359056472778, "learning_rate": 4.4874619208517546e-07, "loss": 0.4901, "step": 13450 }, { "epoch": 0.87, "grad_norm": 1.135846734046936, "learning_rate": 4.4831347070631527e-07, "loss": 0.4877, "step": 13451 }, { "epoch": 0.87, "grad_norm": 1.1485638618469238, "learning_rate": 4.4788094826992977e-07, "loss": 0.4822, "step": 13452 }, { "epoch": 0.87, "grad_norm": 1.253798007965088, "learning_rate": 4.474486247949239e-07, "loss": 0.4988, "step": 13453 }, { "epoch": 0.87, "grad_norm": 1.2600109577178955, "learning_rate": 4.470165003001925e-07, "loss": 0.5065, "step": 13454 }, { "epoch": 0.87, "grad_norm": 1.0930639505386353, "learning_rate": 4.465845748046238e-07, "loss": 0.5357, "step": 13455 }, { "epoch": 0.87, "grad_norm": 1.3022667169570923, "learning_rate": 4.461528483270944e-07, "loss": 0.5151, "step": 13456 }, { "epoch": 0.87, "grad_norm": 1.243973731994629, "learning_rate": 4.457213208864752e-07, "loss": 0.5478, "step": 13457 }, { "epoch": 0.87, "grad_norm": 1.191149115562439, "learning_rate": 4.4528999250162684e-07, "loss": 0.5387, "step": 13458 }, { "epoch": 0.87, "grad_norm": 1.314581036567688, "learning_rate": 4.4485886319140137e-07, "loss": 0.5338, "step": 13459 }, { "epoch": 0.87, "grad_norm": 1.161492109298706, "learning_rate": 4.4442793297464257e-07, "loss": 0.5133, "step": 13460 }, { "epoch": 0.87, "grad_norm": 1.2410633563995361, "learning_rate": 4.4399720187018656e-07, "loss": 0.5327, "step": 13461 }, { "epoch": 0.87, "grad_norm": 1.260724425315857, "learning_rate": 4.4356666989685715e-07, "loss": 0.4624, "step": 13462 }, { "epoch": 0.87, "grad_norm": 1.2286975383758545, "learning_rate": 4.431363370734726e-07, "loss": 0.5102, "step": 13463 }, { "epoch": 0.87, "grad_norm": 1.2234550714492798, "learning_rate": 4.427062034188423e-07, "loss": 0.5135, "step": 13464 }, { "epoch": 0.87, "grad_norm": 1.3203493356704712, "learning_rate": 4.422762689517657e-07, "loss": 0.5712, "step": 13465 }, { "epoch": 0.87, "grad_norm": 1.2598862648010254, "learning_rate": 4.418465336910344e-07, "loss": 0.5167, "step": 13466 }, { "epoch": 0.87, "grad_norm": 1.1781028509140015, "learning_rate": 4.4141699765543166e-07, "loss": 0.4687, "step": 13467 }, { "epoch": 0.87, "grad_norm": 1.1543866395950317, "learning_rate": 4.409876608637298e-07, "loss": 0.4758, "step": 13468 }, { "epoch": 0.87, "grad_norm": 1.1704339981079102, "learning_rate": 4.4055852333469486e-07, "loss": 0.4789, "step": 13469 }, { "epoch": 0.87, "grad_norm": 1.242016315460205, "learning_rate": 4.401295850870829e-07, "loss": 0.4994, "step": 13470 }, { "epoch": 0.87, "grad_norm": 1.1751784086227417, "learning_rate": 4.397008461396429e-07, "loss": 0.4801, "step": 13471 }, { "epoch": 0.87, "grad_norm": 1.09357488155365, "learning_rate": 4.3927230651111264e-07, "loss": 0.4619, "step": 13472 }, { "epoch": 0.87, "grad_norm": 1.3201922178268433, "learning_rate": 4.388439662202243e-07, "loss": 0.524, "step": 13473 }, { "epoch": 0.87, "grad_norm": 1.2726768255233765, "learning_rate": 4.3841582528569736e-07, "loss": 0.542, "step": 13474 }, { "epoch": 0.87, "grad_norm": 1.1844794750213623, "learning_rate": 4.379878837262452e-07, "loss": 0.5044, "step": 13475 }, { "epoch": 0.87, "grad_norm": 1.207722783088684, "learning_rate": 4.375601415605729e-07, "loss": 0.4702, "step": 13476 }, { "epoch": 0.87, "grad_norm": 1.0772745609283447, "learning_rate": 4.3713259880737545e-07, "loss": 0.487, "step": 13477 }, { "epoch": 0.87, "grad_norm": 1.169793963432312, "learning_rate": 4.367052554853396e-07, "loss": 0.5119, "step": 13478 }, { "epoch": 0.87, "grad_norm": 1.1543288230895996, "learning_rate": 4.362781116131448e-07, "loss": 0.533, "step": 13479 }, { "epoch": 0.87, "grad_norm": 1.2034837007522583, "learning_rate": 4.358511672094584e-07, "loss": 0.5581, "step": 13480 }, { "epoch": 0.87, "grad_norm": 1.08714759349823, "learning_rate": 4.354244222929416e-07, "loss": 0.4982, "step": 13481 }, { "epoch": 0.87, "grad_norm": 1.1187154054641724, "learning_rate": 4.3499787688224605e-07, "loss": 0.5127, "step": 13482 }, { "epoch": 0.87, "grad_norm": 1.159059762954712, "learning_rate": 4.3457153099601577e-07, "loss": 0.4862, "step": 13483 }, { "epoch": 0.87, "grad_norm": 1.2566851377487183, "learning_rate": 4.341453846528859e-07, "loss": 0.5574, "step": 13484 }, { "epoch": 0.87, "grad_norm": 1.203255295753479, "learning_rate": 4.337194378714804e-07, "loss": 0.4747, "step": 13485 }, { "epoch": 0.87, "grad_norm": 1.1557658910751343, "learning_rate": 4.3329369067041713e-07, "loss": 0.5133, "step": 13486 }, { "epoch": 0.87, "grad_norm": 1.2172025442123413, "learning_rate": 4.328681430683046e-07, "loss": 0.4867, "step": 13487 }, { "epoch": 0.87, "grad_norm": 1.2433580160140991, "learning_rate": 4.3244279508374175e-07, "loss": 0.5118, "step": 13488 }, { "epoch": 0.87, "grad_norm": 1.221097469329834, "learning_rate": 4.3201764673531985e-07, "loss": 0.4878, "step": 13489 }, { "epoch": 0.87, "grad_norm": 1.1664372682571411, "learning_rate": 4.3159269804162183e-07, "loss": 0.4852, "step": 13490 }, { "epoch": 0.87, "grad_norm": 1.2453529834747314, "learning_rate": 4.3116794902122007e-07, "loss": 0.5391, "step": 13491 }, { "epoch": 0.87, "grad_norm": 1.3084722757339478, "learning_rate": 4.307433996926791e-07, "loss": 0.4904, "step": 13492 }, { "epoch": 0.87, "grad_norm": 1.180437445640564, "learning_rate": 4.303190500745552e-07, "loss": 0.5047, "step": 13493 }, { "epoch": 0.87, "grad_norm": 1.2235872745513916, "learning_rate": 4.298949001853969e-07, "loss": 0.4874, "step": 13494 }, { "epoch": 0.87, "grad_norm": 1.2165518999099731, "learning_rate": 4.2947095004373993e-07, "loss": 0.5327, "step": 13495 }, { "epoch": 0.87, "grad_norm": 1.069720983505249, "learning_rate": 4.2904719966811613e-07, "loss": 0.4728, "step": 13496 }, { "epoch": 0.87, "grad_norm": 1.1395440101623535, "learning_rate": 4.286236490770462e-07, "loss": 0.4632, "step": 13497 }, { "epoch": 0.87, "grad_norm": 1.2299500703811646, "learning_rate": 4.2820029828904206e-07, "loss": 0.5357, "step": 13498 }, { "epoch": 0.87, "grad_norm": 1.3136649131774902, "learning_rate": 4.2777714732260834e-07, "loss": 0.5659, "step": 13499 }, { "epoch": 0.87, "grad_norm": 1.1323069334030151, "learning_rate": 4.2735419619623795e-07, "loss": 0.4991, "step": 13500 }, { "epoch": 0.87, "grad_norm": 1.1659973859786987, "learning_rate": 4.2693144492841844e-07, "loss": 0.5128, "step": 13501 }, { "epoch": 0.87, "grad_norm": 1.196520447731018, "learning_rate": 4.265088935376266e-07, "loss": 0.509, "step": 13502 }, { "epoch": 0.87, "grad_norm": 1.2189562320709229, "learning_rate": 4.2608654204233214e-07, "loss": 0.5432, "step": 13503 }, { "epoch": 0.87, "grad_norm": 1.1698744297027588, "learning_rate": 4.256643904609931e-07, "loss": 0.5595, "step": 13504 }, { "epoch": 0.87, "grad_norm": 1.30955171585083, "learning_rate": 4.2524243881206183e-07, "loss": 0.5073, "step": 13505 }, { "epoch": 0.87, "grad_norm": 1.1588507890701294, "learning_rate": 4.2482068711398037e-07, "loss": 0.5578, "step": 13506 }, { "epoch": 0.87, "grad_norm": 1.2830028533935547, "learning_rate": 4.243991353851823e-07, "loss": 0.4973, "step": 13507 }, { "epoch": 0.87, "grad_norm": 1.0880497694015503, "learning_rate": 4.2397778364409393e-07, "loss": 0.4593, "step": 13508 }, { "epoch": 0.87, "grad_norm": 1.2488362789154053, "learning_rate": 4.2355663190913e-07, "loss": 0.5001, "step": 13509 }, { "epoch": 0.87, "grad_norm": 1.1566100120544434, "learning_rate": 4.2313568019869743e-07, "loss": 0.4825, "step": 13510 }, { "epoch": 0.87, "grad_norm": 1.0369919538497925, "learning_rate": 4.2271492853119653e-07, "loss": 0.5083, "step": 13511 }, { "epoch": 0.87, "grad_norm": 1.2328622341156006, "learning_rate": 4.2229437692501593e-07, "loss": 0.5303, "step": 13512 }, { "epoch": 0.87, "grad_norm": 1.140038251876831, "learning_rate": 4.218740253985376e-07, "loss": 0.5172, "step": 13513 }, { "epoch": 0.87, "grad_norm": 1.2310678958892822, "learning_rate": 4.2145387397013513e-07, "loss": 0.4942, "step": 13514 }, { "epoch": 0.87, "grad_norm": 1.1438895463943481, "learning_rate": 4.2103392265817e-07, "loss": 0.5501, "step": 13515 }, { "epoch": 0.87, "grad_norm": 1.2378205060958862, "learning_rate": 4.2061417148099803e-07, "loss": 0.5027, "step": 13516 }, { "epoch": 0.87, "grad_norm": 1.2164286375045776, "learning_rate": 4.201946204569657e-07, "loss": 0.52, "step": 13517 }, { "epoch": 0.87, "grad_norm": 1.1355171203613281, "learning_rate": 4.1977526960441053e-07, "loss": 0.526, "step": 13518 }, { "epoch": 0.87, "grad_norm": 1.2501243352890015, "learning_rate": 4.1935611894166116e-07, "loss": 0.5368, "step": 13519 }, { "epoch": 0.87, "grad_norm": 1.1996676921844482, "learning_rate": 4.1893716848703856e-07, "loss": 0.4834, "step": 13520 }, { "epoch": 0.87, "grad_norm": 1.1957204341888428, "learning_rate": 4.185184182588525e-07, "loss": 0.4803, "step": 13521 }, { "epoch": 0.87, "grad_norm": 1.2563315629959106, "learning_rate": 4.1809986827540563e-07, "loss": 0.4656, "step": 13522 }, { "epoch": 0.87, "grad_norm": 1.3376275300979614, "learning_rate": 4.1768151855499205e-07, "loss": 0.5299, "step": 13523 }, { "epoch": 0.87, "grad_norm": 1.1094673871994019, "learning_rate": 4.172633691158967e-07, "loss": 0.4942, "step": 13524 }, { "epoch": 0.87, "grad_norm": 1.4053292274475098, "learning_rate": 4.168454199763966e-07, "loss": 0.5482, "step": 13525 }, { "epoch": 0.87, "grad_norm": 1.2394968271255493, "learning_rate": 4.1642767115475827e-07, "loss": 0.5831, "step": 13526 }, { "epoch": 0.87, "grad_norm": 1.166232705116272, "learning_rate": 4.160101226692398e-07, "loss": 0.5121, "step": 13527 }, { "epoch": 0.87, "grad_norm": 1.1156126260757446, "learning_rate": 4.1559277453809275e-07, "loss": 0.419, "step": 13528 }, { "epoch": 0.87, "grad_norm": 1.2987704277038574, "learning_rate": 4.1517562677955704e-07, "loss": 0.5117, "step": 13529 }, { "epoch": 0.87, "grad_norm": 1.1641013622283936, "learning_rate": 4.1475867941186576e-07, "loss": 0.5331, "step": 13530 }, { "epoch": 0.87, "grad_norm": 1.1855624914169312, "learning_rate": 4.1434193245324385e-07, "loss": 0.5478, "step": 13531 }, { "epoch": 0.87, "grad_norm": 1.1727274656295776, "learning_rate": 4.139253859219039e-07, "loss": 0.5054, "step": 13532 }, { "epoch": 0.87, "grad_norm": 1.2161448001861572, "learning_rate": 4.135090398360525e-07, "loss": 0.5184, "step": 13533 }, { "epoch": 0.87, "grad_norm": 1.1935186386108398, "learning_rate": 4.1309289421388844e-07, "loss": 0.4925, "step": 13534 }, { "epoch": 0.87, "grad_norm": 1.2453522682189941, "learning_rate": 4.1267694907359934e-07, "loss": 0.5098, "step": 13535 }, { "epoch": 0.87, "grad_norm": 1.2733134031295776, "learning_rate": 4.122612044333651e-07, "loss": 0.5369, "step": 13536 }, { "epoch": 0.87, "grad_norm": 1.204177975654602, "learning_rate": 4.11845660311358e-07, "loss": 0.5366, "step": 13537 }, { "epoch": 0.87, "grad_norm": 1.2097707986831665, "learning_rate": 4.1143031672573884e-07, "loss": 0.5387, "step": 13538 }, { "epoch": 0.87, "grad_norm": 1.2087887525558472, "learning_rate": 4.110151736946616e-07, "loss": 0.5082, "step": 13539 }, { "epoch": 0.87, "grad_norm": 1.1935982704162598, "learning_rate": 4.1060023123627166e-07, "loss": 0.48, "step": 13540 }, { "epoch": 0.87, "grad_norm": 1.1489313840866089, "learning_rate": 4.101854893687046e-07, "loss": 0.5116, "step": 13541 }, { "epoch": 0.87, "grad_norm": 1.1586779356002808, "learning_rate": 4.097709481100881e-07, "loss": 0.4771, "step": 13542 }, { "epoch": 0.87, "grad_norm": 1.2990249395370483, "learning_rate": 4.0935660747854157e-07, "loss": 0.5359, "step": 13543 }, { "epoch": 0.87, "grad_norm": 1.2001036405563354, "learning_rate": 4.0894246749217216e-07, "loss": 0.5596, "step": 13544 }, { "epoch": 0.87, "grad_norm": 1.1787053346633911, "learning_rate": 4.0852852816908316e-07, "loss": 0.5391, "step": 13545 }, { "epoch": 0.87, "grad_norm": 1.153523564338684, "learning_rate": 4.081147895273657e-07, "loss": 0.5181, "step": 13546 }, { "epoch": 0.87, "grad_norm": 1.324127197265625, "learning_rate": 4.0770125158510364e-07, "loss": 0.5639, "step": 13547 }, { "epoch": 0.87, "grad_norm": 1.2577461004257202, "learning_rate": 4.0728791436037143e-07, "loss": 0.5879, "step": 13548 }, { "epoch": 0.87, "grad_norm": 1.1624962091445923, "learning_rate": 4.068747778712362e-07, "loss": 0.5212, "step": 13549 }, { "epoch": 0.87, "grad_norm": 1.136658787727356, "learning_rate": 4.0646184213575303e-07, "loss": 0.569, "step": 13550 }, { "epoch": 0.87, "grad_norm": 1.1852269172668457, "learning_rate": 4.0604910717197077e-07, "loss": 0.4986, "step": 13551 }, { "epoch": 0.87, "grad_norm": 1.290738821029663, "learning_rate": 4.0563657299793004e-07, "loss": 0.5788, "step": 13552 }, { "epoch": 0.87, "grad_norm": 1.186819314956665, "learning_rate": 4.0522423963166137e-07, "loss": 0.5036, "step": 13553 }, { "epoch": 0.87, "grad_norm": 1.2285012006759644, "learning_rate": 4.048121070911859e-07, "loss": 0.5445, "step": 13554 }, { "epoch": 0.87, "grad_norm": 1.16264009475708, "learning_rate": 4.044001753945187e-07, "loss": 0.4923, "step": 13555 }, { "epoch": 0.88, "grad_norm": 1.2286561727523804, "learning_rate": 4.03988444559662e-07, "loss": 0.4882, "step": 13556 }, { "epoch": 0.88, "grad_norm": 1.2223690748214722, "learning_rate": 4.035769146046126e-07, "loss": 0.5033, "step": 13557 }, { "epoch": 0.88, "grad_norm": 1.2400261163711548, "learning_rate": 4.031655855473571e-07, "loss": 0.5375, "step": 13558 }, { "epoch": 0.88, "grad_norm": 1.2280542850494385, "learning_rate": 4.0275445740587447e-07, "loss": 0.4991, "step": 13559 }, { "epoch": 0.88, "grad_norm": 1.3973908424377441, "learning_rate": 4.0234353019813264e-07, "loss": 0.5525, "step": 13560 }, { "epoch": 0.88, "grad_norm": 1.2368475198745728, "learning_rate": 4.0193280394209445e-07, "loss": 0.5241, "step": 13561 }, { "epoch": 0.88, "grad_norm": 1.2517296075820923, "learning_rate": 4.0152227865570934e-07, "loss": 0.5319, "step": 13562 }, { "epoch": 0.88, "grad_norm": 1.329067587852478, "learning_rate": 4.0111195435692085e-07, "loss": 0.5332, "step": 13563 }, { "epoch": 0.88, "grad_norm": 1.144036889076233, "learning_rate": 4.00701831063664e-07, "loss": 0.5322, "step": 13564 }, { "epoch": 0.88, "grad_norm": 1.236920952796936, "learning_rate": 4.0029190879386334e-07, "loss": 0.5188, "step": 13565 }, { "epoch": 0.88, "grad_norm": 1.191750168800354, "learning_rate": 3.9988218756543684e-07, "loss": 0.513, "step": 13566 }, { "epoch": 0.88, "grad_norm": 1.203543782234192, "learning_rate": 3.9947266739629067e-07, "loss": 0.5436, "step": 13567 }, { "epoch": 0.88, "grad_norm": 1.1750085353851318, "learning_rate": 3.990633483043249e-07, "loss": 0.4815, "step": 13568 }, { "epoch": 0.88, "grad_norm": 1.032132863998413, "learning_rate": 3.986542303074298e-07, "loss": 0.4645, "step": 13569 }, { "epoch": 0.88, "grad_norm": 1.1262633800506592, "learning_rate": 3.9824531342348593e-07, "loss": 0.4903, "step": 13570 }, { "epoch": 0.88, "grad_norm": 1.1460121870040894, "learning_rate": 3.978365976703674e-07, "loss": 0.5032, "step": 13571 }, { "epoch": 0.88, "grad_norm": 1.2330594062805176, "learning_rate": 3.974280830659377e-07, "loss": 0.5331, "step": 13572 }, { "epoch": 0.88, "grad_norm": 1.1660574674606323, "learning_rate": 3.970197696280514e-07, "loss": 0.5212, "step": 13573 }, { "epoch": 0.88, "grad_norm": 1.1148889064788818, "learning_rate": 3.966116573745549e-07, "loss": 0.4827, "step": 13574 }, { "epoch": 0.88, "grad_norm": 1.1029987335205078, "learning_rate": 3.9620374632328595e-07, "loss": 0.4975, "step": 13575 }, { "epoch": 0.88, "grad_norm": 1.1922855377197266, "learning_rate": 3.957960364920732e-07, "loss": 0.562, "step": 13576 }, { "epoch": 0.88, "grad_norm": 1.1910290718078613, "learning_rate": 3.9538852789873684e-07, "loss": 0.5564, "step": 13577 }, { "epoch": 0.88, "grad_norm": 1.1383068561553955, "learning_rate": 3.949812205610887e-07, "loss": 0.4743, "step": 13578 }, { "epoch": 0.88, "grad_norm": 1.093475580215454, "learning_rate": 3.9457411449692894e-07, "loss": 0.4703, "step": 13579 }, { "epoch": 0.88, "grad_norm": 1.3012025356292725, "learning_rate": 3.9416720972405343e-07, "loss": 0.5047, "step": 13580 }, { "epoch": 0.88, "grad_norm": 1.1232019662857056, "learning_rate": 3.937605062602451e-07, "loss": 0.5307, "step": 13581 }, { "epoch": 0.88, "grad_norm": 1.1799182891845703, "learning_rate": 3.933540041232814e-07, "loss": 0.5113, "step": 13582 }, { "epoch": 0.88, "grad_norm": 1.1806554794311523, "learning_rate": 3.929477033309292e-07, "loss": 0.5108, "step": 13583 }, { "epoch": 0.88, "grad_norm": 1.3325737714767456, "learning_rate": 3.925416039009461e-07, "loss": 0.4902, "step": 13584 }, { "epoch": 0.88, "grad_norm": 1.149518609046936, "learning_rate": 3.921357058510822e-07, "loss": 0.4996, "step": 13585 }, { "epoch": 0.88, "grad_norm": 1.2175174951553345, "learning_rate": 3.917300091990778e-07, "loss": 0.503, "step": 13586 }, { "epoch": 0.88, "grad_norm": 1.1091883182525635, "learning_rate": 3.9132451396266533e-07, "loss": 0.4759, "step": 13587 }, { "epoch": 0.88, "grad_norm": 1.3754966259002686, "learning_rate": 3.909192201595685e-07, "loss": 0.5465, "step": 13588 }, { "epoch": 0.88, "grad_norm": 1.2283835411071777, "learning_rate": 3.905141278075003e-07, "loss": 0.5133, "step": 13589 }, { "epoch": 0.88, "grad_norm": 1.2834558486938477, "learning_rate": 3.9010923692416713e-07, "loss": 0.5576, "step": 13590 }, { "epoch": 0.88, "grad_norm": 1.089769721031189, "learning_rate": 3.897045475272654e-07, "loss": 0.5137, "step": 13591 }, { "epoch": 0.88, "grad_norm": 1.1603809595108032, "learning_rate": 3.893000596344837e-07, "loss": 0.4858, "step": 13592 }, { "epoch": 0.88, "grad_norm": 1.2086448669433594, "learning_rate": 3.8889577326350016e-07, "loss": 0.5429, "step": 13593 }, { "epoch": 0.88, "grad_norm": 1.2109618186950684, "learning_rate": 3.884916884319856e-07, "loss": 0.5056, "step": 13594 }, { "epoch": 0.88, "grad_norm": 1.344431757926941, "learning_rate": 3.880878051576009e-07, "loss": 0.5511, "step": 13595 }, { "epoch": 0.88, "grad_norm": 1.2826755046844482, "learning_rate": 3.876841234579998e-07, "loss": 0.5526, "step": 13596 }, { "epoch": 0.88, "grad_norm": 1.1532702445983887, "learning_rate": 3.872806433508269e-07, "loss": 0.4999, "step": 13597 }, { "epoch": 0.88, "grad_norm": 1.2110278606414795, "learning_rate": 3.86877364853715e-07, "loss": 0.5259, "step": 13598 }, { "epoch": 0.88, "grad_norm": 1.2151485681533813, "learning_rate": 3.8647428798429086e-07, "loss": 0.5654, "step": 13599 }, { "epoch": 0.88, "grad_norm": 1.1835545301437378, "learning_rate": 3.860714127601734e-07, "loss": 0.488, "step": 13600 }, { "epoch": 0.88, "grad_norm": 1.2129106521606445, "learning_rate": 3.8566873919896996e-07, "loss": 0.5023, "step": 13601 }, { "epoch": 0.88, "grad_norm": 1.2466188669204712, "learning_rate": 3.8526626731828165e-07, "loss": 0.5403, "step": 13602 }, { "epoch": 0.88, "grad_norm": 1.152394413948059, "learning_rate": 3.8486399713569766e-07, "loss": 0.5112, "step": 13603 }, { "epoch": 0.88, "grad_norm": 1.2010045051574707, "learning_rate": 3.844619286688017e-07, "loss": 0.5418, "step": 13604 }, { "epoch": 0.88, "grad_norm": 1.114688515663147, "learning_rate": 3.8406006193516585e-07, "loss": 0.5209, "step": 13605 }, { "epoch": 0.88, "grad_norm": 1.1733810901641846, "learning_rate": 3.836583969523561e-07, "loss": 0.4929, "step": 13606 }, { "epoch": 0.88, "grad_norm": 1.2268848419189453, "learning_rate": 3.8325693373792716e-07, "loss": 0.521, "step": 13607 }, { "epoch": 0.88, "grad_norm": 1.125911831855774, "learning_rate": 3.8285567230942733e-07, "loss": 0.5344, "step": 13608 }, { "epoch": 0.88, "grad_norm": 1.1505047082901, "learning_rate": 3.8245461268439255e-07, "loss": 0.4989, "step": 13609 }, { "epoch": 0.88, "grad_norm": 1.203421711921692, "learning_rate": 3.8205375488035323e-07, "loss": 0.5052, "step": 13610 }, { "epoch": 0.88, "grad_norm": 1.1686850786209106, "learning_rate": 3.8165309891482983e-07, "loss": 0.5161, "step": 13611 }, { "epoch": 0.88, "grad_norm": 1.1969554424285889, "learning_rate": 3.8125264480533443e-07, "loss": 0.4448, "step": 13612 }, { "epoch": 0.88, "grad_norm": 1.2295433282852173, "learning_rate": 3.808523925693697e-07, "loss": 0.5037, "step": 13613 }, { "epoch": 0.88, "grad_norm": 1.2060754299163818, "learning_rate": 3.8045234222442886e-07, "loss": 0.5671, "step": 13614 }, { "epoch": 0.88, "grad_norm": 1.3105688095092773, "learning_rate": 3.8005249378799735e-07, "loss": 0.5567, "step": 13615 }, { "epoch": 0.88, "grad_norm": 1.2162383794784546, "learning_rate": 3.7965284727755125e-07, "loss": 0.5197, "step": 13616 }, { "epoch": 0.88, "grad_norm": 1.1171780824661255, "learning_rate": 3.7925340271055933e-07, "loss": 0.521, "step": 13617 }, { "epoch": 0.88, "grad_norm": 1.1567567586898804, "learning_rate": 3.788541601044793e-07, "loss": 0.5475, "step": 13618 }, { "epoch": 0.88, "grad_norm": 1.0971366167068481, "learning_rate": 3.784551194767616e-07, "loss": 0.5073, "step": 13619 }, { "epoch": 0.88, "grad_norm": 1.272931694984436, "learning_rate": 3.780562808448468e-07, "loss": 0.5038, "step": 13620 }, { "epoch": 0.88, "grad_norm": 1.2253326177597046, "learning_rate": 3.7765764422616646e-07, "loss": 0.5004, "step": 13621 }, { "epoch": 0.88, "grad_norm": 1.222263216972351, "learning_rate": 3.77259209638145e-07, "loss": 0.5302, "step": 13622 }, { "epoch": 0.88, "grad_norm": 1.1324801445007324, "learning_rate": 3.768609770981968e-07, "loss": 0.4697, "step": 13623 }, { "epoch": 0.88, "grad_norm": 1.177712082862854, "learning_rate": 3.764629466237274e-07, "loss": 0.5498, "step": 13624 }, { "epoch": 0.88, "grad_norm": 1.2387759685516357, "learning_rate": 3.7606511823213454e-07, "loss": 0.5341, "step": 13625 }, { "epoch": 0.88, "grad_norm": 1.1787749528884888, "learning_rate": 3.7566749194080434e-07, "loss": 0.5402, "step": 13626 }, { "epoch": 0.88, "grad_norm": 1.277891993522644, "learning_rate": 3.7527006776711727e-07, "loss": 0.4745, "step": 13627 }, { "epoch": 0.88, "grad_norm": 1.1710929870605469, "learning_rate": 3.748728457284434e-07, "loss": 0.5025, "step": 13628 }, { "epoch": 0.88, "grad_norm": 1.1479541063308716, "learning_rate": 3.7447582584214493e-07, "loss": 0.499, "step": 13629 }, { "epoch": 0.88, "grad_norm": 1.2807998657226562, "learning_rate": 3.740790081255735e-07, "loss": 0.4997, "step": 13630 }, { "epoch": 0.88, "grad_norm": 1.2050421237945557, "learning_rate": 3.736823925960753e-07, "loss": 0.5244, "step": 13631 }, { "epoch": 0.88, "grad_norm": 1.1552059650421143, "learning_rate": 3.73285979270982e-07, "loss": 0.5238, "step": 13632 }, { "epoch": 0.88, "grad_norm": 1.1434671878814697, "learning_rate": 3.728897681676219e-07, "loss": 0.5344, "step": 13633 }, { "epoch": 0.88, "grad_norm": 1.1064001321792603, "learning_rate": 3.7249375930331175e-07, "loss": 0.4924, "step": 13634 }, { "epoch": 0.88, "grad_norm": 1.1864560842514038, "learning_rate": 3.7209795269536e-07, "loss": 0.5075, "step": 13635 }, { "epoch": 0.88, "grad_norm": 1.1064401865005493, "learning_rate": 3.7170234836106714e-07, "loss": 0.4681, "step": 13636 }, { "epoch": 0.88, "grad_norm": 1.1632475852966309, "learning_rate": 3.713069463177238e-07, "loss": 0.4883, "step": 13637 }, { "epoch": 0.88, "grad_norm": 1.1746221780776978, "learning_rate": 3.709117465826112e-07, "loss": 0.5011, "step": 13638 }, { "epoch": 0.88, "grad_norm": 1.1629835367202759, "learning_rate": 3.7051674917300275e-07, "loss": 0.4841, "step": 13639 }, { "epoch": 0.88, "grad_norm": 1.1856032609939575, "learning_rate": 3.701219541061629e-07, "loss": 0.5259, "step": 13640 }, { "epoch": 0.88, "grad_norm": 1.201904058456421, "learning_rate": 3.697273613993474e-07, "loss": 0.5358, "step": 13641 }, { "epoch": 0.88, "grad_norm": 1.175506353378296, "learning_rate": 3.6933297106980294e-07, "loss": 0.5232, "step": 13642 }, { "epoch": 0.88, "grad_norm": 1.2123281955718994, "learning_rate": 3.689387831347674e-07, "loss": 0.5178, "step": 13643 }, { "epoch": 0.88, "grad_norm": 1.1990325450897217, "learning_rate": 3.6854479761146866e-07, "loss": 0.5013, "step": 13644 }, { "epoch": 0.88, "grad_norm": 1.2588768005371094, "learning_rate": 3.6815101451712743e-07, "loss": 0.5013, "step": 13645 }, { "epoch": 0.88, "grad_norm": 1.1905690431594849, "learning_rate": 3.677574338689549e-07, "loss": 0.4614, "step": 13646 }, { "epoch": 0.88, "grad_norm": 1.173228144645691, "learning_rate": 3.67364055684154e-07, "loss": 0.5085, "step": 13647 }, { "epoch": 0.88, "grad_norm": 1.1626923084259033, "learning_rate": 3.6697087997991767e-07, "loss": 0.476, "step": 13648 }, { "epoch": 0.88, "grad_norm": 1.089497447013855, "learning_rate": 3.665779067734315e-07, "loss": 0.5052, "step": 13649 }, { "epoch": 0.88, "grad_norm": 1.0954272747039795, "learning_rate": 3.6618513608186966e-07, "loss": 0.4612, "step": 13650 }, { "epoch": 0.88, "grad_norm": 1.1685599088668823, "learning_rate": 3.657925679224006e-07, "loss": 0.5034, "step": 13651 }, { "epoch": 0.88, "grad_norm": 1.241445541381836, "learning_rate": 3.6540020231218164e-07, "loss": 0.5123, "step": 13652 }, { "epoch": 0.88, "grad_norm": 1.148032546043396, "learning_rate": 3.650080392683625e-07, "loss": 0.5336, "step": 13653 }, { "epoch": 0.88, "grad_norm": 1.2717469930648804, "learning_rate": 3.6461607880808437e-07, "loss": 0.5277, "step": 13654 }, { "epoch": 0.88, "grad_norm": 1.2945181131362915, "learning_rate": 3.642243209484775e-07, "loss": 0.5715, "step": 13655 }, { "epoch": 0.88, "grad_norm": 1.1190528869628906, "learning_rate": 3.6383276570666484e-07, "loss": 0.4688, "step": 13656 }, { "epoch": 0.88, "grad_norm": 1.1019346714019775, "learning_rate": 3.634414130997599e-07, "loss": 0.4749, "step": 13657 }, { "epoch": 0.88, "grad_norm": 1.2366602420806885, "learning_rate": 3.63050263144869e-07, "loss": 0.5068, "step": 13658 }, { "epoch": 0.88, "grad_norm": 1.1756333112716675, "learning_rate": 3.6265931585908744e-07, "loss": 0.5325, "step": 13659 }, { "epoch": 0.88, "grad_norm": 1.15007483959198, "learning_rate": 3.6226857125950366e-07, "loss": 0.4798, "step": 13660 }, { "epoch": 0.88, "grad_norm": 1.1882210969924927, "learning_rate": 3.618780293631941e-07, "loss": 0.504, "step": 13661 }, { "epoch": 0.88, "grad_norm": 1.1830345392227173, "learning_rate": 3.614876901872294e-07, "loss": 0.5225, "step": 13662 }, { "epoch": 0.88, "grad_norm": 1.1601831912994385, "learning_rate": 3.610975537486705e-07, "loss": 0.4792, "step": 13663 }, { "epoch": 0.88, "grad_norm": 1.2168598175048828, "learning_rate": 3.6070762006456874e-07, "loss": 0.5253, "step": 13664 }, { "epoch": 0.88, "grad_norm": 1.316890835762024, "learning_rate": 3.603178891519671e-07, "loss": 0.5182, "step": 13665 }, { "epoch": 0.88, "grad_norm": 1.1887203454971313, "learning_rate": 3.5992836102790143e-07, "loss": 0.4884, "step": 13666 }, { "epoch": 0.88, "grad_norm": 1.322113275527954, "learning_rate": 3.5953903570939417e-07, "loss": 0.5215, "step": 13667 }, { "epoch": 0.88, "grad_norm": 1.1496193408966064, "learning_rate": 3.591499132134635e-07, "loss": 0.5074, "step": 13668 }, { "epoch": 0.88, "grad_norm": 1.1945703029632568, "learning_rate": 3.5876099355711625e-07, "loss": 0.5084, "step": 13669 }, { "epoch": 0.88, "grad_norm": 1.245119571685791, "learning_rate": 3.5837227675735164e-07, "loss": 0.5171, "step": 13670 }, { "epoch": 0.88, "grad_norm": 1.3209314346313477, "learning_rate": 3.579837628311594e-07, "loss": 0.5172, "step": 13671 }, { "epoch": 0.88, "grad_norm": 1.3447445631027222, "learning_rate": 3.57595451795521e-07, "loss": 0.4971, "step": 13672 }, { "epoch": 0.88, "grad_norm": 1.1049996614456177, "learning_rate": 3.5720734366740675e-07, "loss": 0.5204, "step": 13673 }, { "epoch": 0.88, "grad_norm": 1.1619460582733154, "learning_rate": 3.568194384637813e-07, "loss": 0.4814, "step": 13674 }, { "epoch": 0.88, "grad_norm": 1.2057257890701294, "learning_rate": 3.564317362015984e-07, "loss": 0.5214, "step": 13675 }, { "epoch": 0.88, "grad_norm": 1.1981713771820068, "learning_rate": 3.5604423689780396e-07, "loss": 0.5337, "step": 13676 }, { "epoch": 0.88, "grad_norm": 1.1384906768798828, "learning_rate": 3.5565694056933496e-07, "loss": 0.4774, "step": 13677 }, { "epoch": 0.88, "grad_norm": 1.2420971393585205, "learning_rate": 3.5526984723311787e-07, "loss": 0.5495, "step": 13678 }, { "epoch": 0.88, "grad_norm": 1.1921236515045166, "learning_rate": 3.5488295690607187e-07, "loss": 0.4664, "step": 13679 }, { "epoch": 0.88, "grad_norm": 1.1415058374404907, "learning_rate": 3.544962696051074e-07, "loss": 0.4946, "step": 13680 }, { "epoch": 0.88, "grad_norm": 1.2712664604187012, "learning_rate": 3.541097853471265e-07, "loss": 0.5207, "step": 13681 }, { "epoch": 0.88, "grad_norm": 1.1793938875198364, "learning_rate": 3.537235041490195e-07, "loss": 0.544, "step": 13682 }, { "epoch": 0.88, "grad_norm": 1.2434804439544678, "learning_rate": 3.533374260276706e-07, "loss": 0.5065, "step": 13683 }, { "epoch": 0.88, "grad_norm": 1.1148592233657837, "learning_rate": 3.529515509999537e-07, "loss": 0.4795, "step": 13684 }, { "epoch": 0.88, "grad_norm": 1.1689112186431885, "learning_rate": 3.5256587908273576e-07, "loss": 0.4693, "step": 13685 }, { "epoch": 0.88, "grad_norm": 1.2455896139144897, "learning_rate": 3.5218041029287273e-07, "loss": 0.5377, "step": 13686 }, { "epoch": 0.88, "grad_norm": 1.1948429346084595, "learning_rate": 3.517951446472123e-07, "loss": 0.5239, "step": 13687 }, { "epoch": 0.88, "grad_norm": 1.192997694015503, "learning_rate": 3.514100821625932e-07, "loss": 0.5139, "step": 13688 }, { "epoch": 0.88, "grad_norm": 1.1709786653518677, "learning_rate": 3.510252228558458e-07, "loss": 0.5311, "step": 13689 }, { "epoch": 0.88, "grad_norm": 1.1467210054397583, "learning_rate": 3.5064056674379176e-07, "loss": 0.5128, "step": 13690 }, { "epoch": 0.88, "grad_norm": 1.1531795263290405, "learning_rate": 3.5025611384324364e-07, "loss": 0.5243, "step": 13691 }, { "epoch": 0.88, "grad_norm": 1.172487735748291, "learning_rate": 3.498718641710036e-07, "loss": 0.5109, "step": 13692 }, { "epoch": 0.88, "grad_norm": 1.2280433177947998, "learning_rate": 3.494878177438665e-07, "loss": 0.546, "step": 13693 }, { "epoch": 0.88, "grad_norm": 1.0971863269805908, "learning_rate": 3.49103974578619e-07, "loss": 0.4924, "step": 13694 }, { "epoch": 0.88, "grad_norm": 1.178505301475525, "learning_rate": 3.487203346920376e-07, "loss": 0.5495, "step": 13695 }, { "epoch": 0.88, "grad_norm": 1.1633095741271973, "learning_rate": 3.4833689810088944e-07, "loss": 0.5314, "step": 13696 }, { "epoch": 0.88, "grad_norm": 1.1219666004180908, "learning_rate": 3.479536648219339e-07, "loss": 0.5033, "step": 13697 }, { "epoch": 0.88, "grad_norm": 1.2004536390304565, "learning_rate": 3.47570634871921e-07, "loss": 0.5537, "step": 13698 }, { "epoch": 0.88, "grad_norm": 1.292019248008728, "learning_rate": 3.4718780826759223e-07, "loss": 0.5528, "step": 13699 }, { "epoch": 0.88, "grad_norm": 1.2106884717941284, "learning_rate": 3.4680518502568037e-07, "loss": 0.4912, "step": 13700 }, { "epoch": 0.88, "grad_norm": 1.2503970861434937, "learning_rate": 3.4642276516290876e-07, "loss": 0.5206, "step": 13701 }, { "epoch": 0.88, "grad_norm": 1.4223641157150269, "learning_rate": 3.460405486959911e-07, "loss": 0.5419, "step": 13702 }, { "epoch": 0.88, "grad_norm": 1.152565836906433, "learning_rate": 3.456585356416342e-07, "loss": 0.535, "step": 13703 }, { "epoch": 0.88, "grad_norm": 1.2502801418304443, "learning_rate": 3.452767260165335e-07, "loss": 0.5326, "step": 13704 }, { "epoch": 0.88, "grad_norm": 1.1876312494277954, "learning_rate": 3.4489511983737847e-07, "loss": 0.4697, "step": 13705 }, { "epoch": 0.88, "grad_norm": 1.3211103677749634, "learning_rate": 3.445137171208468e-07, "loss": 0.5544, "step": 13706 }, { "epoch": 0.88, "grad_norm": 1.257440209388733, "learning_rate": 3.4413251788361024e-07, "loss": 0.4854, "step": 13707 }, { "epoch": 0.88, "grad_norm": 1.1734845638275146, "learning_rate": 3.4375152214232875e-07, "loss": 0.5102, "step": 13708 }, { "epoch": 0.88, "grad_norm": 1.1359611749649048, "learning_rate": 3.433707299136546e-07, "loss": 0.5172, "step": 13709 }, { "epoch": 0.88, "grad_norm": 1.1317307949066162, "learning_rate": 3.429901412142311e-07, "loss": 0.4715, "step": 13710 }, { "epoch": 0.89, "grad_norm": 1.1608623266220093, "learning_rate": 3.426097560606939e-07, "loss": 0.5334, "step": 13711 }, { "epoch": 0.89, "grad_norm": 1.165963888168335, "learning_rate": 3.4222957446966797e-07, "loss": 0.5766, "step": 13712 }, { "epoch": 0.89, "grad_norm": 1.155942440032959, "learning_rate": 3.418495964577706e-07, "loss": 0.5032, "step": 13713 }, { "epoch": 0.89, "grad_norm": 1.085985541343689, "learning_rate": 3.414698220416085e-07, "loss": 0.4744, "step": 13714 }, { "epoch": 0.89, "grad_norm": 1.2356982231140137, "learning_rate": 3.4109025123778174e-07, "loss": 0.5193, "step": 13715 }, { "epoch": 0.89, "grad_norm": 1.1600008010864258, "learning_rate": 3.4071088406287924e-07, "loss": 0.4519, "step": 13716 }, { "epoch": 0.89, "grad_norm": 1.1850653886795044, "learning_rate": 3.403317205334833e-07, "loss": 0.5088, "step": 13717 }, { "epoch": 0.89, "grad_norm": 1.0859107971191406, "learning_rate": 3.3995276066616566e-07, "loss": 0.4994, "step": 13718 }, { "epoch": 0.89, "grad_norm": 1.1288479566574097, "learning_rate": 3.3957400447749035e-07, "loss": 0.4642, "step": 13719 }, { "epoch": 0.89, "grad_norm": 1.0922387838363647, "learning_rate": 3.391954519840107e-07, "loss": 0.5179, "step": 13720 }, { "epoch": 0.89, "grad_norm": 1.0503617525100708, "learning_rate": 3.38817103202273e-07, "loss": 0.4879, "step": 13721 }, { "epoch": 0.89, "grad_norm": 1.1235381364822388, "learning_rate": 3.3843895814881346e-07, "loss": 0.4969, "step": 13722 }, { "epoch": 0.89, "grad_norm": 1.2159080505371094, "learning_rate": 3.380610168401599e-07, "loss": 0.5371, "step": 13723 }, { "epoch": 0.89, "grad_norm": 1.2244144678115845, "learning_rate": 3.3768327929283197e-07, "loss": 0.5071, "step": 13724 }, { "epoch": 0.89, "grad_norm": 1.2317641973495483, "learning_rate": 3.3730574552333917e-07, "loss": 0.5383, "step": 13725 }, { "epoch": 0.89, "grad_norm": 1.2136590480804443, "learning_rate": 3.369284155481817e-07, "loss": 0.5258, "step": 13726 }, { "epoch": 0.89, "grad_norm": 1.147668480873108, "learning_rate": 3.365512893838524e-07, "loss": 0.5349, "step": 13727 }, { "epoch": 0.89, "grad_norm": 1.064252257347107, "learning_rate": 3.3617436704683424e-07, "loss": 0.4443, "step": 13728 }, { "epoch": 0.89, "grad_norm": 1.0950183868408203, "learning_rate": 3.357976485536013e-07, "loss": 0.4751, "step": 13729 }, { "epoch": 0.89, "grad_norm": 1.168601155281067, "learning_rate": 3.3542113392061984e-07, "loss": 0.5718, "step": 13730 }, { "epoch": 0.89, "grad_norm": 1.2018147706985474, "learning_rate": 3.350448231643466e-07, "loss": 0.5162, "step": 13731 }, { "epoch": 0.89, "grad_norm": 1.3310881853103638, "learning_rate": 3.3466871630122743e-07, "loss": 0.5422, "step": 13732 }, { "epoch": 0.89, "grad_norm": 1.1607375144958496, "learning_rate": 3.3429281334770194e-07, "loss": 0.5321, "step": 13733 }, { "epoch": 0.89, "grad_norm": 1.1259928941726685, "learning_rate": 3.3391711432020024e-07, "loss": 0.5113, "step": 13734 }, { "epoch": 0.89, "grad_norm": 1.2823522090911865, "learning_rate": 3.335416192351426e-07, "loss": 0.5146, "step": 13735 }, { "epoch": 0.89, "grad_norm": 1.2014738321304321, "learning_rate": 3.331663281089409e-07, "loss": 0.5087, "step": 13736 }, { "epoch": 0.89, "grad_norm": 1.1751209497451782, "learning_rate": 3.3279124095799977e-07, "loss": 0.5348, "step": 13737 }, { "epoch": 0.89, "grad_norm": 1.193255066871643, "learning_rate": 3.324163577987105e-07, "loss": 0.5001, "step": 13738 }, { "epoch": 0.89, "grad_norm": 1.224603533744812, "learning_rate": 3.3204167864746007e-07, "loss": 0.5083, "step": 13739 }, { "epoch": 0.89, "grad_norm": 1.1024659872055054, "learning_rate": 3.316672035206242e-07, "loss": 0.4664, "step": 13740 }, { "epoch": 0.89, "grad_norm": 1.0817946195602417, "learning_rate": 3.3129293243457093e-07, "loss": 0.4559, "step": 13741 }, { "epoch": 0.89, "grad_norm": 1.253654956817627, "learning_rate": 3.3091886540565833e-07, "loss": 0.5071, "step": 13742 }, { "epoch": 0.89, "grad_norm": 1.160882592201233, "learning_rate": 3.3054500245023547e-07, "loss": 0.4575, "step": 13743 }, { "epoch": 0.89, "grad_norm": 1.1780587434768677, "learning_rate": 3.3017134358464263e-07, "loss": 0.5186, "step": 13744 }, { "epoch": 0.89, "grad_norm": 1.1363427639007568, "learning_rate": 3.2979788882521234e-07, "loss": 0.546, "step": 13745 }, { "epoch": 0.89, "grad_norm": 1.140117883682251, "learning_rate": 3.294246381882671e-07, "loss": 0.4991, "step": 13746 }, { "epoch": 0.89, "grad_norm": 1.2281562089920044, "learning_rate": 3.2905159169012046e-07, "loss": 0.5346, "step": 13747 }, { "epoch": 0.89, "grad_norm": 1.1364285945892334, "learning_rate": 3.2867874934707833e-07, "loss": 0.4939, "step": 13748 }, { "epoch": 0.89, "grad_norm": 1.2029441595077515, "learning_rate": 3.2830611117543543e-07, "loss": 0.4942, "step": 13749 }, { "epoch": 0.89, "grad_norm": 1.2206429243087769, "learning_rate": 3.2793367719147926e-07, "loss": 0.5334, "step": 13750 }, { "epoch": 0.89, "grad_norm": 1.1677557229995728, "learning_rate": 3.2756144741148745e-07, "loss": 0.4779, "step": 13751 }, { "epoch": 0.89, "grad_norm": 1.2200592756271362, "learning_rate": 3.271894218517302e-07, "loss": 0.5484, "step": 13752 }, { "epoch": 0.89, "grad_norm": 1.3026880025863647, "learning_rate": 3.2681760052846734e-07, "loss": 0.5197, "step": 13753 }, { "epoch": 0.89, "grad_norm": 1.2043896913528442, "learning_rate": 3.2644598345795085e-07, "loss": 0.5008, "step": 13754 }, { "epoch": 0.89, "grad_norm": 1.1767544746398926, "learning_rate": 3.260745706564217e-07, "loss": 0.4832, "step": 13755 }, { "epoch": 0.89, "grad_norm": 1.2324004173278809, "learning_rate": 3.25703362140114e-07, "loss": 0.5126, "step": 13756 }, { "epoch": 0.89, "grad_norm": 1.1786056756973267, "learning_rate": 3.253323579252526e-07, "loss": 0.5036, "step": 13757 }, { "epoch": 0.89, "grad_norm": 1.279882550239563, "learning_rate": 3.2496155802805294e-07, "loss": 0.5168, "step": 13758 }, { "epoch": 0.89, "grad_norm": 1.0981446504592896, "learning_rate": 3.24590962464722e-07, "loss": 0.5227, "step": 13759 }, { "epoch": 0.89, "grad_norm": 1.1316688060760498, "learning_rate": 3.242205712514579e-07, "loss": 0.4982, "step": 13760 }, { "epoch": 0.89, "grad_norm": 1.2215815782546997, "learning_rate": 3.238503844044488e-07, "loss": 0.5283, "step": 13761 }, { "epoch": 0.89, "grad_norm": 1.1731927394866943, "learning_rate": 3.2348040193987407e-07, "loss": 0.5122, "step": 13762 }, { "epoch": 0.89, "grad_norm": 1.1469234228134155, "learning_rate": 3.2311062387390567e-07, "loss": 0.5034, "step": 13763 }, { "epoch": 0.89, "grad_norm": 1.1431243419647217, "learning_rate": 3.2274105022270575e-07, "loss": 0.4847, "step": 13764 }, { "epoch": 0.89, "grad_norm": 1.1542823314666748, "learning_rate": 3.2237168100242633e-07, "loss": 0.5093, "step": 13765 }, { "epoch": 0.89, "grad_norm": 1.2023483514785767, "learning_rate": 3.2200251622921343e-07, "loss": 0.5077, "step": 13766 }, { "epoch": 0.89, "grad_norm": 1.2535055875778198, "learning_rate": 3.216335559192002e-07, "loss": 0.5379, "step": 13767 }, { "epoch": 0.89, "grad_norm": 1.1880682706832886, "learning_rate": 3.2126480008851436e-07, "loss": 0.52, "step": 13768 }, { "epoch": 0.89, "grad_norm": 1.1535061597824097, "learning_rate": 3.208962487532724e-07, "loss": 0.497, "step": 13769 }, { "epoch": 0.89, "grad_norm": 1.1605932712554932, "learning_rate": 3.2052790192958317e-07, "loss": 0.528, "step": 13770 }, { "epoch": 0.89, "grad_norm": 1.3095632791519165, "learning_rate": 3.201597596335471e-07, "loss": 0.5325, "step": 13771 }, { "epoch": 0.89, "grad_norm": 1.1884770393371582, "learning_rate": 3.197918218812529e-07, "loss": 0.4672, "step": 13772 }, { "epoch": 0.89, "grad_norm": 1.194132924079895, "learning_rate": 3.1942408868878283e-07, "loss": 0.524, "step": 13773 }, { "epoch": 0.89, "grad_norm": 1.2533149719238281, "learning_rate": 3.190565600722101e-07, "loss": 0.5085, "step": 13774 }, { "epoch": 0.89, "grad_norm": 1.097813606262207, "learning_rate": 3.1868923604759905e-07, "loss": 0.5046, "step": 13775 }, { "epoch": 0.89, "grad_norm": 1.1599901914596558, "learning_rate": 3.1832211663100244e-07, "loss": 0.4815, "step": 13776 }, { "epoch": 0.89, "grad_norm": 1.1025402545928955, "learning_rate": 3.179552018384674e-07, "loss": 0.4788, "step": 13777 }, { "epoch": 0.89, "grad_norm": 1.161708950996399, "learning_rate": 3.1758849168603057e-07, "loss": 0.5229, "step": 13778 }, { "epoch": 0.89, "grad_norm": 1.1704767942428589, "learning_rate": 3.172219861897202e-07, "loss": 0.49, "step": 13779 }, { "epoch": 0.89, "grad_norm": 1.1627309322357178, "learning_rate": 3.1685568536555577e-07, "loss": 0.4939, "step": 13780 }, { "epoch": 0.89, "grad_norm": 1.1849435567855835, "learning_rate": 3.1648958922954555e-07, "loss": 0.5211, "step": 13781 }, { "epoch": 0.89, "grad_norm": 1.162845492362976, "learning_rate": 3.1612369779769224e-07, "loss": 0.5041, "step": 13782 }, { "epoch": 0.89, "grad_norm": 1.1615980863571167, "learning_rate": 3.1575801108598703e-07, "loss": 0.5059, "step": 13783 }, { "epoch": 0.89, "grad_norm": 1.1711314916610718, "learning_rate": 3.1539252911041486e-07, "loss": 0.4959, "step": 13784 }, { "epoch": 0.89, "grad_norm": 1.1869909763336182, "learning_rate": 3.1502725188694796e-07, "loss": 0.5035, "step": 13785 }, { "epoch": 0.89, "grad_norm": 1.1396265029907227, "learning_rate": 3.1466217943155244e-07, "loss": 0.4722, "step": 13786 }, { "epoch": 0.89, "grad_norm": 1.231234073638916, "learning_rate": 3.142973117601844e-07, "loss": 0.5498, "step": 13787 }, { "epoch": 0.89, "grad_norm": 1.2429072856903076, "learning_rate": 3.139326488887917e-07, "loss": 0.5202, "step": 13788 }, { "epoch": 0.89, "grad_norm": 1.2371054887771606, "learning_rate": 3.135681908333138e-07, "loss": 0.5277, "step": 13789 }, { "epoch": 0.89, "grad_norm": 1.1551004648208618, "learning_rate": 3.132039376096785e-07, "loss": 0.5207, "step": 13790 }, { "epoch": 0.89, "grad_norm": 1.2615634202957153, "learning_rate": 3.1283988923380635e-07, "loss": 0.4852, "step": 13791 }, { "epoch": 0.89, "grad_norm": 1.2115908861160278, "learning_rate": 3.124760457216103e-07, "loss": 0.4841, "step": 13792 }, { "epoch": 0.89, "grad_norm": 1.1133205890655518, "learning_rate": 3.1211240708899193e-07, "loss": 0.4891, "step": 13793 }, { "epoch": 0.89, "grad_norm": 1.2265796661376953, "learning_rate": 3.1174897335184526e-07, "loss": 0.5432, "step": 13794 }, { "epoch": 0.89, "grad_norm": 1.2856289148330688, "learning_rate": 3.1138574452605596e-07, "loss": 0.5002, "step": 13795 }, { "epoch": 0.89, "grad_norm": 1.19088876247406, "learning_rate": 3.110227206274985e-07, "loss": 0.465, "step": 13796 }, { "epoch": 0.89, "grad_norm": 1.2014846801757812, "learning_rate": 3.106599016720396e-07, "loss": 0.4648, "step": 13797 }, { "epoch": 0.89, "grad_norm": 1.3709098100662231, "learning_rate": 3.1029728767553834e-07, "loss": 0.496, "step": 13798 }, { "epoch": 0.89, "grad_norm": 1.2193834781646729, "learning_rate": 3.0993487865384256e-07, "loss": 0.561, "step": 13799 }, { "epoch": 0.89, "grad_norm": 1.113492488861084, "learning_rate": 3.0957267462279283e-07, "loss": 0.5379, "step": 13800 }, { "epoch": 0.89, "grad_norm": 1.2320572137832642, "learning_rate": 3.0921067559822106e-07, "loss": 0.544, "step": 13801 }, { "epoch": 0.89, "grad_norm": 1.245546817779541, "learning_rate": 3.0884888159594727e-07, "loss": 0.5266, "step": 13802 }, { "epoch": 0.89, "grad_norm": 1.2421350479125977, "learning_rate": 3.0848729263178557e-07, "loss": 0.5184, "step": 13803 }, { "epoch": 0.89, "grad_norm": 1.191317081451416, "learning_rate": 3.081259087215399e-07, "loss": 0.491, "step": 13804 }, { "epoch": 0.89, "grad_norm": 1.1952438354492188, "learning_rate": 3.07764729881006e-07, "loss": 0.5065, "step": 13805 }, { "epoch": 0.89, "grad_norm": 1.2953370809555054, "learning_rate": 3.074037561259691e-07, "loss": 0.4986, "step": 13806 }, { "epoch": 0.89, "grad_norm": 1.1667869091033936, "learning_rate": 3.0704298747220807e-07, "loss": 0.5125, "step": 13807 }, { "epoch": 0.89, "grad_norm": 1.2986326217651367, "learning_rate": 3.066824239354893e-07, "loss": 0.5312, "step": 13808 }, { "epoch": 0.89, "grad_norm": 1.1714547872543335, "learning_rate": 3.06322065531573e-07, "loss": 0.5296, "step": 13809 }, { "epoch": 0.89, "grad_norm": 1.4063915014266968, "learning_rate": 3.059619122762092e-07, "loss": 0.532, "step": 13810 }, { "epoch": 0.89, "grad_norm": 1.1564927101135254, "learning_rate": 3.0560196418513934e-07, "loss": 0.5052, "step": 13811 }, { "epoch": 0.89, "grad_norm": 1.1094261407852173, "learning_rate": 3.0524222127409574e-07, "loss": 0.4817, "step": 13812 }, { "epoch": 0.89, "grad_norm": 1.1426383256912231, "learning_rate": 3.0488268355880315e-07, "loss": 0.4666, "step": 13813 }, { "epoch": 0.89, "grad_norm": 1.2208348512649536, "learning_rate": 3.045233510549739e-07, "loss": 0.5047, "step": 13814 }, { "epoch": 0.89, "grad_norm": 1.2213382720947266, "learning_rate": 3.0416422377831434e-07, "loss": 0.4739, "step": 13815 }, { "epoch": 0.89, "grad_norm": 1.1674158573150635, "learning_rate": 3.038053017445214e-07, "loss": 0.5165, "step": 13816 }, { "epoch": 0.89, "grad_norm": 1.178635835647583, "learning_rate": 3.034465849692825e-07, "loss": 0.5122, "step": 13817 }, { "epoch": 0.89, "grad_norm": 1.1725940704345703, "learning_rate": 3.030880734682762e-07, "loss": 0.5267, "step": 13818 }, { "epoch": 0.89, "grad_norm": 1.2616955041885376, "learning_rate": 3.027297672571722e-07, "loss": 0.5121, "step": 13819 }, { "epoch": 0.89, "grad_norm": 1.1802406311035156, "learning_rate": 3.0237166635163073e-07, "loss": 0.5285, "step": 13820 }, { "epoch": 0.89, "grad_norm": 1.2152776718139648, "learning_rate": 3.020137707673032e-07, "loss": 0.5315, "step": 13821 }, { "epoch": 0.89, "grad_norm": 1.1893444061279297, "learning_rate": 3.016560805198332e-07, "loss": 0.482, "step": 13822 }, { "epoch": 0.89, "grad_norm": 1.15053391456604, "learning_rate": 3.0129859562485373e-07, "loss": 0.4885, "step": 13823 }, { "epoch": 0.89, "grad_norm": 1.154031753540039, "learning_rate": 3.009413160979907e-07, "loss": 0.5299, "step": 13824 }, { "epoch": 0.89, "grad_norm": 1.3414814472198486, "learning_rate": 3.0058424195485827e-07, "loss": 0.5597, "step": 13825 }, { "epoch": 0.89, "grad_norm": 1.2029175758361816, "learning_rate": 3.0022737321106386e-07, "loss": 0.5143, "step": 13826 }, { "epoch": 0.89, "grad_norm": 1.1168544292449951, "learning_rate": 2.998707098822057e-07, "loss": 0.4819, "step": 13827 }, { "epoch": 0.89, "grad_norm": 1.1853668689727783, "learning_rate": 2.995142519838723e-07, "loss": 0.5223, "step": 13828 }, { "epoch": 0.89, "grad_norm": 1.1698604822158813, "learning_rate": 2.991579995316435e-07, "loss": 0.5078, "step": 13829 }, { "epoch": 0.89, "grad_norm": 1.075711965560913, "learning_rate": 2.9880195254109067e-07, "loss": 0.4722, "step": 13830 }, { "epoch": 0.89, "grad_norm": 1.254145860671997, "learning_rate": 2.9844611102777474e-07, "loss": 0.5145, "step": 13831 }, { "epoch": 0.89, "grad_norm": 1.1937569379806519, "learning_rate": 2.980904750072489e-07, "loss": 0.5097, "step": 13832 }, { "epoch": 0.89, "grad_norm": 1.1913727521896362, "learning_rate": 2.977350444950572e-07, "loss": 0.5276, "step": 13833 }, { "epoch": 0.89, "grad_norm": 1.2688734531402588, "learning_rate": 2.973798195067351e-07, "loss": 0.5418, "step": 13834 }, { "epoch": 0.89, "grad_norm": 1.2252488136291504, "learning_rate": 2.97024800057808e-07, "loss": 0.4697, "step": 13835 }, { "epoch": 0.89, "grad_norm": 1.2041293382644653, "learning_rate": 2.9666998616379396e-07, "loss": 0.5067, "step": 13836 }, { "epoch": 0.89, "grad_norm": 1.1942254304885864, "learning_rate": 2.9631537784019893e-07, "loss": 0.4744, "step": 13837 }, { "epoch": 0.89, "grad_norm": 1.2915805578231812, "learning_rate": 2.959609751025233e-07, "loss": 0.5698, "step": 13838 }, { "epoch": 0.89, "grad_norm": 1.1581605672836304, "learning_rate": 2.956067779662569e-07, "loss": 0.5025, "step": 13839 }, { "epoch": 0.89, "grad_norm": 1.1180235147476196, "learning_rate": 2.952527864468807e-07, "loss": 0.4713, "step": 13840 }, { "epoch": 0.89, "grad_norm": 1.172144889831543, "learning_rate": 2.9489900055986675e-07, "loss": 0.5094, "step": 13841 }, { "epoch": 0.89, "grad_norm": 1.1909606456756592, "learning_rate": 2.945454203206793e-07, "loss": 0.4733, "step": 13842 }, { "epoch": 0.89, "grad_norm": 1.1813453435897827, "learning_rate": 2.9419204574477e-07, "loss": 0.5208, "step": 13843 }, { "epoch": 0.89, "grad_norm": 1.230119228363037, "learning_rate": 2.9383887684758525e-07, "loss": 0.5088, "step": 13844 }, { "epoch": 0.89, "grad_norm": 1.2564836740493774, "learning_rate": 2.934859136445617e-07, "loss": 0.5572, "step": 13845 }, { "epoch": 0.89, "grad_norm": 1.2373117208480835, "learning_rate": 2.9313315615112523e-07, "loss": 0.5354, "step": 13846 }, { "epoch": 0.89, "grad_norm": 1.1829209327697754, "learning_rate": 2.927806043826953e-07, "loss": 0.5055, "step": 13847 }, { "epoch": 0.89, "grad_norm": 1.1741943359375, "learning_rate": 2.924282583546806e-07, "loss": 0.5137, "step": 13848 }, { "epoch": 0.89, "grad_norm": 1.0677934885025024, "learning_rate": 2.920761180824805e-07, "loss": 0.4843, "step": 13849 }, { "epoch": 0.89, "grad_norm": 1.1643116474151611, "learning_rate": 2.917241835814866e-07, "loss": 0.4838, "step": 13850 }, { "epoch": 0.89, "grad_norm": 1.1966087818145752, "learning_rate": 2.91372454867081e-07, "loss": 0.5394, "step": 13851 }, { "epoch": 0.89, "grad_norm": 1.1544336080551147, "learning_rate": 2.91020931954637e-07, "loss": 0.5509, "step": 13852 }, { "epoch": 0.89, "grad_norm": 1.1760783195495605, "learning_rate": 2.906696148595189e-07, "loss": 0.4927, "step": 13853 }, { "epoch": 0.89, "grad_norm": 1.275532841682434, "learning_rate": 2.9031850359708236e-07, "loss": 0.5495, "step": 13854 }, { "epoch": 0.89, "grad_norm": 1.1342089176177979, "learning_rate": 2.8996759818267215e-07, "loss": 0.5035, "step": 13855 }, { "epoch": 0.89, "grad_norm": 1.1235759258270264, "learning_rate": 2.896168986316261e-07, "loss": 0.4332, "step": 13856 }, { "epoch": 0.89, "grad_norm": 1.1957019567489624, "learning_rate": 2.892664049592725e-07, "loss": 0.5237, "step": 13857 }, { "epoch": 0.89, "grad_norm": 1.1071919202804565, "learning_rate": 2.889161171809307e-07, "loss": 0.5073, "step": 13858 }, { "epoch": 0.89, "grad_norm": 1.1627020835876465, "learning_rate": 2.8856603531191074e-07, "loss": 0.4778, "step": 13859 }, { "epoch": 0.89, "grad_norm": 1.1406993865966797, "learning_rate": 2.882161593675142e-07, "loss": 0.4995, "step": 13860 }, { "epoch": 0.89, "grad_norm": 1.0679844617843628, "learning_rate": 2.878664893630317e-07, "loss": 0.452, "step": 13861 }, { "epoch": 0.89, "grad_norm": 1.1357288360595703, "learning_rate": 2.875170253137477e-07, "loss": 0.5029, "step": 13862 }, { "epoch": 0.89, "grad_norm": 1.1775383949279785, "learning_rate": 2.8716776723493655e-07, "loss": 0.4901, "step": 13863 }, { "epoch": 0.89, "grad_norm": 1.0896592140197754, "learning_rate": 2.868187151418633e-07, "loss": 0.4499, "step": 13864 }, { "epoch": 0.89, "grad_norm": 1.2081650495529175, "learning_rate": 2.8646986904978357e-07, "loss": 0.4407, "step": 13865 }, { "epoch": 0.9, "grad_norm": 1.0731788873672485, "learning_rate": 2.8612122897394456e-07, "loss": 0.4893, "step": 13866 }, { "epoch": 0.9, "grad_norm": 1.0816445350646973, "learning_rate": 2.8577279492958464e-07, "loss": 0.4817, "step": 13867 }, { "epoch": 0.9, "grad_norm": 1.2257732152938843, "learning_rate": 2.8542456693193277e-07, "loss": 0.5491, "step": 13868 }, { "epoch": 0.9, "grad_norm": 1.0546021461486816, "learning_rate": 2.850765449962101e-07, "loss": 0.4916, "step": 13869 }, { "epoch": 0.9, "grad_norm": 1.1691370010375977, "learning_rate": 2.8472872913762663e-07, "loss": 0.509, "step": 13870 }, { "epoch": 0.9, "grad_norm": 1.1194766759872437, "learning_rate": 2.8438111937138466e-07, "loss": 0.5057, "step": 13871 }, { "epoch": 0.9, "grad_norm": 1.195917010307312, "learning_rate": 2.840337157126771e-07, "loss": 0.479, "step": 13872 }, { "epoch": 0.9, "grad_norm": 1.1101754903793335, "learning_rate": 2.836865181766896e-07, "loss": 0.5143, "step": 13873 }, { "epoch": 0.9, "grad_norm": 1.2383137941360474, "learning_rate": 2.8333952677859546e-07, "loss": 0.5515, "step": 13874 }, { "epoch": 0.9, "grad_norm": 1.110277771949768, "learning_rate": 2.8299274153356147e-07, "loss": 0.4717, "step": 13875 }, { "epoch": 0.9, "grad_norm": 1.1284366846084595, "learning_rate": 2.8264616245674447e-07, "loss": 0.4879, "step": 13876 }, { "epoch": 0.9, "grad_norm": 1.1098594665527344, "learning_rate": 2.8229978956329285e-07, "loss": 0.5224, "step": 13877 }, { "epoch": 0.9, "grad_norm": 1.2286912202835083, "learning_rate": 2.819536228683467e-07, "loss": 0.5289, "step": 13878 }, { "epoch": 0.9, "grad_norm": 1.2333850860595703, "learning_rate": 2.816076623870334e-07, "loss": 0.5446, "step": 13879 }, { "epoch": 0.9, "grad_norm": 1.1801100969314575, "learning_rate": 2.812619081344764e-07, "loss": 0.5225, "step": 13880 }, { "epoch": 0.9, "grad_norm": 1.1981505155563354, "learning_rate": 2.809163601257864e-07, "loss": 0.5254, "step": 13881 }, { "epoch": 0.9, "grad_norm": 1.2263354063034058, "learning_rate": 2.8057101837606735e-07, "loss": 0.5411, "step": 13882 }, { "epoch": 0.9, "grad_norm": 1.228425145149231, "learning_rate": 2.8022588290041286e-07, "loss": 0.4731, "step": 13883 }, { "epoch": 0.9, "grad_norm": 1.1235860586166382, "learning_rate": 2.79880953713908e-07, "loss": 0.5104, "step": 13884 }, { "epoch": 0.9, "grad_norm": 1.185212254524231, "learning_rate": 2.7953623083162797e-07, "loss": 0.5104, "step": 13885 }, { "epoch": 0.9, "grad_norm": 1.158925175666809, "learning_rate": 2.791917142686401e-07, "loss": 0.4625, "step": 13886 }, { "epoch": 0.9, "grad_norm": 1.0649241209030151, "learning_rate": 2.78847404040003e-07, "loss": 0.4979, "step": 13887 }, { "epoch": 0.9, "grad_norm": 1.2091504335403442, "learning_rate": 2.785033001607651e-07, "loss": 0.4768, "step": 13888 }, { "epoch": 0.9, "grad_norm": 1.1398591995239258, "learning_rate": 2.781594026459672e-07, "loss": 0.4833, "step": 13889 }, { "epoch": 0.9, "grad_norm": 1.1553936004638672, "learning_rate": 2.7781571151063835e-07, "loss": 0.5238, "step": 13890 }, { "epoch": 0.9, "grad_norm": 1.1215113401412964, "learning_rate": 2.77472226769801e-07, "loss": 0.4919, "step": 13891 }, { "epoch": 0.9, "grad_norm": 1.1454012393951416, "learning_rate": 2.771289484384687e-07, "loss": 0.4765, "step": 13892 }, { "epoch": 0.9, "grad_norm": 1.142523169517517, "learning_rate": 2.76785876531645e-07, "loss": 0.5045, "step": 13893 }, { "epoch": 0.9, "grad_norm": 1.2906568050384521, "learning_rate": 2.764430110643246e-07, "loss": 0.5479, "step": 13894 }, { "epoch": 0.9, "grad_norm": 1.1791491508483887, "learning_rate": 2.761003520514938e-07, "loss": 0.5673, "step": 13895 }, { "epoch": 0.9, "grad_norm": 1.1618095636367798, "learning_rate": 2.7575789950812837e-07, "loss": 0.4975, "step": 13896 }, { "epoch": 0.9, "grad_norm": 1.3086804151535034, "learning_rate": 2.7541565344919583e-07, "loss": 0.5742, "step": 13897 }, { "epoch": 0.9, "grad_norm": 1.1505616903305054, "learning_rate": 2.7507361388965593e-07, "loss": 0.4899, "step": 13898 }, { "epoch": 0.9, "grad_norm": 1.2002054452896118, "learning_rate": 2.747317808444577e-07, "loss": 0.5342, "step": 13899 }, { "epoch": 0.9, "grad_norm": 1.1604485511779785, "learning_rate": 2.743901543285421e-07, "loss": 0.5186, "step": 13900 }, { "epoch": 0.9, "grad_norm": 1.131201982498169, "learning_rate": 2.7404873435684154e-07, "loss": 0.406, "step": 13901 }, { "epoch": 0.9, "grad_norm": 1.3774895668029785, "learning_rate": 2.7370752094427687e-07, "loss": 0.5401, "step": 13902 }, { "epoch": 0.9, "grad_norm": 1.1849708557128906, "learning_rate": 2.733665141057623e-07, "loss": 0.5039, "step": 13903 }, { "epoch": 0.9, "grad_norm": 1.1882725954055786, "learning_rate": 2.7302571385620257e-07, "loss": 0.5609, "step": 13904 }, { "epoch": 0.9, "grad_norm": 1.2895601987838745, "learning_rate": 2.72685120210493e-07, "loss": 0.5232, "step": 13905 }, { "epoch": 0.9, "grad_norm": 1.2723398208618164, "learning_rate": 2.723447331835205e-07, "loss": 0.5127, "step": 13906 }, { "epoch": 0.9, "grad_norm": 1.1471062898635864, "learning_rate": 2.720045527901627e-07, "loss": 0.5221, "step": 13907 }, { "epoch": 0.9, "grad_norm": 1.138093113899231, "learning_rate": 2.7166457904528654e-07, "loss": 0.533, "step": 13908 }, { "epoch": 0.9, "grad_norm": 1.2220382690429688, "learning_rate": 2.7132481196375293e-07, "loss": 0.5285, "step": 13909 }, { "epoch": 0.9, "grad_norm": 1.2516745328903198, "learning_rate": 2.7098525156041113e-07, "loss": 0.5541, "step": 13910 }, { "epoch": 0.9, "grad_norm": 1.3485629558563232, "learning_rate": 2.7064589785010307e-07, "loss": 0.5611, "step": 13911 }, { "epoch": 0.9, "grad_norm": 1.0686882734298706, "learning_rate": 2.70306750847662e-07, "loss": 0.4835, "step": 13912 }, { "epoch": 0.9, "grad_norm": 1.1231920719146729, "learning_rate": 2.699678105679093e-07, "loss": 0.5127, "step": 13913 }, { "epoch": 0.9, "grad_norm": 1.072851538658142, "learning_rate": 2.6962907702565933e-07, "loss": 0.4995, "step": 13914 }, { "epoch": 0.9, "grad_norm": 1.2290334701538086, "learning_rate": 2.692905502357185e-07, "loss": 0.4928, "step": 13915 }, { "epoch": 0.9, "grad_norm": 1.2131047248840332, "learning_rate": 2.6895223021288217e-07, "loss": 0.4955, "step": 13916 }, { "epoch": 0.9, "grad_norm": 1.141421914100647, "learning_rate": 2.686141169719375e-07, "loss": 0.4945, "step": 13917 }, { "epoch": 0.9, "grad_norm": 1.1862176656723022, "learning_rate": 2.682762105276637e-07, "loss": 0.5434, "step": 13918 }, { "epoch": 0.9, "grad_norm": 1.0618245601654053, "learning_rate": 2.6793851089482736e-07, "loss": 0.47, "step": 13919 }, { "epoch": 0.9, "grad_norm": 1.1336003541946411, "learning_rate": 2.676010180881905e-07, "loss": 0.4966, "step": 13920 }, { "epoch": 0.9, "grad_norm": 1.2103036642074585, "learning_rate": 2.67263732122503e-07, "loss": 0.5219, "step": 13921 }, { "epoch": 0.9, "grad_norm": 1.2304176092147827, "learning_rate": 2.66926653012507e-07, "loss": 0.4963, "step": 13922 }, { "epoch": 0.9, "grad_norm": 1.0437599420547485, "learning_rate": 2.665897807729362e-07, "loss": 0.4725, "step": 13923 }, { "epoch": 0.9, "grad_norm": 1.1607612371444702, "learning_rate": 2.6625311541851386e-07, "loss": 0.515, "step": 13924 }, { "epoch": 0.9, "grad_norm": 1.2739448547363281, "learning_rate": 2.6591665696395376e-07, "loss": 0.4817, "step": 13925 }, { "epoch": 0.9, "grad_norm": 1.22567880153656, "learning_rate": 2.6558040542396303e-07, "loss": 0.5412, "step": 13926 }, { "epoch": 0.9, "grad_norm": 1.2031540870666504, "learning_rate": 2.6524436081323765e-07, "loss": 0.4759, "step": 13927 }, { "epoch": 0.9, "grad_norm": 1.1912204027175903, "learning_rate": 2.6490852314646486e-07, "loss": 0.5068, "step": 13928 }, { "epoch": 0.9, "grad_norm": 1.1360117197036743, "learning_rate": 2.645728924383245e-07, "loss": 0.4887, "step": 13929 }, { "epoch": 0.9, "grad_norm": 1.3293074369430542, "learning_rate": 2.6423746870348597e-07, "loss": 0.5383, "step": 13930 }, { "epoch": 0.9, "grad_norm": 1.1863974332809448, "learning_rate": 2.6390225195660866e-07, "loss": 0.5038, "step": 13931 }, { "epoch": 0.9, "grad_norm": 1.1420499086380005, "learning_rate": 2.635672422123442e-07, "loss": 0.5254, "step": 13932 }, { "epoch": 0.9, "grad_norm": 1.1796947717666626, "learning_rate": 2.632324394853358e-07, "loss": 0.4599, "step": 13933 }, { "epoch": 0.9, "grad_norm": 1.2671949863433838, "learning_rate": 2.628978437902163e-07, "loss": 0.5336, "step": 13934 }, { "epoch": 0.9, "grad_norm": 1.247153878211975, "learning_rate": 2.6256345514161007e-07, "loss": 0.5246, "step": 13935 }, { "epoch": 0.9, "grad_norm": 1.2549304962158203, "learning_rate": 2.622292735541332e-07, "loss": 0.4708, "step": 13936 }, { "epoch": 0.9, "grad_norm": 1.2447484731674194, "learning_rate": 2.6189529904239064e-07, "loss": 0.5142, "step": 13937 }, { "epoch": 0.9, "grad_norm": 1.1777377128601074, "learning_rate": 2.615615316209802e-07, "loss": 0.5533, "step": 13938 }, { "epoch": 0.9, "grad_norm": 1.337388038635254, "learning_rate": 2.612279713044896e-07, "loss": 0.5477, "step": 13939 }, { "epoch": 0.9, "grad_norm": 1.1553866863250732, "learning_rate": 2.608946181074984e-07, "loss": 0.5452, "step": 13940 }, { "epoch": 0.9, "grad_norm": 1.3111838102340698, "learning_rate": 2.6056147204457595e-07, "loss": 0.4858, "step": 13941 }, { "epoch": 0.9, "grad_norm": 1.1105419397354126, "learning_rate": 2.6022853313028507e-07, "loss": 0.4555, "step": 13942 }, { "epoch": 0.9, "grad_norm": 1.1580286026000977, "learning_rate": 2.598958013791747e-07, "loss": 0.5001, "step": 13943 }, { "epoch": 0.9, "grad_norm": 1.2165169715881348, "learning_rate": 2.5956327680578986e-07, "loss": 0.57, "step": 13944 }, { "epoch": 0.9, "grad_norm": 1.169993281364441, "learning_rate": 2.5923095942466337e-07, "loss": 0.4824, "step": 13945 }, { "epoch": 0.9, "grad_norm": 1.1606519222259521, "learning_rate": 2.588988492503203e-07, "loss": 0.5517, "step": 13946 }, { "epoch": 0.9, "grad_norm": 1.126280426979065, "learning_rate": 2.5856694629727675e-07, "loss": 0.5145, "step": 13947 }, { "epoch": 0.9, "grad_norm": 1.2396581172943115, "learning_rate": 2.5823525058003894e-07, "loss": 0.4834, "step": 13948 }, { "epoch": 0.9, "grad_norm": 1.1773669719696045, "learning_rate": 2.579037621131042e-07, "loss": 0.4845, "step": 13949 }, { "epoch": 0.9, "grad_norm": 1.102213740348816, "learning_rate": 2.5757248091096143e-07, "loss": 0.4835, "step": 13950 }, { "epoch": 0.9, "grad_norm": 1.196169137954712, "learning_rate": 2.572414069880891e-07, "loss": 0.4641, "step": 13951 }, { "epoch": 0.9, "grad_norm": 1.1592867374420166, "learning_rate": 2.569105403589589e-07, "loss": 0.4879, "step": 13952 }, { "epoch": 0.9, "grad_norm": 1.1808096170425415, "learning_rate": 2.5657988103803266e-07, "loss": 0.4453, "step": 13953 }, { "epoch": 0.9, "grad_norm": 1.27729070186615, "learning_rate": 2.5624942903976044e-07, "loss": 0.5214, "step": 13954 }, { "epoch": 0.9, "grad_norm": 1.1587356328964233, "learning_rate": 2.5591918437858686e-07, "loss": 0.4971, "step": 13955 }, { "epoch": 0.9, "grad_norm": 1.2521001100540161, "learning_rate": 2.555891470689459e-07, "loss": 0.5373, "step": 13956 }, { "epoch": 0.9, "grad_norm": 1.2315552234649658, "learning_rate": 2.5525931712526207e-07, "loss": 0.5268, "step": 13957 }, { "epoch": 0.9, "grad_norm": 1.2172292470932007, "learning_rate": 2.549296945619534e-07, "loss": 0.5317, "step": 13958 }, { "epoch": 0.9, "grad_norm": 1.1390666961669922, "learning_rate": 2.546002793934238e-07, "loss": 0.4977, "step": 13959 }, { "epoch": 0.9, "grad_norm": 1.1224958896636963, "learning_rate": 2.5427107163407296e-07, "loss": 0.5235, "step": 13960 }, { "epoch": 0.9, "grad_norm": 1.3042041063308716, "learning_rate": 2.5394207129828986e-07, "loss": 0.5319, "step": 13961 }, { "epoch": 0.9, "grad_norm": 1.1362221240997314, "learning_rate": 2.5361327840045413e-07, "loss": 0.4842, "step": 13962 }, { "epoch": 0.9, "grad_norm": 1.1650757789611816, "learning_rate": 2.532846929549354e-07, "loss": 0.5283, "step": 13963 }, { "epoch": 0.9, "grad_norm": 1.2232553958892822, "learning_rate": 2.529563149760966e-07, "loss": 0.5583, "step": 13964 }, { "epoch": 0.9, "grad_norm": 1.246281385421753, "learning_rate": 2.526281444782891e-07, "loss": 0.4782, "step": 13965 }, { "epoch": 0.9, "grad_norm": 1.1129833459854126, "learning_rate": 2.523001814758574e-07, "loss": 0.4827, "step": 13966 }, { "epoch": 0.9, "grad_norm": 1.2266935110092163, "learning_rate": 2.5197242598313633e-07, "loss": 0.451, "step": 13967 }, { "epoch": 0.9, "grad_norm": 1.1266909837722778, "learning_rate": 2.516448780144493e-07, "loss": 0.4839, "step": 13968 }, { "epoch": 0.9, "grad_norm": 1.201364517211914, "learning_rate": 2.5131753758411425e-07, "loss": 0.4895, "step": 13969 }, { "epoch": 0.9, "grad_norm": 1.3419089317321777, "learning_rate": 2.509904047064371e-07, "loss": 0.5064, "step": 13970 }, { "epoch": 0.9, "grad_norm": 1.1528456211090088, "learning_rate": 2.50663479395718e-07, "loss": 0.5276, "step": 13971 }, { "epoch": 0.9, "grad_norm": 1.1700785160064697, "learning_rate": 2.503367616662439e-07, "loss": 0.5043, "step": 13972 }, { "epoch": 0.9, "grad_norm": 1.1724621057510376, "learning_rate": 2.5001025153229606e-07, "loss": 0.4649, "step": 13973 }, { "epoch": 0.9, "grad_norm": 1.0989794731140137, "learning_rate": 2.4968394900814483e-07, "loss": 0.5, "step": 13974 }, { "epoch": 0.9, "grad_norm": 1.186466097831726, "learning_rate": 2.4935785410805213e-07, "loss": 0.5217, "step": 13975 }, { "epoch": 0.9, "grad_norm": 1.1596019268035889, "learning_rate": 2.490319668462704e-07, "loss": 0.5362, "step": 13976 }, { "epoch": 0.9, "grad_norm": 1.1621594429016113, "learning_rate": 2.487062872370449e-07, "loss": 0.5208, "step": 13977 }, { "epoch": 0.9, "grad_norm": 1.141664981842041, "learning_rate": 2.483808152946082e-07, "loss": 0.4851, "step": 13978 }, { "epoch": 0.9, "grad_norm": 1.0692198276519775, "learning_rate": 2.4805555103318667e-07, "loss": 0.4934, "step": 13979 }, { "epoch": 0.9, "grad_norm": 1.2457890510559082, "learning_rate": 2.4773049446699727e-07, "loss": 0.5178, "step": 13980 }, { "epoch": 0.9, "grad_norm": 1.211946964263916, "learning_rate": 2.4740564561024636e-07, "loss": 0.4874, "step": 13981 }, { "epoch": 0.9, "grad_norm": 1.2954038381576538, "learning_rate": 2.4708100447713324e-07, "loss": 0.5309, "step": 13982 }, { "epoch": 0.9, "grad_norm": 1.2447878122329712, "learning_rate": 2.46756571081847e-07, "loss": 0.5236, "step": 13983 }, { "epoch": 0.9, "grad_norm": 1.1813713312149048, "learning_rate": 2.46432345438567e-07, "loss": 0.5112, "step": 13984 }, { "epoch": 0.9, "grad_norm": 1.249594807624817, "learning_rate": 2.461083275614651e-07, "loss": 0.4874, "step": 13985 }, { "epoch": 0.9, "grad_norm": 1.176855444908142, "learning_rate": 2.4578451746470276e-07, "loss": 0.5683, "step": 13986 }, { "epoch": 0.9, "grad_norm": 1.1637203693389893, "learning_rate": 2.454609151624332e-07, "loss": 0.4912, "step": 13987 }, { "epoch": 0.9, "grad_norm": 1.1837362051010132, "learning_rate": 2.4513752066880005e-07, "loss": 0.493, "step": 13988 }, { "epoch": 0.9, "grad_norm": 1.2102210521697998, "learning_rate": 2.448143339979386e-07, "loss": 0.5353, "step": 13989 }, { "epoch": 0.9, "grad_norm": 1.1559077501296997, "learning_rate": 2.444913551639738e-07, "loss": 0.5436, "step": 13990 }, { "epoch": 0.9, "grad_norm": 1.2089871168136597, "learning_rate": 2.4416858418102263e-07, "loss": 0.5127, "step": 13991 }, { "epoch": 0.9, "grad_norm": 1.3249828815460205, "learning_rate": 2.438460210631921e-07, "loss": 0.4643, "step": 13992 }, { "epoch": 0.9, "grad_norm": 1.1510088443756104, "learning_rate": 2.4352366582458097e-07, "loss": 0.5141, "step": 13993 }, { "epoch": 0.9, "grad_norm": 1.191760540008545, "learning_rate": 2.432015184792791e-07, "loss": 0.5257, "step": 13994 }, { "epoch": 0.9, "grad_norm": 1.3397421836853027, "learning_rate": 2.428795790413657e-07, "loss": 0.5281, "step": 13995 }, { "epoch": 0.9, "grad_norm": 1.1436456441879272, "learning_rate": 2.425578475249124e-07, "loss": 0.4804, "step": 13996 }, { "epoch": 0.9, "grad_norm": 1.174777626991272, "learning_rate": 2.4223632394398125e-07, "loss": 0.499, "step": 13997 }, { "epoch": 0.9, "grad_norm": 1.129786729812622, "learning_rate": 2.419150083126248e-07, "loss": 0.5438, "step": 13998 }, { "epoch": 0.9, "grad_norm": 1.1325609683990479, "learning_rate": 2.415939006448875e-07, "loss": 0.5256, "step": 13999 }, { "epoch": 0.9, "grad_norm": 1.217781662940979, "learning_rate": 2.412730009548048e-07, "loss": 0.4986, "step": 14000 }, { "epoch": 0.9, "grad_norm": 1.148209571838379, "learning_rate": 2.409523092564009e-07, "loss": 0.4835, "step": 14001 }, { "epoch": 0.9, "grad_norm": 1.1189913749694824, "learning_rate": 2.40631825563693e-07, "loss": 0.5036, "step": 14002 }, { "epoch": 0.9, "grad_norm": 1.2130813598632812, "learning_rate": 2.403115498906883e-07, "loss": 0.5368, "step": 14003 }, { "epoch": 0.9, "grad_norm": 1.1651971340179443, "learning_rate": 2.3999148225138603e-07, "loss": 0.5019, "step": 14004 }, { "epoch": 0.9, "grad_norm": 1.2149478197097778, "learning_rate": 2.3967162265977507e-07, "loss": 0.4935, "step": 14005 }, { "epoch": 0.9, "grad_norm": 1.21309232711792, "learning_rate": 2.393519711298364e-07, "loss": 0.5544, "step": 14006 }, { "epoch": 0.9, "grad_norm": 1.1448718309402466, "learning_rate": 2.3903252767553953e-07, "loss": 0.5191, "step": 14007 }, { "epoch": 0.9, "grad_norm": 1.1689789295196533, "learning_rate": 2.3871329231084704e-07, "loss": 0.5227, "step": 14008 }, { "epoch": 0.9, "grad_norm": 1.1773388385772705, "learning_rate": 2.3839426504971286e-07, "loss": 0.5128, "step": 14009 }, { "epoch": 0.9, "grad_norm": 1.158264398574829, "learning_rate": 2.3807544590608022e-07, "loss": 0.511, "step": 14010 }, { "epoch": 0.9, "grad_norm": 1.2531564235687256, "learning_rate": 2.3775683489388357e-07, "loss": 0.5253, "step": 14011 }, { "epoch": 0.9, "grad_norm": 1.129441738128662, "learning_rate": 2.374384320270501e-07, "loss": 0.4947, "step": 14012 }, { "epoch": 0.9, "grad_norm": 1.2915040254592896, "learning_rate": 2.371202373194942e-07, "loss": 0.4935, "step": 14013 }, { "epoch": 0.9, "grad_norm": 1.110861897468567, "learning_rate": 2.368022507851242e-07, "loss": 0.4826, "step": 14014 }, { "epoch": 0.9, "grad_norm": 1.2901906967163086, "learning_rate": 2.3648447243783901e-07, "loss": 0.5322, "step": 14015 }, { "epoch": 0.9, "grad_norm": 1.2134491205215454, "learning_rate": 2.3616690229152694e-07, "loss": 0.5722, "step": 14016 }, { "epoch": 0.9, "grad_norm": 1.122747778892517, "learning_rate": 2.3584954036006914e-07, "loss": 0.4969, "step": 14017 }, { "epoch": 0.9, "grad_norm": 1.1194905042648315, "learning_rate": 2.3553238665733668e-07, "loss": 0.5181, "step": 14018 }, { "epoch": 0.9, "grad_norm": 1.1332672834396362, "learning_rate": 2.3521544119719074e-07, "loss": 0.521, "step": 14019 }, { "epoch": 0.9, "grad_norm": 1.2465568780899048, "learning_rate": 2.3489870399348412e-07, "loss": 0.5403, "step": 14020 }, { "epoch": 0.91, "grad_norm": 1.2743234634399414, "learning_rate": 2.345821750600613e-07, "loss": 0.5254, "step": 14021 }, { "epoch": 0.91, "grad_norm": 1.1443555355072021, "learning_rate": 2.3426585441075678e-07, "loss": 0.4551, "step": 14022 }, { "epoch": 0.91, "grad_norm": 1.2984111309051514, "learning_rate": 2.3394974205939614e-07, "loss": 0.5085, "step": 14023 }, { "epoch": 0.91, "grad_norm": 1.0826183557510376, "learning_rate": 2.3363383801979612e-07, "loss": 0.4558, "step": 14024 }, { "epoch": 0.91, "grad_norm": 1.2671809196472168, "learning_rate": 2.333181423057629e-07, "loss": 0.452, "step": 14025 }, { "epoch": 0.91, "grad_norm": 1.192563533782959, "learning_rate": 2.330026549310954e-07, "loss": 0.517, "step": 14026 }, { "epoch": 0.91, "grad_norm": 1.1045310497283936, "learning_rate": 2.3268737590958269e-07, "loss": 0.4892, "step": 14027 }, { "epoch": 0.91, "grad_norm": 1.1791656017303467, "learning_rate": 2.323723052550053e-07, "loss": 0.5025, "step": 14028 }, { "epoch": 0.91, "grad_norm": 1.1514297723770142, "learning_rate": 2.3205744298113397e-07, "loss": 0.5121, "step": 14029 }, { "epoch": 0.91, "grad_norm": 1.1415702104568481, "learning_rate": 2.3174278910173097e-07, "loss": 0.5144, "step": 14030 }, { "epoch": 0.91, "grad_norm": 1.2374022006988525, "learning_rate": 2.3142834363054755e-07, "loss": 0.4403, "step": 14031 }, { "epoch": 0.91, "grad_norm": 1.113853931427002, "learning_rate": 2.311141065813277e-07, "loss": 0.5337, "step": 14032 }, { "epoch": 0.91, "grad_norm": 1.123853087425232, "learning_rate": 2.3080007796780713e-07, "loss": 0.5044, "step": 14033 }, { "epoch": 0.91, "grad_norm": 1.0640498399734497, "learning_rate": 2.304862578037098e-07, "loss": 0.496, "step": 14034 }, { "epoch": 0.91, "grad_norm": 1.2013016939163208, "learning_rate": 2.3017264610275314e-07, "loss": 0.5174, "step": 14035 }, { "epoch": 0.91, "grad_norm": 1.1405571699142456, "learning_rate": 2.2985924287864448e-07, "loss": 0.5305, "step": 14036 }, { "epoch": 0.91, "grad_norm": 1.1411592960357666, "learning_rate": 2.295460481450801e-07, "loss": 0.4713, "step": 14037 }, { "epoch": 0.91, "grad_norm": 1.1545891761779785, "learning_rate": 2.2923306191575012e-07, "loss": 0.4861, "step": 14038 }, { "epoch": 0.91, "grad_norm": 1.0830408334732056, "learning_rate": 2.2892028420433477e-07, "loss": 0.5289, "step": 14039 }, { "epoch": 0.91, "grad_norm": 1.2621017694473267, "learning_rate": 2.2860771502450363e-07, "loss": 0.5384, "step": 14040 }, { "epoch": 0.91, "grad_norm": 1.217284917831421, "learning_rate": 2.282953543899197e-07, "loss": 0.5175, "step": 14041 }, { "epoch": 0.91, "grad_norm": 1.2369096279144287, "learning_rate": 2.2798320231423422e-07, "loss": 0.5643, "step": 14042 }, { "epoch": 0.91, "grad_norm": 1.1865400075912476, "learning_rate": 2.2767125881109076e-07, "loss": 0.4825, "step": 14043 }, { "epoch": 0.91, "grad_norm": 1.150395393371582, "learning_rate": 2.2735952389412396e-07, "loss": 0.5162, "step": 14044 }, { "epoch": 0.91, "grad_norm": 1.1215182542800903, "learning_rate": 2.2704799757695906e-07, "loss": 0.4804, "step": 14045 }, { "epoch": 0.91, "grad_norm": 1.1720802783966064, "learning_rate": 2.2673667987321124e-07, "loss": 0.4967, "step": 14046 }, { "epoch": 0.91, "grad_norm": 1.2005882263183594, "learning_rate": 2.2642557079648908e-07, "loss": 0.5677, "step": 14047 }, { "epoch": 0.91, "grad_norm": 1.3296570777893066, "learning_rate": 2.2611467036038836e-07, "loss": 0.5712, "step": 14048 }, { "epoch": 0.91, "grad_norm": 1.2765944004058838, "learning_rate": 2.2580397857849878e-07, "loss": 0.5263, "step": 14049 }, { "epoch": 0.91, "grad_norm": 1.1652195453643799, "learning_rate": 2.2549349546439946e-07, "loss": 0.5477, "step": 14050 }, { "epoch": 0.91, "grad_norm": 1.258820652961731, "learning_rate": 2.251832210316618e-07, "loss": 0.5171, "step": 14051 }, { "epoch": 0.91, "grad_norm": 1.2839951515197754, "learning_rate": 2.24873155293846e-07, "loss": 0.4994, "step": 14052 }, { "epoch": 0.91, "grad_norm": 1.1283557415008545, "learning_rate": 2.2456329826450408e-07, "loss": 0.4992, "step": 14053 }, { "epoch": 0.91, "grad_norm": 1.2449977397918701, "learning_rate": 2.2425364995718012e-07, "loss": 0.5344, "step": 14054 }, { "epoch": 0.91, "grad_norm": 1.152259349822998, "learning_rate": 2.2394421038540725e-07, "loss": 0.5754, "step": 14055 }, { "epoch": 0.91, "grad_norm": 1.3238139152526855, "learning_rate": 2.2363497956271073e-07, "loss": 0.5745, "step": 14056 }, { "epoch": 0.91, "grad_norm": 1.2667824029922485, "learning_rate": 2.2332595750260587e-07, "loss": 0.5329, "step": 14057 }, { "epoch": 0.91, "grad_norm": 1.1747971773147583, "learning_rate": 2.2301714421859965e-07, "loss": 0.4873, "step": 14058 }, { "epoch": 0.91, "grad_norm": 1.2007089853286743, "learning_rate": 2.2270853972418904e-07, "loss": 0.4943, "step": 14059 }, { "epoch": 0.91, "grad_norm": 1.1721941232681274, "learning_rate": 2.2240014403286326e-07, "loss": 0.4911, "step": 14060 }, { "epoch": 0.91, "grad_norm": 1.1872365474700928, "learning_rate": 2.220919571581004e-07, "loss": 0.5316, "step": 14061 }, { "epoch": 0.91, "grad_norm": 1.1653409004211426, "learning_rate": 2.217839791133708e-07, "loss": 0.5285, "step": 14062 }, { "epoch": 0.91, "grad_norm": 1.2304251194000244, "learning_rate": 2.2147620991213538e-07, "loss": 0.5226, "step": 14063 }, { "epoch": 0.91, "grad_norm": 1.2122348546981812, "learning_rate": 2.211686495678461e-07, "loss": 0.5158, "step": 14064 }, { "epoch": 0.91, "grad_norm": 1.1761680841445923, "learning_rate": 2.2086129809394618e-07, "loss": 0.522, "step": 14065 }, { "epoch": 0.91, "grad_norm": 1.154533863067627, "learning_rate": 2.2055415550386817e-07, "loss": 0.494, "step": 14066 }, { "epoch": 0.91, "grad_norm": 1.3326045274734497, "learning_rate": 2.202472218110363e-07, "loss": 0.4623, "step": 14067 }, { "epoch": 0.91, "grad_norm": 1.085824728012085, "learning_rate": 2.199404970288671e-07, "loss": 0.4986, "step": 14068 }, { "epoch": 0.91, "grad_norm": 1.2432931661605835, "learning_rate": 2.1963398117076595e-07, "loss": 0.5209, "step": 14069 }, { "epoch": 0.91, "grad_norm": 1.2187492847442627, "learning_rate": 2.1932767425012936e-07, "loss": 0.5112, "step": 14070 }, { "epoch": 0.91, "grad_norm": 1.168251395225525, "learning_rate": 2.1902157628034714e-07, "loss": 0.4839, "step": 14071 }, { "epoch": 0.91, "grad_norm": 1.142080545425415, "learning_rate": 2.1871568727479587e-07, "loss": 0.4603, "step": 14072 }, { "epoch": 0.91, "grad_norm": 1.135210633277893, "learning_rate": 2.1841000724684592e-07, "loss": 0.5289, "step": 14073 }, { "epoch": 0.91, "grad_norm": 1.168790340423584, "learning_rate": 2.1810453620985828e-07, "loss": 0.5041, "step": 14074 }, { "epoch": 0.91, "grad_norm": 1.0859501361846924, "learning_rate": 2.1779927417718338e-07, "loss": 0.4815, "step": 14075 }, { "epoch": 0.91, "grad_norm": 1.1204800605773926, "learning_rate": 2.1749422116216444e-07, "loss": 0.5133, "step": 14076 }, { "epoch": 0.91, "grad_norm": 1.312404751777649, "learning_rate": 2.171893771781347e-07, "loss": 0.5706, "step": 14077 }, { "epoch": 0.91, "grad_norm": 1.0977939367294312, "learning_rate": 2.1688474223841626e-07, "loss": 0.466, "step": 14078 }, { "epoch": 0.91, "grad_norm": 1.1227376461029053, "learning_rate": 2.165803163563257e-07, "loss": 0.4817, "step": 14079 }, { "epoch": 0.91, "grad_norm": 1.1806639432907104, "learning_rate": 2.1627609954516794e-07, "loss": 0.5294, "step": 14080 }, { "epoch": 0.91, "grad_norm": 1.0892856121063232, "learning_rate": 2.1597209181823953e-07, "loss": 0.5346, "step": 14081 }, { "epoch": 0.91, "grad_norm": 1.1397508382797241, "learning_rate": 2.1566829318882876e-07, "loss": 0.4531, "step": 14082 }, { "epoch": 0.91, "grad_norm": 1.3339955806732178, "learning_rate": 2.1536470367021279e-07, "loss": 0.532, "step": 14083 }, { "epoch": 0.91, "grad_norm": 1.2976202964782715, "learning_rate": 2.1506132327566097e-07, "loss": 0.5082, "step": 14084 }, { "epoch": 0.91, "grad_norm": 1.1481022834777832, "learning_rate": 2.147581520184333e-07, "loss": 0.5077, "step": 14085 }, { "epoch": 0.91, "grad_norm": 1.1556059122085571, "learning_rate": 2.1445518991178083e-07, "loss": 0.5157, "step": 14086 }, { "epoch": 0.91, "grad_norm": 1.0937671661376953, "learning_rate": 2.1415243696894462e-07, "loss": 0.5328, "step": 14087 }, { "epoch": 0.91, "grad_norm": 1.176518201828003, "learning_rate": 2.138498932031591e-07, "loss": 0.4975, "step": 14088 }, { "epoch": 0.91, "grad_norm": 1.2302120923995972, "learning_rate": 2.135475586276453e-07, "loss": 0.5553, "step": 14089 }, { "epoch": 0.91, "grad_norm": 1.246689796447754, "learning_rate": 2.132454332556183e-07, "loss": 0.503, "step": 14090 }, { "epoch": 0.91, "grad_norm": 1.205139398574829, "learning_rate": 2.1294351710028415e-07, "loss": 0.5179, "step": 14091 }, { "epoch": 0.91, "grad_norm": 1.2674520015716553, "learning_rate": 2.1264181017483732e-07, "loss": 0.5373, "step": 14092 }, { "epoch": 0.91, "grad_norm": 1.1291388273239136, "learning_rate": 2.1234031249246612e-07, "loss": 0.501, "step": 14093 }, { "epoch": 0.91, "grad_norm": 1.155726432800293, "learning_rate": 2.120390240663478e-07, "loss": 0.5104, "step": 14094 }, { "epoch": 0.91, "grad_norm": 1.0949550867080688, "learning_rate": 2.1173794490964961e-07, "loss": 0.4732, "step": 14095 }, { "epoch": 0.91, "grad_norm": 1.3838505744934082, "learning_rate": 2.1143707503553268e-07, "loss": 0.5262, "step": 14096 }, { "epoch": 0.91, "grad_norm": 1.1312044858932495, "learning_rate": 2.1113641445714594e-07, "loss": 0.5229, "step": 14097 }, { "epoch": 0.91, "grad_norm": 1.1598886251449585, "learning_rate": 2.1083596318763166e-07, "loss": 0.5533, "step": 14098 }, { "epoch": 0.91, "grad_norm": 1.173619031906128, "learning_rate": 2.10535721240121e-07, "loss": 0.502, "step": 14099 }, { "epoch": 0.91, "grad_norm": 1.0996289253234863, "learning_rate": 2.1023568862773735e-07, "loss": 0.4812, "step": 14100 }, { "epoch": 0.91, "grad_norm": 1.054651141166687, "learning_rate": 2.0993586536359357e-07, "loss": 0.5005, "step": 14101 }, { "epoch": 0.91, "grad_norm": 1.20270836353302, "learning_rate": 2.0963625146079468e-07, "loss": 0.4665, "step": 14102 }, { "epoch": 0.91, "grad_norm": 1.221778392791748, "learning_rate": 2.0933684693243583e-07, "loss": 0.4987, "step": 14103 }, { "epoch": 0.91, "grad_norm": 1.4746791124343872, "learning_rate": 2.090376517916032e-07, "loss": 0.4694, "step": 14104 }, { "epoch": 0.91, "grad_norm": 1.18893301486969, "learning_rate": 2.0873866605137404e-07, "loss": 0.5117, "step": 14105 }, { "epoch": 0.91, "grad_norm": 1.2597596645355225, "learning_rate": 2.084398897248169e-07, "loss": 0.5287, "step": 14106 }, { "epoch": 0.91, "grad_norm": 1.2162553071975708, "learning_rate": 2.08141322824989e-07, "loss": 0.4904, "step": 14107 }, { "epoch": 0.91, "grad_norm": 1.087558388710022, "learning_rate": 2.078429653649411e-07, "loss": 0.4867, "step": 14108 }, { "epoch": 0.91, "grad_norm": 1.1884443759918213, "learning_rate": 2.075448173577127e-07, "loss": 0.5178, "step": 14109 }, { "epoch": 0.91, "grad_norm": 1.1659365892410278, "learning_rate": 2.0724687881633566e-07, "loss": 0.5207, "step": 14110 }, { "epoch": 0.91, "grad_norm": 1.112722635269165, "learning_rate": 2.069491497538323e-07, "loss": 0.5072, "step": 14111 }, { "epoch": 0.91, "grad_norm": 1.221673846244812, "learning_rate": 2.066516301832161e-07, "loss": 0.5043, "step": 14112 }, { "epoch": 0.91, "grad_norm": 1.1522833108901978, "learning_rate": 2.0635432011748947e-07, "loss": 0.5056, "step": 14113 }, { "epoch": 0.91, "grad_norm": 1.2892018556594849, "learning_rate": 2.0605721956964808e-07, "loss": 0.4991, "step": 14114 }, { "epoch": 0.91, "grad_norm": 1.1597011089324951, "learning_rate": 2.057603285526766e-07, "loss": 0.505, "step": 14115 }, { "epoch": 0.91, "grad_norm": 1.1462322473526, "learning_rate": 2.054636470795518e-07, "loss": 0.4846, "step": 14116 }, { "epoch": 0.91, "grad_norm": 1.1870702505111694, "learning_rate": 2.0516717516324114e-07, "loss": 0.488, "step": 14117 }, { "epoch": 0.91, "grad_norm": 1.219605565071106, "learning_rate": 2.0487091281670314e-07, "loss": 0.5105, "step": 14118 }, { "epoch": 0.91, "grad_norm": 1.263154149055481, "learning_rate": 2.0457486005288518e-07, "loss": 0.5271, "step": 14119 }, { "epoch": 0.91, "grad_norm": 1.1936160326004028, "learning_rate": 2.042790168847275e-07, "loss": 0.4969, "step": 14120 }, { "epoch": 0.91, "grad_norm": 1.1341558694839478, "learning_rate": 2.0398338332516088e-07, "loss": 0.4691, "step": 14121 }, { "epoch": 0.91, "grad_norm": 1.1710479259490967, "learning_rate": 2.0368795938710716e-07, "loss": 0.5103, "step": 14122 }, { "epoch": 0.91, "grad_norm": 1.065585732460022, "learning_rate": 2.0339274508347827e-07, "loss": 0.5099, "step": 14123 }, { "epoch": 0.91, "grad_norm": 1.292344570159912, "learning_rate": 2.0309774042717668e-07, "loss": 0.5074, "step": 14124 }, { "epoch": 0.91, "grad_norm": 1.281114101409912, "learning_rate": 2.0280294543109647e-07, "loss": 0.4956, "step": 14125 }, { "epoch": 0.91, "grad_norm": 1.3268009424209595, "learning_rate": 2.0250836010812292e-07, "loss": 0.4966, "step": 14126 }, { "epoch": 0.91, "grad_norm": 1.2198272943496704, "learning_rate": 2.0221398447113127e-07, "loss": 0.5249, "step": 14127 }, { "epoch": 0.91, "grad_norm": 1.3312475681304932, "learning_rate": 2.0191981853298735e-07, "loss": 0.4795, "step": 14128 }, { "epoch": 0.91, "grad_norm": 1.2116140127182007, "learning_rate": 2.0162586230655034e-07, "loss": 0.4778, "step": 14129 }, { "epoch": 0.91, "grad_norm": 1.2638696432113647, "learning_rate": 2.0133211580466548e-07, "loss": 0.5588, "step": 14130 }, { "epoch": 0.91, "grad_norm": 1.0839399099349976, "learning_rate": 2.0103857904017365e-07, "loss": 0.4922, "step": 14131 }, { "epoch": 0.91, "grad_norm": 1.1829750537872314, "learning_rate": 2.00745252025904e-07, "loss": 0.4985, "step": 14132 }, { "epoch": 0.91, "grad_norm": 1.2412540912628174, "learning_rate": 2.0045213477467684e-07, "loss": 0.5385, "step": 14133 }, { "epoch": 0.91, "grad_norm": 1.1758965253829956, "learning_rate": 2.0015922729930414e-07, "loss": 0.537, "step": 14134 }, { "epoch": 0.91, "grad_norm": 1.2322065830230713, "learning_rate": 1.9986652961258845e-07, "loss": 0.5377, "step": 14135 }, { "epoch": 0.91, "grad_norm": 1.3352965116500854, "learning_rate": 1.9957404172732176e-07, "loss": 0.5655, "step": 14136 }, { "epoch": 0.91, "grad_norm": 1.2090706825256348, "learning_rate": 1.9928176365628826e-07, "loss": 0.5226, "step": 14137 }, { "epoch": 0.91, "grad_norm": 1.3881269693374634, "learning_rate": 1.9898969541226276e-07, "loss": 0.5301, "step": 14138 }, { "epoch": 0.91, "grad_norm": 1.133984923362732, "learning_rate": 1.9869783700801115e-07, "loss": 0.5157, "step": 14139 }, { "epoch": 0.91, "grad_norm": 1.1382354497909546, "learning_rate": 1.984061884562899e-07, "loss": 0.5145, "step": 14140 }, { "epoch": 0.91, "grad_norm": 1.152904748916626, "learning_rate": 1.9811474976984547e-07, "loss": 0.5051, "step": 14141 }, { "epoch": 0.91, "grad_norm": 1.2306891679763794, "learning_rate": 1.97823520961416e-07, "loss": 0.4913, "step": 14142 }, { "epoch": 0.91, "grad_norm": 1.1792300939559937, "learning_rate": 1.975325020437313e-07, "loss": 0.5139, "step": 14143 }, { "epoch": 0.91, "grad_norm": 1.1582777500152588, "learning_rate": 1.9724169302950958e-07, "loss": 0.4927, "step": 14144 }, { "epoch": 0.91, "grad_norm": 1.1508066654205322, "learning_rate": 1.9695109393146339e-07, "loss": 0.4972, "step": 14145 }, { "epoch": 0.91, "grad_norm": 1.2996517419815063, "learning_rate": 1.9666070476229204e-07, "loss": 0.5703, "step": 14146 }, { "epoch": 0.91, "grad_norm": 1.1548917293548584, "learning_rate": 1.9637052553468817e-07, "loss": 0.4654, "step": 14147 }, { "epoch": 0.91, "grad_norm": 1.2226696014404297, "learning_rate": 1.960805562613355e-07, "loss": 0.475, "step": 14148 }, { "epoch": 0.91, "grad_norm": 1.2166221141815186, "learning_rate": 1.9579079695490832e-07, "loss": 0.4773, "step": 14149 }, { "epoch": 0.91, "grad_norm": 1.2461212873458862, "learning_rate": 1.955012476280693e-07, "loss": 0.5714, "step": 14150 }, { "epoch": 0.91, "grad_norm": 1.17074453830719, "learning_rate": 1.9521190829347502e-07, "loss": 0.5549, "step": 14151 }, { "epoch": 0.91, "grad_norm": 1.2132841348648071, "learning_rate": 1.9492277896377197e-07, "loss": 0.5485, "step": 14152 }, { "epoch": 0.91, "grad_norm": 1.1573076248168945, "learning_rate": 1.9463385965159676e-07, "loss": 0.5239, "step": 14153 }, { "epoch": 0.91, "grad_norm": 1.2188777923583984, "learning_rate": 1.9434515036957813e-07, "loss": 0.5142, "step": 14154 }, { "epoch": 0.91, "grad_norm": 1.120901346206665, "learning_rate": 1.9405665113033379e-07, "loss": 0.4867, "step": 14155 }, { "epoch": 0.91, "grad_norm": 1.1116740703582764, "learning_rate": 1.9376836194647364e-07, "loss": 0.49, "step": 14156 }, { "epoch": 0.91, "grad_norm": 1.2697250843048096, "learning_rate": 1.9348028283059815e-07, "loss": 0.5372, "step": 14157 }, { "epoch": 0.91, "grad_norm": 1.1649622917175293, "learning_rate": 1.9319241379529841e-07, "loss": 0.5286, "step": 14158 }, { "epoch": 0.91, "grad_norm": 1.1841157674789429, "learning_rate": 1.9290475485315708e-07, "loss": 0.5352, "step": 14159 }, { "epoch": 0.91, "grad_norm": 1.1068240404129028, "learning_rate": 1.9261730601674577e-07, "loss": 0.4786, "step": 14160 }, { "epoch": 0.91, "grad_norm": 1.0611878633499146, "learning_rate": 1.9233006729862947e-07, "loss": 0.5082, "step": 14161 }, { "epoch": 0.91, "grad_norm": 1.035322904586792, "learning_rate": 1.9204303871136143e-07, "loss": 0.4988, "step": 14162 }, { "epoch": 0.91, "grad_norm": 1.184883713722229, "learning_rate": 1.917562202674872e-07, "loss": 0.5429, "step": 14163 }, { "epoch": 0.91, "grad_norm": 1.2393707036972046, "learning_rate": 1.9146961197954394e-07, "loss": 0.5423, "step": 14164 }, { "epoch": 0.91, "grad_norm": 1.2137607336044312, "learning_rate": 1.9118321386005778e-07, "loss": 0.4762, "step": 14165 }, { "epoch": 0.91, "grad_norm": 1.2890779972076416, "learning_rate": 1.908970259215459e-07, "loss": 0.5106, "step": 14166 }, { "epoch": 0.91, "grad_norm": 1.1876449584960938, "learning_rate": 1.9061104817651722e-07, "loss": 0.5051, "step": 14167 }, { "epoch": 0.91, "grad_norm": 1.1636055707931519, "learning_rate": 1.903252806374717e-07, "loss": 0.5259, "step": 14168 }, { "epoch": 0.91, "grad_norm": 1.0745221376419067, "learning_rate": 1.900397233168988e-07, "loss": 0.4917, "step": 14169 }, { "epoch": 0.91, "grad_norm": 1.1680817604064941, "learning_rate": 1.897543762272802e-07, "loss": 0.5163, "step": 14170 }, { "epoch": 0.91, "grad_norm": 1.1351087093353271, "learning_rate": 1.8946923938108707e-07, "loss": 0.4966, "step": 14171 }, { "epoch": 0.91, "grad_norm": 1.2374156713485718, "learning_rate": 1.8918431279078165e-07, "loss": 0.5165, "step": 14172 }, { "epoch": 0.91, "grad_norm": 1.3180444240570068, "learning_rate": 1.8889959646881783e-07, "loss": 0.5141, "step": 14173 }, { "epoch": 0.91, "grad_norm": 1.2284008264541626, "learning_rate": 1.8861509042764016e-07, "loss": 0.5098, "step": 14174 }, { "epoch": 0.91, "grad_norm": 1.1807762384414673, "learning_rate": 1.8833079467968307e-07, "loss": 0.5523, "step": 14175 }, { "epoch": 0.92, "grad_norm": 1.388550877571106, "learning_rate": 1.8804670923737333e-07, "loss": 0.5627, "step": 14176 }, { "epoch": 0.92, "grad_norm": 1.2466752529144287, "learning_rate": 1.8776283411312656e-07, "loss": 0.5187, "step": 14177 }, { "epoch": 0.92, "grad_norm": 1.0669373273849487, "learning_rate": 1.8747916931935062e-07, "loss": 0.4898, "step": 14178 }, { "epoch": 0.92, "grad_norm": 1.2963569164276123, "learning_rate": 1.8719571486844335e-07, "loss": 0.5059, "step": 14179 }, { "epoch": 0.92, "grad_norm": 1.1627013683319092, "learning_rate": 1.8691247077279428e-07, "loss": 0.512, "step": 14180 }, { "epoch": 0.92, "grad_norm": 1.3219736814498901, "learning_rate": 1.8662943704478353e-07, "loss": 0.4987, "step": 14181 }, { "epoch": 0.92, "grad_norm": 1.175031304359436, "learning_rate": 1.8634661369678176e-07, "loss": 0.5176, "step": 14182 }, { "epoch": 0.92, "grad_norm": 1.164740800857544, "learning_rate": 1.860640007411496e-07, "loss": 0.5412, "step": 14183 }, { "epoch": 0.92, "grad_norm": 1.3002886772155762, "learning_rate": 1.8578159819023943e-07, "loss": 0.5317, "step": 14184 }, { "epoch": 0.92, "grad_norm": 1.1397050619125366, "learning_rate": 1.8549940605639527e-07, "loss": 0.5015, "step": 14185 }, { "epoch": 0.92, "grad_norm": 1.1544389724731445, "learning_rate": 1.8521742435195057e-07, "loss": 0.4842, "step": 14186 }, { "epoch": 0.92, "grad_norm": 1.1277039051055908, "learning_rate": 1.8493565308922933e-07, "loss": 0.4912, "step": 14187 }, { "epoch": 0.92, "grad_norm": 1.1873699426651, "learning_rate": 1.8465409228054898e-07, "loss": 0.4993, "step": 14188 }, { "epoch": 0.92, "grad_norm": 1.2916053533554077, "learning_rate": 1.8437274193821352e-07, "loss": 0.5069, "step": 14189 }, { "epoch": 0.92, "grad_norm": 1.1545096635818481, "learning_rate": 1.8409160207452092e-07, "loss": 0.5163, "step": 14190 }, { "epoch": 0.92, "grad_norm": 1.164355993270874, "learning_rate": 1.8381067270175913e-07, "loss": 0.4723, "step": 14191 }, { "epoch": 0.92, "grad_norm": 1.207682728767395, "learning_rate": 1.8352995383220716e-07, "loss": 0.4707, "step": 14192 }, { "epoch": 0.92, "grad_norm": 1.3078311681747437, "learning_rate": 1.8324944547813417e-07, "loss": 0.5561, "step": 14193 }, { "epoch": 0.92, "grad_norm": 1.224118709564209, "learning_rate": 1.8296914765180141e-07, "loss": 0.5051, "step": 14194 }, { "epoch": 0.92, "grad_norm": 1.0961976051330566, "learning_rate": 1.8268906036545796e-07, "loss": 0.4706, "step": 14195 }, { "epoch": 0.92, "grad_norm": 1.1552355289459229, "learning_rate": 1.8240918363134742e-07, "loss": 0.4923, "step": 14196 }, { "epoch": 0.92, "grad_norm": 1.2040096521377563, "learning_rate": 1.821295174617016e-07, "loss": 0.5473, "step": 14197 }, { "epoch": 0.92, "grad_norm": 1.2468823194503784, "learning_rate": 1.818500618687441e-07, "loss": 0.5022, "step": 14198 }, { "epoch": 0.92, "grad_norm": 1.2225215435028076, "learning_rate": 1.8157081686468959e-07, "loss": 0.5216, "step": 14199 }, { "epoch": 0.92, "grad_norm": 1.1498790979385376, "learning_rate": 1.8129178246174385e-07, "loss": 0.4884, "step": 14200 }, { "epoch": 0.92, "grad_norm": 1.1702959537506104, "learning_rate": 1.81012958672101e-07, "loss": 0.4939, "step": 14201 }, { "epoch": 0.92, "grad_norm": 1.2690125703811646, "learning_rate": 1.8073434550794855e-07, "loss": 0.5023, "step": 14202 }, { "epoch": 0.92, "grad_norm": 1.2576041221618652, "learning_rate": 1.8045594298146397e-07, "loss": 0.4976, "step": 14203 }, { "epoch": 0.92, "grad_norm": 1.147794246673584, "learning_rate": 1.8017775110481528e-07, "loss": 0.4987, "step": 14204 }, { "epoch": 0.92, "grad_norm": 1.1948719024658203, "learning_rate": 1.7989976989016222e-07, "loss": 0.5243, "step": 14205 }, { "epoch": 0.92, "grad_norm": 1.2030506134033203, "learning_rate": 1.7962199934965453e-07, "loss": 0.5743, "step": 14206 }, { "epoch": 0.92, "grad_norm": 1.1937448978424072, "learning_rate": 1.793444394954319e-07, "loss": 0.5273, "step": 14207 }, { "epoch": 0.92, "grad_norm": 1.194714903831482, "learning_rate": 1.790670903396269e-07, "loss": 0.5038, "step": 14208 }, { "epoch": 0.92, "grad_norm": 1.060546875, "learning_rate": 1.7878995189436087e-07, "loss": 0.5011, "step": 14209 }, { "epoch": 0.92, "grad_norm": 1.182008981704712, "learning_rate": 1.7851302417174698e-07, "loss": 0.5407, "step": 14210 }, { "epoch": 0.92, "grad_norm": 1.201350212097168, "learning_rate": 1.7823630718388996e-07, "loss": 0.4863, "step": 14211 }, { "epoch": 0.92, "grad_norm": 1.035921573638916, "learning_rate": 1.779598009428829e-07, "loss": 0.4821, "step": 14212 }, { "epoch": 0.92, "grad_norm": 1.1519360542297363, "learning_rate": 1.776835054608117e-07, "loss": 0.5035, "step": 14213 }, { "epoch": 0.92, "grad_norm": 1.2680004835128784, "learning_rate": 1.7740742074975337e-07, "loss": 0.5084, "step": 14214 }, { "epoch": 0.92, "grad_norm": 1.1101033687591553, "learning_rate": 1.771315468217738e-07, "loss": 0.4594, "step": 14215 }, { "epoch": 0.92, "grad_norm": 1.2326968908309937, "learning_rate": 1.7685588368893113e-07, "loss": 0.5235, "step": 14216 }, { "epoch": 0.92, "grad_norm": 1.2892707586288452, "learning_rate": 1.7658043136327463e-07, "loss": 0.5362, "step": 14217 }, { "epoch": 0.92, "grad_norm": 1.3278965950012207, "learning_rate": 1.7630518985684242e-07, "loss": 0.5265, "step": 14218 }, { "epoch": 0.92, "grad_norm": 1.19681978225708, "learning_rate": 1.760301591816649e-07, "loss": 0.4927, "step": 14219 }, { "epoch": 0.92, "grad_norm": 1.1141550540924072, "learning_rate": 1.7575533934976297e-07, "loss": 0.5174, "step": 14220 }, { "epoch": 0.92, "grad_norm": 1.2704476118087769, "learning_rate": 1.7548073037314873e-07, "loss": 0.5205, "step": 14221 }, { "epoch": 0.92, "grad_norm": 1.1525352001190186, "learning_rate": 1.7520633226382421e-07, "loss": 0.5112, "step": 14222 }, { "epoch": 0.92, "grad_norm": 1.0910930633544922, "learning_rate": 1.7493214503378319e-07, "loss": 0.4372, "step": 14223 }, { "epoch": 0.92, "grad_norm": 1.1757149696350098, "learning_rate": 1.7465816869500885e-07, "loss": 0.5151, "step": 14224 }, { "epoch": 0.92, "grad_norm": 1.182590365409851, "learning_rate": 1.743844032594766e-07, "loss": 0.4566, "step": 14225 }, { "epoch": 0.92, "grad_norm": 1.2494888305664062, "learning_rate": 1.741108487391513e-07, "loss": 0.5092, "step": 14226 }, { "epoch": 0.92, "grad_norm": 1.1640472412109375, "learning_rate": 1.7383750514599063e-07, "loss": 0.529, "step": 14227 }, { "epoch": 0.92, "grad_norm": 1.0811313390731812, "learning_rate": 1.735643724919406e-07, "loss": 0.5109, "step": 14228 }, { "epoch": 0.92, "grad_norm": 1.316035270690918, "learning_rate": 1.7329145078893994e-07, "loss": 0.5032, "step": 14229 }, { "epoch": 0.92, "grad_norm": 1.1411799192428589, "learning_rate": 1.7301874004891584e-07, "loss": 0.4762, "step": 14230 }, { "epoch": 0.92, "grad_norm": 1.1691434383392334, "learning_rate": 1.7274624028378928e-07, "loss": 0.5515, "step": 14231 }, { "epoch": 0.92, "grad_norm": 1.2451493740081787, "learning_rate": 1.724739515054702e-07, "loss": 0.5331, "step": 14232 }, { "epoch": 0.92, "grad_norm": 1.2015185356140137, "learning_rate": 1.722018737258596e-07, "loss": 0.5241, "step": 14233 }, { "epoch": 0.92, "grad_norm": 1.073866605758667, "learning_rate": 1.7193000695684968e-07, "loss": 0.487, "step": 14234 }, { "epoch": 0.92, "grad_norm": 1.2940959930419922, "learning_rate": 1.7165835121032203e-07, "loss": 0.498, "step": 14235 }, { "epoch": 0.92, "grad_norm": 1.2462437152862549, "learning_rate": 1.7138690649815048e-07, "loss": 0.5307, "step": 14236 }, { "epoch": 0.92, "grad_norm": 1.1777410507202148, "learning_rate": 1.7111567283219887e-07, "loss": 0.4759, "step": 14237 }, { "epoch": 0.92, "grad_norm": 1.2407801151275635, "learning_rate": 1.7084465022432383e-07, "loss": 0.5026, "step": 14238 }, { "epoch": 0.92, "grad_norm": 1.2231627702713013, "learning_rate": 1.705738386863687e-07, "loss": 0.487, "step": 14239 }, { "epoch": 0.92, "grad_norm": 1.153588056564331, "learning_rate": 1.7030323823017115e-07, "loss": 0.5142, "step": 14240 }, { "epoch": 0.92, "grad_norm": 1.1381627321243286, "learning_rate": 1.7003284886755844e-07, "loss": 0.4832, "step": 14241 }, { "epoch": 0.92, "grad_norm": 1.1916290521621704, "learning_rate": 1.6976267061034778e-07, "loss": 0.5376, "step": 14242 }, { "epoch": 0.92, "grad_norm": 1.1675769090652466, "learning_rate": 1.6949270347034975e-07, "loss": 0.5277, "step": 14243 }, { "epoch": 0.92, "grad_norm": 1.1707961559295654, "learning_rate": 1.6922294745936209e-07, "loss": 0.4956, "step": 14244 }, { "epoch": 0.92, "grad_norm": 1.1961970329284668, "learning_rate": 1.6895340258917592e-07, "loss": 0.4863, "step": 14245 }, { "epoch": 0.92, "grad_norm": 1.1354068517684937, "learning_rate": 1.6868406887157185e-07, "loss": 0.4974, "step": 14246 }, { "epoch": 0.92, "grad_norm": 1.0747411251068115, "learning_rate": 1.6841494631832322e-07, "loss": 0.4863, "step": 14247 }, { "epoch": 0.92, "grad_norm": 1.2437679767608643, "learning_rate": 1.6814603494119063e-07, "loss": 0.5065, "step": 14248 }, { "epoch": 0.92, "grad_norm": 1.1954525709152222, "learning_rate": 1.6787733475192913e-07, "loss": 0.5419, "step": 14249 }, { "epoch": 0.92, "grad_norm": 1.281865119934082, "learning_rate": 1.6760884576228153e-07, "loss": 0.5301, "step": 14250 }, { "epoch": 0.92, "grad_norm": 1.22776198387146, "learning_rate": 1.6734056798398402e-07, "loss": 0.5385, "step": 14251 }, { "epoch": 0.92, "grad_norm": 1.21847665309906, "learning_rate": 1.670725014287622e-07, "loss": 0.4646, "step": 14252 }, { "epoch": 0.92, "grad_norm": 1.1326696872711182, "learning_rate": 1.6680464610833168e-07, "loss": 0.5729, "step": 14253 }, { "epoch": 0.92, "grad_norm": 1.1506080627441406, "learning_rate": 1.6653700203440038e-07, "loss": 0.4843, "step": 14254 }, { "epoch": 0.92, "grad_norm": 1.206092119216919, "learning_rate": 1.6626956921866665e-07, "loss": 0.4652, "step": 14255 }, { "epoch": 0.92, "grad_norm": 1.1474075317382812, "learning_rate": 1.660023476728184e-07, "loss": 0.5058, "step": 14256 }, { "epoch": 0.92, "grad_norm": 1.078155755996704, "learning_rate": 1.6573533740853521e-07, "loss": 0.4981, "step": 14257 }, { "epoch": 0.92, "grad_norm": 1.1078134775161743, "learning_rate": 1.6546853843748934e-07, "loss": 0.5302, "step": 14258 }, { "epoch": 0.92, "grad_norm": 1.2395915985107422, "learning_rate": 1.6520195077133928e-07, "loss": 0.5463, "step": 14259 }, { "epoch": 0.92, "grad_norm": 1.189856767654419, "learning_rate": 1.6493557442173792e-07, "loss": 0.5222, "step": 14260 }, { "epoch": 0.92, "grad_norm": 1.2088522911071777, "learning_rate": 1.6466940940032816e-07, "loss": 0.4731, "step": 14261 }, { "epoch": 0.92, "grad_norm": 1.1249276399612427, "learning_rate": 1.6440345571874295e-07, "loss": 0.5287, "step": 14262 }, { "epoch": 0.92, "grad_norm": 1.1150658130645752, "learning_rate": 1.641377133886074e-07, "loss": 0.4763, "step": 14263 }, { "epoch": 0.92, "grad_norm": 1.1802104711532593, "learning_rate": 1.6387218242153558e-07, "loss": 0.5195, "step": 14264 }, { "epoch": 0.92, "grad_norm": 1.2717044353485107, "learning_rate": 1.6360686282913262e-07, "loss": 0.5502, "step": 14265 }, { "epoch": 0.92, "grad_norm": 1.1180323362350464, "learning_rate": 1.6334175462299595e-07, "loss": 0.5242, "step": 14266 }, { "epoch": 0.92, "grad_norm": 1.1669998168945312, "learning_rate": 1.6307685781471238e-07, "loss": 0.5485, "step": 14267 }, { "epoch": 0.92, "grad_norm": 1.2096434831619263, "learning_rate": 1.6281217241586044e-07, "loss": 0.4892, "step": 14268 }, { "epoch": 0.92, "grad_norm": 1.1435966491699219, "learning_rate": 1.6254769843800756e-07, "loss": 0.5368, "step": 14269 }, { "epoch": 0.92, "grad_norm": 1.1773912906646729, "learning_rate": 1.62283435892715e-07, "loss": 0.5656, "step": 14270 }, { "epoch": 0.92, "grad_norm": 1.180701732635498, "learning_rate": 1.6201938479153133e-07, "loss": 0.524, "step": 14271 }, { "epoch": 0.92, "grad_norm": 1.2893290519714355, "learning_rate": 1.617555451459979e-07, "loss": 0.5392, "step": 14272 }, { "epoch": 0.92, "grad_norm": 1.2003610134124756, "learning_rate": 1.614919169676471e-07, "loss": 0.5274, "step": 14273 }, { "epoch": 0.92, "grad_norm": 1.1846044063568115, "learning_rate": 1.6122850026800086e-07, "loss": 0.5115, "step": 14274 }, { "epoch": 0.92, "grad_norm": 1.1363736391067505, "learning_rate": 1.6096529505857272e-07, "loss": 0.4771, "step": 14275 }, { "epoch": 0.92, "grad_norm": 1.1686787605285645, "learning_rate": 1.607023013508674e-07, "loss": 0.5175, "step": 14276 }, { "epoch": 0.92, "grad_norm": 1.21404230594635, "learning_rate": 1.6043951915637846e-07, "loss": 0.484, "step": 14277 }, { "epoch": 0.92, "grad_norm": 1.2334808111190796, "learning_rate": 1.601769484865917e-07, "loss": 0.5336, "step": 14278 }, { "epoch": 0.92, "grad_norm": 1.1633013486862183, "learning_rate": 1.5991458935298354e-07, "loss": 0.5406, "step": 14279 }, { "epoch": 0.92, "grad_norm": 1.1849104166030884, "learning_rate": 1.5965244176702144e-07, "loss": 0.4773, "step": 14280 }, { "epoch": 0.92, "grad_norm": 1.2868092060089111, "learning_rate": 1.5939050574016236e-07, "loss": 0.5108, "step": 14281 }, { "epoch": 0.92, "grad_norm": 1.2273262739181519, "learning_rate": 1.59128781283856e-07, "loss": 0.5385, "step": 14282 }, { "epoch": 0.92, "grad_norm": 1.121576189994812, "learning_rate": 1.5886726840954046e-07, "loss": 0.4932, "step": 14283 }, { "epoch": 0.92, "grad_norm": 1.1114704608917236, "learning_rate": 1.5860596712864662e-07, "loss": 0.4641, "step": 14284 }, { "epoch": 0.92, "grad_norm": 1.1469730138778687, "learning_rate": 1.5834487745259475e-07, "loss": 0.5331, "step": 14285 }, { "epoch": 0.92, "grad_norm": 1.259521484375, "learning_rate": 1.5808399939279684e-07, "loss": 0.544, "step": 14286 }, { "epoch": 0.92, "grad_norm": 1.233479619026184, "learning_rate": 1.5782333296065488e-07, "loss": 0.5002, "step": 14287 }, { "epoch": 0.92, "grad_norm": 1.2663894891738892, "learning_rate": 1.5756287816756255e-07, "loss": 0.475, "step": 14288 }, { "epoch": 0.92, "grad_norm": 1.221474289894104, "learning_rate": 1.5730263502490239e-07, "loss": 0.5878, "step": 14289 }, { "epoch": 0.92, "grad_norm": 1.2486542463302612, "learning_rate": 1.5704260354405032e-07, "loss": 0.581, "step": 14290 }, { "epoch": 0.92, "grad_norm": 1.1839396953582764, "learning_rate": 1.5678278373637057e-07, "loss": 0.5371, "step": 14291 }, { "epoch": 0.92, "grad_norm": 1.2554336786270142, "learning_rate": 1.5652317561321962e-07, "loss": 0.5155, "step": 14292 }, { "epoch": 0.92, "grad_norm": 1.246770977973938, "learning_rate": 1.5626377918594448e-07, "loss": 0.572, "step": 14293 }, { "epoch": 0.92, "grad_norm": 1.1925231218338013, "learning_rate": 1.5600459446588335e-07, "loss": 0.5561, "step": 14294 }, { "epoch": 0.92, "grad_norm": 1.3129581212997437, "learning_rate": 1.5574562146436323e-07, "loss": 0.5165, "step": 14295 }, { "epoch": 0.92, "grad_norm": 1.0770920515060425, "learning_rate": 1.5548686019270343e-07, "loss": 0.5458, "step": 14296 }, { "epoch": 0.92, "grad_norm": 1.3139228820800781, "learning_rate": 1.5522831066221378e-07, "loss": 0.511, "step": 14297 }, { "epoch": 0.92, "grad_norm": 1.2047442197799683, "learning_rate": 1.5496997288419524e-07, "loss": 0.4824, "step": 14298 }, { "epoch": 0.92, "grad_norm": 1.1752315759658813, "learning_rate": 1.5471184686993935e-07, "loss": 0.5049, "step": 14299 }, { "epoch": 0.92, "grad_norm": 1.3269033432006836, "learning_rate": 1.544539326307265e-07, "loss": 0.5231, "step": 14300 }, { "epoch": 0.92, "grad_norm": 1.0566362142562866, "learning_rate": 1.5419623017783158e-07, "loss": 0.5239, "step": 14301 }, { "epoch": 0.92, "grad_norm": 1.1517366170883179, "learning_rate": 1.5393873952251614e-07, "loss": 0.4988, "step": 14302 }, { "epoch": 0.92, "grad_norm": 1.1490830183029175, "learning_rate": 1.5368146067603617e-07, "loss": 0.4826, "step": 14303 }, { "epoch": 0.92, "grad_norm": 1.1537530422210693, "learning_rate": 1.5342439364963547e-07, "loss": 0.494, "step": 14304 }, { "epoch": 0.92, "grad_norm": 1.2048317193984985, "learning_rate": 1.5316753845455113e-07, "loss": 0.5104, "step": 14305 }, { "epoch": 0.92, "grad_norm": 1.1412724256515503, "learning_rate": 1.5291089510200752e-07, "loss": 0.5189, "step": 14306 }, { "epoch": 0.92, "grad_norm": 1.2062512636184692, "learning_rate": 1.5265446360322346e-07, "loss": 0.4968, "step": 14307 }, { "epoch": 0.92, "grad_norm": 1.1621323823928833, "learning_rate": 1.5239824396940662e-07, "loss": 0.49, "step": 14308 }, { "epoch": 0.92, "grad_norm": 1.1479355096817017, "learning_rate": 1.5214223621175527e-07, "loss": 0.5785, "step": 14309 }, { "epoch": 0.92, "grad_norm": 1.0780959129333496, "learning_rate": 1.5188644034145883e-07, "loss": 0.4912, "step": 14310 }, { "epoch": 0.92, "grad_norm": 1.1303811073303223, "learning_rate": 1.5163085636969888e-07, "loss": 0.5104, "step": 14311 }, { "epoch": 0.92, "grad_norm": 1.1190299987792969, "learning_rate": 1.5137548430764427e-07, "loss": 0.4702, "step": 14312 }, { "epoch": 0.92, "grad_norm": 1.153495192527771, "learning_rate": 1.5112032416645828e-07, "loss": 0.5132, "step": 14313 }, { "epoch": 0.92, "grad_norm": 1.1541039943695068, "learning_rate": 1.50865375957292e-07, "loss": 0.4841, "step": 14314 }, { "epoch": 0.92, "grad_norm": 1.106624960899353, "learning_rate": 1.506106396912893e-07, "loss": 0.4537, "step": 14315 }, { "epoch": 0.92, "grad_norm": 1.3075146675109863, "learning_rate": 1.5035611537958404e-07, "loss": 0.5339, "step": 14316 }, { "epoch": 0.92, "grad_norm": 1.251287817955017, "learning_rate": 1.5010180303330124e-07, "loss": 0.5389, "step": 14317 }, { "epoch": 0.92, "grad_norm": 1.2090415954589844, "learning_rate": 1.498477026635553e-07, "loss": 0.5366, "step": 14318 }, { "epoch": 0.92, "grad_norm": 1.2639936208724976, "learning_rate": 1.4959381428145292e-07, "loss": 0.5703, "step": 14319 }, { "epoch": 0.92, "grad_norm": 1.3166217803955078, "learning_rate": 1.4934013789809022e-07, "loss": 0.5309, "step": 14320 }, { "epoch": 0.92, "grad_norm": 1.1650418043136597, "learning_rate": 1.4908667352455554e-07, "loss": 0.4597, "step": 14321 }, { "epoch": 0.92, "grad_norm": 1.1500883102416992, "learning_rate": 1.4883342117192668e-07, "loss": 0.5573, "step": 14322 }, { "epoch": 0.92, "grad_norm": 1.143586277961731, "learning_rate": 1.485803808512737e-07, "loss": 0.4911, "step": 14323 }, { "epoch": 0.92, "grad_norm": 1.1771031618118286, "learning_rate": 1.4832755257365438e-07, "loss": 0.5148, "step": 14324 }, { "epoch": 0.92, "grad_norm": 1.2056976556777954, "learning_rate": 1.480749363501205e-07, "loss": 0.5384, "step": 14325 }, { "epoch": 0.92, "grad_norm": 1.1803754568099976, "learning_rate": 1.4782253219171316e-07, "loss": 0.5219, "step": 14326 }, { "epoch": 0.92, "grad_norm": 1.1550629138946533, "learning_rate": 1.4757034010946414e-07, "loss": 0.5108, "step": 14327 }, { "epoch": 0.92, "grad_norm": 1.1797369718551636, "learning_rate": 1.4731836011439625e-07, "loss": 0.4737, "step": 14328 }, { "epoch": 0.92, "grad_norm": 1.1678967475891113, "learning_rate": 1.470665922175224e-07, "loss": 0.5148, "step": 14329 }, { "epoch": 0.92, "grad_norm": 1.171051025390625, "learning_rate": 1.4681503642984707e-07, "loss": 0.5538, "step": 14330 }, { "epoch": 0.93, "grad_norm": 1.1530851125717163, "learning_rate": 1.4656369276236537e-07, "loss": 0.5551, "step": 14331 }, { "epoch": 0.93, "grad_norm": 1.1603184938430786, "learning_rate": 1.4631256122606297e-07, "loss": 0.5044, "step": 14332 }, { "epoch": 0.93, "grad_norm": 1.232804775238037, "learning_rate": 1.4606164183191552e-07, "loss": 0.5461, "step": 14333 }, { "epoch": 0.93, "grad_norm": 1.1867128610610962, "learning_rate": 1.458109345908898e-07, "loss": 0.5002, "step": 14334 }, { "epoch": 0.93, "grad_norm": 1.5966144800186157, "learning_rate": 1.4556043951394483e-07, "loss": 0.5691, "step": 14335 }, { "epoch": 0.93, "grad_norm": 1.253345012664795, "learning_rate": 1.4531015661202853e-07, "loss": 0.5201, "step": 14336 }, { "epoch": 0.93, "grad_norm": 1.2391935586929321, "learning_rate": 1.4506008589607935e-07, "loss": 0.4969, "step": 14337 }, { "epoch": 0.93, "grad_norm": 1.2838541269302368, "learning_rate": 1.4481022737702854e-07, "loss": 0.4976, "step": 14338 }, { "epoch": 0.93, "grad_norm": 1.1980502605438232, "learning_rate": 1.4456058106579573e-07, "loss": 0.5058, "step": 14339 }, { "epoch": 0.93, "grad_norm": 1.1875810623168945, "learning_rate": 1.4431114697329274e-07, "loss": 0.4724, "step": 14340 }, { "epoch": 0.93, "grad_norm": 1.1304131746292114, "learning_rate": 1.4406192511042194e-07, "loss": 0.5201, "step": 14341 }, { "epoch": 0.93, "grad_norm": 1.1378086805343628, "learning_rate": 1.4381291548807574e-07, "loss": 0.5184, "step": 14342 }, { "epoch": 0.93, "grad_norm": 1.2949894666671753, "learning_rate": 1.4356411811713767e-07, "loss": 0.4761, "step": 14343 }, { "epoch": 0.93, "grad_norm": 1.2359155416488647, "learning_rate": 1.4331553300848233e-07, "loss": 0.5162, "step": 14344 }, { "epoch": 0.93, "grad_norm": 1.120244026184082, "learning_rate": 1.4306716017297495e-07, "loss": 0.4823, "step": 14345 }, { "epoch": 0.93, "grad_norm": 1.2036346197128296, "learning_rate": 1.4281899962147128e-07, "loss": 0.4863, "step": 14346 }, { "epoch": 0.93, "grad_norm": 1.1773402690887451, "learning_rate": 1.4257105136481652e-07, "loss": 0.4763, "step": 14347 }, { "epoch": 0.93, "grad_norm": 1.1945232152938843, "learning_rate": 1.4232331541384926e-07, "loss": 0.5252, "step": 14348 }, { "epoch": 0.93, "grad_norm": 1.128671407699585, "learning_rate": 1.4207579177939635e-07, "loss": 0.4942, "step": 14349 }, { "epoch": 0.93, "grad_norm": 1.1572942733764648, "learning_rate": 1.4182848047227747e-07, "loss": 0.5048, "step": 14350 }, { "epoch": 0.93, "grad_norm": 1.3115710020065308, "learning_rate": 1.4158138150330124e-07, "loss": 0.5487, "step": 14351 }, { "epoch": 0.93, "grad_norm": 1.179742693901062, "learning_rate": 1.4133449488326844e-07, "loss": 0.5104, "step": 14352 }, { "epoch": 0.93, "grad_norm": 1.167728066444397, "learning_rate": 1.4108782062296934e-07, "loss": 0.5178, "step": 14353 }, { "epoch": 0.93, "grad_norm": 1.0993438959121704, "learning_rate": 1.4084135873318472e-07, "loss": 0.4662, "step": 14354 }, { "epoch": 0.93, "grad_norm": 1.1381568908691406, "learning_rate": 1.4059510922468766e-07, "loss": 0.5116, "step": 14355 }, { "epoch": 0.93, "grad_norm": 1.153741478919983, "learning_rate": 1.4034907210824123e-07, "loss": 0.5059, "step": 14356 }, { "epoch": 0.93, "grad_norm": 1.1791688203811646, "learning_rate": 1.4010324739459847e-07, "loss": 0.5422, "step": 14357 }, { "epoch": 0.93, "grad_norm": 1.192578911781311, "learning_rate": 1.3985763509450522e-07, "loss": 0.4966, "step": 14358 }, { "epoch": 0.93, "grad_norm": 1.278976559638977, "learning_rate": 1.3961223521869406e-07, "loss": 0.5652, "step": 14359 }, { "epoch": 0.93, "grad_norm": 1.1549054384231567, "learning_rate": 1.3936704777789245e-07, "loss": 0.5302, "step": 14360 }, { "epoch": 0.93, "grad_norm": 1.0875253677368164, "learning_rate": 1.3912207278281685e-07, "loss": 0.5097, "step": 14361 }, { "epoch": 0.93, "grad_norm": 1.1491678953170776, "learning_rate": 1.3887731024417373e-07, "loss": 0.4641, "step": 14362 }, { "epoch": 0.93, "grad_norm": 1.1589577198028564, "learning_rate": 1.3863276017266224e-07, "loss": 0.4914, "step": 14363 }, { "epoch": 0.93, "grad_norm": 1.0625841617584229, "learning_rate": 1.3838842257897e-07, "loss": 0.5233, "step": 14364 }, { "epoch": 0.93, "grad_norm": 1.0881423950195312, "learning_rate": 1.3814429747377678e-07, "loss": 0.494, "step": 14365 }, { "epoch": 0.93, "grad_norm": 1.2149263620376587, "learning_rate": 1.3790038486775238e-07, "loss": 0.4778, "step": 14366 }, { "epoch": 0.93, "grad_norm": 1.2409480810165405, "learning_rate": 1.376566847715577e-07, "loss": 0.5165, "step": 14367 }, { "epoch": 0.93, "grad_norm": 1.2338690757751465, "learning_rate": 1.3741319719584422e-07, "loss": 0.4899, "step": 14368 }, { "epoch": 0.93, "grad_norm": 1.1814496517181396, "learning_rate": 1.3716992215125458e-07, "loss": 0.4782, "step": 14369 }, { "epoch": 0.93, "grad_norm": 1.1745811700820923, "learning_rate": 1.3692685964842133e-07, "loss": 0.4775, "step": 14370 }, { "epoch": 0.93, "grad_norm": 1.243212103843689, "learning_rate": 1.3668400969796824e-07, "loss": 0.5014, "step": 14371 }, { "epoch": 0.93, "grad_norm": 1.174835205078125, "learning_rate": 1.3644137231050901e-07, "loss": 0.5338, "step": 14372 }, { "epoch": 0.93, "grad_norm": 1.253222942352295, "learning_rate": 1.3619894749664962e-07, "loss": 0.5123, "step": 14373 }, { "epoch": 0.93, "grad_norm": 1.2471320629119873, "learning_rate": 1.359567352669855e-07, "loss": 0.5347, "step": 14374 }, { "epoch": 0.93, "grad_norm": 1.1499691009521484, "learning_rate": 1.3571473563210314e-07, "loss": 0.4942, "step": 14375 }, { "epoch": 0.93, "grad_norm": 1.2923331260681152, "learning_rate": 1.3547294860257964e-07, "loss": 0.507, "step": 14376 }, { "epoch": 0.93, "grad_norm": 1.1640733480453491, "learning_rate": 1.352313741889827e-07, "loss": 0.5199, "step": 14377 }, { "epoch": 0.93, "grad_norm": 1.102841854095459, "learning_rate": 1.3499001240187103e-07, "loss": 0.5093, "step": 14378 }, { "epoch": 0.93, "grad_norm": 1.2701932191848755, "learning_rate": 1.347488632517946e-07, "loss": 0.5607, "step": 14379 }, { "epoch": 0.93, "grad_norm": 1.172053337097168, "learning_rate": 1.3450792674929215e-07, "loss": 0.5154, "step": 14380 }, { "epoch": 0.93, "grad_norm": 1.104082703590393, "learning_rate": 1.3426720290489582e-07, "loss": 0.5266, "step": 14381 }, { "epoch": 0.93, "grad_norm": 1.1461505889892578, "learning_rate": 1.3402669172912607e-07, "loss": 0.4783, "step": 14382 }, { "epoch": 0.93, "grad_norm": 1.17717444896698, "learning_rate": 1.3378639323249509e-07, "loss": 0.4988, "step": 14383 }, { "epoch": 0.93, "grad_norm": 1.133948564529419, "learning_rate": 1.3354630742550557e-07, "loss": 0.5335, "step": 14384 }, { "epoch": 0.93, "grad_norm": 1.2285029888153076, "learning_rate": 1.333064343186513e-07, "loss": 0.532, "step": 14385 }, { "epoch": 0.93, "grad_norm": 1.3214449882507324, "learning_rate": 1.3306677392241674e-07, "loss": 0.5721, "step": 14386 }, { "epoch": 0.93, "grad_norm": 1.217423677444458, "learning_rate": 1.3282732624727734e-07, "loss": 0.4849, "step": 14387 }, { "epoch": 0.93, "grad_norm": 1.22975754737854, "learning_rate": 1.3258809130369753e-07, "loss": 0.5005, "step": 14388 }, { "epoch": 0.93, "grad_norm": 1.1745766401290894, "learning_rate": 1.3234906910213396e-07, "loss": 0.477, "step": 14389 }, { "epoch": 0.93, "grad_norm": 1.0901985168457031, "learning_rate": 1.321102596530338e-07, "loss": 0.4689, "step": 14390 }, { "epoch": 0.93, "grad_norm": 1.1646361351013184, "learning_rate": 1.3187166296683485e-07, "loss": 0.4702, "step": 14391 }, { "epoch": 0.93, "grad_norm": 1.341770052909851, "learning_rate": 1.3163327905396596e-07, "loss": 0.4941, "step": 14392 }, { "epoch": 0.93, "grad_norm": 1.161633014678955, "learning_rate": 1.31395107924846e-07, "loss": 0.4557, "step": 14393 }, { "epoch": 0.93, "grad_norm": 1.1619417667388916, "learning_rate": 1.3115714958988446e-07, "loss": 0.496, "step": 14394 }, { "epoch": 0.93, "grad_norm": 1.1294023990631104, "learning_rate": 1.309194040594819e-07, "loss": 0.4857, "step": 14395 }, { "epoch": 0.93, "grad_norm": 1.0869942903518677, "learning_rate": 1.3068187134403e-07, "loss": 0.4921, "step": 14396 }, { "epoch": 0.93, "grad_norm": 1.245025396347046, "learning_rate": 1.3044455145391044e-07, "loss": 0.486, "step": 14397 }, { "epoch": 0.93, "grad_norm": 1.1319063901901245, "learning_rate": 1.3020744439949607e-07, "loss": 0.4729, "step": 14398 }, { "epoch": 0.93, "grad_norm": 1.2322372198104858, "learning_rate": 1.2997055019115024e-07, "loss": 0.5464, "step": 14399 }, { "epoch": 0.93, "grad_norm": 1.2490136623382568, "learning_rate": 1.2973386883922633e-07, "loss": 0.5344, "step": 14400 }, { "epoch": 0.93, "grad_norm": 1.2083981037139893, "learning_rate": 1.2949740035407e-07, "loss": 0.5356, "step": 14401 }, { "epoch": 0.93, "grad_norm": 1.1619118452072144, "learning_rate": 1.292611447460157e-07, "loss": 0.4965, "step": 14402 }, { "epoch": 0.93, "grad_norm": 1.1944469213485718, "learning_rate": 1.2902510202539021e-07, "loss": 0.5014, "step": 14403 }, { "epoch": 0.93, "grad_norm": 1.198872685432434, "learning_rate": 1.2878927220251026e-07, "loss": 0.4705, "step": 14404 }, { "epoch": 0.93, "grad_norm": 1.213161587715149, "learning_rate": 1.2855365528768425e-07, "loss": 0.4971, "step": 14405 }, { "epoch": 0.93, "grad_norm": 1.2352697849273682, "learning_rate": 1.2831825129120845e-07, "loss": 0.5748, "step": 14406 }, { "epoch": 0.93, "grad_norm": 1.112744927406311, "learning_rate": 1.2808306022337291e-07, "loss": 0.5083, "step": 14407 }, { "epoch": 0.93, "grad_norm": 1.2112224102020264, "learning_rate": 1.278480820944572e-07, "loss": 0.5099, "step": 14408 }, { "epoch": 0.93, "grad_norm": 1.212276816368103, "learning_rate": 1.2761331691473145e-07, "loss": 0.5097, "step": 14409 }, { "epoch": 0.93, "grad_norm": 1.1723859310150146, "learning_rate": 1.273787646944563e-07, "loss": 0.4554, "step": 14410 }, { "epoch": 0.93, "grad_norm": 1.2391095161437988, "learning_rate": 1.2714442544388473e-07, "loss": 0.5019, "step": 14411 }, { "epoch": 0.93, "grad_norm": 1.2471556663513184, "learning_rate": 1.2691029917325793e-07, "loss": 0.5192, "step": 14412 }, { "epoch": 0.93, "grad_norm": 1.2404060363769531, "learning_rate": 1.2667638589280884e-07, "loss": 0.5103, "step": 14413 }, { "epoch": 0.93, "grad_norm": 1.227499008178711, "learning_rate": 1.264426856127615e-07, "loss": 0.4806, "step": 14414 }, { "epoch": 0.93, "grad_norm": 1.0713062286376953, "learning_rate": 1.2620919834333056e-07, "loss": 0.4749, "step": 14415 }, { "epoch": 0.93, "grad_norm": 1.1256611347198486, "learning_rate": 1.2597592409472171e-07, "loss": 0.4782, "step": 14416 }, { "epoch": 0.93, "grad_norm": 1.1670832633972168, "learning_rate": 1.2574286287712956e-07, "loss": 0.5213, "step": 14417 }, { "epoch": 0.93, "grad_norm": 1.1464862823486328, "learning_rate": 1.2551001470074098e-07, "loss": 0.5039, "step": 14418 }, { "epoch": 0.93, "grad_norm": 1.1734933853149414, "learning_rate": 1.252773795757334e-07, "loss": 0.4271, "step": 14419 }, { "epoch": 0.93, "grad_norm": 1.1540861129760742, "learning_rate": 1.250449575122742e-07, "loss": 0.4642, "step": 14420 }, { "epoch": 0.93, "grad_norm": 1.3483219146728516, "learning_rate": 1.2481274852052306e-07, "loss": 0.5045, "step": 14421 }, { "epoch": 0.93, "grad_norm": 1.0932796001434326, "learning_rate": 1.245807526106285e-07, "loss": 0.4671, "step": 14422 }, { "epoch": 0.93, "grad_norm": 1.3022857904434204, "learning_rate": 1.2434896979273026e-07, "loss": 0.5621, "step": 14423 }, { "epoch": 0.93, "grad_norm": 1.118747353553772, "learning_rate": 1.2411740007695905e-07, "loss": 0.5123, "step": 14424 }, { "epoch": 0.93, "grad_norm": 1.1986708641052246, "learning_rate": 1.2388604347343625e-07, "loss": 0.4882, "step": 14425 }, { "epoch": 0.93, "grad_norm": 1.17897367477417, "learning_rate": 1.236548999922743e-07, "loss": 0.5231, "step": 14426 }, { "epoch": 0.93, "grad_norm": 1.2873482704162598, "learning_rate": 1.2342396964357516e-07, "loss": 0.5287, "step": 14427 }, { "epoch": 0.93, "grad_norm": 1.2633006572723389, "learning_rate": 1.2319325243743296e-07, "loss": 0.541, "step": 14428 }, { "epoch": 0.93, "grad_norm": 1.169731616973877, "learning_rate": 1.229627483839313e-07, "loss": 0.5013, "step": 14429 }, { "epoch": 0.93, "grad_norm": 1.1852964162826538, "learning_rate": 1.227324574931449e-07, "loss": 0.5501, "step": 14430 }, { "epoch": 0.93, "grad_norm": 1.1122220754623413, "learning_rate": 1.2250237977513956e-07, "loss": 0.495, "step": 14431 }, { "epoch": 0.93, "grad_norm": 1.1603909730911255, "learning_rate": 1.2227251523997064e-07, "loss": 0.5214, "step": 14432 }, { "epoch": 0.93, "grad_norm": 1.1487762928009033, "learning_rate": 1.2204286389768505e-07, "loss": 0.5022, "step": 14433 }, { "epoch": 0.93, "grad_norm": 1.1787223815917969, "learning_rate": 1.218134257583209e-07, "loss": 0.4743, "step": 14434 }, { "epoch": 0.93, "grad_norm": 1.2097159624099731, "learning_rate": 1.2158420083190681e-07, "loss": 0.5458, "step": 14435 }, { "epoch": 0.93, "grad_norm": 1.1915231943130493, "learning_rate": 1.2135518912846034e-07, "loss": 0.539, "step": 14436 }, { "epoch": 0.93, "grad_norm": 1.1517157554626465, "learning_rate": 1.211263906579907e-07, "loss": 0.5171, "step": 14437 }, { "epoch": 0.93, "grad_norm": 1.3758126497268677, "learning_rate": 1.208978054304999e-07, "loss": 0.4709, "step": 14438 }, { "epoch": 0.93, "grad_norm": 1.2584197521209717, "learning_rate": 1.2066943345597715e-07, "loss": 0.5648, "step": 14439 }, { "epoch": 0.93, "grad_norm": 1.28315269947052, "learning_rate": 1.2044127474440503e-07, "loss": 0.5035, "step": 14440 }, { "epoch": 0.93, "grad_norm": 1.1017671823501587, "learning_rate": 1.20213329305755e-07, "loss": 0.4841, "step": 14441 }, { "epoch": 0.93, "grad_norm": 1.2482033967971802, "learning_rate": 1.1998559714999026e-07, "loss": 0.5264, "step": 14442 }, { "epoch": 0.93, "grad_norm": 1.1152225732803345, "learning_rate": 1.19758078287065e-07, "loss": 0.4915, "step": 14443 }, { "epoch": 0.93, "grad_norm": 1.23225998878479, "learning_rate": 1.1953077272692239e-07, "loss": 0.548, "step": 14444 }, { "epoch": 0.93, "grad_norm": 1.2263448238372803, "learning_rate": 1.1930368047949836e-07, "loss": 0.5414, "step": 14445 }, { "epoch": 0.93, "grad_norm": 1.2898656129837036, "learning_rate": 1.190768015547178e-07, "loss": 0.5766, "step": 14446 }, { "epoch": 0.93, "grad_norm": 1.2387350797653198, "learning_rate": 1.1885013596249772e-07, "loss": 0.4902, "step": 14447 }, { "epoch": 0.93, "grad_norm": 1.2393611669540405, "learning_rate": 1.1862368371274414e-07, "loss": 0.6017, "step": 14448 }, { "epoch": 0.93, "grad_norm": 1.1779143810272217, "learning_rate": 1.183974448153552e-07, "loss": 0.5355, "step": 14449 }, { "epoch": 0.93, "grad_norm": 1.1752679347991943, "learning_rate": 1.1817141928021969e-07, "loss": 0.49, "step": 14450 }, { "epoch": 0.93, "grad_norm": 1.1766318082809448, "learning_rate": 1.1794560711721581e-07, "loss": 0.543, "step": 14451 }, { "epoch": 0.93, "grad_norm": 1.088800072669983, "learning_rate": 1.1772000833621399e-07, "loss": 0.489, "step": 14452 }, { "epoch": 0.93, "grad_norm": 1.0844141244888306, "learning_rate": 1.1749462294707359e-07, "loss": 0.4838, "step": 14453 }, { "epoch": 0.93, "grad_norm": 1.3119335174560547, "learning_rate": 1.172694509596467e-07, "loss": 0.5388, "step": 14454 }, { "epoch": 0.93, "grad_norm": 1.1752243041992188, "learning_rate": 1.170444923837738e-07, "loss": 0.4987, "step": 14455 }, { "epoch": 0.93, "grad_norm": 1.228880763053894, "learning_rate": 1.1681974722928869e-07, "loss": 0.4844, "step": 14456 }, { "epoch": 0.93, "grad_norm": 1.157680869102478, "learning_rate": 1.1659521550601294e-07, "loss": 0.5777, "step": 14457 }, { "epoch": 0.93, "grad_norm": 1.3055082559585571, "learning_rate": 1.1637089722376204e-07, "loss": 0.5111, "step": 14458 }, { "epoch": 0.93, "grad_norm": 1.0561615228652954, "learning_rate": 1.1614679239233817e-07, "loss": 0.4576, "step": 14459 }, { "epoch": 0.93, "grad_norm": 1.2214938402175903, "learning_rate": 1.1592290102153791e-07, "loss": 0.5149, "step": 14460 }, { "epoch": 0.93, "grad_norm": 1.2056450843811035, "learning_rate": 1.1569922312114679e-07, "loss": 0.474, "step": 14461 }, { "epoch": 0.93, "grad_norm": 1.220320463180542, "learning_rate": 1.1547575870094085e-07, "loss": 0.4884, "step": 14462 }, { "epoch": 0.93, "grad_norm": 1.2242612838745117, "learning_rate": 1.1525250777068675e-07, "loss": 0.5208, "step": 14463 }, { "epoch": 0.93, "grad_norm": 1.1998361349105835, "learning_rate": 1.150294703401439e-07, "loss": 0.5185, "step": 14464 }, { "epoch": 0.93, "grad_norm": 1.2482922077178955, "learning_rate": 1.1480664641905837e-07, "loss": 0.5134, "step": 14465 }, { "epoch": 0.93, "grad_norm": 1.1056979894638062, "learning_rate": 1.1458403601717072e-07, "loss": 0.5134, "step": 14466 }, { "epoch": 0.93, "grad_norm": 1.0896481275558472, "learning_rate": 1.1436163914421039e-07, "loss": 0.5235, "step": 14467 }, { "epoch": 0.93, "grad_norm": 1.1941497325897217, "learning_rate": 1.1413945580989738e-07, "loss": 0.5114, "step": 14468 }, { "epoch": 0.93, "grad_norm": 1.1257840394973755, "learning_rate": 1.1391748602394393e-07, "loss": 0.5046, "step": 14469 }, { "epoch": 0.93, "grad_norm": 1.1831773519515991, "learning_rate": 1.1369572979605059e-07, "loss": 0.5157, "step": 14470 }, { "epoch": 0.93, "grad_norm": 1.224000096321106, "learning_rate": 1.1347418713590963e-07, "loss": 0.5173, "step": 14471 }, { "epoch": 0.93, "grad_norm": 1.1416343450546265, "learning_rate": 1.132528580532044e-07, "loss": 0.5032, "step": 14472 }, { "epoch": 0.93, "grad_norm": 1.1168452501296997, "learning_rate": 1.1303174255760885e-07, "loss": 0.5115, "step": 14473 }, { "epoch": 0.93, "grad_norm": 1.1923787593841553, "learning_rate": 1.1281084065878745e-07, "loss": 0.501, "step": 14474 }, { "epoch": 0.93, "grad_norm": 1.2412549257278442, "learning_rate": 1.1259015236639581e-07, "loss": 0.561, "step": 14475 }, { "epoch": 0.93, "grad_norm": 1.0831645727157593, "learning_rate": 1.1236967769007844e-07, "loss": 0.5006, "step": 14476 }, { "epoch": 0.93, "grad_norm": 1.11362624168396, "learning_rate": 1.1214941663947154e-07, "loss": 0.4761, "step": 14477 }, { "epoch": 0.93, "grad_norm": 1.1111513376235962, "learning_rate": 1.119293692242035e-07, "loss": 0.5218, "step": 14478 }, { "epoch": 0.93, "grad_norm": 1.2107263803482056, "learning_rate": 1.1170953545389107e-07, "loss": 0.54, "step": 14479 }, { "epoch": 0.93, "grad_norm": 1.0579304695129395, "learning_rate": 1.1148991533814268e-07, "loss": 0.4685, "step": 14480 }, { "epoch": 0.93, "grad_norm": 1.1802306175231934, "learning_rate": 1.1127050888655844e-07, "loss": 0.5309, "step": 14481 }, { "epoch": 0.93, "grad_norm": 1.3390883207321167, "learning_rate": 1.1105131610872676e-07, "loss": 0.5576, "step": 14482 }, { "epoch": 0.93, "grad_norm": 1.1515713930130005, "learning_rate": 1.108323370142278e-07, "loss": 0.5034, "step": 14483 }, { "epoch": 0.93, "grad_norm": 1.1834447383880615, "learning_rate": 1.1061357161263387e-07, "loss": 0.4927, "step": 14484 }, { "epoch": 0.93, "grad_norm": 1.2220680713653564, "learning_rate": 1.103950199135051e-07, "loss": 0.5025, "step": 14485 }, { "epoch": 0.94, "grad_norm": 1.2384828329086304, "learning_rate": 1.1017668192639553e-07, "loss": 0.5108, "step": 14486 }, { "epoch": 0.94, "grad_norm": 1.1476298570632935, "learning_rate": 1.0995855766084696e-07, "loss": 0.463, "step": 14487 }, { "epoch": 0.94, "grad_norm": 1.09535813331604, "learning_rate": 1.0974064712639343e-07, "loss": 0.4977, "step": 14488 }, { "epoch": 0.94, "grad_norm": 1.1884511709213257, "learning_rate": 1.09522950332559e-07, "loss": 0.5191, "step": 14489 }, { "epoch": 0.94, "grad_norm": 1.161007046699524, "learning_rate": 1.0930546728885883e-07, "loss": 0.496, "step": 14490 }, { "epoch": 0.94, "grad_norm": 1.274764895439148, "learning_rate": 1.0908819800479863e-07, "loss": 0.5155, "step": 14491 }, { "epoch": 0.94, "grad_norm": 1.1266049146652222, "learning_rate": 1.0887114248987474e-07, "loss": 0.5289, "step": 14492 }, { "epoch": 0.94, "grad_norm": 1.1900460720062256, "learning_rate": 1.0865430075357397e-07, "loss": 0.5134, "step": 14493 }, { "epoch": 0.94, "grad_norm": 1.1972377300262451, "learning_rate": 1.0843767280537376e-07, "loss": 0.5161, "step": 14494 }, { "epoch": 0.94, "grad_norm": 1.2233363389968872, "learning_rate": 1.0822125865474209e-07, "loss": 0.533, "step": 14495 }, { "epoch": 0.94, "grad_norm": 1.1743159294128418, "learning_rate": 1.080050583111386e-07, "loss": 0.5572, "step": 14496 }, { "epoch": 0.94, "grad_norm": 1.1201785802841187, "learning_rate": 1.0778907178401243e-07, "loss": 0.527, "step": 14497 }, { "epoch": 0.94, "grad_norm": 1.1550294160842896, "learning_rate": 1.0757329908280379e-07, "loss": 0.5187, "step": 14498 }, { "epoch": 0.94, "grad_norm": 1.2803983688354492, "learning_rate": 1.0735774021694401e-07, "loss": 0.5119, "step": 14499 }, { "epoch": 0.94, "grad_norm": 1.2598382234573364, "learning_rate": 1.071423951958539e-07, "loss": 0.4998, "step": 14500 }, { "epoch": 0.94, "grad_norm": 1.276285171508789, "learning_rate": 1.0692726402894593e-07, "loss": 0.5187, "step": 14501 }, { "epoch": 0.94, "grad_norm": 1.089229941368103, "learning_rate": 1.0671234672562259e-07, "loss": 0.5067, "step": 14502 }, { "epoch": 0.94, "grad_norm": 1.166236162185669, "learning_rate": 1.0649764329527801e-07, "loss": 0.5573, "step": 14503 }, { "epoch": 0.94, "grad_norm": 1.1033246517181396, "learning_rate": 1.0628315374729636e-07, "loss": 0.4689, "step": 14504 }, { "epoch": 0.94, "grad_norm": 1.1110650300979614, "learning_rate": 1.0606887809105238e-07, "loss": 0.5024, "step": 14505 }, { "epoch": 0.94, "grad_norm": 1.2887067794799805, "learning_rate": 1.0585481633591021e-07, "loss": 0.5513, "step": 14506 }, { "epoch": 0.94, "grad_norm": 1.272788405418396, "learning_rate": 1.056409684912274e-07, "loss": 0.5682, "step": 14507 }, { "epoch": 0.94, "grad_norm": 1.1744354963302612, "learning_rate": 1.0542733456635034e-07, "loss": 0.5407, "step": 14508 }, { "epoch": 0.94, "grad_norm": 1.1539874076843262, "learning_rate": 1.0521391457061658e-07, "loss": 0.502, "step": 14509 }, { "epoch": 0.94, "grad_norm": 1.100398063659668, "learning_rate": 1.0500070851335365e-07, "loss": 0.5059, "step": 14510 }, { "epoch": 0.94, "grad_norm": 1.1855311393737793, "learning_rate": 1.0478771640388074e-07, "loss": 0.4936, "step": 14511 }, { "epoch": 0.94, "grad_norm": 1.18034827709198, "learning_rate": 1.0457493825150655e-07, "loss": 0.5095, "step": 14512 }, { "epoch": 0.94, "grad_norm": 1.2893180847167969, "learning_rate": 1.043623740655314e-07, "loss": 0.5524, "step": 14513 }, { "epoch": 0.94, "grad_norm": 1.2248080968856812, "learning_rate": 1.041500238552462e-07, "loss": 0.5035, "step": 14514 }, { "epoch": 0.94, "grad_norm": 1.0751897096633911, "learning_rate": 1.0393788762993184e-07, "loss": 0.4574, "step": 14515 }, { "epoch": 0.94, "grad_norm": 1.2767668962478638, "learning_rate": 1.0372596539886037e-07, "loss": 0.5142, "step": 14516 }, { "epoch": 0.94, "grad_norm": 1.1197038888931274, "learning_rate": 1.0351425717129438e-07, "loss": 0.514, "step": 14517 }, { "epoch": 0.94, "grad_norm": 1.222474455833435, "learning_rate": 1.0330276295648756e-07, "loss": 0.5071, "step": 14518 }, { "epoch": 0.94, "grad_norm": 1.2649403810501099, "learning_rate": 1.0309148276368309e-07, "loss": 0.514, "step": 14519 }, { "epoch": 0.94, "grad_norm": 1.1938912868499756, "learning_rate": 1.0288041660211578e-07, "loss": 0.5012, "step": 14520 }, { "epoch": 0.94, "grad_norm": 1.206900715827942, "learning_rate": 1.0266956448101051e-07, "loss": 0.4654, "step": 14521 }, { "epoch": 0.94, "grad_norm": 1.2241307497024536, "learning_rate": 1.0245892640958322e-07, "loss": 0.5544, "step": 14522 }, { "epoch": 0.94, "grad_norm": 1.2422996759414673, "learning_rate": 1.0224850239704043e-07, "loss": 0.5418, "step": 14523 }, { "epoch": 0.94, "grad_norm": 1.1697802543640137, "learning_rate": 1.0203829245257979e-07, "loss": 0.5042, "step": 14524 }, { "epoch": 0.94, "grad_norm": 1.1475836038589478, "learning_rate": 1.0182829658538784e-07, "loss": 0.534, "step": 14525 }, { "epoch": 0.94, "grad_norm": 1.1689478158950806, "learning_rate": 1.0161851480464391e-07, "loss": 0.5018, "step": 14526 }, { "epoch": 0.94, "grad_norm": 1.1786106824874878, "learning_rate": 1.0140894711951621e-07, "loss": 0.4736, "step": 14527 }, { "epoch": 0.94, "grad_norm": 1.2752827405929565, "learning_rate": 1.0119959353916576e-07, "loss": 0.5278, "step": 14528 }, { "epoch": 0.94, "grad_norm": 1.1935081481933594, "learning_rate": 1.0099045407274132e-07, "loss": 0.5099, "step": 14529 }, { "epoch": 0.94, "grad_norm": 1.2299617528915405, "learning_rate": 1.0078152872938451e-07, "loss": 0.4997, "step": 14530 }, { "epoch": 0.94, "grad_norm": 1.1682640314102173, "learning_rate": 1.0057281751822744e-07, "loss": 0.5194, "step": 14531 }, { "epoch": 0.94, "grad_norm": 1.0895124673843384, "learning_rate": 1.0036432044839118e-07, "loss": 0.5247, "step": 14532 }, { "epoch": 0.94, "grad_norm": 1.125922441482544, "learning_rate": 1.0015603752898951e-07, "loss": 0.4836, "step": 14533 }, { "epoch": 0.94, "grad_norm": 1.209464192390442, "learning_rate": 9.994796876912626e-08, "loss": 0.5365, "step": 14534 }, { "epoch": 0.94, "grad_norm": 1.1889832019805908, "learning_rate": 9.97401141778942e-08, "loss": 0.533, "step": 14535 }, { "epoch": 0.94, "grad_norm": 1.212526559829712, "learning_rate": 9.953247376437936e-08, "loss": 0.5154, "step": 14536 }, { "epoch": 0.94, "grad_norm": 1.1653133630752563, "learning_rate": 9.932504753765614e-08, "loss": 0.5266, "step": 14537 }, { "epoch": 0.94, "grad_norm": 1.137527585029602, "learning_rate": 9.911783550679177e-08, "loss": 0.5039, "step": 14538 }, { "epoch": 0.94, "grad_norm": 1.172082781791687, "learning_rate": 9.891083768084231e-08, "loss": 0.5174, "step": 14539 }, { "epoch": 0.94, "grad_norm": 1.2180163860321045, "learning_rate": 9.870405406885497e-08, "loss": 0.5338, "step": 14540 }, { "epoch": 0.94, "grad_norm": 1.1041316986083984, "learning_rate": 9.849748467986808e-08, "loss": 0.4635, "step": 14541 }, { "epoch": 0.94, "grad_norm": 1.1960511207580566, "learning_rate": 9.829112952290997e-08, "loss": 0.4899, "step": 14542 }, { "epoch": 0.94, "grad_norm": 1.1528481245040894, "learning_rate": 9.80849886070001e-08, "loss": 0.4689, "step": 14543 }, { "epoch": 0.94, "grad_norm": 1.1693780422210693, "learning_rate": 9.787906194114794e-08, "loss": 0.5366, "step": 14544 }, { "epoch": 0.94, "grad_norm": 1.1136072874069214, "learning_rate": 9.767334953435459e-08, "loss": 0.49, "step": 14545 }, { "epoch": 0.94, "grad_norm": 1.2133582830429077, "learning_rate": 9.746785139561121e-08, "loss": 0.5232, "step": 14546 }, { "epoch": 0.94, "grad_norm": 1.1759717464447021, "learning_rate": 9.726256753389895e-08, "loss": 0.5224, "step": 14547 }, { "epoch": 0.94, "grad_norm": 1.114759922027588, "learning_rate": 9.705749795819064e-08, "loss": 0.4759, "step": 14548 }, { "epoch": 0.94, "grad_norm": 1.2043085098266602, "learning_rate": 9.685264267744964e-08, "loss": 0.5163, "step": 14549 }, { "epoch": 0.94, "grad_norm": 1.1182714700698853, "learning_rate": 9.664800170062937e-08, "loss": 0.4868, "step": 14550 }, { "epoch": 0.94, "grad_norm": 1.2294597625732422, "learning_rate": 9.644357503667378e-08, "loss": 0.5253, "step": 14551 }, { "epoch": 0.94, "grad_norm": 1.147133469581604, "learning_rate": 9.62393626945185e-08, "loss": 0.511, "step": 14552 }, { "epoch": 0.94, "grad_norm": 1.1603307723999023, "learning_rate": 9.60353646830886e-08, "loss": 0.5105, "step": 14553 }, { "epoch": 0.94, "grad_norm": 1.2266653776168823, "learning_rate": 9.58315810113003e-08, "loss": 0.5325, "step": 14554 }, { "epoch": 0.94, "grad_norm": 1.1807613372802734, "learning_rate": 9.56280116880609e-08, "loss": 0.5343, "step": 14555 }, { "epoch": 0.94, "grad_norm": 1.2026362419128418, "learning_rate": 9.542465672226775e-08, "loss": 0.5352, "step": 14556 }, { "epoch": 0.94, "grad_norm": 1.231061577796936, "learning_rate": 9.522151612280928e-08, "loss": 0.5068, "step": 14557 }, { "epoch": 0.94, "grad_norm": 1.2101027965545654, "learning_rate": 9.501858989856339e-08, "loss": 0.5392, "step": 14558 }, { "epoch": 0.94, "grad_norm": 1.1819571256637573, "learning_rate": 9.481587805839964e-08, "loss": 0.5156, "step": 14559 }, { "epoch": 0.94, "grad_norm": 1.15861177444458, "learning_rate": 9.461338061117876e-08, "loss": 0.5324, "step": 14560 }, { "epoch": 0.94, "grad_norm": 1.195595383644104, "learning_rate": 9.441109756575029e-08, "loss": 0.4642, "step": 14561 }, { "epoch": 0.94, "grad_norm": 1.2452653646469116, "learning_rate": 9.420902893095663e-08, "loss": 0.5133, "step": 14562 }, { "epoch": 0.94, "grad_norm": 1.349064826965332, "learning_rate": 9.400717471562959e-08, "loss": 0.5225, "step": 14563 }, { "epoch": 0.94, "grad_norm": 1.1904728412628174, "learning_rate": 9.380553492859101e-08, "loss": 0.4976, "step": 14564 }, { "epoch": 0.94, "grad_norm": 1.2382240295410156, "learning_rate": 9.36041095786544e-08, "loss": 0.5261, "step": 14565 }, { "epoch": 0.94, "grad_norm": 1.2443898916244507, "learning_rate": 9.340289867462326e-08, "loss": 0.5408, "step": 14566 }, { "epoch": 0.94, "grad_norm": 1.1647003889083862, "learning_rate": 9.320190222529224e-08, "loss": 0.5488, "step": 14567 }, { "epoch": 0.94, "grad_norm": 1.1896345615386963, "learning_rate": 9.300112023944597e-08, "loss": 0.5569, "step": 14568 }, { "epoch": 0.94, "grad_norm": 1.1232444047927856, "learning_rate": 9.28005527258613e-08, "loss": 0.4679, "step": 14569 }, { "epoch": 0.94, "grad_norm": 1.2212952375411987, "learning_rate": 9.260019969330347e-08, "loss": 0.4914, "step": 14570 }, { "epoch": 0.94, "grad_norm": 1.2057485580444336, "learning_rate": 9.240006115052991e-08, "loss": 0.5459, "step": 14571 }, { "epoch": 0.94, "grad_norm": 1.0870957374572754, "learning_rate": 9.22001371062875e-08, "loss": 0.4878, "step": 14572 }, { "epoch": 0.94, "grad_norm": 1.1267164945602417, "learning_rate": 9.200042756931482e-08, "loss": 0.477, "step": 14573 }, { "epoch": 0.94, "grad_norm": 1.2868167161941528, "learning_rate": 9.180093254834044e-08, "loss": 0.5335, "step": 14574 }, { "epoch": 0.94, "grad_norm": 1.1970984935760498, "learning_rate": 9.16016520520846e-08, "loss": 0.5138, "step": 14575 }, { "epoch": 0.94, "grad_norm": 1.1040102243423462, "learning_rate": 9.140258608925645e-08, "loss": 0.4536, "step": 14576 }, { "epoch": 0.94, "grad_norm": 1.154882788658142, "learning_rate": 9.120373466855681e-08, "loss": 0.5266, "step": 14577 }, { "epoch": 0.94, "grad_norm": 1.1433370113372803, "learning_rate": 9.100509779867706e-08, "loss": 0.4536, "step": 14578 }, { "epoch": 0.94, "grad_norm": 1.233994960784912, "learning_rate": 9.080667548829913e-08, "loss": 0.5276, "step": 14579 }, { "epoch": 0.94, "grad_norm": 1.1991807222366333, "learning_rate": 9.0608467746095e-08, "loss": 0.5064, "step": 14580 }, { "epoch": 0.94, "grad_norm": 1.1672471761703491, "learning_rate": 9.041047458072937e-08, "loss": 0.4603, "step": 14581 }, { "epoch": 0.94, "grad_norm": 1.2366242408752441, "learning_rate": 9.021269600085424e-08, "loss": 0.5327, "step": 14582 }, { "epoch": 0.94, "grad_norm": 1.22855544090271, "learning_rate": 9.001513201511547e-08, "loss": 0.5065, "step": 14583 }, { "epoch": 0.94, "grad_norm": 1.1671638488769531, "learning_rate": 8.981778263214669e-08, "loss": 0.5277, "step": 14584 }, { "epoch": 0.94, "grad_norm": 1.2070329189300537, "learning_rate": 8.962064786057434e-08, "loss": 0.4629, "step": 14585 }, { "epoch": 0.94, "grad_norm": 1.1824315786361694, "learning_rate": 8.942372770901431e-08, "loss": 0.52, "step": 14586 }, { "epoch": 0.94, "grad_norm": 1.1514179706573486, "learning_rate": 8.922702218607416e-08, "loss": 0.5315, "step": 14587 }, { "epoch": 0.94, "grad_norm": 1.1007801294326782, "learning_rate": 8.903053130035089e-08, "loss": 0.5144, "step": 14588 }, { "epoch": 0.94, "grad_norm": 1.1556369066238403, "learning_rate": 8.883425506043208e-08, "loss": 0.572, "step": 14589 }, { "epoch": 0.94, "grad_norm": 1.184110164642334, "learning_rate": 8.863819347489755e-08, "loss": 0.5623, "step": 14590 }, { "epoch": 0.94, "grad_norm": 1.190287709236145, "learning_rate": 8.844234655231599e-08, "loss": 0.5363, "step": 14591 }, { "epoch": 0.94, "grad_norm": 1.25360906124115, "learning_rate": 8.82467143012472e-08, "loss": 0.5406, "step": 14592 }, { "epoch": 0.94, "grad_norm": 1.098633885383606, "learning_rate": 8.805129673024271e-08, "loss": 0.4903, "step": 14593 }, { "epoch": 0.94, "grad_norm": 1.254677414894104, "learning_rate": 8.785609384784232e-08, "loss": 0.5416, "step": 14594 }, { "epoch": 0.94, "grad_norm": 1.1471199989318848, "learning_rate": 8.766110566257869e-08, "loss": 0.5047, "step": 14595 }, { "epoch": 0.94, "grad_norm": 1.089064121246338, "learning_rate": 8.74663321829744e-08, "loss": 0.4976, "step": 14596 }, { "epoch": 0.94, "grad_norm": 1.1579183340072632, "learning_rate": 8.727177341754156e-08, "loss": 0.4882, "step": 14597 }, { "epoch": 0.94, "grad_norm": 1.100051999092102, "learning_rate": 8.707742937478558e-08, "loss": 0.4946, "step": 14598 }, { "epoch": 0.94, "grad_norm": 1.12839937210083, "learning_rate": 8.688330006319911e-08, "loss": 0.4917, "step": 14599 }, { "epoch": 0.94, "grad_norm": 1.2426613569259644, "learning_rate": 8.668938549126759e-08, "loss": 0.4959, "step": 14600 }, { "epoch": 0.94, "grad_norm": 1.2226086854934692, "learning_rate": 8.649568566746591e-08, "loss": 0.5327, "step": 14601 }, { "epoch": 0.94, "grad_norm": 1.1598362922668457, "learning_rate": 8.630220060026173e-08, "loss": 0.5076, "step": 14602 }, { "epoch": 0.94, "grad_norm": 1.177120327949524, "learning_rate": 8.610893029811051e-08, "loss": 0.4441, "step": 14603 }, { "epoch": 0.94, "grad_norm": 1.1781268119812012, "learning_rate": 8.591587476945996e-08, "loss": 0.5158, "step": 14604 }, { "epoch": 0.94, "grad_norm": 1.2555150985717773, "learning_rate": 8.572303402274773e-08, "loss": 0.5232, "step": 14605 }, { "epoch": 0.94, "grad_norm": 1.1061289310455322, "learning_rate": 8.553040806640322e-08, "loss": 0.4545, "step": 14606 }, { "epoch": 0.94, "grad_norm": 1.307481288909912, "learning_rate": 8.53379969088447e-08, "loss": 0.5065, "step": 14607 }, { "epoch": 0.94, "grad_norm": 1.2115033864974976, "learning_rate": 8.514580055848265e-08, "loss": 0.5181, "step": 14608 }, { "epoch": 0.94, "grad_norm": 1.2337615489959717, "learning_rate": 8.495381902371703e-08, "loss": 0.5088, "step": 14609 }, { "epoch": 0.94, "grad_norm": 1.1754605770111084, "learning_rate": 8.476205231293888e-08, "loss": 0.5305, "step": 14610 }, { "epoch": 0.94, "grad_norm": 1.2035572528839111, "learning_rate": 8.457050043452986e-08, "loss": 0.5335, "step": 14611 }, { "epoch": 0.94, "grad_norm": 1.2263611555099487, "learning_rate": 8.437916339686215e-08, "loss": 0.5295, "step": 14612 }, { "epoch": 0.94, "grad_norm": 1.2513291835784912, "learning_rate": 8.418804120829959e-08, "loss": 0.4974, "step": 14613 }, { "epoch": 0.94, "grad_norm": 1.2516099214553833, "learning_rate": 8.399713387719389e-08, "loss": 0.5373, "step": 14614 }, { "epoch": 0.94, "grad_norm": 1.136890172958374, "learning_rate": 8.380644141188998e-08, "loss": 0.4832, "step": 14615 }, { "epoch": 0.94, "grad_norm": 1.140312671661377, "learning_rate": 8.36159638207229e-08, "loss": 0.5222, "step": 14616 }, { "epoch": 0.94, "grad_norm": 1.1361565589904785, "learning_rate": 8.34257011120182e-08, "loss": 0.5247, "step": 14617 }, { "epoch": 0.94, "grad_norm": 1.3089182376861572, "learning_rate": 8.323565329409033e-08, "loss": 0.517, "step": 14618 }, { "epoch": 0.94, "grad_norm": 1.186084508895874, "learning_rate": 8.304582037524655e-08, "loss": 0.5436, "step": 14619 }, { "epoch": 0.94, "grad_norm": 1.2649791240692139, "learning_rate": 8.285620236378467e-08, "loss": 0.5542, "step": 14620 }, { "epoch": 0.94, "grad_norm": 1.2509740591049194, "learning_rate": 8.266679926799137e-08, "loss": 0.4758, "step": 14621 }, { "epoch": 0.94, "grad_norm": 1.225662112236023, "learning_rate": 8.247761109614561e-08, "loss": 0.484, "step": 14622 }, { "epoch": 0.94, "grad_norm": 1.2650935649871826, "learning_rate": 8.228863785651576e-08, "loss": 0.4954, "step": 14623 }, { "epoch": 0.94, "grad_norm": 1.289894461631775, "learning_rate": 8.209987955736188e-08, "loss": 0.5452, "step": 14624 }, { "epoch": 0.94, "grad_norm": 1.1758668422698975, "learning_rate": 8.191133620693404e-08, "loss": 0.4985, "step": 14625 }, { "epoch": 0.94, "grad_norm": 1.147093415260315, "learning_rate": 8.172300781347286e-08, "loss": 0.529, "step": 14626 }, { "epoch": 0.94, "grad_norm": 1.2186498641967773, "learning_rate": 8.153489438520956e-08, "loss": 0.5473, "step": 14627 }, { "epoch": 0.94, "grad_norm": 1.1212399005889893, "learning_rate": 8.134699593036643e-08, "loss": 0.4869, "step": 14628 }, { "epoch": 0.94, "grad_norm": 1.1178433895111084, "learning_rate": 8.115931245715525e-08, "loss": 0.4982, "step": 14629 }, { "epoch": 0.94, "grad_norm": 1.077633023262024, "learning_rate": 8.097184397378e-08, "loss": 0.4709, "step": 14630 }, { "epoch": 0.94, "grad_norm": 1.1099528074264526, "learning_rate": 8.078459048843468e-08, "loss": 0.4853, "step": 14631 }, { "epoch": 0.94, "grad_norm": 1.19955313205719, "learning_rate": 8.059755200930275e-08, "loss": 0.5614, "step": 14632 }, { "epoch": 0.94, "grad_norm": 1.176829218864441, "learning_rate": 8.041072854455933e-08, "loss": 0.5449, "step": 14633 }, { "epoch": 0.94, "grad_norm": 1.2348254919052124, "learning_rate": 8.022412010237068e-08, "loss": 0.5226, "step": 14634 }, { "epoch": 0.94, "grad_norm": 1.1615381240844727, "learning_rate": 8.003772669089249e-08, "loss": 0.512, "step": 14635 }, { "epoch": 0.94, "grad_norm": 1.1619335412979126, "learning_rate": 7.985154831827158e-08, "loss": 0.5261, "step": 14636 }, { "epoch": 0.94, "grad_norm": 1.1113524436950684, "learning_rate": 7.966558499264532e-08, "loss": 0.5126, "step": 14637 }, { "epoch": 0.94, "grad_norm": 1.1314115524291992, "learning_rate": 7.947983672214165e-08, "loss": 0.4857, "step": 14638 }, { "epoch": 0.94, "grad_norm": 1.215000033378601, "learning_rate": 7.92943035148791e-08, "loss": 0.5347, "step": 14639 }, { "epoch": 0.94, "grad_norm": 1.0902501344680786, "learning_rate": 7.910898537896727e-08, "loss": 0.5415, "step": 14640 }, { "epoch": 0.95, "grad_norm": 1.2391811609268188, "learning_rate": 7.892388232250525e-08, "loss": 0.5221, "step": 14641 }, { "epoch": 0.95, "grad_norm": 1.2660232782363892, "learning_rate": 7.873899435358379e-08, "loss": 0.5416, "step": 14642 }, { "epoch": 0.95, "grad_norm": 1.1796610355377197, "learning_rate": 7.855432148028419e-08, "loss": 0.5224, "step": 14643 }, { "epoch": 0.95, "grad_norm": 1.3053147792816162, "learning_rate": 7.836986371067723e-08, "loss": 0.5098, "step": 14644 }, { "epoch": 0.95, "grad_norm": 1.0818451642990112, "learning_rate": 7.818562105282645e-08, "loss": 0.5084, "step": 14645 }, { "epoch": 0.95, "grad_norm": 1.2833824157714844, "learning_rate": 7.800159351478321e-08, "loss": 0.5165, "step": 14646 }, { "epoch": 0.95, "grad_norm": 1.185478687286377, "learning_rate": 7.781778110459103e-08, "loss": 0.5116, "step": 14647 }, { "epoch": 0.95, "grad_norm": 1.1405572891235352, "learning_rate": 7.763418383028465e-08, "loss": 0.5087, "step": 14648 }, { "epoch": 0.95, "grad_norm": 1.1827301979064941, "learning_rate": 7.745080169988817e-08, "loss": 0.5132, "step": 14649 }, { "epoch": 0.95, "grad_norm": 1.0907083749771118, "learning_rate": 7.726763472141629e-08, "loss": 0.4876, "step": 14650 }, { "epoch": 0.95, "grad_norm": 1.2556761503219604, "learning_rate": 7.708468290287597e-08, "loss": 0.4979, "step": 14651 }, { "epoch": 0.95, "grad_norm": 1.2985175848007202, "learning_rate": 7.6901946252263e-08, "loss": 0.5175, "step": 14652 }, { "epoch": 0.95, "grad_norm": 1.1701794862747192, "learning_rate": 7.671942477756378e-08, "loss": 0.53, "step": 14653 }, { "epoch": 0.95, "grad_norm": 1.296088695526123, "learning_rate": 7.653711848675582e-08, "loss": 0.5351, "step": 14654 }, { "epoch": 0.95, "grad_norm": 1.1105722188949585, "learning_rate": 7.635502738780831e-08, "loss": 0.5168, "step": 14655 }, { "epoch": 0.95, "grad_norm": 1.2909351587295532, "learning_rate": 7.617315148867932e-08, "loss": 0.5362, "step": 14656 }, { "epoch": 0.95, "grad_norm": 1.1655482053756714, "learning_rate": 7.59914907973186e-08, "loss": 0.5418, "step": 14657 }, { "epoch": 0.95, "grad_norm": 1.2973048686981201, "learning_rate": 7.58100453216648e-08, "loss": 0.542, "step": 14658 }, { "epoch": 0.95, "grad_norm": 1.172088623046875, "learning_rate": 7.562881506964992e-08, "loss": 0.477, "step": 14659 }, { "epoch": 0.95, "grad_norm": 1.1397874355316162, "learning_rate": 7.544780004919427e-08, "loss": 0.4935, "step": 14660 }, { "epoch": 0.95, "grad_norm": 1.0690498352050781, "learning_rate": 7.526700026820988e-08, "loss": 0.4291, "step": 14661 }, { "epoch": 0.95, "grad_norm": 1.188598871231079, "learning_rate": 7.508641573459874e-08, "loss": 0.5098, "step": 14662 }, { "epoch": 0.95, "grad_norm": 1.1782310009002686, "learning_rate": 7.490604645625454e-08, "loss": 0.5268, "step": 14663 }, { "epoch": 0.95, "grad_norm": 1.1827917098999023, "learning_rate": 7.472589244105987e-08, "loss": 0.5053, "step": 14664 }, { "epoch": 0.95, "grad_norm": 1.2342021465301514, "learning_rate": 7.4545953696889e-08, "loss": 0.5206, "step": 14665 }, { "epoch": 0.95, "grad_norm": 1.1543806791305542, "learning_rate": 7.436623023160616e-08, "loss": 0.4698, "step": 14666 }, { "epoch": 0.95, "grad_norm": 1.1189883947372437, "learning_rate": 7.418672205306731e-08, "loss": 0.5435, "step": 14667 }, { "epoch": 0.95, "grad_norm": 1.1442278623580933, "learning_rate": 7.40074291691184e-08, "loss": 0.4667, "step": 14668 }, { "epoch": 0.95, "grad_norm": 1.2366827726364136, "learning_rate": 7.382835158759593e-08, "loss": 0.5153, "step": 14669 }, { "epoch": 0.95, "grad_norm": 1.1566879749298096, "learning_rate": 7.364948931632587e-08, "loss": 0.4882, "step": 14670 }, { "epoch": 0.95, "grad_norm": 1.1495062112808228, "learning_rate": 7.34708423631264e-08, "loss": 0.4899, "step": 14671 }, { "epoch": 0.95, "grad_norm": 1.2199517488479614, "learning_rate": 7.329241073580573e-08, "loss": 0.5704, "step": 14672 }, { "epoch": 0.95, "grad_norm": 1.2358900308609009, "learning_rate": 7.311419444216317e-08, "loss": 0.4933, "step": 14673 }, { "epoch": 0.95, "grad_norm": 1.3253253698349, "learning_rate": 7.293619348998749e-08, "loss": 0.5197, "step": 14674 }, { "epoch": 0.95, "grad_norm": 1.1860146522521973, "learning_rate": 7.275840788705912e-08, "loss": 0.5543, "step": 14675 }, { "epoch": 0.95, "grad_norm": 1.1140645742416382, "learning_rate": 7.258083764114743e-08, "loss": 0.4643, "step": 14676 }, { "epoch": 0.95, "grad_norm": 1.1879215240478516, "learning_rate": 7.240348276001507e-08, "loss": 0.4993, "step": 14677 }, { "epoch": 0.95, "grad_norm": 1.243780493736267, "learning_rate": 7.222634325141309e-08, "loss": 0.5346, "step": 14678 }, { "epoch": 0.95, "grad_norm": 1.19115149974823, "learning_rate": 7.204941912308306e-08, "loss": 0.511, "step": 14679 }, { "epoch": 0.95, "grad_norm": 1.3064073324203491, "learning_rate": 7.187271038275989e-08, "loss": 0.545, "step": 14680 }, { "epoch": 0.95, "grad_norm": 1.146742582321167, "learning_rate": 7.16962170381652e-08, "loss": 0.5181, "step": 14681 }, { "epoch": 0.95, "grad_norm": 1.2120821475982666, "learning_rate": 7.151993909701338e-08, "loss": 0.5291, "step": 14682 }, { "epoch": 0.95, "grad_norm": 1.163243055343628, "learning_rate": 7.134387656700937e-08, "loss": 0.5013, "step": 14683 }, { "epoch": 0.95, "grad_norm": 1.2556018829345703, "learning_rate": 7.116802945584867e-08, "loss": 0.5455, "step": 14684 }, { "epoch": 0.95, "grad_norm": 1.2602044343948364, "learning_rate": 7.09923977712168e-08, "loss": 0.5272, "step": 14685 }, { "epoch": 0.95, "grad_norm": 1.2510355710983276, "learning_rate": 7.081698152079097e-08, "loss": 0.5468, "step": 14686 }, { "epoch": 0.95, "grad_norm": 1.1510703563690186, "learning_rate": 7.064178071223615e-08, "loss": 0.5269, "step": 14687 }, { "epoch": 0.95, "grad_norm": 1.3825515508651733, "learning_rate": 7.046679535321178e-08, "loss": 0.5448, "step": 14688 }, { "epoch": 0.95, "grad_norm": 1.2017842531204224, "learning_rate": 7.02920254513656e-08, "loss": 0.4961, "step": 14689 }, { "epoch": 0.95, "grad_norm": 1.0936418771743774, "learning_rate": 7.011747101433597e-08, "loss": 0.4964, "step": 14690 }, { "epoch": 0.95, "grad_norm": 1.2119919061660767, "learning_rate": 6.994313204975234e-08, "loss": 0.5496, "step": 14691 }, { "epoch": 0.95, "grad_norm": 1.1746946573257446, "learning_rate": 6.976900856523472e-08, "loss": 0.5086, "step": 14692 }, { "epoch": 0.95, "grad_norm": 1.189781904220581, "learning_rate": 6.959510056839369e-08, "loss": 0.4681, "step": 14693 }, { "epoch": 0.95, "grad_norm": 1.1769182682037354, "learning_rate": 6.942140806682985e-08, "loss": 0.5133, "step": 14694 }, { "epoch": 0.95, "grad_norm": 1.1844569444656372, "learning_rate": 6.924793106813544e-08, "loss": 0.509, "step": 14695 }, { "epoch": 0.95, "grad_norm": 1.4726396799087524, "learning_rate": 6.907466957989272e-08, "loss": 0.5278, "step": 14696 }, { "epoch": 0.95, "grad_norm": 1.1893185377120972, "learning_rate": 6.890162360967401e-08, "loss": 0.4854, "step": 14697 }, { "epoch": 0.95, "grad_norm": 1.2395938634872437, "learning_rate": 6.872879316504321e-08, "loss": 0.515, "step": 14698 }, { "epoch": 0.95, "grad_norm": 1.1639361381530762, "learning_rate": 6.85561782535532e-08, "loss": 0.52, "step": 14699 }, { "epoch": 0.95, "grad_norm": 1.1489652395248413, "learning_rate": 6.838377888275016e-08, "loss": 0.4871, "step": 14700 }, { "epoch": 0.95, "grad_norm": 1.2117767333984375, "learning_rate": 6.821159506016806e-08, "loss": 0.5161, "step": 14701 }, { "epoch": 0.95, "grad_norm": 1.1093320846557617, "learning_rate": 6.80396267933331e-08, "loss": 0.4585, "step": 14702 }, { "epoch": 0.95, "grad_norm": 1.1440812349319458, "learning_rate": 6.786787408976147e-08, "loss": 0.532, "step": 14703 }, { "epoch": 0.95, "grad_norm": 1.2275173664093018, "learning_rate": 6.769633695695943e-08, "loss": 0.4544, "step": 14704 }, { "epoch": 0.95, "grad_norm": 1.2056591510772705, "learning_rate": 6.75250154024254e-08, "loss": 0.5193, "step": 14705 }, { "epoch": 0.95, "grad_norm": 1.2624505758285522, "learning_rate": 6.735390943364728e-08, "loss": 0.5377, "step": 14706 }, { "epoch": 0.95, "grad_norm": 1.037243366241455, "learning_rate": 6.718301905810298e-08, "loss": 0.489, "step": 14707 }, { "epoch": 0.95, "grad_norm": 1.180181622505188, "learning_rate": 6.701234428326209e-08, "loss": 0.5078, "step": 14708 }, { "epoch": 0.95, "grad_norm": 1.1877107620239258, "learning_rate": 6.684188511658473e-08, "loss": 0.5472, "step": 14709 }, { "epoch": 0.95, "grad_norm": 1.1409014463424683, "learning_rate": 6.667164156552053e-08, "loss": 0.5398, "step": 14710 }, { "epoch": 0.95, "grad_norm": 1.1319533586502075, "learning_rate": 6.650161363751073e-08, "loss": 0.507, "step": 14711 }, { "epoch": 0.95, "grad_norm": 1.176599144935608, "learning_rate": 6.633180133998717e-08, "loss": 0.5325, "step": 14712 }, { "epoch": 0.95, "grad_norm": 1.1390279531478882, "learning_rate": 6.616220468037116e-08, "loss": 0.5149, "step": 14713 }, { "epoch": 0.95, "grad_norm": 1.2388670444488525, "learning_rate": 6.599282366607563e-08, "loss": 0.525, "step": 14714 }, { "epoch": 0.95, "grad_norm": 1.2974289655685425, "learning_rate": 6.582365830450411e-08, "loss": 0.5288, "step": 14715 }, { "epoch": 0.95, "grad_norm": 1.2035127878189087, "learning_rate": 6.565470860305012e-08, "loss": 0.5002, "step": 14716 }, { "epoch": 0.95, "grad_norm": 1.2532358169555664, "learning_rate": 6.548597456909778e-08, "loss": 0.477, "step": 14717 }, { "epoch": 0.95, "grad_norm": 1.1048091650009155, "learning_rate": 6.531745621002228e-08, "loss": 0.4612, "step": 14718 }, { "epoch": 0.95, "grad_norm": 1.2482517957687378, "learning_rate": 6.514915353318941e-08, "loss": 0.5546, "step": 14719 }, { "epoch": 0.95, "grad_norm": 1.1561083793640137, "learning_rate": 6.498106654595493e-08, "loss": 0.5075, "step": 14720 }, { "epoch": 0.95, "grad_norm": 1.2129414081573486, "learning_rate": 6.48131952556652e-08, "loss": 0.4956, "step": 14721 }, { "epoch": 0.95, "grad_norm": 1.1461650133132935, "learning_rate": 6.464553966965826e-08, "loss": 0.5279, "step": 14722 }, { "epoch": 0.95, "grad_norm": 1.2191801071166992, "learning_rate": 6.447809979526099e-08, "loss": 0.4968, "step": 14723 }, { "epoch": 0.95, "grad_norm": 1.1426790952682495, "learning_rate": 6.431087563979254e-08, "loss": 0.5054, "step": 14724 }, { "epoch": 0.95, "grad_norm": 1.1686019897460938, "learning_rate": 6.414386721056098e-08, "loss": 0.4713, "step": 14725 }, { "epoch": 0.95, "grad_norm": 1.2463524341583252, "learning_rate": 6.397707451486656e-08, "loss": 0.5486, "step": 14726 }, { "epoch": 0.95, "grad_norm": 1.1385259628295898, "learning_rate": 6.381049755999901e-08, "loss": 0.4847, "step": 14727 }, { "epoch": 0.95, "grad_norm": 1.1446808576583862, "learning_rate": 6.364413635323918e-08, "loss": 0.4848, "step": 14728 }, { "epoch": 0.95, "grad_norm": 1.228689432144165, "learning_rate": 6.347799090185846e-08, "loss": 0.4955, "step": 14729 }, { "epoch": 0.95, "grad_norm": 1.3359788656234741, "learning_rate": 6.331206121311773e-08, "loss": 0.5247, "step": 14730 }, { "epoch": 0.95, "grad_norm": 1.2063828706741333, "learning_rate": 6.314634729427005e-08, "loss": 0.5276, "step": 14731 }, { "epoch": 0.95, "grad_norm": 1.2358664274215698, "learning_rate": 6.298084915255853e-08, "loss": 0.5391, "step": 14732 }, { "epoch": 0.95, "grad_norm": 1.1652534008026123, "learning_rate": 6.281556679521683e-08, "loss": 0.5317, "step": 14733 }, { "epoch": 0.95, "grad_norm": 1.247823715209961, "learning_rate": 6.265050022946806e-08, "loss": 0.5311, "step": 14734 }, { "epoch": 0.95, "grad_norm": 1.1195579767227173, "learning_rate": 6.248564946252755e-08, "loss": 0.4952, "step": 14735 }, { "epoch": 0.95, "grad_norm": 1.2292354106903076, "learning_rate": 6.23210145016001e-08, "loss": 0.5243, "step": 14736 }, { "epoch": 0.95, "grad_norm": 1.172364354133606, "learning_rate": 6.215659535388219e-08, "loss": 0.5045, "step": 14737 }, { "epoch": 0.95, "grad_norm": 1.2568920850753784, "learning_rate": 6.199239202655915e-08, "loss": 0.4987, "step": 14738 }, { "epoch": 0.95, "grad_norm": 1.2331225872039795, "learning_rate": 6.182840452680916e-08, "loss": 0.5105, "step": 14739 }, { "epoch": 0.95, "grad_norm": 1.2413265705108643, "learning_rate": 6.166463286179814e-08, "loss": 0.5147, "step": 14740 }, { "epoch": 0.95, "grad_norm": 1.0927809476852417, "learning_rate": 6.150107703868535e-08, "loss": 0.4957, "step": 14741 }, { "epoch": 0.95, "grad_norm": 1.1182197332382202, "learning_rate": 6.133773706461898e-08, "loss": 0.4776, "step": 14742 }, { "epoch": 0.95, "grad_norm": 1.2560749053955078, "learning_rate": 6.117461294673776e-08, "loss": 0.53, "step": 14743 }, { "epoch": 0.95, "grad_norm": 1.186373233795166, "learning_rate": 6.101170469217266e-08, "loss": 0.5162, "step": 14744 }, { "epoch": 0.95, "grad_norm": 1.1979830265045166, "learning_rate": 6.084901230804297e-08, "loss": 0.5191, "step": 14745 }, { "epoch": 0.95, "grad_norm": 1.1551647186279297, "learning_rate": 6.068653580145966e-08, "loss": 0.5158, "step": 14746 }, { "epoch": 0.95, "grad_norm": 1.2870103120803833, "learning_rate": 6.052427517952375e-08, "loss": 0.5209, "step": 14747 }, { "epoch": 0.95, "grad_norm": 1.1317203044891357, "learning_rate": 6.036223044932843e-08, "loss": 0.5209, "step": 14748 }, { "epoch": 0.95, "grad_norm": 1.3434069156646729, "learning_rate": 6.020040161795526e-08, "loss": 0.531, "step": 14749 }, { "epoch": 0.95, "grad_norm": 1.2784112691879272, "learning_rate": 6.003878869247748e-08, "loss": 0.5657, "step": 14750 }, { "epoch": 0.95, "grad_norm": 1.2612258195877075, "learning_rate": 5.987739167995999e-08, "loss": 0.5378, "step": 14751 }, { "epoch": 0.95, "grad_norm": 1.239347219467163, "learning_rate": 5.971621058745492e-08, "loss": 0.5523, "step": 14752 }, { "epoch": 0.95, "grad_norm": 1.1484206914901733, "learning_rate": 5.9555245422008856e-08, "loss": 0.4893, "step": 14753 }, { "epoch": 0.95, "grad_norm": 1.1123539209365845, "learning_rate": 5.939449619065618e-08, "loss": 0.5208, "step": 14754 }, { "epoch": 0.95, "grad_norm": 1.2162585258483887, "learning_rate": 5.923396290042294e-08, "loss": 0.5364, "step": 14755 }, { "epoch": 0.95, "grad_norm": 1.2965105772018433, "learning_rate": 5.9073645558326286e-08, "loss": 0.5455, "step": 14756 }, { "epoch": 0.95, "grad_norm": 1.1852723360061646, "learning_rate": 5.891354417137285e-08, "loss": 0.5323, "step": 14757 }, { "epoch": 0.95, "grad_norm": 1.1372730731964111, "learning_rate": 5.8753658746559806e-08, "loss": 0.4827, "step": 14758 }, { "epoch": 0.95, "grad_norm": 1.0971176624298096, "learning_rate": 5.8593989290876006e-08, "loss": 0.4975, "step": 14759 }, { "epoch": 0.95, "grad_norm": 1.1784230470657349, "learning_rate": 5.8434535811299765e-08, "loss": 0.5241, "step": 14760 }, { "epoch": 0.95, "grad_norm": 1.2066808938980103, "learning_rate": 5.8275298314800496e-08, "loss": 0.5478, "step": 14761 }, { "epoch": 0.95, "grad_norm": 1.0564924478530884, "learning_rate": 5.8116276808338204e-08, "loss": 0.5114, "step": 14762 }, { "epoch": 0.95, "grad_norm": 1.3051859140396118, "learning_rate": 5.795747129886342e-08, "loss": 0.5293, "step": 14763 }, { "epoch": 0.95, "grad_norm": 1.161500096321106, "learning_rate": 5.779888179331672e-08, "loss": 0.5006, "step": 14764 }, { "epoch": 0.95, "grad_norm": 1.1280491352081299, "learning_rate": 5.764050829863033e-08, "loss": 0.4929, "step": 14765 }, { "epoch": 0.95, "grad_norm": 1.3414669036865234, "learning_rate": 5.7482350821725374e-08, "loss": 0.4725, "step": 14766 }, { "epoch": 0.95, "grad_norm": 1.218626618385315, "learning_rate": 5.732440936951467e-08, "loss": 0.5353, "step": 14767 }, { "epoch": 0.95, "grad_norm": 1.1430281400680542, "learning_rate": 5.716668394890268e-08, "loss": 0.4972, "step": 14768 }, { "epoch": 0.95, "grad_norm": 1.2960587739944458, "learning_rate": 5.7009174566781675e-08, "loss": 0.5194, "step": 14769 }, { "epoch": 0.95, "grad_norm": 1.3156317472457886, "learning_rate": 5.685188123003671e-08, "loss": 0.5295, "step": 14770 }, { "epoch": 0.95, "grad_norm": 1.243584156036377, "learning_rate": 5.6694803945542276e-08, "loss": 0.4858, "step": 14771 }, { "epoch": 0.95, "grad_norm": 1.2021461725234985, "learning_rate": 5.6537942720163996e-08, "loss": 0.4888, "step": 14772 }, { "epoch": 0.95, "grad_norm": 1.1991676092147827, "learning_rate": 5.638129756075805e-08, "loss": 0.5323, "step": 14773 }, { "epoch": 0.95, "grad_norm": 1.2219330072402954, "learning_rate": 5.6224868474171744e-08, "loss": 0.4882, "step": 14774 }, { "epoch": 0.95, "grad_norm": 1.2380695343017578, "learning_rate": 5.6068655467240165e-08, "loss": 0.5235, "step": 14775 }, { "epoch": 0.95, "grad_norm": 1.1582489013671875, "learning_rate": 5.5912658546792844e-08, "loss": 0.5109, "step": 14776 }, { "epoch": 0.95, "grad_norm": 1.253564476966858, "learning_rate": 5.5756877719647105e-08, "loss": 0.5386, "step": 14777 }, { "epoch": 0.95, "grad_norm": 1.0791229009628296, "learning_rate": 5.56013129926114e-08, "loss": 0.4848, "step": 14778 }, { "epoch": 0.95, "grad_norm": 1.1746498346328735, "learning_rate": 5.544596437248639e-08, "loss": 0.5292, "step": 14779 }, { "epoch": 0.95, "grad_norm": 1.2940948009490967, "learning_rate": 5.5290831866061103e-08, "loss": 0.5146, "step": 14780 }, { "epoch": 0.95, "grad_norm": 1.237857699394226, "learning_rate": 5.513591548011565e-08, "loss": 0.5008, "step": 14781 }, { "epoch": 0.95, "grad_norm": 1.317217230796814, "learning_rate": 5.49812152214213e-08, "loss": 0.5514, "step": 14782 }, { "epoch": 0.95, "grad_norm": 1.128033995628357, "learning_rate": 5.482673109673986e-08, "loss": 0.4959, "step": 14783 }, { "epoch": 0.95, "grad_norm": 1.1953407526016235, "learning_rate": 5.467246311282315e-08, "loss": 0.4978, "step": 14784 }, { "epoch": 0.95, "grad_norm": 1.1934696435928345, "learning_rate": 5.451841127641411e-08, "loss": 0.5438, "step": 14785 }, { "epoch": 0.95, "grad_norm": 1.2138561010360718, "learning_rate": 5.436457559424624e-08, "loss": 0.5194, "step": 14786 }, { "epoch": 0.95, "grad_norm": 1.131842851638794, "learning_rate": 5.42109560730425e-08, "loss": 0.5037, "step": 14787 }, { "epoch": 0.95, "grad_norm": 1.2186567783355713, "learning_rate": 5.405755271951751e-08, "loss": 0.511, "step": 14788 }, { "epoch": 0.95, "grad_norm": 1.2204170227050781, "learning_rate": 5.390436554037592e-08, "loss": 0.5383, "step": 14789 }, { "epoch": 0.95, "grad_norm": 1.3221911191940308, "learning_rate": 5.375139454231404e-08, "loss": 0.5417, "step": 14790 }, { "epoch": 0.95, "grad_norm": 1.1141515970230103, "learning_rate": 5.3598639732017066e-08, "loss": 0.4645, "step": 14791 }, { "epoch": 0.95, "grad_norm": 1.2215605974197388, "learning_rate": 5.344610111616133e-08, "loss": 0.5157, "step": 14792 }, { "epoch": 0.95, "grad_norm": 1.1834526062011719, "learning_rate": 5.3293778701414275e-08, "loss": 0.5051, "step": 14793 }, { "epoch": 0.95, "grad_norm": 1.1504602432250977, "learning_rate": 5.314167249443336e-08, "loss": 0.451, "step": 14794 }, { "epoch": 0.95, "grad_norm": 1.0845205783843994, "learning_rate": 5.2989782501867706e-08, "loss": 0.5331, "step": 14795 }, { "epoch": 0.96, "grad_norm": 1.1856902837753296, "learning_rate": 5.283810873035422e-08, "loss": 0.5081, "step": 14796 }, { "epoch": 0.96, "grad_norm": 1.0741567611694336, "learning_rate": 5.268665118652372e-08, "loss": 0.4434, "step": 14797 }, { "epoch": 0.96, "grad_norm": 1.1084682941436768, "learning_rate": 5.253540987699535e-08, "loss": 0.4886, "step": 14798 }, { "epoch": 0.96, "grad_norm": 1.0716522932052612, "learning_rate": 5.238438480837937e-08, "loss": 0.4872, "step": 14799 }, { "epoch": 0.96, "grad_norm": 1.1171718835830688, "learning_rate": 5.223357598727719e-08, "loss": 0.5189, "step": 14800 }, { "epoch": 0.96, "grad_norm": 1.2323888540267944, "learning_rate": 5.208298342027962e-08, "loss": 0.5225, "step": 14801 }, { "epoch": 0.96, "grad_norm": 1.1595373153686523, "learning_rate": 5.193260711396975e-08, "loss": 0.5173, "step": 14802 }, { "epoch": 0.96, "grad_norm": 1.1563812494277954, "learning_rate": 5.178244707491842e-08, "loss": 0.5272, "step": 14803 }, { "epoch": 0.96, "grad_norm": 1.2651389837265015, "learning_rate": 5.163250330969094e-08, "loss": 0.5031, "step": 14804 }, { "epoch": 0.96, "grad_norm": 1.1678133010864258, "learning_rate": 5.1482775824838735e-08, "loss": 0.5297, "step": 14805 }, { "epoch": 0.96, "grad_norm": 1.0589967966079712, "learning_rate": 5.1333264626907685e-08, "loss": 0.5027, "step": 14806 }, { "epoch": 0.96, "grad_norm": 1.1036200523376465, "learning_rate": 5.1183969722431446e-08, "loss": 0.5014, "step": 14807 }, { "epoch": 0.96, "grad_norm": 1.2040443420410156, "learning_rate": 5.103489111793591e-08, "loss": 0.5316, "step": 14808 }, { "epoch": 0.96, "grad_norm": 1.134920358657837, "learning_rate": 5.088602881993754e-08, "loss": 0.4839, "step": 14809 }, { "epoch": 0.96, "grad_norm": 1.3198679685592651, "learning_rate": 5.073738283494112e-08, "loss": 0.498, "step": 14810 }, { "epoch": 0.96, "grad_norm": 1.2061156034469604, "learning_rate": 5.058895316944479e-08, "loss": 0.5016, "step": 14811 }, { "epoch": 0.96, "grad_norm": 1.1814367771148682, "learning_rate": 5.044073982993558e-08, "loss": 0.5377, "step": 14812 }, { "epoch": 0.96, "grad_norm": 1.1262775659561157, "learning_rate": 5.0292742822891095e-08, "loss": 0.4484, "step": 14813 }, { "epoch": 0.96, "grad_norm": 1.1325724124908447, "learning_rate": 5.0144962154781154e-08, "loss": 0.5205, "step": 14814 }, { "epoch": 0.96, "grad_norm": 1.2672646045684814, "learning_rate": 4.999739783206392e-08, "loss": 0.5205, "step": 14815 }, { "epoch": 0.96, "grad_norm": 1.1597851514816284, "learning_rate": 4.985004986118924e-08, "loss": 0.4993, "step": 14816 }, { "epoch": 0.96, "grad_norm": 1.1223853826522827, "learning_rate": 4.970291824859696e-08, "loss": 0.5144, "step": 14817 }, { "epoch": 0.96, "grad_norm": 1.341509461402893, "learning_rate": 4.955600300071861e-08, "loss": 0.5387, "step": 14818 }, { "epoch": 0.96, "grad_norm": 1.1653859615325928, "learning_rate": 4.94093041239746e-08, "loss": 0.5087, "step": 14819 }, { "epoch": 0.96, "grad_norm": 1.1108530759811401, "learning_rate": 4.926282162477758e-08, "loss": 0.4968, "step": 14820 }, { "epoch": 0.96, "grad_norm": 1.1737627983093262, "learning_rate": 4.911655550952965e-08, "loss": 0.5198, "step": 14821 }, { "epoch": 0.96, "grad_norm": 1.2008875608444214, "learning_rate": 4.897050578462348e-08, "loss": 0.5575, "step": 14822 }, { "epoch": 0.96, "grad_norm": 1.0545300245285034, "learning_rate": 4.882467245644229e-08, "loss": 0.4879, "step": 14823 }, { "epoch": 0.96, "grad_norm": 1.1948658227920532, "learning_rate": 4.867905553136098e-08, "loss": 0.5049, "step": 14824 }, { "epoch": 0.96, "grad_norm": 1.1437921524047852, "learning_rate": 4.853365501574337e-08, "loss": 0.4655, "step": 14825 }, { "epoch": 0.96, "grad_norm": 1.2063381671905518, "learning_rate": 4.8388470915944895e-08, "loss": 0.5295, "step": 14826 }, { "epoch": 0.96, "grad_norm": 1.2423408031463623, "learning_rate": 4.824350323831106e-08, "loss": 0.4892, "step": 14827 }, { "epoch": 0.96, "grad_norm": 1.1681747436523438, "learning_rate": 4.80987519891779e-08, "loss": 0.5272, "step": 14828 }, { "epoch": 0.96, "grad_norm": 1.1275367736816406, "learning_rate": 4.795421717487203e-08, "loss": 0.4816, "step": 14829 }, { "epoch": 0.96, "grad_norm": 1.2780121564865112, "learning_rate": 4.780989880171116e-08, "loss": 0.5512, "step": 14830 }, { "epoch": 0.96, "grad_norm": 1.2413156032562256, "learning_rate": 4.766579687600248e-08, "loss": 0.5178, "step": 14831 }, { "epoch": 0.96, "grad_norm": 1.3822821378707886, "learning_rate": 4.752191140404483e-08, "loss": 0.5204, "step": 14832 }, { "epoch": 0.96, "grad_norm": 1.226914644241333, "learning_rate": 4.7378242392127074e-08, "loss": 0.4859, "step": 14833 }, { "epoch": 0.96, "grad_norm": 1.1956392526626587, "learning_rate": 4.723478984652807e-08, "loss": 0.541, "step": 14834 }, { "epoch": 0.96, "grad_norm": 1.1669843196868896, "learning_rate": 4.709155377351837e-08, "loss": 0.5019, "step": 14835 }, { "epoch": 0.96, "grad_norm": 1.1133471727371216, "learning_rate": 4.6948534179357965e-08, "loss": 0.5363, "step": 14836 }, { "epoch": 0.96, "grad_norm": 1.1039732694625854, "learning_rate": 4.680573107029796e-08, "loss": 0.4567, "step": 14837 }, { "epoch": 0.96, "grad_norm": 1.2217435836791992, "learning_rate": 4.6663144452580026e-08, "loss": 0.4785, "step": 14838 }, { "epoch": 0.96, "grad_norm": 1.2895207405090332, "learning_rate": 4.652077433243696e-08, "loss": 0.5088, "step": 14839 }, { "epoch": 0.96, "grad_norm": 1.2172770500183105, "learning_rate": 4.63786207160899e-08, "loss": 0.4703, "step": 14840 }, { "epoch": 0.96, "grad_norm": 1.1545497179031372, "learning_rate": 4.623668360975331e-08, "loss": 0.485, "step": 14841 }, { "epoch": 0.96, "grad_norm": 1.3256046772003174, "learning_rate": 4.609496301963001e-08, "loss": 0.5782, "step": 14842 }, { "epoch": 0.96, "grad_norm": 1.1598981618881226, "learning_rate": 4.59534589519145e-08, "loss": 0.5121, "step": 14843 }, { "epoch": 0.96, "grad_norm": 1.2274858951568604, "learning_rate": 4.581217141279237e-08, "loss": 0.5149, "step": 14844 }, { "epoch": 0.96, "grad_norm": 1.249629020690918, "learning_rate": 4.567110040843814e-08, "loss": 0.5227, "step": 14845 }, { "epoch": 0.96, "grad_norm": 1.268615961074829, "learning_rate": 4.553024594501743e-08, "loss": 0.506, "step": 14846 }, { "epoch": 0.96, "grad_norm": 1.1121591329574585, "learning_rate": 4.538960802868697e-08, "loss": 0.4837, "step": 14847 }, { "epoch": 0.96, "grad_norm": 1.2553057670593262, "learning_rate": 4.5249186665593524e-08, "loss": 0.4709, "step": 14848 }, { "epoch": 0.96, "grad_norm": 1.2776644229888916, "learning_rate": 4.510898186187496e-08, "loss": 0.502, "step": 14849 }, { "epoch": 0.96, "grad_norm": 1.2055302858352661, "learning_rate": 4.496899362365914e-08, "loss": 0.4749, "step": 14850 }, { "epoch": 0.96, "grad_norm": 1.1631975173950195, "learning_rate": 4.482922195706452e-08, "loss": 0.5477, "step": 14851 }, { "epoch": 0.96, "grad_norm": 1.1823556423187256, "learning_rate": 4.468966686819953e-08, "loss": 0.5012, "step": 14852 }, { "epoch": 0.96, "grad_norm": 1.1589369773864746, "learning_rate": 4.4550328363164864e-08, "loss": 0.5078, "step": 14853 }, { "epoch": 0.96, "grad_norm": 1.134806752204895, "learning_rate": 4.441120644805008e-08, "loss": 0.4819, "step": 14854 }, { "epoch": 0.96, "grad_norm": 1.2060376405715942, "learning_rate": 4.427230112893532e-08, "loss": 0.5421, "step": 14855 }, { "epoch": 0.96, "grad_norm": 1.2602567672729492, "learning_rate": 4.413361241189351e-08, "loss": 0.5465, "step": 14856 }, { "epoch": 0.96, "grad_norm": 1.1132304668426514, "learning_rate": 4.399514030298424e-08, "loss": 0.4558, "step": 14857 }, { "epoch": 0.96, "grad_norm": 1.1097303628921509, "learning_rate": 4.3856884808261e-08, "loss": 0.5553, "step": 14858 }, { "epoch": 0.96, "grad_norm": 1.2798190116882324, "learning_rate": 4.3718845933766186e-08, "loss": 0.5432, "step": 14859 }, { "epoch": 0.96, "grad_norm": 1.0414139032363892, "learning_rate": 4.3581023685532744e-08, "loss": 0.4518, "step": 14860 }, { "epoch": 0.96, "grad_norm": 1.211348533630371, "learning_rate": 4.344341806958585e-08, "loss": 0.5156, "step": 14861 }, { "epoch": 0.96, "grad_norm": 1.1106802225112915, "learning_rate": 4.330602909193904e-08, "loss": 0.53, "step": 14862 }, { "epoch": 0.96, "grad_norm": 1.2469918727874756, "learning_rate": 4.3168856758596945e-08, "loss": 0.5358, "step": 14863 }, { "epoch": 0.96, "grad_norm": 1.2054500579833984, "learning_rate": 4.303190107555533e-08, "loss": 0.5077, "step": 14864 }, { "epoch": 0.96, "grad_norm": 1.1720564365386963, "learning_rate": 4.289516204879996e-08, "loss": 0.5283, "step": 14865 }, { "epoch": 0.96, "grad_norm": 1.1182632446289062, "learning_rate": 4.2758639684307714e-08, "loss": 0.5419, "step": 14866 }, { "epoch": 0.96, "grad_norm": 1.072216510772705, "learning_rate": 4.2622333988045496e-08, "loss": 0.4457, "step": 14867 }, { "epoch": 0.96, "grad_norm": 1.2856497764587402, "learning_rate": 4.2486244965971314e-08, "loss": 0.5215, "step": 14868 }, { "epoch": 0.96, "grad_norm": 1.277883768081665, "learning_rate": 4.235037262403263e-08, "loss": 0.4796, "step": 14869 }, { "epoch": 0.96, "grad_norm": 1.2883894443511963, "learning_rate": 4.2214716968168036e-08, "loss": 0.529, "step": 14870 }, { "epoch": 0.96, "grad_norm": 1.3148647546768188, "learning_rate": 4.2079278004306665e-08, "loss": 0.5296, "step": 14871 }, { "epoch": 0.96, "grad_norm": 1.2361572980880737, "learning_rate": 4.1944055738368794e-08, "loss": 0.4807, "step": 14872 }, { "epoch": 0.96, "grad_norm": 1.1830894947052002, "learning_rate": 4.180905017626413e-08, "loss": 0.4998, "step": 14873 }, { "epoch": 0.96, "grad_norm": 1.2213029861450195, "learning_rate": 4.167426132389407e-08, "loss": 0.5092, "step": 14874 }, { "epoch": 0.96, "grad_norm": 1.2274490594863892, "learning_rate": 4.15396891871489e-08, "loss": 0.5262, "step": 14875 }, { "epoch": 0.96, "grad_norm": 1.1518499851226807, "learning_rate": 4.1405333771910584e-08, "loss": 0.5045, "step": 14876 }, { "epoch": 0.96, "grad_norm": 1.2300546169281006, "learning_rate": 4.1271195084052196e-08, "loss": 0.4589, "step": 14877 }, { "epoch": 0.96, "grad_norm": 1.1777299642562866, "learning_rate": 4.1137273129436273e-08, "loss": 0.5044, "step": 14878 }, { "epoch": 0.96, "grad_norm": 1.1669949293136597, "learning_rate": 4.100356791391591e-08, "loss": 0.4775, "step": 14879 }, { "epoch": 0.96, "grad_norm": 1.197335124015808, "learning_rate": 4.087007944333532e-08, "loss": 0.547, "step": 14880 }, { "epoch": 0.96, "grad_norm": 1.2114520072937012, "learning_rate": 4.073680772352928e-08, "loss": 0.467, "step": 14881 }, { "epoch": 0.96, "grad_norm": 1.2539881467819214, "learning_rate": 4.060375276032147e-08, "loss": 0.5199, "step": 14882 }, { "epoch": 0.96, "grad_norm": 1.1926666498184204, "learning_rate": 4.047091455952834e-08, "loss": 0.49, "step": 14883 }, { "epoch": 0.96, "grad_norm": 1.1957588195800781, "learning_rate": 4.033829312695636e-08, "loss": 0.5368, "step": 14884 }, { "epoch": 0.96, "grad_norm": 1.138784646987915, "learning_rate": 4.020588846840146e-08, "loss": 0.4983, "step": 14885 }, { "epoch": 0.96, "grad_norm": 1.1617226600646973, "learning_rate": 4.00737005896501e-08, "loss": 0.5126, "step": 14886 }, { "epoch": 0.96, "grad_norm": 1.128343939781189, "learning_rate": 3.994172949648101e-08, "loss": 0.4692, "step": 14887 }, { "epoch": 0.96, "grad_norm": 1.2215417623519897, "learning_rate": 3.980997519466123e-08, "loss": 0.552, "step": 14888 }, { "epoch": 0.96, "grad_norm": 1.169171690940857, "learning_rate": 3.967843768995117e-08, "loss": 0.5254, "step": 14889 }, { "epoch": 0.96, "grad_norm": 1.1250758171081543, "learning_rate": 3.954711698809788e-08, "loss": 0.4745, "step": 14890 }, { "epoch": 0.96, "grad_norm": 1.1354318857192993, "learning_rate": 3.941601309484178e-08, "loss": 0.4982, "step": 14891 }, { "epoch": 0.96, "grad_norm": 1.198486566543579, "learning_rate": 3.9285126015913835e-08, "loss": 0.5238, "step": 14892 }, { "epoch": 0.96, "grad_norm": 1.2616559267044067, "learning_rate": 3.9154455757033914e-08, "loss": 0.5251, "step": 14893 }, { "epoch": 0.96, "grad_norm": 1.1613221168518066, "learning_rate": 3.9024002323913565e-08, "loss": 0.5235, "step": 14894 }, { "epoch": 0.96, "grad_norm": 1.1675000190734863, "learning_rate": 3.8893765722254895e-08, "loss": 0.4731, "step": 14895 }, { "epoch": 0.96, "grad_norm": 1.1667991876602173, "learning_rate": 3.8763745957749455e-08, "loss": 0.5128, "step": 14896 }, { "epoch": 0.96, "grad_norm": 1.1619971990585327, "learning_rate": 3.863394303608048e-08, "loss": 0.5365, "step": 14897 }, { "epoch": 0.96, "grad_norm": 1.0966451168060303, "learning_rate": 3.8504356962921765e-08, "loss": 0.5179, "step": 14898 }, { "epoch": 0.96, "grad_norm": 1.226192593574524, "learning_rate": 3.8374987743936554e-08, "loss": 0.4877, "step": 14899 }, { "epoch": 0.96, "grad_norm": 1.1405211687088013, "learning_rate": 3.824583538477977e-08, "loss": 0.4635, "step": 14900 }, { "epoch": 0.96, "grad_norm": 1.3177525997161865, "learning_rate": 3.8116899891095794e-08, "loss": 0.569, "step": 14901 }, { "epoch": 0.96, "grad_norm": 1.472253680229187, "learning_rate": 3.798818126852011e-08, "loss": 0.5431, "step": 14902 }, { "epoch": 0.96, "grad_norm": 1.184029459953308, "learning_rate": 3.7859679522679325e-08, "loss": 0.4959, "step": 14903 }, { "epoch": 0.96, "grad_norm": 1.2146997451782227, "learning_rate": 3.773139465918896e-08, "loss": 0.5119, "step": 14904 }, { "epoch": 0.96, "grad_norm": 1.1845920085906982, "learning_rate": 3.760332668365674e-08, "loss": 0.4878, "step": 14905 }, { "epoch": 0.96, "grad_norm": 1.1901813745498657, "learning_rate": 3.7475475601679854e-08, "loss": 0.5153, "step": 14906 }, { "epoch": 0.96, "grad_norm": 1.1063555479049683, "learning_rate": 3.734784141884662e-08, "loss": 0.4507, "step": 14907 }, { "epoch": 0.96, "grad_norm": 1.2190965414047241, "learning_rate": 3.7220424140735346e-08, "loss": 0.5486, "step": 14908 }, { "epoch": 0.96, "grad_norm": 1.1590842008590698, "learning_rate": 3.709322377291547e-08, "loss": 0.5188, "step": 14909 }, { "epoch": 0.96, "grad_norm": 1.1997493505477905, "learning_rate": 3.6966240320945865e-08, "loss": 0.5216, "step": 14910 }, { "epoch": 0.96, "grad_norm": 1.189522385597229, "learning_rate": 3.683947379037711e-08, "loss": 0.5217, "step": 14911 }, { "epoch": 0.96, "grad_norm": 1.1796280145645142, "learning_rate": 3.671292418674976e-08, "loss": 0.4933, "step": 14912 }, { "epoch": 0.96, "grad_norm": 1.0767042636871338, "learning_rate": 3.6586591515595514e-08, "loss": 0.5206, "step": 14913 }, { "epoch": 0.96, "grad_norm": 1.1487278938293457, "learning_rate": 3.6460475782434945e-08, "loss": 0.497, "step": 14914 }, { "epoch": 0.96, "grad_norm": 1.1933066844940186, "learning_rate": 3.633457699278142e-08, "loss": 0.498, "step": 14915 }, { "epoch": 0.96, "grad_norm": 1.1012035608291626, "learning_rate": 3.6208895152137215e-08, "loss": 0.516, "step": 14916 }, { "epoch": 0.96, "grad_norm": 1.1911100149154663, "learning_rate": 3.6083430265995144e-08, "loss": 0.5447, "step": 14917 }, { "epoch": 0.96, "grad_norm": 1.2647587060928345, "learning_rate": 3.595818233983916e-08, "loss": 0.5183, "step": 14918 }, { "epoch": 0.96, "grad_norm": 1.3011085987091064, "learning_rate": 3.5833151379143785e-08, "loss": 0.5248, "step": 14919 }, { "epoch": 0.96, "grad_norm": 1.1619642972946167, "learning_rate": 3.570833738937352e-08, "loss": 0.477, "step": 14920 }, { "epoch": 0.96, "grad_norm": 1.1845523118972778, "learning_rate": 3.5583740375984e-08, "loss": 0.5547, "step": 14921 }, { "epoch": 0.96, "grad_norm": 1.2064732313156128, "learning_rate": 3.545936034442088e-08, "loss": 0.5383, "step": 14922 }, { "epoch": 0.96, "grad_norm": 1.1875882148742676, "learning_rate": 3.5335197300119806e-08, "loss": 0.5315, "step": 14923 }, { "epoch": 0.96, "grad_norm": 1.2310467958450317, "learning_rate": 3.521125124850866e-08, "loss": 0.4926, "step": 14924 }, { "epoch": 0.96, "grad_norm": 1.2134560346603394, "learning_rate": 3.5087522195004775e-08, "loss": 0.4885, "step": 14925 }, { "epoch": 0.96, "grad_norm": 1.2520921230316162, "learning_rate": 3.496401014501494e-08, "loss": 0.5262, "step": 14926 }, { "epoch": 0.96, "grad_norm": 1.1580584049224854, "learning_rate": 3.484071510393927e-08, "loss": 0.4809, "step": 14927 }, { "epoch": 0.96, "grad_norm": 1.1678905487060547, "learning_rate": 3.4717637077164576e-08, "loss": 0.4919, "step": 14928 }, { "epoch": 0.96, "grad_norm": 1.1951359510421753, "learning_rate": 3.459477607007211e-08, "loss": 0.5036, "step": 14929 }, { "epoch": 0.96, "grad_norm": 1.2342770099639893, "learning_rate": 3.447213208803035e-08, "loss": 0.5978, "step": 14930 }, { "epoch": 0.96, "grad_norm": 1.2214781045913696, "learning_rate": 3.4349705136401125e-08, "loss": 0.5064, "step": 14931 }, { "epoch": 0.96, "grad_norm": 1.2072802782058716, "learning_rate": 3.422749522053459e-08, "loss": 0.5339, "step": 14932 }, { "epoch": 0.96, "grad_norm": 1.2305420637130737, "learning_rate": 3.410550234577203e-08, "loss": 0.4996, "step": 14933 }, { "epoch": 0.96, "grad_norm": 1.1763554811477661, "learning_rate": 3.398372651744641e-08, "loss": 0.5391, "step": 14934 }, { "epoch": 0.96, "grad_norm": 1.1641706228256226, "learning_rate": 3.3862167740879026e-08, "loss": 0.4892, "step": 14935 }, { "epoch": 0.96, "grad_norm": 1.1851751804351807, "learning_rate": 3.37408260213834e-08, "loss": 0.5188, "step": 14936 }, { "epoch": 0.96, "grad_norm": 1.2660703659057617, "learning_rate": 3.361970136426363e-08, "loss": 0.4934, "step": 14937 }, { "epoch": 0.96, "grad_norm": 1.0497238636016846, "learning_rate": 3.349879377481324e-08, "loss": 0.4831, "step": 14938 }, { "epoch": 0.96, "grad_norm": 1.2079814672470093, "learning_rate": 3.337810325831692e-08, "loss": 0.5132, "step": 14939 }, { "epoch": 0.96, "grad_norm": 1.2562288045883179, "learning_rate": 3.325762982004932e-08, "loss": 0.4948, "step": 14940 }, { "epoch": 0.96, "grad_norm": 1.2186254262924194, "learning_rate": 3.3137373465276234e-08, "loss": 0.495, "step": 14941 }, { "epoch": 0.96, "grad_norm": 1.235815167427063, "learning_rate": 3.3017334199254017e-08, "loss": 0.5356, "step": 14942 }, { "epoch": 0.96, "grad_norm": 1.3241130113601685, "learning_rate": 3.289751202722902e-08, "loss": 0.5108, "step": 14943 }, { "epoch": 0.96, "grad_norm": 1.0667914152145386, "learning_rate": 3.277790695443927e-08, "loss": 0.4524, "step": 14944 }, { "epoch": 0.96, "grad_norm": 1.1972743272781372, "learning_rate": 3.265851898611061e-08, "loss": 0.4748, "step": 14945 }, { "epoch": 0.96, "grad_norm": 1.1437491178512573, "learning_rate": 3.253934812746273e-08, "loss": 0.5006, "step": 14946 }, { "epoch": 0.96, "grad_norm": 1.1558927297592163, "learning_rate": 3.24203943837037e-08, "loss": 0.4757, "step": 14947 }, { "epoch": 0.96, "grad_norm": 1.1553597450256348, "learning_rate": 3.2301657760032135e-08, "loss": 0.4994, "step": 14948 }, { "epoch": 0.96, "grad_norm": 1.2686935663223267, "learning_rate": 3.218313826163888e-08, "loss": 0.4933, "step": 14949 }, { "epoch": 0.96, "grad_norm": 1.1079078912734985, "learning_rate": 3.206483589370368e-08, "loss": 0.4689, "step": 14950 }, { "epoch": 0.97, "grad_norm": 1.2411531209945679, "learning_rate": 3.1946750661396294e-08, "loss": 0.5166, "step": 14951 }, { "epoch": 0.97, "grad_norm": 1.262291669845581, "learning_rate": 3.182888256987926e-08, "loss": 0.4688, "step": 14952 }, { "epoch": 0.97, "grad_norm": 1.074204921722412, "learning_rate": 3.171123162430345e-08, "loss": 0.5292, "step": 14953 }, { "epoch": 0.97, "grad_norm": 1.0802767276763916, "learning_rate": 3.159379782981142e-08, "loss": 0.4774, "step": 14954 }, { "epoch": 0.97, "grad_norm": 1.1246596574783325, "learning_rate": 3.1476581191535735e-08, "loss": 0.4983, "step": 14955 }, { "epoch": 0.97, "grad_norm": 1.1537503004074097, "learning_rate": 3.1359581714600074e-08, "loss": 0.5173, "step": 14956 }, { "epoch": 0.97, "grad_norm": 1.2021849155426025, "learning_rate": 3.124279940411756e-08, "loss": 0.4663, "step": 14957 }, { "epoch": 0.97, "grad_norm": 1.2176275253295898, "learning_rate": 3.112623426519301e-08, "loss": 0.5133, "step": 14958 }, { "epoch": 0.97, "grad_norm": 1.2534279823303223, "learning_rate": 3.1009886302920675e-08, "loss": 0.5196, "step": 14959 }, { "epoch": 0.97, "grad_norm": 1.1715246438980103, "learning_rate": 3.0893755522385934e-08, "loss": 0.5888, "step": 14960 }, { "epoch": 0.97, "grad_norm": 1.2202740907669067, "learning_rate": 3.0777841928664733e-08, "loss": 0.5575, "step": 14961 }, { "epoch": 0.97, "grad_norm": 1.1583642959594727, "learning_rate": 3.066214552682356e-08, "loss": 0.4889, "step": 14962 }, { "epoch": 0.97, "grad_norm": 1.184499740600586, "learning_rate": 3.054666632191838e-08, "loss": 0.5205, "step": 14963 }, { "epoch": 0.97, "grad_norm": 1.1143724918365479, "learning_rate": 3.043140431899738e-08, "loss": 0.5084, "step": 14964 }, { "epoch": 0.97, "grad_norm": 1.0871516466140747, "learning_rate": 3.031635952309819e-08, "loss": 0.4441, "step": 14965 }, { "epoch": 0.97, "grad_norm": 1.3021392822265625, "learning_rate": 3.020153193924902e-08, "loss": 0.5523, "step": 14966 }, { "epoch": 0.97, "grad_norm": 1.1060649156570435, "learning_rate": 3.0086921572468066e-08, "loss": 0.4973, "step": 14967 }, { "epoch": 0.97, "grad_norm": 1.1855324506759644, "learning_rate": 2.997252842776577e-08, "loss": 0.5686, "step": 14968 }, { "epoch": 0.97, "grad_norm": 1.108225703239441, "learning_rate": 2.985835251014146e-08, "loss": 0.4937, "step": 14969 }, { "epoch": 0.97, "grad_norm": 1.2733218669891357, "learning_rate": 2.974439382458505e-08, "loss": 0.5589, "step": 14970 }, { "epoch": 0.97, "grad_norm": 1.196496605873108, "learning_rate": 2.9630652376078096e-08, "loss": 0.5007, "step": 14971 }, { "epoch": 0.97, "grad_norm": 1.2511928081512451, "learning_rate": 2.951712816959107e-08, "loss": 0.5255, "step": 14972 }, { "epoch": 0.97, "grad_norm": 1.2047666311264038, "learning_rate": 2.9403821210087225e-08, "loss": 0.4932, "step": 14973 }, { "epoch": 0.97, "grad_norm": 1.0623525381088257, "learning_rate": 2.92907315025176e-08, "loss": 0.5091, "step": 14974 }, { "epoch": 0.97, "grad_norm": 1.1704005002975464, "learning_rate": 2.9177859051825462e-08, "loss": 0.4887, "step": 14975 }, { "epoch": 0.97, "grad_norm": 1.1555194854736328, "learning_rate": 2.906520386294409e-08, "loss": 0.5251, "step": 14976 }, { "epoch": 0.97, "grad_norm": 1.210545539855957, "learning_rate": 2.8952765940797323e-08, "loss": 0.505, "step": 14977 }, { "epoch": 0.97, "grad_norm": 1.2875785827636719, "learning_rate": 2.8840545290300115e-08, "loss": 0.5555, "step": 14978 }, { "epoch": 0.97, "grad_norm": 1.2393466234207153, "learning_rate": 2.8728541916356878e-08, "loss": 0.5294, "step": 14979 }, { "epoch": 0.97, "grad_norm": 1.3561604022979736, "learning_rate": 2.8616755823862585e-08, "loss": 0.5247, "step": 14980 }, { "epoch": 0.97, "grad_norm": 1.2021760940551758, "learning_rate": 2.8505187017703885e-08, "loss": 0.4812, "step": 14981 }, { "epoch": 0.97, "grad_norm": 1.2811745405197144, "learning_rate": 2.8393835502756872e-08, "loss": 0.5098, "step": 14982 }, { "epoch": 0.97, "grad_norm": 1.2256476879119873, "learning_rate": 2.8282701283888214e-08, "loss": 0.5137, "step": 14983 }, { "epoch": 0.97, "grad_norm": 1.3151421546936035, "learning_rate": 2.817178436595569e-08, "loss": 0.5136, "step": 14984 }, { "epoch": 0.97, "grad_norm": 1.2258714437484741, "learning_rate": 2.8061084753807088e-08, "loss": 0.5257, "step": 14985 }, { "epoch": 0.97, "grad_norm": 1.2975351810455322, "learning_rate": 2.7950602452280206e-08, "loss": 0.5385, "step": 14986 }, { "epoch": 0.97, "grad_norm": 1.2176194190979004, "learning_rate": 2.784033746620507e-08, "loss": 0.5053, "step": 14987 }, { "epoch": 0.97, "grad_norm": 1.099320650100708, "learning_rate": 2.7730289800400045e-08, "loss": 0.4498, "step": 14988 }, { "epoch": 0.97, "grad_norm": 1.110685110092163, "learning_rate": 2.7620459459675174e-08, "loss": 0.4938, "step": 14989 }, { "epoch": 0.97, "grad_norm": 1.205443263053894, "learning_rate": 2.751084644883162e-08, "loss": 0.5183, "step": 14990 }, { "epoch": 0.97, "grad_norm": 1.1976906061172485, "learning_rate": 2.7401450772659434e-08, "loss": 0.506, "step": 14991 }, { "epoch": 0.97, "grad_norm": 1.362831711769104, "learning_rate": 2.7292272435940903e-08, "loss": 0.5434, "step": 14992 }, { "epoch": 0.97, "grad_norm": 1.174243688583374, "learning_rate": 2.718331144344666e-08, "loss": 0.5042, "step": 14993 }, { "epoch": 0.97, "grad_norm": 1.140324354171753, "learning_rate": 2.707456779994011e-08, "loss": 0.495, "step": 14994 }, { "epoch": 0.97, "grad_norm": 1.2874071598052979, "learning_rate": 2.6966041510174123e-08, "loss": 0.4886, "step": 14995 }, { "epoch": 0.97, "grad_norm": 1.1726845502853394, "learning_rate": 2.685773257889157e-08, "loss": 0.4927, "step": 14996 }, { "epoch": 0.97, "grad_norm": 1.2836207151412964, "learning_rate": 2.6749641010827e-08, "loss": 0.5492, "step": 14997 }, { "epoch": 0.97, "grad_norm": 1.1579481363296509, "learning_rate": 2.664176681070385e-08, "loss": 0.4964, "step": 14998 }, { "epoch": 0.97, "grad_norm": 1.206662654876709, "learning_rate": 2.65341099832378e-08, "loss": 0.5278, "step": 14999 }, { "epoch": 0.97, "grad_norm": 1.1895560026168823, "learning_rate": 2.6426670533134524e-08, "loss": 0.5447, "step": 15000 }, { "epoch": 0.97, "grad_norm": 1.10121750831604, "learning_rate": 2.6319448465089158e-08, "loss": 0.4727, "step": 15001 }, { "epoch": 0.97, "grad_norm": 1.2255287170410156, "learning_rate": 2.621244378378851e-08, "loss": 0.5343, "step": 15002 }, { "epoch": 0.97, "grad_norm": 1.3090928792953491, "learning_rate": 2.6105656493909393e-08, "loss": 0.5307, "step": 15003 }, { "epoch": 0.97, "grad_norm": 1.1554408073425293, "learning_rate": 2.5999086600119185e-08, "loss": 0.4906, "step": 15004 }, { "epoch": 0.97, "grad_norm": 1.225753903388977, "learning_rate": 2.5892734107075824e-08, "loss": 0.4922, "step": 15005 }, { "epoch": 0.97, "grad_norm": 1.1763533353805542, "learning_rate": 2.5786599019427815e-08, "loss": 0.5021, "step": 15006 }, { "epoch": 0.97, "grad_norm": 1.1809207201004028, "learning_rate": 2.5680681341813675e-08, "loss": 0.4481, "step": 15007 }, { "epoch": 0.97, "grad_norm": 1.135831356048584, "learning_rate": 2.5574981078863025e-08, "loss": 0.518, "step": 15008 }, { "epoch": 0.97, "grad_norm": 1.3197829723358154, "learning_rate": 2.5469498235196067e-08, "loss": 0.521, "step": 15009 }, { "epoch": 0.97, "grad_norm": 1.1201404333114624, "learning_rate": 2.5364232815422997e-08, "loss": 0.5175, "step": 15010 }, { "epoch": 0.97, "grad_norm": 1.157760500907898, "learning_rate": 2.5259184824144023e-08, "loss": 0.4868, "step": 15011 }, { "epoch": 0.97, "grad_norm": 1.1661570072174072, "learning_rate": 2.5154354265951585e-08, "loss": 0.5018, "step": 15012 }, { "epoch": 0.97, "grad_norm": 1.1147730350494385, "learning_rate": 2.504974114542702e-08, "loss": 0.5137, "step": 15013 }, { "epoch": 0.97, "grad_norm": 1.2899924516677856, "learning_rate": 2.4945345467142224e-08, "loss": 0.5748, "step": 15014 }, { "epoch": 0.97, "grad_norm": 1.1092323064804077, "learning_rate": 2.4841167235661322e-08, "loss": 0.4776, "step": 15015 }, { "epoch": 0.97, "grad_norm": 1.161109447479248, "learning_rate": 2.4737206455536233e-08, "loss": 0.4802, "step": 15016 }, { "epoch": 0.97, "grad_norm": 1.197806477546692, "learning_rate": 2.463346313131165e-08, "loss": 0.5441, "step": 15017 }, { "epoch": 0.97, "grad_norm": 1.188094973564148, "learning_rate": 2.452993726752173e-08, "loss": 0.5005, "step": 15018 }, { "epoch": 0.97, "grad_norm": 1.2929085493087769, "learning_rate": 2.4426628868691737e-08, "loss": 0.5601, "step": 15019 }, { "epoch": 0.97, "grad_norm": 1.1432745456695557, "learning_rate": 2.4323537939336395e-08, "loss": 0.4943, "step": 15020 }, { "epoch": 0.97, "grad_norm": 1.157729148864746, "learning_rate": 2.42206644839621e-08, "loss": 0.52, "step": 15021 }, { "epoch": 0.97, "grad_norm": 1.1631650924682617, "learning_rate": 2.4118008507064138e-08, "loss": 0.4751, "step": 15022 }, { "epoch": 0.97, "grad_norm": 1.1105518341064453, "learning_rate": 2.4015570013130596e-08, "loss": 0.5013, "step": 15023 }, { "epoch": 0.97, "grad_norm": 1.3052914142608643, "learning_rate": 2.391334900663844e-08, "loss": 0.5308, "step": 15024 }, { "epoch": 0.97, "grad_norm": 1.183295488357544, "learning_rate": 2.3811345492054662e-08, "loss": 0.5003, "step": 15025 }, { "epoch": 0.97, "grad_norm": 1.3176313638687134, "learning_rate": 2.3709559473839016e-08, "loss": 0.5052, "step": 15026 }, { "epoch": 0.97, "grad_norm": 1.2066487073898315, "learning_rate": 2.3607990956439063e-08, "loss": 0.5174, "step": 15027 }, { "epoch": 0.97, "grad_norm": 1.2095059156417847, "learning_rate": 2.3506639944294585e-08, "loss": 0.5254, "step": 15028 }, { "epoch": 0.97, "grad_norm": 1.169624924659729, "learning_rate": 2.3405506441835923e-08, "loss": 0.5189, "step": 15029 }, { "epoch": 0.97, "grad_norm": 1.169694423675537, "learning_rate": 2.3304590453481767e-08, "loss": 0.5314, "step": 15030 }, { "epoch": 0.97, "grad_norm": 1.1635074615478516, "learning_rate": 2.32038919836447e-08, "loss": 0.5421, "step": 15031 }, { "epoch": 0.97, "grad_norm": 1.232282280921936, "learning_rate": 2.3103411036725086e-08, "loss": 0.5434, "step": 15032 }, { "epoch": 0.97, "grad_norm": 1.2072293758392334, "learning_rate": 2.3003147617114974e-08, "loss": 0.4977, "step": 15033 }, { "epoch": 0.97, "grad_norm": 1.3105119466781616, "learning_rate": 2.2903101729195854e-08, "loss": 0.5294, "step": 15034 }, { "epoch": 0.97, "grad_norm": 1.1868678331375122, "learning_rate": 2.2803273377341452e-08, "loss": 0.4925, "step": 15035 }, { "epoch": 0.97, "grad_norm": 1.1997357606887817, "learning_rate": 2.2703662565914386e-08, "loss": 0.5721, "step": 15036 }, { "epoch": 0.97, "grad_norm": 1.171639084815979, "learning_rate": 2.2604269299268954e-08, "loss": 0.5157, "step": 15037 }, { "epoch": 0.97, "grad_norm": 1.337285041809082, "learning_rate": 2.2505093581748904e-08, "loss": 0.4944, "step": 15038 }, { "epoch": 0.97, "grad_norm": 1.2419377565383911, "learning_rate": 2.24061354176891e-08, "loss": 0.4993, "step": 15039 }, { "epoch": 0.97, "grad_norm": 1.1623603105545044, "learning_rate": 2.2307394811414973e-08, "loss": 0.5013, "step": 15040 }, { "epoch": 0.97, "grad_norm": 1.1506119966506958, "learning_rate": 2.2208871767241956e-08, "loss": 0.5041, "step": 15041 }, { "epoch": 0.97, "grad_norm": 1.1793593168258667, "learning_rate": 2.2110566289476053e-08, "loss": 0.5391, "step": 15042 }, { "epoch": 0.97, "grad_norm": 1.2035505771636963, "learning_rate": 2.201247838241438e-08, "loss": 0.5383, "step": 15043 }, { "epoch": 0.97, "grad_norm": 1.190019965171814, "learning_rate": 2.191460805034351e-08, "loss": 0.509, "step": 15044 }, { "epoch": 0.97, "grad_norm": 1.0873477458953857, "learning_rate": 2.1816955297541685e-08, "loss": 0.4878, "step": 15045 }, { "epoch": 0.97, "grad_norm": 1.1723634004592896, "learning_rate": 2.1719520128277162e-08, "loss": 0.5241, "step": 15046 }, { "epoch": 0.97, "grad_norm": 1.1133701801300049, "learning_rate": 2.1622302546808194e-08, "loss": 0.5111, "step": 15047 }, { "epoch": 0.97, "grad_norm": 1.226988673210144, "learning_rate": 2.1525302557383608e-08, "loss": 0.5086, "step": 15048 }, { "epoch": 0.97, "grad_norm": 1.164925217628479, "learning_rate": 2.1428520164243906e-08, "loss": 0.4814, "step": 15049 }, { "epoch": 0.97, "grad_norm": 1.187403917312622, "learning_rate": 2.1331955371618475e-08, "loss": 0.5081, "step": 15050 }, { "epoch": 0.97, "grad_norm": 1.1482805013656616, "learning_rate": 2.123560818372783e-08, "loss": 0.4997, "step": 15051 }, { "epoch": 0.97, "grad_norm": 1.135157823562622, "learning_rate": 2.1139478604783604e-08, "loss": 0.4608, "step": 15052 }, { "epoch": 0.97, "grad_norm": 1.2606357336044312, "learning_rate": 2.1043566638987435e-08, "loss": 0.5014, "step": 15053 }, { "epoch": 0.97, "grad_norm": 1.1874604225158691, "learning_rate": 2.094787229053097e-08, "loss": 0.4862, "step": 15054 }, { "epoch": 0.97, "grad_norm": 1.2978721857070923, "learning_rate": 2.0852395563596417e-08, "loss": 0.5287, "step": 15055 }, { "epoch": 0.97, "grad_norm": 1.1846736669540405, "learning_rate": 2.0757136462357665e-08, "loss": 0.522, "step": 15056 }, { "epoch": 0.97, "grad_norm": 1.219864010810852, "learning_rate": 2.0662094990978044e-08, "loss": 0.4843, "step": 15057 }, { "epoch": 0.97, "grad_norm": 1.2162641286849976, "learning_rate": 2.0567271153610902e-08, "loss": 0.5197, "step": 15058 }, { "epoch": 0.97, "grad_norm": 1.1617047786712646, "learning_rate": 2.047266495440181e-08, "loss": 0.5063, "step": 15059 }, { "epoch": 0.97, "grad_norm": 1.309358835220337, "learning_rate": 2.0378276397484687e-08, "loss": 0.5312, "step": 15060 }, { "epoch": 0.97, "grad_norm": 1.1552201509475708, "learning_rate": 2.0284105486985673e-08, "loss": 0.5286, "step": 15061 }, { "epoch": 0.97, "grad_norm": 1.3265583515167236, "learning_rate": 2.019015222702092e-08, "loss": 0.5633, "step": 15062 }, { "epoch": 0.97, "grad_norm": 1.2468888759613037, "learning_rate": 2.0096416621696035e-08, "loss": 0.4876, "step": 15063 }, { "epoch": 0.97, "grad_norm": 1.185043215751648, "learning_rate": 2.0002898675108295e-08, "loss": 0.5265, "step": 15064 }, { "epoch": 0.97, "grad_norm": 1.2324796915054321, "learning_rate": 1.9909598391345542e-08, "loss": 0.5298, "step": 15065 }, { "epoch": 0.97, "grad_norm": 1.198855996131897, "learning_rate": 1.9816515774485623e-08, "loss": 0.5147, "step": 15066 }, { "epoch": 0.97, "grad_norm": 1.1514657735824585, "learning_rate": 1.972365082859695e-08, "loss": 0.5249, "step": 15067 }, { "epoch": 0.97, "grad_norm": 1.2110607624053955, "learning_rate": 1.963100355773795e-08, "loss": 0.5185, "step": 15068 }, { "epoch": 0.97, "grad_norm": 1.1751567125320435, "learning_rate": 1.9538573965958153e-08, "loss": 0.491, "step": 15069 }, { "epoch": 0.97, "grad_norm": 1.1841522455215454, "learning_rate": 1.9446362057297663e-08, "loss": 0.4828, "step": 15070 }, { "epoch": 0.97, "grad_norm": 1.161779761314392, "learning_rate": 1.935436783578659e-08, "loss": 0.4693, "step": 15071 }, { "epoch": 0.97, "grad_norm": 1.148856520652771, "learning_rate": 1.926259130544561e-08, "loss": 0.5066, "step": 15072 }, { "epoch": 0.97, "grad_norm": 1.3136723041534424, "learning_rate": 1.917103247028651e-08, "loss": 0.4968, "step": 15073 }, { "epoch": 0.97, "grad_norm": 1.1567538976669312, "learning_rate": 1.907969133431109e-08, "loss": 0.4903, "step": 15074 }, { "epoch": 0.97, "grad_norm": 1.233634114265442, "learning_rate": 1.8988567901511158e-08, "loss": 0.5086, "step": 15075 }, { "epoch": 0.97, "grad_norm": 1.2399593591690063, "learning_rate": 1.8897662175870192e-08, "loss": 0.5409, "step": 15076 }, { "epoch": 0.97, "grad_norm": 1.206822156906128, "learning_rate": 1.880697416136057e-08, "loss": 0.5265, "step": 15077 }, { "epoch": 0.97, "grad_norm": 1.1748331785202026, "learning_rate": 1.8716503861946344e-08, "loss": 0.5005, "step": 15078 }, { "epoch": 0.97, "grad_norm": 1.0705220699310303, "learning_rate": 1.8626251281582132e-08, "loss": 0.5184, "step": 15079 }, { "epoch": 0.97, "grad_norm": 1.1905895471572876, "learning_rate": 1.853621642421255e-08, "loss": 0.5194, "step": 15080 }, { "epoch": 0.97, "grad_norm": 1.179009199142456, "learning_rate": 1.8446399293772232e-08, "loss": 0.4971, "step": 15081 }, { "epoch": 0.97, "grad_norm": 1.1495084762573242, "learning_rate": 1.8356799894186926e-08, "loss": 0.5146, "step": 15082 }, { "epoch": 0.97, "grad_norm": 1.0840176343917847, "learning_rate": 1.8267418229373503e-08, "loss": 0.4765, "step": 15083 }, { "epoch": 0.97, "grad_norm": 1.1262271404266357, "learning_rate": 1.8178254303238275e-08, "loss": 0.5292, "step": 15084 }, { "epoch": 0.97, "grad_norm": 1.2672374248504639, "learning_rate": 1.808930811967813e-08, "loss": 0.4916, "step": 15085 }, { "epoch": 0.97, "grad_norm": 1.2987443208694458, "learning_rate": 1.8000579682581065e-08, "loss": 0.5679, "step": 15086 }, { "epoch": 0.97, "grad_norm": 1.1460644006729126, "learning_rate": 1.791206899582454e-08, "loss": 0.4495, "step": 15087 }, { "epoch": 0.97, "grad_norm": 1.3232765197753906, "learning_rate": 1.7823776063277675e-08, "loss": 0.5558, "step": 15088 }, { "epoch": 0.97, "grad_norm": 1.1752814054489136, "learning_rate": 1.773570088879961e-08, "loss": 0.5095, "step": 15089 }, { "epoch": 0.97, "grad_norm": 1.1781377792358398, "learning_rate": 1.7647843476238934e-08, "loss": 0.5107, "step": 15090 }, { "epoch": 0.97, "grad_norm": 1.1913448572158813, "learning_rate": 1.7560203829437573e-08, "loss": 0.5156, "step": 15091 }, { "epoch": 0.97, "grad_norm": 1.1633689403533936, "learning_rate": 1.7472781952223573e-08, "loss": 0.4662, "step": 15092 }, { "epoch": 0.97, "grad_norm": 1.1736042499542236, "learning_rate": 1.738557784841999e-08, "loss": 0.5383, "step": 15093 }, { "epoch": 0.97, "grad_norm": 1.1336872577667236, "learning_rate": 1.7298591521837104e-08, "loss": 0.5157, "step": 15094 }, { "epoch": 0.97, "grad_norm": 1.1494265794754028, "learning_rate": 1.7211822976277438e-08, "loss": 0.5211, "step": 15095 }, { "epoch": 0.97, "grad_norm": 1.1418308019638062, "learning_rate": 1.712527221553295e-08, "loss": 0.4881, "step": 15096 }, { "epoch": 0.97, "grad_norm": 1.2386449575424194, "learning_rate": 1.7038939243387286e-08, "loss": 0.5413, "step": 15097 }, { "epoch": 0.97, "grad_norm": 1.2431055307388306, "learning_rate": 1.695282406361298e-08, "loss": 0.5238, "step": 15098 }, { "epoch": 0.97, "grad_norm": 1.1209076642990112, "learning_rate": 1.6866926679974804e-08, "loss": 0.474, "step": 15099 }, { "epoch": 0.97, "grad_norm": 1.0714449882507324, "learning_rate": 1.678124709622586e-08, "loss": 0.4895, "step": 15100 }, { "epoch": 0.97, "grad_norm": 1.2220125198364258, "learning_rate": 1.6695785316112044e-08, "loss": 0.5173, "step": 15101 }, { "epoch": 0.97, "grad_norm": 1.1698373556137085, "learning_rate": 1.6610541343368702e-08, "loss": 0.4877, "step": 15102 }, { "epoch": 0.97, "grad_norm": 1.1334006786346436, "learning_rate": 1.6525515181721184e-08, "loss": 0.4854, "step": 15103 }, { "epoch": 0.97, "grad_norm": 1.186505675315857, "learning_rate": 1.644070683488541e-08, "loss": 0.4879, "step": 15104 }, { "epoch": 0.98, "grad_norm": 1.148195743560791, "learning_rate": 1.635611630656897e-08, "loss": 0.4808, "step": 15105 }, { "epoch": 0.98, "grad_norm": 1.1541355848312378, "learning_rate": 1.6271743600468905e-08, "loss": 0.5536, "step": 15106 }, { "epoch": 0.98, "grad_norm": 1.1616055965423584, "learning_rate": 1.6187588720272263e-08, "loss": 0.4828, "step": 15107 }, { "epoch": 0.98, "grad_norm": 1.2458285093307495, "learning_rate": 1.610365166965777e-08, "loss": 0.4899, "step": 15108 }, { "epoch": 0.98, "grad_norm": 1.171108603477478, "learning_rate": 1.6019932452294162e-08, "loss": 0.4912, "step": 15109 }, { "epoch": 0.98, "grad_norm": 1.106881022453308, "learning_rate": 1.593643107184073e-08, "loss": 0.4968, "step": 15110 }, { "epoch": 0.98, "grad_norm": 1.1385366916656494, "learning_rate": 1.5853147531946224e-08, "loss": 0.4804, "step": 15111 }, { "epoch": 0.98, "grad_norm": 1.1601837873458862, "learning_rate": 1.577008183625217e-08, "loss": 0.5065, "step": 15112 }, { "epoch": 0.98, "grad_norm": 1.200774908065796, "learning_rate": 1.568723398838734e-08, "loss": 0.5246, "step": 15113 }, { "epoch": 0.98, "grad_norm": 1.2914323806762695, "learning_rate": 1.560460399197494e-08, "loss": 0.5357, "step": 15114 }, { "epoch": 0.98, "grad_norm": 1.2169820070266724, "learning_rate": 1.552219185062487e-08, "loss": 0.5422, "step": 15115 }, { "epoch": 0.98, "grad_norm": 1.2564622163772583, "learning_rate": 1.5439997567939234e-08, "loss": 0.4862, "step": 15116 }, { "epoch": 0.98, "grad_norm": 1.1013227701187134, "learning_rate": 1.5358021147511282e-08, "loss": 0.4791, "step": 15117 }, { "epoch": 0.98, "grad_norm": 1.2316993474960327, "learning_rate": 1.5276262592923696e-08, "loss": 0.4849, "step": 15118 }, { "epoch": 0.98, "grad_norm": 1.2907510995864868, "learning_rate": 1.5194721907750286e-08, "loss": 0.5413, "step": 15119 }, { "epoch": 0.98, "grad_norm": 1.203862190246582, "learning_rate": 1.5113399095554315e-08, "loss": 0.5499, "step": 15120 }, { "epoch": 0.98, "grad_norm": 1.2425730228424072, "learning_rate": 1.5032294159890713e-08, "loss": 0.5522, "step": 15121 }, { "epoch": 0.98, "grad_norm": 1.1765023469924927, "learning_rate": 1.4951407104303872e-08, "loss": 0.4906, "step": 15122 }, { "epoch": 0.98, "grad_norm": 1.2514134645462036, "learning_rate": 1.4870737932329293e-08, "loss": 0.5454, "step": 15123 }, { "epoch": 0.98, "grad_norm": 1.2216506004333496, "learning_rate": 1.4790286647493045e-08, "loss": 0.504, "step": 15124 }, { "epoch": 0.98, "grad_norm": 1.268223762512207, "learning_rate": 1.4710053253311208e-08, "loss": 0.5183, "step": 15125 }, { "epoch": 0.98, "grad_norm": 1.1705067157745361, "learning_rate": 1.4630037753291526e-08, "loss": 0.4546, "step": 15126 }, { "epoch": 0.98, "grad_norm": 1.200352430343628, "learning_rate": 1.455024015092954e-08, "loss": 0.5267, "step": 15127 }, { "epoch": 0.98, "grad_norm": 1.2024470567703247, "learning_rate": 1.4470660449714124e-08, "loss": 0.506, "step": 15128 }, { "epoch": 0.98, "grad_norm": 1.165738582611084, "learning_rate": 1.4391298653123609e-08, "loss": 0.5116, "step": 15129 }, { "epoch": 0.98, "grad_norm": 1.2152595520019531, "learning_rate": 1.431215476462633e-08, "loss": 0.5279, "step": 15130 }, { "epoch": 0.98, "grad_norm": 1.0789331197738647, "learning_rate": 1.4233228787681186e-08, "loss": 0.5043, "step": 15131 }, { "epoch": 0.98, "grad_norm": 1.2309006452560425, "learning_rate": 1.4154520725738752e-08, "loss": 0.5295, "step": 15132 }, { "epoch": 0.98, "grad_norm": 1.1026180982589722, "learning_rate": 1.4076030582237942e-08, "loss": 0.4713, "step": 15133 }, { "epoch": 0.98, "grad_norm": 1.2832707166671753, "learning_rate": 1.3997758360610458e-08, "loss": 0.4859, "step": 15134 }, { "epoch": 0.98, "grad_norm": 1.1333370208740234, "learning_rate": 1.3919704064276895e-08, "loss": 0.4794, "step": 15135 }, { "epoch": 0.98, "grad_norm": 1.248172640800476, "learning_rate": 1.3841867696648415e-08, "loss": 0.5136, "step": 15136 }, { "epoch": 0.98, "grad_norm": 1.2022864818572998, "learning_rate": 1.3764249261127294e-08, "loss": 0.5245, "step": 15137 }, { "epoch": 0.98, "grad_norm": 1.1962569952011108, "learning_rate": 1.368684876110693e-08, "loss": 0.5392, "step": 15138 }, { "epoch": 0.98, "grad_norm": 1.1195663213729858, "learning_rate": 1.3609666199969062e-08, "loss": 0.462, "step": 15139 }, { "epoch": 0.98, "grad_norm": 1.2176120281219482, "learning_rate": 1.3532701581087659e-08, "loss": 0.5241, "step": 15140 }, { "epoch": 0.98, "grad_norm": 1.1319342851638794, "learning_rate": 1.3455954907826696e-08, "loss": 0.4848, "step": 15141 }, { "epoch": 0.98, "grad_norm": 1.1224498748779297, "learning_rate": 1.3379426183540157e-08, "loss": 0.4984, "step": 15142 }, { "epoch": 0.98, "grad_norm": 1.229055643081665, "learning_rate": 1.3303115411573697e-08, "loss": 0.4866, "step": 15143 }, { "epoch": 0.98, "grad_norm": 1.1583236455917358, "learning_rate": 1.322702259526243e-08, "loss": 0.521, "step": 15144 }, { "epoch": 0.98, "grad_norm": 1.2767314910888672, "learning_rate": 1.315114773793147e-08, "loss": 0.521, "step": 15145 }, { "epoch": 0.98, "grad_norm": 1.2244027853012085, "learning_rate": 1.3075490842897609e-08, "loss": 0.5147, "step": 15146 }, { "epoch": 0.98, "grad_norm": 1.1960138082504272, "learning_rate": 1.3000051913467649e-08, "loss": 0.5687, "step": 15147 }, { "epoch": 0.98, "grad_norm": 1.0684787034988403, "learning_rate": 1.292483095293895e-08, "loss": 0.447, "step": 15148 }, { "epoch": 0.98, "grad_norm": 1.1927103996276855, "learning_rate": 1.2849827964598882e-08, "loss": 0.5316, "step": 15149 }, { "epoch": 0.98, "grad_norm": 1.182728886604309, "learning_rate": 1.2775042951725935e-08, "loss": 0.5074, "step": 15150 }, { "epoch": 0.98, "grad_norm": 1.2320985794067383, "learning_rate": 1.2700475917588607e-08, "loss": 0.5578, "step": 15151 }, { "epoch": 0.98, "grad_norm": 1.3612496852874756, "learning_rate": 1.2626126865445953e-08, "loss": 0.5412, "step": 15152 }, { "epoch": 0.98, "grad_norm": 1.1754502058029175, "learning_rate": 1.25519957985476e-08, "loss": 0.4908, "step": 15153 }, { "epoch": 0.98, "grad_norm": 1.1346802711486816, "learning_rate": 1.2478082720134288e-08, "loss": 0.4531, "step": 15154 }, { "epoch": 0.98, "grad_norm": 1.1964555978775024, "learning_rate": 1.2404387633435655e-08, "loss": 0.5082, "step": 15155 }, { "epoch": 0.98, "grad_norm": 1.1351405382156372, "learning_rate": 1.2330910541673013e-08, "loss": 0.4627, "step": 15156 }, { "epoch": 0.98, "grad_norm": 1.1879987716674805, "learning_rate": 1.2257651448058238e-08, "loss": 0.5042, "step": 15157 }, { "epoch": 0.98, "grad_norm": 1.1532833576202393, "learning_rate": 1.2184610355792658e-08, "loss": 0.5023, "step": 15158 }, { "epoch": 0.98, "grad_norm": 1.1566897630691528, "learning_rate": 1.211178726806872e-08, "loss": 0.5177, "step": 15159 }, { "epoch": 0.98, "grad_norm": 1.242815613746643, "learning_rate": 1.2039182188069987e-08, "loss": 0.5213, "step": 15160 }, { "epoch": 0.98, "grad_norm": 1.202143669128418, "learning_rate": 1.1966795118969476e-08, "loss": 0.5692, "step": 15161 }, { "epoch": 0.98, "grad_norm": 1.2020305395126343, "learning_rate": 1.1894626063931325e-08, "loss": 0.4934, "step": 15162 }, { "epoch": 0.98, "grad_norm": 1.1300020217895508, "learning_rate": 1.182267502610912e-08, "loss": 0.5001, "step": 15163 }, { "epoch": 0.98, "grad_norm": 1.1166085004806519, "learning_rate": 1.175094200864868e-08, "loss": 0.4803, "step": 15164 }, { "epoch": 0.98, "grad_norm": 1.1991798877716064, "learning_rate": 1.1679427014684164e-08, "loss": 0.5067, "step": 15165 }, { "epoch": 0.98, "grad_norm": 1.1980458498001099, "learning_rate": 1.1608130047341959e-08, "loss": 0.5293, "step": 15166 }, { "epoch": 0.98, "grad_norm": 1.2950328588485718, "learning_rate": 1.1537051109738463e-08, "loss": 0.5039, "step": 15167 }, { "epoch": 0.98, "grad_norm": 1.1216144561767578, "learning_rate": 1.146619020497952e-08, "loss": 0.4967, "step": 15168 }, { "epoch": 0.98, "grad_norm": 1.1831899881362915, "learning_rate": 1.1395547336163216e-08, "loss": 0.5162, "step": 15169 }, { "epoch": 0.98, "grad_norm": 1.1092820167541504, "learning_rate": 1.1325122506377073e-08, "loss": 0.4825, "step": 15170 }, { "epoch": 0.98, "grad_norm": 1.2452542781829834, "learning_rate": 1.1254915718698633e-08, "loss": 0.5131, "step": 15171 }, { "epoch": 0.98, "grad_norm": 1.1964495182037354, "learning_rate": 1.1184926976195997e-08, "loss": 0.4582, "step": 15172 }, { "epoch": 0.98, "grad_norm": 1.2279126644134521, "learning_rate": 1.1115156281929496e-08, "loss": 0.5496, "step": 15173 }, { "epoch": 0.98, "grad_norm": 1.2611316442489624, "learning_rate": 1.1045603638948354e-08, "loss": 0.5422, "step": 15174 }, { "epoch": 0.98, "grad_norm": 1.0768790245056152, "learning_rate": 1.097626905029181e-08, "loss": 0.5226, "step": 15175 }, { "epoch": 0.98, "grad_norm": 1.3118809461593628, "learning_rate": 1.0907152518990772e-08, "loss": 0.5167, "step": 15176 }, { "epoch": 0.98, "grad_norm": 1.1801137924194336, "learning_rate": 1.08382540480656e-08, "loss": 0.5281, "step": 15177 }, { "epoch": 0.98, "grad_norm": 1.2319684028625488, "learning_rate": 1.0769573640528885e-08, "loss": 0.5099, "step": 15178 }, { "epoch": 0.98, "grad_norm": 1.142307996749878, "learning_rate": 1.0701111299381562e-08, "loss": 0.5124, "step": 15179 }, { "epoch": 0.98, "grad_norm": 1.1458821296691895, "learning_rate": 1.0632867027615678e-08, "loss": 0.4633, "step": 15180 }, { "epoch": 0.98, "grad_norm": 1.204832673072815, "learning_rate": 1.0564840828214407e-08, "loss": 0.5076, "step": 15181 }, { "epoch": 0.98, "grad_norm": 1.240506649017334, "learning_rate": 1.0497032704151477e-08, "loss": 0.5353, "step": 15182 }, { "epoch": 0.98, "grad_norm": 1.174654245376587, "learning_rate": 1.0429442658390076e-08, "loss": 0.5312, "step": 15183 }, { "epoch": 0.98, "grad_norm": 1.221555233001709, "learning_rate": 1.0362070693883952e-08, "loss": 0.5314, "step": 15184 }, { "epoch": 0.98, "grad_norm": 1.2072117328643799, "learning_rate": 1.0294916813579082e-08, "loss": 0.5404, "step": 15185 }, { "epoch": 0.98, "grad_norm": 1.322736382484436, "learning_rate": 1.0227981020409227e-08, "loss": 0.5496, "step": 15186 }, { "epoch": 0.98, "grad_norm": 1.1331040859222412, "learning_rate": 1.016126331730094e-08, "loss": 0.5366, "step": 15187 }, { "epoch": 0.98, "grad_norm": 1.1275488138198853, "learning_rate": 1.0094763707169664e-08, "loss": 0.4711, "step": 15188 }, { "epoch": 0.98, "grad_norm": 1.1903188228607178, "learning_rate": 1.0028482192921961e-08, "loss": 0.4886, "step": 15189 }, { "epoch": 0.98, "grad_norm": 1.1941920518875122, "learning_rate": 9.962418777454964e-09, "loss": 0.5318, "step": 15190 }, { "epoch": 0.98, "grad_norm": 1.1818960905075073, "learning_rate": 9.89657346365691e-09, "loss": 0.5287, "step": 15191 }, { "epoch": 0.98, "grad_norm": 1.1326848268508911, "learning_rate": 9.830946254404395e-09, "loss": 0.5147, "step": 15192 }, { "epoch": 0.98, "grad_norm": 1.1933335065841675, "learning_rate": 9.765537152566229e-09, "loss": 0.4744, "step": 15193 }, { "epoch": 0.98, "grad_norm": 1.3064894676208496, "learning_rate": 9.700346161001794e-09, "loss": 0.5329, "step": 15194 }, { "epoch": 0.98, "grad_norm": 1.1068165302276611, "learning_rate": 9.635373282559924e-09, "loss": 0.5325, "step": 15195 }, { "epoch": 0.98, "grad_norm": 1.134652853012085, "learning_rate": 9.570618520080566e-09, "loss": 0.4886, "step": 15196 }, { "epoch": 0.98, "grad_norm": 1.2142083644866943, "learning_rate": 9.506081876394235e-09, "loss": 0.5403, "step": 15197 }, { "epoch": 0.98, "grad_norm": 1.2199015617370605, "learning_rate": 9.441763354321454e-09, "loss": 0.5146, "step": 15198 }, { "epoch": 0.98, "grad_norm": 1.4296470880508423, "learning_rate": 9.37766295667275e-09, "loss": 0.4761, "step": 15199 }, { "epoch": 0.98, "grad_norm": 1.1504334211349487, "learning_rate": 9.313780686250885e-09, "loss": 0.503, "step": 15200 }, { "epoch": 0.98, "grad_norm": 1.4249212741851807, "learning_rate": 9.250116545847509e-09, "loss": 0.542, "step": 15201 }, { "epoch": 0.98, "grad_norm": 1.282551884651184, "learning_rate": 9.186670538245401e-09, "loss": 0.5482, "step": 15202 }, { "epoch": 0.98, "grad_norm": 1.1712647676467896, "learning_rate": 9.123442666217342e-09, "loss": 0.5096, "step": 15203 }, { "epoch": 0.98, "grad_norm": 1.141299843788147, "learning_rate": 9.060432932526674e-09, "loss": 0.4663, "step": 15204 }, { "epoch": 0.98, "grad_norm": 1.2358568906784058, "learning_rate": 8.997641339927865e-09, "loss": 0.493, "step": 15205 }, { "epoch": 0.98, "grad_norm": 1.1119565963745117, "learning_rate": 8.935067891165384e-09, "loss": 0.5035, "step": 15206 }, { "epoch": 0.98, "grad_norm": 1.2918511629104614, "learning_rate": 8.87271258897371e-09, "loss": 0.52, "step": 15207 }, { "epoch": 0.98, "grad_norm": 1.242776870727539, "learning_rate": 8.810575436078993e-09, "loss": 0.5216, "step": 15208 }, { "epoch": 0.98, "grad_norm": 1.1887555122375488, "learning_rate": 8.748656435195735e-09, "loss": 0.503, "step": 15209 }, { "epoch": 0.98, "grad_norm": 1.2085336446762085, "learning_rate": 8.686955589031765e-09, "loss": 0.552, "step": 15210 }, { "epoch": 0.98, "grad_norm": 1.1478239297866821, "learning_rate": 8.625472900283261e-09, "loss": 0.5261, "step": 15211 }, { "epoch": 0.98, "grad_norm": 1.2317160367965698, "learning_rate": 8.564208371636961e-09, "loss": 0.5205, "step": 15212 }, { "epoch": 0.98, "grad_norm": 1.1494989395141602, "learning_rate": 8.503162005771281e-09, "loss": 0.5098, "step": 15213 }, { "epoch": 0.98, "grad_norm": 1.203560709953308, "learning_rate": 8.442333805354086e-09, "loss": 0.5204, "step": 15214 }, { "epoch": 0.98, "grad_norm": 1.2185148000717163, "learning_rate": 8.381723773044359e-09, "loss": 0.4258, "step": 15215 }, { "epoch": 0.98, "grad_norm": 1.2684478759765625, "learning_rate": 8.321331911490538e-09, "loss": 0.5073, "step": 15216 }, { "epoch": 0.98, "grad_norm": 1.1937239170074463, "learning_rate": 8.261158223332733e-09, "loss": 0.4981, "step": 15217 }, { "epoch": 0.98, "grad_norm": 1.0847039222717285, "learning_rate": 8.201202711200507e-09, "loss": 0.5101, "step": 15218 }, { "epoch": 0.98, "grad_norm": 1.0658799409866333, "learning_rate": 8.141465377715097e-09, "loss": 0.5135, "step": 15219 }, { "epoch": 0.98, "grad_norm": 1.189876914024353, "learning_rate": 8.081946225487192e-09, "loss": 0.5177, "step": 15220 }, { "epoch": 0.98, "grad_norm": 1.2270143032073975, "learning_rate": 8.022645257118045e-09, "loss": 0.4955, "step": 15221 }, { "epoch": 0.98, "grad_norm": 1.2208114862442017, "learning_rate": 7.963562475199471e-09, "loss": 0.538, "step": 15222 }, { "epoch": 0.98, "grad_norm": 1.0986218452453613, "learning_rate": 7.90469788231385e-09, "loss": 0.509, "step": 15223 }, { "epoch": 0.98, "grad_norm": 1.1612281799316406, "learning_rate": 7.846051481034123e-09, "loss": 0.4844, "step": 15224 }, { "epoch": 0.98, "grad_norm": 1.27986741065979, "learning_rate": 7.787623273923795e-09, "loss": 0.5159, "step": 15225 }, { "epoch": 0.98, "grad_norm": 1.1514968872070312, "learning_rate": 7.729413263536378e-09, "loss": 0.5489, "step": 15226 }, { "epoch": 0.98, "grad_norm": 1.222596287727356, "learning_rate": 7.67142145241595e-09, "loss": 0.4996, "step": 15227 }, { "epoch": 0.98, "grad_norm": 1.1729531288146973, "learning_rate": 7.613647843097149e-09, "loss": 0.52, "step": 15228 }, { "epoch": 0.98, "grad_norm": 1.123727560043335, "learning_rate": 7.556092438105734e-09, "loss": 0.496, "step": 15229 }, { "epoch": 0.98, "grad_norm": 1.1618825197219849, "learning_rate": 7.498755239956357e-09, "loss": 0.5105, "step": 15230 }, { "epoch": 0.98, "grad_norm": 1.1105483770370483, "learning_rate": 7.4416362511559034e-09, "loss": 0.5075, "step": 15231 }, { "epoch": 0.98, "grad_norm": 1.0635685920715332, "learning_rate": 7.384735474200155e-09, "loss": 0.5112, "step": 15232 }, { "epoch": 0.98, "grad_norm": 1.1197102069854736, "learning_rate": 7.328052911577121e-09, "loss": 0.4986, "step": 15233 }, { "epoch": 0.98, "grad_norm": 1.2370308637619019, "learning_rate": 7.271588565763155e-09, "loss": 0.4956, "step": 15234 }, { "epoch": 0.98, "grad_norm": 1.2171833515167236, "learning_rate": 7.215342439226281e-09, "loss": 0.5289, "step": 15235 }, { "epoch": 0.98, "grad_norm": 1.1207772493362427, "learning_rate": 7.159314534425643e-09, "loss": 0.5118, "step": 15236 }, { "epoch": 0.98, "grad_norm": 1.1426446437835693, "learning_rate": 7.10350485380984e-09, "loss": 0.5557, "step": 15237 }, { "epoch": 0.98, "grad_norm": 1.1852961778640747, "learning_rate": 7.0479133998180295e-09, "loss": 0.5176, "step": 15238 }, { "epoch": 0.98, "grad_norm": 1.2528903484344482, "learning_rate": 6.992540174879936e-09, "loss": 0.5565, "step": 15239 }, { "epoch": 0.98, "grad_norm": 1.1731483936309814, "learning_rate": 6.9373851814152905e-09, "loss": 0.5029, "step": 15240 }, { "epoch": 0.98, "grad_norm": 1.1676026582717896, "learning_rate": 6.882448421835497e-09, "loss": 0.5211, "step": 15241 }, { "epoch": 0.98, "grad_norm": 1.2535409927368164, "learning_rate": 6.827729898541968e-09, "loss": 0.4873, "step": 15242 }, { "epoch": 0.98, "grad_norm": 1.1422544717788696, "learning_rate": 6.773229613925569e-09, "loss": 0.5173, "step": 15243 }, { "epoch": 0.98, "grad_norm": 1.1701364517211914, "learning_rate": 6.718947570368284e-09, "loss": 0.4791, "step": 15244 }, { "epoch": 0.98, "grad_norm": 1.1359518766403198, "learning_rate": 6.664883770243214e-09, "loss": 0.5416, "step": 15245 }, { "epoch": 0.98, "grad_norm": 1.1643340587615967, "learning_rate": 6.611038215912913e-09, "loss": 0.5232, "step": 15246 }, { "epoch": 0.98, "grad_norm": 1.379845380783081, "learning_rate": 6.557410909731054e-09, "loss": 0.5511, "step": 15247 }, { "epoch": 0.98, "grad_norm": 1.2172046899795532, "learning_rate": 6.5040018540413196e-09, "loss": 0.4741, "step": 15248 }, { "epoch": 0.98, "grad_norm": 1.2435462474822998, "learning_rate": 6.450811051178507e-09, "loss": 0.5152, "step": 15249 }, { "epoch": 0.98, "grad_norm": 1.1607176065444946, "learning_rate": 6.397838503467424e-09, "loss": 0.5314, "step": 15250 }, { "epoch": 0.98, "grad_norm": 1.1685949563980103, "learning_rate": 6.345084213222885e-09, "loss": 0.4522, "step": 15251 }, { "epoch": 0.98, "grad_norm": 1.213413119316101, "learning_rate": 6.292548182750824e-09, "loss": 0.5484, "step": 15252 }, { "epoch": 0.98, "grad_norm": 1.010756015777588, "learning_rate": 6.240230414347736e-09, "loss": 0.4884, "step": 15253 }, { "epoch": 0.98, "grad_norm": 1.1997580528259277, "learning_rate": 6.1881309103001275e-09, "loss": 0.4898, "step": 15254 }, { "epoch": 0.98, "grad_norm": 1.1904743909835815, "learning_rate": 6.136249672885064e-09, "loss": 0.4841, "step": 15255 }, { "epoch": 0.98, "grad_norm": 1.2124195098876953, "learning_rate": 6.0845867043701765e-09, "loss": 0.546, "step": 15256 }, { "epoch": 0.98, "grad_norm": 1.2577210664749146, "learning_rate": 6.033142007013659e-09, "loss": 0.5349, "step": 15257 }, { "epoch": 0.98, "grad_norm": 1.1349419355392456, "learning_rate": 5.981915583063713e-09, "loss": 0.4793, "step": 15258 }, { "epoch": 0.98, "grad_norm": 1.1393040418624878, "learning_rate": 5.9309074347596585e-09, "loss": 0.4653, "step": 15259 }, { "epoch": 0.99, "grad_norm": 1.206824779510498, "learning_rate": 5.880117564330823e-09, "loss": 0.5443, "step": 15260 }, { "epoch": 0.99, "grad_norm": 1.205322027206421, "learning_rate": 5.829545973996542e-09, "loss": 0.4805, "step": 15261 }, { "epoch": 0.99, "grad_norm": 1.1792171001434326, "learning_rate": 5.77919266596838e-09, "loss": 0.4996, "step": 15262 }, { "epoch": 0.99, "grad_norm": 1.2312887907028198, "learning_rate": 5.729057642446245e-09, "loss": 0.517, "step": 15263 }, { "epoch": 0.99, "grad_norm": 1.1087017059326172, "learning_rate": 5.6791409056211615e-09, "loss": 0.5192, "step": 15264 }, { "epoch": 0.99, "grad_norm": 1.272347092628479, "learning_rate": 5.6294424576758265e-09, "loss": 0.5641, "step": 15265 }, { "epoch": 0.99, "grad_norm": 1.1208263635635376, "learning_rate": 5.5799623007818384e-09, "loss": 0.4896, "step": 15266 }, { "epoch": 0.99, "grad_norm": 1.2261085510253906, "learning_rate": 5.5307004371013555e-09, "loss": 0.5077, "step": 15267 }, { "epoch": 0.99, "grad_norm": 1.1030197143554688, "learning_rate": 5.481656868788765e-09, "loss": 0.5114, "step": 15268 }, { "epoch": 0.99, "grad_norm": 1.1806139945983887, "learning_rate": 5.4328315979867986e-09, "loss": 0.512, "step": 15269 }, { "epoch": 0.99, "grad_norm": 1.202369213104248, "learning_rate": 5.384224626829304e-09, "loss": 0.523, "step": 15270 }, { "epoch": 0.99, "grad_norm": 1.1995413303375244, "learning_rate": 5.3358359574412486e-09, "loss": 0.538, "step": 15271 }, { "epoch": 0.99, "grad_norm": 1.1532663106918335, "learning_rate": 5.287665591937052e-09, "loss": 0.5186, "step": 15272 }, { "epoch": 0.99, "grad_norm": 1.2230119705200195, "learning_rate": 5.239713532422808e-09, "loss": 0.5236, "step": 15273 }, { "epoch": 0.99, "grad_norm": 1.1904993057250977, "learning_rate": 5.191979780994061e-09, "loss": 0.4756, "step": 15274 }, { "epoch": 0.99, "grad_norm": 1.2968512773513794, "learning_rate": 5.144464339736921e-09, "loss": 0.5444, "step": 15275 }, { "epoch": 0.99, "grad_norm": 1.137061357498169, "learning_rate": 5.097167210728615e-09, "loss": 0.5032, "step": 15276 }, { "epoch": 0.99, "grad_norm": 1.0849545001983643, "learning_rate": 5.0500883960358235e-09, "loss": 0.494, "step": 15277 }, { "epoch": 0.99, "grad_norm": 1.2131258249282837, "learning_rate": 5.0032278977169005e-09, "loss": 0.5274, "step": 15278 }, { "epoch": 0.99, "grad_norm": 1.2874728441238403, "learning_rate": 4.956585717819095e-09, "loss": 0.4915, "step": 15279 }, { "epoch": 0.99, "grad_norm": 1.26573646068573, "learning_rate": 4.91016185838189e-09, "loss": 0.5121, "step": 15280 }, { "epoch": 0.99, "grad_norm": 1.2217168807983398, "learning_rate": 4.863956321434216e-09, "loss": 0.5527, "step": 15281 }, { "epoch": 0.99, "grad_norm": 1.1849908828735352, "learning_rate": 4.817969108995013e-09, "loss": 0.4908, "step": 15282 }, { "epoch": 0.99, "grad_norm": 1.320189356803894, "learning_rate": 4.772200223074896e-09, "loss": 0.501, "step": 15283 }, { "epoch": 0.99, "grad_norm": 1.1383267641067505, "learning_rate": 4.726649665673933e-09, "loss": 0.4611, "step": 15284 }, { "epoch": 0.99, "grad_norm": 1.237778902053833, "learning_rate": 4.681317438782751e-09, "loss": 0.5453, "step": 15285 }, { "epoch": 0.99, "grad_norm": 1.2235872745513916, "learning_rate": 4.636203544383655e-09, "loss": 0.5327, "step": 15286 }, { "epoch": 0.99, "grad_norm": 1.177011251449585, "learning_rate": 4.591307984447846e-09, "loss": 0.5152, "step": 15287 }, { "epoch": 0.99, "grad_norm": 1.1895400285720825, "learning_rate": 4.546630760937088e-09, "loss": 0.4971, "step": 15288 }, { "epoch": 0.99, "grad_norm": 1.1420849561691284, "learning_rate": 4.502171875805372e-09, "loss": 0.5043, "step": 15289 }, { "epoch": 0.99, "grad_norm": 1.2057279348373413, "learning_rate": 4.4579313309944804e-09, "loss": 0.4968, "step": 15290 }, { "epoch": 0.99, "grad_norm": 1.1392245292663574, "learning_rate": 4.4139091284395306e-09, "loss": 0.5426, "step": 15291 }, { "epoch": 0.99, "grad_norm": 1.2025556564331055, "learning_rate": 4.3701052700628746e-09, "loss": 0.4996, "step": 15292 }, { "epoch": 0.99, "grad_norm": 1.1109156608581543, "learning_rate": 4.326519757780756e-09, "loss": 0.5486, "step": 15293 }, { "epoch": 0.99, "grad_norm": 1.1252119541168213, "learning_rate": 4.283152593497208e-09, "loss": 0.4905, "step": 15294 }, { "epoch": 0.99, "grad_norm": 1.0219659805297852, "learning_rate": 4.240003779107937e-09, "loss": 0.4554, "step": 15295 }, { "epoch": 0.99, "grad_norm": 1.427799105644226, "learning_rate": 4.197073316499211e-09, "loss": 0.4889, "step": 15296 }, { "epoch": 0.99, "grad_norm": 1.2178232669830322, "learning_rate": 4.154361207546753e-09, "loss": 0.5433, "step": 15297 }, { "epoch": 0.99, "grad_norm": 1.1583001613616943, "learning_rate": 4.111867454117402e-09, "loss": 0.4741, "step": 15298 }, { "epoch": 0.99, "grad_norm": 1.129035234451294, "learning_rate": 4.069592058069116e-09, "loss": 0.5587, "step": 15299 }, { "epoch": 0.99, "grad_norm": 1.064842939376831, "learning_rate": 4.027535021249307e-09, "loss": 0.5082, "step": 15300 }, { "epoch": 0.99, "grad_norm": 1.140982747077942, "learning_rate": 3.985696345495949e-09, "loss": 0.4783, "step": 15301 }, { "epoch": 0.99, "grad_norm": 1.2100050449371338, "learning_rate": 3.944076032638133e-09, "loss": 0.4828, "step": 15302 }, { "epoch": 0.99, "grad_norm": 1.025804042816162, "learning_rate": 3.902674084494962e-09, "loss": 0.481, "step": 15303 }, { "epoch": 0.99, "grad_norm": 1.2224360704421997, "learning_rate": 3.861490502874987e-09, "loss": 0.5089, "step": 15304 }, { "epoch": 0.99, "grad_norm": 1.268990397453308, "learning_rate": 3.820525289580102e-09, "loss": 0.5341, "step": 15305 }, { "epoch": 0.99, "grad_norm": 1.2342314720153809, "learning_rate": 3.7797784463988744e-09, "loss": 0.5379, "step": 15306 }, { "epoch": 0.99, "grad_norm": 1.1928139925003052, "learning_rate": 3.739249975113768e-09, "loss": 0.4815, "step": 15307 }, { "epoch": 0.99, "grad_norm": 1.339611291885376, "learning_rate": 3.6989398774950336e-09, "loss": 0.538, "step": 15308 }, { "epoch": 0.99, "grad_norm": 1.1371667385101318, "learning_rate": 3.6588481553051504e-09, "loss": 0.4747, "step": 15309 }, { "epoch": 0.99, "grad_norm": 1.1960089206695557, "learning_rate": 3.6189748102966047e-09, "loss": 0.5076, "step": 15310 }, { "epoch": 0.99, "grad_norm": 1.1434566974639893, "learning_rate": 3.5793198442113375e-09, "loss": 0.5326, "step": 15311 }, { "epoch": 0.99, "grad_norm": 1.18614661693573, "learning_rate": 3.5398832587829613e-09, "loss": 0.4872, "step": 15312 }, { "epoch": 0.99, "grad_norm": 1.1762263774871826, "learning_rate": 3.5006650557356523e-09, "loss": 0.527, "step": 15313 }, { "epoch": 0.99, "grad_norm": 1.1559873819351196, "learning_rate": 3.4616652367830404e-09, "loss": 0.5081, "step": 15314 }, { "epoch": 0.99, "grad_norm": 1.2558159828186035, "learning_rate": 3.422883803629873e-09, "loss": 0.4732, "step": 15315 }, { "epoch": 0.99, "grad_norm": 1.1470293998718262, "learning_rate": 3.3843207579714597e-09, "loss": 0.5135, "step": 15316 }, { "epoch": 0.99, "grad_norm": 1.1916886568069458, "learning_rate": 3.34597610149201e-09, "loss": 0.5506, "step": 15317 }, { "epoch": 0.99, "grad_norm": 1.133342981338501, "learning_rate": 3.3078498358690704e-09, "loss": 0.5039, "step": 15318 }, { "epoch": 0.99, "grad_norm": 1.2125877141952515, "learning_rate": 3.26994196276853e-09, "loss": 0.5494, "step": 15319 }, { "epoch": 0.99, "grad_norm": 1.282402753829956, "learning_rate": 3.232252483846843e-09, "loss": 0.5204, "step": 15320 }, { "epoch": 0.99, "grad_norm": 1.0888350009918213, "learning_rate": 3.194781400751579e-09, "loss": 0.4973, "step": 15321 }, { "epoch": 0.99, "grad_norm": 1.2978554964065552, "learning_rate": 3.1575287151203173e-09, "loss": 0.5417, "step": 15322 }, { "epoch": 0.99, "grad_norm": 1.2211722135543823, "learning_rate": 3.1204944285812e-09, "loss": 0.4906, "step": 15323 }, { "epoch": 0.99, "grad_norm": 1.0965451002120972, "learning_rate": 3.0836785427534878e-09, "loss": 0.4764, "step": 15324 }, { "epoch": 0.99, "grad_norm": 1.2400392293930054, "learning_rate": 3.0470810592464483e-09, "loss": 0.529, "step": 15325 }, { "epoch": 0.99, "grad_norm": 1.3555347919464111, "learning_rate": 3.0107019796588034e-09, "loss": 0.5261, "step": 15326 }, { "epoch": 0.99, "grad_norm": 1.2256979942321777, "learning_rate": 2.974541305580947e-09, "loss": 0.5236, "step": 15327 }, { "epoch": 0.99, "grad_norm": 1.2779439687728882, "learning_rate": 2.9385990385932818e-09, "loss": 0.5359, "step": 15328 }, { "epoch": 0.99, "grad_norm": 1.1994119882583618, "learning_rate": 2.902875180266773e-09, "loss": 0.4854, "step": 15329 }, { "epoch": 0.99, "grad_norm": 1.1901136636734009, "learning_rate": 2.86736973216295e-09, "loss": 0.5007, "step": 15330 }, { "epoch": 0.99, "grad_norm": 1.2427699565887451, "learning_rate": 2.8320826958339045e-09, "loss": 0.5226, "step": 15331 }, { "epoch": 0.99, "grad_norm": 1.0882881879806519, "learning_rate": 2.7970140728211803e-09, "loss": 0.5037, "step": 15332 }, { "epoch": 0.99, "grad_norm": 1.1141417026519775, "learning_rate": 2.7621638646585515e-09, "loss": 0.4892, "step": 15333 }, { "epoch": 0.99, "grad_norm": 1.189963936805725, "learning_rate": 2.7275320728686883e-09, "loss": 0.4563, "step": 15334 }, { "epoch": 0.99, "grad_norm": 1.0636438131332397, "learning_rate": 2.693118698964825e-09, "loss": 0.5158, "step": 15335 }, { "epoch": 0.99, "grad_norm": 1.1639596223831177, "learning_rate": 2.658923744451869e-09, "loss": 0.5247, "step": 15336 }, { "epoch": 0.99, "grad_norm": 1.225212574005127, "learning_rate": 2.6249472108236253e-09, "loss": 0.5141, "step": 15337 }, { "epoch": 0.99, "grad_norm": 1.1680669784545898, "learning_rate": 2.591189099566127e-09, "loss": 0.4516, "step": 15338 }, { "epoch": 0.99, "grad_norm": 1.0969218015670776, "learning_rate": 2.557649412153751e-09, "loss": 0.5024, "step": 15339 }, { "epoch": 0.99, "grad_norm": 1.1798778772354126, "learning_rate": 2.5243281500531012e-09, "loss": 0.5689, "step": 15340 }, { "epoch": 0.99, "grad_norm": 1.290961742401123, "learning_rate": 2.491225314720791e-09, "loss": 0.4824, "step": 15341 }, { "epoch": 0.99, "grad_norm": 1.3100990056991577, "learning_rate": 2.4583409076028853e-09, "loss": 0.5496, "step": 15342 }, { "epoch": 0.99, "grad_norm": 1.2410355806350708, "learning_rate": 2.4256749301371228e-09, "loss": 0.5857, "step": 15343 }, { "epoch": 0.99, "grad_norm": 1.2030309438705444, "learning_rate": 2.3932273837512508e-09, "loss": 0.5686, "step": 15344 }, { "epoch": 0.99, "grad_norm": 1.1945968866348267, "learning_rate": 2.3609982698635793e-09, "loss": 0.502, "step": 15345 }, { "epoch": 0.99, "grad_norm": 1.1827826499938965, "learning_rate": 2.3289875898818704e-09, "loss": 0.5167, "step": 15346 }, { "epoch": 0.99, "grad_norm": 1.1820363998413086, "learning_rate": 2.297195345206671e-09, "loss": 0.5486, "step": 15347 }, { "epoch": 0.99, "grad_norm": 1.216280221939087, "learning_rate": 2.2656215372268697e-09, "loss": 0.5557, "step": 15348 }, { "epoch": 0.99, "grad_norm": 1.2850418090820312, "learning_rate": 2.234266167321919e-09, "loss": 0.4759, "step": 15349 }, { "epoch": 0.99, "grad_norm": 1.2374340295791626, "learning_rate": 2.2031292368629444e-09, "loss": 0.5395, "step": 15350 }, { "epoch": 0.99, "grad_norm": 1.2087739706039429, "learning_rate": 2.172210747211079e-09, "loss": 0.5316, "step": 15351 }, { "epoch": 0.99, "grad_norm": 1.2527015209197998, "learning_rate": 2.141510699716909e-09, "loss": 0.5367, "step": 15352 }, { "epoch": 0.99, "grad_norm": 1.1088793277740479, "learning_rate": 2.111029095722694e-09, "loss": 0.494, "step": 15353 }, { "epoch": 0.99, "grad_norm": 1.209019422531128, "learning_rate": 2.0807659365607023e-09, "loss": 0.5043, "step": 15354 }, { "epoch": 0.99, "grad_norm": 1.080001711845398, "learning_rate": 2.0507212235537646e-09, "loss": 0.489, "step": 15355 }, { "epoch": 0.99, "grad_norm": 1.0560390949249268, "learning_rate": 2.0208949580147188e-09, "loss": 0.4791, "step": 15356 }, { "epoch": 0.99, "grad_norm": 1.1919262409210205, "learning_rate": 1.991287141247522e-09, "loss": 0.4958, "step": 15357 }, { "epoch": 0.99, "grad_norm": 1.3264633417129517, "learning_rate": 1.9618977745461398e-09, "loss": 0.5472, "step": 15358 }, { "epoch": 0.99, "grad_norm": 1.2563579082489014, "learning_rate": 1.9327268591950998e-09, "loss": 0.5491, "step": 15359 }, { "epoch": 0.99, "grad_norm": 1.2041876316070557, "learning_rate": 1.903774396469493e-09, "loss": 0.525, "step": 15360 }, { "epoch": 0.99, "grad_norm": 1.2004989385604858, "learning_rate": 1.875040387634419e-09, "loss": 0.5163, "step": 15361 }, { "epoch": 0.99, "grad_norm": 1.2227646112442017, "learning_rate": 1.846524833946095e-09, "loss": 0.5045, "step": 15362 }, { "epoch": 0.99, "grad_norm": 1.1410517692565918, "learning_rate": 1.8182277366507462e-09, "loss": 0.4894, "step": 15363 }, { "epoch": 0.99, "grad_norm": 1.2059009075164795, "learning_rate": 1.790149096985716e-09, "loss": 0.519, "step": 15364 }, { "epoch": 0.99, "grad_norm": 1.1694546937942505, "learning_rate": 1.762288916176691e-09, "loss": 0.5096, "step": 15365 }, { "epoch": 0.99, "grad_norm": 1.1803431510925293, "learning_rate": 1.734647195443251e-09, "loss": 0.4903, "step": 15366 }, { "epoch": 0.99, "grad_norm": 1.22588050365448, "learning_rate": 1.7072239359922082e-09, "loss": 0.5258, "step": 15367 }, { "epoch": 0.99, "grad_norm": 1.1128655672073364, "learning_rate": 1.6800191390226039e-09, "loss": 0.4894, "step": 15368 }, { "epoch": 0.99, "grad_norm": 1.2142536640167236, "learning_rate": 1.653032805724042e-09, "loss": 0.4773, "step": 15369 }, { "epoch": 0.99, "grad_norm": 1.2093613147735596, "learning_rate": 1.6262649372750239e-09, "loss": 0.5198, "step": 15370 }, { "epoch": 0.99, "grad_norm": 1.225654125213623, "learning_rate": 1.59971553484628e-09, "loss": 0.5331, "step": 15371 }, { "epoch": 0.99, "grad_norm": 1.2461081743240356, "learning_rate": 1.5733845995974383e-09, "loss": 0.4922, "step": 15372 }, { "epoch": 0.99, "grad_norm": 1.0884677171707153, "learning_rate": 1.5472721326803553e-09, "loss": 0.5211, "step": 15373 }, { "epoch": 0.99, "grad_norm": 1.200620412826538, "learning_rate": 1.5213781352357848e-09, "loss": 0.5238, "step": 15374 }, { "epoch": 0.99, "grad_norm": 1.1824616193771362, "learning_rate": 1.4957026083950444e-09, "loss": 0.537, "step": 15375 }, { "epoch": 0.99, "grad_norm": 1.098745584487915, "learning_rate": 1.4702455532811244e-09, "loss": 0.4716, "step": 15376 }, { "epoch": 0.99, "grad_norm": 1.1485462188720703, "learning_rate": 1.4450069710064684e-09, "loss": 0.5297, "step": 15377 }, { "epoch": 0.99, "grad_norm": 1.1843724250793457, "learning_rate": 1.4199868626746383e-09, "loss": 0.5101, "step": 15378 }, { "epoch": 0.99, "grad_norm": 1.1582921743392944, "learning_rate": 1.3951852293780932e-09, "loss": 0.4645, "step": 15379 }, { "epoch": 0.99, "grad_norm": 1.2559106349945068, "learning_rate": 1.370602072200966e-09, "loss": 0.4686, "step": 15380 }, { "epoch": 0.99, "grad_norm": 1.1880043745040894, "learning_rate": 1.346237392219063e-09, "loss": 0.4845, "step": 15381 }, { "epoch": 0.99, "grad_norm": 1.2035040855407715, "learning_rate": 1.3220911904959778e-09, "loss": 0.5615, "step": 15382 }, { "epoch": 0.99, "grad_norm": 1.2269902229309082, "learning_rate": 1.298163468087532e-09, "loss": 0.4887, "step": 15383 }, { "epoch": 0.99, "grad_norm": 1.1741505861282349, "learning_rate": 1.2744542260395565e-09, "loss": 0.4977, "step": 15384 }, { "epoch": 0.99, "grad_norm": 1.4862234592437744, "learning_rate": 1.250963465388444e-09, "loss": 0.5517, "step": 15385 }, { "epoch": 0.99, "grad_norm": 1.0378122329711914, "learning_rate": 1.2276911871605957e-09, "loss": 0.4739, "step": 15386 }, { "epoch": 0.99, "grad_norm": 1.1536750793457031, "learning_rate": 1.2046373923735311e-09, "loss": 0.5079, "step": 15387 }, { "epoch": 0.99, "grad_norm": 1.1875213384628296, "learning_rate": 1.1818020820347776e-09, "loss": 0.4591, "step": 15388 }, { "epoch": 0.99, "grad_norm": 1.0763715505599976, "learning_rate": 1.1591852571418705e-09, "loss": 0.4759, "step": 15389 }, { "epoch": 0.99, "grad_norm": 1.300498604774475, "learning_rate": 1.1367869186840186e-09, "loss": 0.5174, "step": 15390 }, { "epoch": 0.99, "grad_norm": 1.1628752946853638, "learning_rate": 1.1146070676404387e-09, "loss": 0.5341, "step": 15391 }, { "epoch": 0.99, "grad_norm": 1.1622953414916992, "learning_rate": 1.092645704979245e-09, "loss": 0.4669, "step": 15392 }, { "epoch": 0.99, "grad_norm": 1.2273212671279907, "learning_rate": 1.0709028316618907e-09, "loss": 0.5451, "step": 15393 }, { "epoch": 0.99, "grad_norm": 1.1869558095932007, "learning_rate": 1.0493784486376169e-09, "loss": 0.5131, "step": 15394 }, { "epoch": 0.99, "grad_norm": 1.2147923707962036, "learning_rate": 1.0280725568473371e-09, "loss": 0.5664, "step": 15395 }, { "epoch": 0.99, "grad_norm": 1.1584852933883667, "learning_rate": 1.0069851572230838e-09, "loss": 0.4892, "step": 15396 }, { "epoch": 0.99, "grad_norm": 1.2476189136505127, "learning_rate": 9.861162506857869e-10, "loss": 0.5352, "step": 15397 }, { "epoch": 0.99, "grad_norm": 1.3035545349121094, "learning_rate": 9.654658381474945e-10, "loss": 0.5, "step": 15398 }, { "epoch": 0.99, "grad_norm": 1.0578114986419678, "learning_rate": 9.450339205108182e-10, "loss": 0.5142, "step": 15399 }, { "epoch": 0.99, "grad_norm": 1.2178465127944946, "learning_rate": 9.248204986694875e-10, "loss": 0.52, "step": 15400 }, { "epoch": 0.99, "grad_norm": 1.1897598505020142, "learning_rate": 9.048255735061295e-10, "loss": 0.5077, "step": 15401 }, { "epoch": 0.99, "grad_norm": 1.1552702188491821, "learning_rate": 8.85049145895045e-10, "loss": 0.5075, "step": 15402 }, { "epoch": 0.99, "grad_norm": 1.3556132316589355, "learning_rate": 8.654912167005425e-10, "loss": 0.5262, "step": 15403 }, { "epoch": 0.99, "grad_norm": 1.1331121921539307, "learning_rate": 8.461517867774938e-10, "loss": 0.5116, "step": 15404 }, { "epoch": 0.99, "grad_norm": 1.176263451576233, "learning_rate": 8.270308569713337e-10, "loss": 0.5152, "step": 15405 }, { "epoch": 0.99, "grad_norm": 1.161116600036621, "learning_rate": 8.081284281175051e-10, "loss": 0.5052, "step": 15406 }, { "epoch": 0.99, "grad_norm": 1.2462878227233887, "learning_rate": 7.894445010420138e-10, "loss": 0.4861, "step": 15407 }, { "epoch": 0.99, "grad_norm": 1.1384425163269043, "learning_rate": 7.70979076561984e-10, "loss": 0.5038, "step": 15408 }, { "epoch": 0.99, "grad_norm": 1.1002528667449951, "learning_rate": 7.52732155484548e-10, "loss": 0.4742, "step": 15409 }, { "epoch": 0.99, "grad_norm": 1.1081593036651611, "learning_rate": 7.347037386068456e-10, "loss": 0.5299, "step": 15410 }, { "epoch": 0.99, "grad_norm": 1.1878931522369385, "learning_rate": 7.168938267165804e-10, "loss": 0.5405, "step": 15411 }, { "epoch": 0.99, "grad_norm": 1.08554208278656, "learning_rate": 6.993024205931287e-10, "loss": 0.5011, "step": 15412 }, { "epoch": 0.99, "grad_norm": 1.147843599319458, "learning_rate": 6.819295210042099e-10, "loss": 0.5047, "step": 15413 }, { "epoch": 0.99, "grad_norm": 1.154065728187561, "learning_rate": 6.647751287103265e-10, "loss": 0.5106, "step": 15414 }, { "epoch": 1.0, "grad_norm": 1.0992848873138428, "learning_rate": 6.478392444603243e-10, "loss": 0.5333, "step": 15415 }, { "epoch": 1.0, "grad_norm": 1.3517467975616455, "learning_rate": 6.311218689947219e-10, "loss": 0.5355, "step": 15416 }, { "epoch": 1.0, "grad_norm": 1.15823495388031, "learning_rate": 6.146230030440459e-10, "loss": 0.5173, "step": 15417 }, { "epoch": 1.0, "grad_norm": 1.2862781286239624, "learning_rate": 5.983426473299414e-10, "loss": 0.56, "step": 15418 }, { "epoch": 1.0, "grad_norm": 1.1587839126586914, "learning_rate": 5.822808025640614e-10, "loss": 0.5046, "step": 15419 }, { "epoch": 1.0, "grad_norm": 1.287578821182251, "learning_rate": 5.664374694475117e-10, "loss": 0.5045, "step": 15420 }, { "epoch": 1.0, "grad_norm": 1.0993181467056274, "learning_rate": 5.508126486730714e-10, "loss": 0.4934, "step": 15421 }, { "epoch": 1.0, "grad_norm": 1.1323792934417725, "learning_rate": 5.354063409240828e-10, "loss": 0.4975, "step": 15422 }, { "epoch": 1.0, "grad_norm": 1.2100722789764404, "learning_rate": 5.202185468738963e-10, "loss": 0.5484, "step": 15423 }, { "epoch": 1.0, "grad_norm": 1.0976401567459106, "learning_rate": 5.0524926718587e-10, "loss": 0.5232, "step": 15424 }, { "epoch": 1.0, "grad_norm": 1.188473105430603, "learning_rate": 4.904985025144804e-10, "loss": 0.5404, "step": 15425 }, { "epoch": 1.0, "grad_norm": 1.281209945678711, "learning_rate": 4.759662535047672e-10, "loss": 0.4575, "step": 15426 }, { "epoch": 1.0, "grad_norm": 1.140872836112976, "learning_rate": 4.616525207917777e-10, "loss": 0.5021, "step": 15427 }, { "epoch": 1.0, "grad_norm": 1.205586552619934, "learning_rate": 4.475573050005677e-10, "loss": 0.4925, "step": 15428 }, { "epoch": 1.0, "grad_norm": 1.1515452861785889, "learning_rate": 4.3368060674786603e-10, "loss": 0.4653, "step": 15429 }, { "epoch": 1.0, "grad_norm": 1.1923469305038452, "learning_rate": 4.2002242663929936e-10, "loss": 0.4968, "step": 15430 }, { "epoch": 1.0, "grad_norm": 1.147118091583252, "learning_rate": 4.065827652732779e-10, "loss": 0.4862, "step": 15431 }, { "epoch": 1.0, "grad_norm": 1.1984654664993286, "learning_rate": 3.9336162323599937e-10, "loss": 0.5352, "step": 15432 }, { "epoch": 1.0, "grad_norm": 1.2800229787826538, "learning_rate": 3.8035900110589e-10, "loss": 0.5311, "step": 15433 }, { "epoch": 1.0, "grad_norm": 1.2344611883163452, "learning_rate": 3.675748994508288e-10, "loss": 0.511, "step": 15434 }, { "epoch": 1.0, "grad_norm": 1.2149574756622314, "learning_rate": 3.550093188303683e-10, "loss": 0.5793, "step": 15435 }, { "epoch": 1.0, "grad_norm": 1.2604038715362549, "learning_rate": 3.426622597929585e-10, "loss": 0.5153, "step": 15436 }, { "epoch": 1.0, "grad_norm": 1.152356743812561, "learning_rate": 3.30533722878168e-10, "loss": 0.4964, "step": 15437 }, { "epoch": 1.0, "grad_norm": 1.2258975505828857, "learning_rate": 3.1862370861668323e-10, "loss": 0.4915, "step": 15438 }, { "epoch": 1.0, "grad_norm": 1.2394837141036987, "learning_rate": 3.0693221752864376e-10, "loss": 0.5206, "step": 15439 }, { "epoch": 1.0, "grad_norm": 1.1785598993301392, "learning_rate": 2.954592501253073e-10, "loss": 0.519, "step": 15440 }, { "epoch": 1.0, "grad_norm": 1.1599842309951782, "learning_rate": 2.842048069084946e-10, "loss": 0.4651, "step": 15441 }, { "epoch": 1.0, "grad_norm": 1.4051045179367065, "learning_rate": 2.731688883689243e-10, "loss": 0.5279, "step": 15442 }, { "epoch": 1.0, "grad_norm": 1.147156834602356, "learning_rate": 2.623514949900985e-10, "loss": 0.5054, "step": 15443 }, { "epoch": 1.0, "grad_norm": 1.1785119771957397, "learning_rate": 2.5175262724441707e-10, "loss": 0.5, "step": 15444 }, { "epoch": 1.0, "grad_norm": 1.2639628648757935, "learning_rate": 2.4137228559484306e-10, "loss": 0.5505, "step": 15445 }, { "epoch": 1.0, "grad_norm": 1.188988447189331, "learning_rate": 2.3121047049545763e-10, "loss": 0.5658, "step": 15446 }, { "epoch": 1.0, "grad_norm": 1.1310573816299438, "learning_rate": 2.2126718239035006e-10, "loss": 0.5102, "step": 15447 }, { "epoch": 1.0, "grad_norm": 1.2099578380584717, "learning_rate": 2.1154242171417261e-10, "loss": 0.5171, "step": 15448 }, { "epoch": 1.0, "grad_norm": 1.2646437883377075, "learning_rate": 2.020361888915856e-10, "loss": 0.5492, "step": 15449 }, { "epoch": 1.0, "grad_norm": 1.2314977645874023, "learning_rate": 1.9274848433836757e-10, "loss": 0.4609, "step": 15450 }, { "epoch": 1.0, "grad_norm": 1.1361430883407593, "learning_rate": 1.8367930846030502e-10, "loss": 0.4897, "step": 15451 }, { "epoch": 1.0, "grad_norm": 1.2584667205810547, "learning_rate": 1.7482866165430268e-10, "loss": 0.5235, "step": 15452 }, { "epoch": 1.0, "grad_norm": 1.268349528312683, "learning_rate": 1.6619654430671816e-10, "loss": 0.5502, "step": 15453 }, { "epoch": 1.0, "grad_norm": 1.1793711185455322, "learning_rate": 1.577829567950273e-10, "loss": 0.4868, "step": 15454 }, { "epoch": 1.0, "grad_norm": 1.248931884765625, "learning_rate": 1.4958789948671393e-10, "loss": 0.558, "step": 15455 }, { "epoch": 1.0, "grad_norm": 1.1953860521316528, "learning_rate": 1.4161137273982494e-10, "loss": 0.5371, "step": 15456 }, { "epoch": 1.0, "grad_norm": 1.1861151456832886, "learning_rate": 1.3385337690352552e-10, "loss": 0.5014, "step": 15457 }, { "epoch": 1.0, "grad_norm": 1.172990322113037, "learning_rate": 1.2631391231698875e-10, "loss": 0.5062, "step": 15458 }, { "epoch": 1.0, "grad_norm": 1.2443205118179321, "learning_rate": 1.189929793093958e-10, "loss": 0.5346, "step": 15459 }, { "epoch": 1.0, "grad_norm": 1.2586332559585571, "learning_rate": 1.1189057820049087e-10, "loss": 0.539, "step": 15460 }, { "epoch": 1.0, "grad_norm": 1.2038331031799316, "learning_rate": 1.0500670930058132e-10, "loss": 0.502, "step": 15461 }, { "epoch": 1.0, "grad_norm": 1.3022058010101318, "learning_rate": 9.834137291164781e-11, "loss": 0.5856, "step": 15462 }, { "epoch": 1.0, "grad_norm": 1.1601903438568115, "learning_rate": 9.189456932401364e-11, "loss": 0.5329, "step": 15463 }, { "epoch": 1.0, "grad_norm": 1.3967041969299316, "learning_rate": 8.566629881967547e-11, "loss": 0.533, "step": 15464 }, { "epoch": 1.0, "grad_norm": 1.296358346939087, "learning_rate": 7.965656167119306e-11, "loss": 0.5161, "step": 15465 }, { "epoch": 1.0, "grad_norm": 1.172501564025879, "learning_rate": 7.386535814057904e-11, "loss": 0.4564, "step": 15466 }, { "epoch": 1.0, "grad_norm": 1.226940393447876, "learning_rate": 6.829268848151938e-11, "loss": 0.4973, "step": 15467 }, { "epoch": 1.0, "grad_norm": 1.1256674528121948, "learning_rate": 6.29385529371529e-11, "loss": 0.4926, "step": 15468 }, { "epoch": 1.0, "grad_norm": 1.318701982498169, "learning_rate": 5.780295174173667e-11, "loss": 0.5282, "step": 15469 }, { "epoch": 1.0, "grad_norm": 1.1602165699005127, "learning_rate": 5.288588512009085e-11, "loss": 0.504, "step": 15470 }, { "epoch": 1.0, "grad_norm": 1.196083664894104, "learning_rate": 4.8187353286488494e-11, "loss": 0.5556, "step": 15471 }, { "epoch": 1.0, "grad_norm": 1.1597474813461304, "learning_rate": 4.370735644687596e-11, "loss": 0.5282, "step": 15472 }, { "epoch": 1.0, "grad_norm": 1.3324589729309082, "learning_rate": 3.944589479665251e-11, "loss": 0.5571, "step": 15473 }, { "epoch": 1.0, "grad_norm": 1.1844801902770996, "learning_rate": 3.540296852178049e-11, "loss": 0.5335, "step": 15474 }, { "epoch": 1.0, "grad_norm": 1.1483122110366821, "learning_rate": 3.1578577799895596e-11, "loss": 0.5441, "step": 15475 }, { "epoch": 1.0, "grad_norm": 1.2455034255981445, "learning_rate": 2.797272279753127e-11, "loss": 0.5292, "step": 15476 }, { "epoch": 1.0, "grad_norm": 1.1847378015518188, "learning_rate": 2.4585403672339192e-11, "loss": 0.5451, "step": 15477 }, { "epoch": 1.0, "grad_norm": 1.3203248977661133, "learning_rate": 2.1416620571979017e-11, "loss": 0.5067, "step": 15478 }, { "epoch": 1.0, "grad_norm": 1.1695729494094849, "learning_rate": 1.8466373635783742e-11, "loss": 0.502, "step": 15479 }, { "epoch": 1.0, "grad_norm": 1.1748936176300049, "learning_rate": 1.573466299253923e-11, "loss": 0.4877, "step": 15480 }, { "epoch": 1.0, "grad_norm": 1.0852153301239014, "learning_rate": 1.3221488761039348e-11, "loss": 0.4814, "step": 15481 }, { "epoch": 1.0, "grad_norm": 1.2782458066940308, "learning_rate": 1.0926851051196175e-11, "loss": 0.5201, "step": 15482 }, { "epoch": 1.0, "grad_norm": 1.162278175354004, "learning_rate": 8.850749964040007e-12, "loss": 0.4905, "step": 15483 }, { "epoch": 1.0, "grad_norm": 1.1204663515090942, "learning_rate": 6.993185590054019e-12, "loss": 0.5219, "step": 15484 }, { "epoch": 1.0, "grad_norm": 1.1248512268066406, "learning_rate": 5.354158009729382e-12, "loss": 0.4834, "step": 15485 }, { "epoch": 1.0, "grad_norm": 1.1403969526290894, "learning_rate": 3.933667295230592e-12, "loss": 0.4903, "step": 15486 }, { "epoch": 1.0, "grad_norm": 1.1924396753311157, "learning_rate": 2.7317135092852497e-12, "loss": 0.5108, "step": 15487 }, { "epoch": 1.0, "grad_norm": 1.1448320150375366, "learning_rate": 1.7482967029636145e-12, "loss": 0.5164, "step": 15488 }, { "epoch": 1.0, "grad_norm": 1.2038486003875732, "learning_rate": 9.834169206746068e-13, "loss": 0.5429, "step": 15489 }, { "epoch": 1.0, "grad_norm": 1.251604437828064, "learning_rate": 4.37074194614695e-13, "loss": 0.5633, "step": 15490 }, { "epoch": 1.0, "grad_norm": 1.2270528078079224, "learning_rate": 1.0926854976389678e-13, "loss": 0.525, "step": 15491 }, { "epoch": 1.0, "grad_norm": 1.1700235605239868, "learning_rate": 0.0, "loss": 0.4984, "step": 15492 }, { "epoch": 1.0, "step": 15492, "total_flos": 3.776754736458786e+19, "train_loss": 0.2507274884518006, "train_runtime": 68196.825, "train_samples_per_second": 58.156, "train_steps_per_second": 0.227 } ], "logging_steps": 1.0, "max_steps": 15492, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 8000, "total_flos": 3.776754736458786e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }