{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 7345, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006807351940095302, "grad_norm": 14.770733833312988, "learning_rate": 2.7173913043478262e-08, "loss": 1.6604, "step": 1 }, { "epoch": 0.0013614703880190605, "grad_norm": 14.61154556274414, "learning_rate": 5.4347826086956524e-08, "loss": 1.6699, "step": 2 }, { "epoch": 0.0020422055820285907, "grad_norm": 13.788142204284668, "learning_rate": 8.152173913043479e-08, "loss": 1.6971, "step": 3 }, { "epoch": 0.002722940776038121, "grad_norm": 14.730708122253418, "learning_rate": 1.0869565217391305e-07, "loss": 1.7281, "step": 4 }, { "epoch": 0.0034036759700476512, "grad_norm": 12.242794036865234, "learning_rate": 1.3586956521739132e-07, "loss": 1.6763, "step": 5 }, { "epoch": 0.0040844111640571815, "grad_norm": 13.795373916625977, "learning_rate": 1.6304347826086958e-07, "loss": 1.6873, "step": 6 }, { "epoch": 0.004765146358066712, "grad_norm": 14.370015144348145, "learning_rate": 1.9021739130434786e-07, "loss": 1.7161, "step": 7 }, { "epoch": 0.005445881552076242, "grad_norm": 14.788175582885742, "learning_rate": 2.173913043478261e-07, "loss": 1.6586, "step": 8 }, { "epoch": 0.006126616746085773, "grad_norm": 16.191638946533203, "learning_rate": 2.445652173913044e-07, "loss": 1.8414, "step": 9 }, { "epoch": 0.0068073519400953025, "grad_norm": 13.916003227233887, "learning_rate": 2.7173913043478264e-07, "loss": 1.7193, "step": 10 }, { "epoch": 0.007488087134104833, "grad_norm": 13.228225708007812, "learning_rate": 2.989130434782609e-07, "loss": 1.7311, "step": 11 }, { "epoch": 0.008168822328114363, "grad_norm": 14.395231246948242, "learning_rate": 3.2608695652173915e-07, "loss": 1.6987, "step": 12 }, { "epoch": 0.008849557522123894, "grad_norm": 14.064480781555176, "learning_rate": 3.532608695652174e-07, "loss": 1.632, "step": 13 }, { "epoch": 0.009530292716133424, "grad_norm": 13.814208030700684, "learning_rate": 3.804347826086957e-07, "loss": 1.6677, "step": 14 }, { "epoch": 0.010211027910142955, "grad_norm": 12.626769065856934, "learning_rate": 4.0760869565217393e-07, "loss": 1.6354, "step": 15 }, { "epoch": 0.010891763104152484, "grad_norm": 12.938849449157715, "learning_rate": 4.347826086956522e-07, "loss": 1.67, "step": 16 }, { "epoch": 0.011572498298162015, "grad_norm": 12.799755096435547, "learning_rate": 4.6195652173913045e-07, "loss": 1.7009, "step": 17 }, { "epoch": 0.012253233492171545, "grad_norm": 13.502803802490234, "learning_rate": 4.891304347826088e-07, "loss": 1.6197, "step": 18 }, { "epoch": 0.012933968686181076, "grad_norm": 10.538741111755371, "learning_rate": 5.16304347826087e-07, "loss": 1.6279, "step": 19 }, { "epoch": 0.013614703880190605, "grad_norm": 10.575577735900879, "learning_rate": 5.434782608695653e-07, "loss": 1.5524, "step": 20 }, { "epoch": 0.014295439074200136, "grad_norm": 12.144651412963867, "learning_rate": 5.706521739130435e-07, "loss": 1.6111, "step": 21 }, { "epoch": 0.014976174268209666, "grad_norm": 10.429139137268066, "learning_rate": 5.978260869565218e-07, "loss": 1.4808, "step": 22 }, { "epoch": 0.015656909462219197, "grad_norm": 10.706865310668945, "learning_rate": 6.25e-07, "loss": 1.4879, "step": 23 }, { "epoch": 0.016337644656228726, "grad_norm": 9.774752616882324, "learning_rate": 6.521739130434783e-07, "loss": 1.5669, "step": 24 }, { "epoch": 0.01701837985023826, "grad_norm": 9.323801040649414, "learning_rate": 6.793478260869566e-07, "loss": 1.5093, "step": 25 }, { "epoch": 0.017699115044247787, "grad_norm": 7.78405237197876, "learning_rate": 7.065217391304348e-07, "loss": 1.4643, "step": 26 }, { "epoch": 0.018379850238257316, "grad_norm": 7.156020164489746, "learning_rate": 7.336956521739132e-07, "loss": 1.5117, "step": 27 }, { "epoch": 0.01906058543226685, "grad_norm": 7.993779182434082, "learning_rate": 7.608695652173914e-07, "loss": 1.467, "step": 28 }, { "epoch": 0.019741320626276378, "grad_norm": 6.048841953277588, "learning_rate": 7.880434782608697e-07, "loss": 1.4081, "step": 29 }, { "epoch": 0.02042205582028591, "grad_norm": 6.887612342834473, "learning_rate": 8.152173913043479e-07, "loss": 1.4612, "step": 30 }, { "epoch": 0.02110279101429544, "grad_norm": 6.5488996505737305, "learning_rate": 8.423913043478261e-07, "loss": 1.4873, "step": 31 }, { "epoch": 0.021783526208304968, "grad_norm": 5.53971529006958, "learning_rate": 8.695652173913044e-07, "loss": 1.3842, "step": 32 }, { "epoch": 0.0224642614023145, "grad_norm": 6.1894683837890625, "learning_rate": 8.967391304347826e-07, "loss": 1.3336, "step": 33 }, { "epoch": 0.02314499659632403, "grad_norm": 5.144716262817383, "learning_rate": 9.239130434782609e-07, "loss": 1.3911, "step": 34 }, { "epoch": 0.023825731790333562, "grad_norm": 5.3912858963012695, "learning_rate": 9.510869565217393e-07, "loss": 1.3513, "step": 35 }, { "epoch": 0.02450646698434309, "grad_norm": 4.655354022979736, "learning_rate": 9.782608695652175e-07, "loss": 1.3312, "step": 36 }, { "epoch": 0.02518720217835262, "grad_norm": 5.730432987213135, "learning_rate": 1.0054347826086958e-06, "loss": 1.4134, "step": 37 }, { "epoch": 0.025867937372362152, "grad_norm": 4.930758953094482, "learning_rate": 1.032608695652174e-06, "loss": 1.2671, "step": 38 }, { "epoch": 0.02654867256637168, "grad_norm": 4.815064430236816, "learning_rate": 1.0597826086956523e-06, "loss": 1.3759, "step": 39 }, { "epoch": 0.02722940776038121, "grad_norm": 4.756772041320801, "learning_rate": 1.0869565217391306e-06, "loss": 1.2907, "step": 40 }, { "epoch": 0.027910142954390742, "grad_norm": 4.482181072235107, "learning_rate": 1.1141304347826088e-06, "loss": 1.2787, "step": 41 }, { "epoch": 0.02859087814840027, "grad_norm": 3.7565348148345947, "learning_rate": 1.141304347826087e-06, "loss": 1.2471, "step": 42 }, { "epoch": 0.029271613342409804, "grad_norm": 5.9350905418396, "learning_rate": 1.1684782608695653e-06, "loss": 1.2537, "step": 43 }, { "epoch": 0.029952348536419333, "grad_norm": 3.453871488571167, "learning_rate": 1.1956521739130436e-06, "loss": 1.3285, "step": 44 }, { "epoch": 0.03063308373042886, "grad_norm": 3.8828611373901367, "learning_rate": 1.2228260869565218e-06, "loss": 1.2951, "step": 45 }, { "epoch": 0.031313818924438394, "grad_norm": 3.5132393836975098, "learning_rate": 1.25e-06, "loss": 1.3351, "step": 46 }, { "epoch": 0.031994554118447927, "grad_norm": 3.5083281993865967, "learning_rate": 1.2771739130434786e-06, "loss": 1.2406, "step": 47 }, { "epoch": 0.03267528931245745, "grad_norm": 2.97625470161438, "learning_rate": 1.3043478260869566e-06, "loss": 1.1964, "step": 48 }, { "epoch": 0.033356024506466984, "grad_norm": 2.8885841369628906, "learning_rate": 1.3315217391304349e-06, "loss": 1.2787, "step": 49 }, { "epoch": 0.03403675970047652, "grad_norm": 2.861670732498169, "learning_rate": 1.3586956521739131e-06, "loss": 1.2223, "step": 50 }, { "epoch": 0.03471749489448604, "grad_norm": 3.088991403579712, "learning_rate": 1.3858695652173914e-06, "loss": 1.2651, "step": 51 }, { "epoch": 0.035398230088495575, "grad_norm": 2.7578678131103516, "learning_rate": 1.4130434782608697e-06, "loss": 1.2438, "step": 52 }, { "epoch": 0.03607896528250511, "grad_norm": 3.0454001426696777, "learning_rate": 1.440217391304348e-06, "loss": 1.0882, "step": 53 }, { "epoch": 0.03675970047651463, "grad_norm": 3.150799036026001, "learning_rate": 1.4673913043478264e-06, "loss": 1.2837, "step": 54 }, { "epoch": 0.037440435670524165, "grad_norm": 2.8189377784729004, "learning_rate": 1.4945652173913044e-06, "loss": 1.2326, "step": 55 }, { "epoch": 0.0381211708645337, "grad_norm": 2.9741382598876953, "learning_rate": 1.521739130434783e-06, "loss": 1.2358, "step": 56 }, { "epoch": 0.03880190605854323, "grad_norm": 2.9895858764648438, "learning_rate": 1.548913043478261e-06, "loss": 1.1583, "step": 57 }, { "epoch": 0.039482641252552755, "grad_norm": 2.9600908756256104, "learning_rate": 1.5760869565217394e-06, "loss": 1.2251, "step": 58 }, { "epoch": 0.04016337644656229, "grad_norm": 2.5965685844421387, "learning_rate": 1.6032608695652175e-06, "loss": 1.2123, "step": 59 }, { "epoch": 0.04084411164057182, "grad_norm": 2.625870704650879, "learning_rate": 1.6304347826086957e-06, "loss": 1.0647, "step": 60 }, { "epoch": 0.041524846834581346, "grad_norm": 2.636648654937744, "learning_rate": 1.657608695652174e-06, "loss": 1.1812, "step": 61 }, { "epoch": 0.04220558202859088, "grad_norm": 2.630424976348877, "learning_rate": 1.6847826086956522e-06, "loss": 1.2723, "step": 62 }, { "epoch": 0.04288631722260041, "grad_norm": 2.674956798553467, "learning_rate": 1.7119565217391307e-06, "loss": 1.1235, "step": 63 }, { "epoch": 0.043567052416609936, "grad_norm": 2.5320019721984863, "learning_rate": 1.7391304347826088e-06, "loss": 1.17, "step": 64 }, { "epoch": 0.04424778761061947, "grad_norm": 2.523031711578369, "learning_rate": 1.7663043478260872e-06, "loss": 1.1327, "step": 65 }, { "epoch": 0.044928522804629, "grad_norm": 2.382991313934326, "learning_rate": 1.7934782608695653e-06, "loss": 1.2053, "step": 66 }, { "epoch": 0.045609257998638526, "grad_norm": 2.2783236503601074, "learning_rate": 1.8206521739130437e-06, "loss": 1.2482, "step": 67 }, { "epoch": 0.04628999319264806, "grad_norm": 2.2704291343688965, "learning_rate": 1.8478260869565218e-06, "loss": 1.1244, "step": 68 }, { "epoch": 0.04697072838665759, "grad_norm": 2.587001085281372, "learning_rate": 1.8750000000000003e-06, "loss": 1.1549, "step": 69 }, { "epoch": 0.047651463580667124, "grad_norm": 2.34722900390625, "learning_rate": 1.9021739130434785e-06, "loss": 1.07, "step": 70 }, { "epoch": 0.04833219877467665, "grad_norm": 2.323404550552368, "learning_rate": 1.9293478260869568e-06, "loss": 1.2153, "step": 71 }, { "epoch": 0.04901293396868618, "grad_norm": 2.327934741973877, "learning_rate": 1.956521739130435e-06, "loss": 1.108, "step": 72 }, { "epoch": 0.049693669162695714, "grad_norm": 2.2196128368377686, "learning_rate": 1.9836956521739133e-06, "loss": 1.1551, "step": 73 }, { "epoch": 0.05037440435670524, "grad_norm": 2.3591485023498535, "learning_rate": 2.0108695652173916e-06, "loss": 1.1316, "step": 74 }, { "epoch": 0.05105513955071477, "grad_norm": 2.4810094833374023, "learning_rate": 2.03804347826087e-06, "loss": 1.1036, "step": 75 }, { "epoch": 0.051735874744724304, "grad_norm": 2.1599066257476807, "learning_rate": 2.065217391304348e-06, "loss": 1.1998, "step": 76 }, { "epoch": 0.05241660993873383, "grad_norm": 2.1836955547332764, "learning_rate": 2.0923913043478263e-06, "loss": 1.0745, "step": 77 }, { "epoch": 0.05309734513274336, "grad_norm": 2.286623239517212, "learning_rate": 2.1195652173913046e-06, "loss": 1.1183, "step": 78 }, { "epoch": 0.053778080326752895, "grad_norm": 2.384411334991455, "learning_rate": 2.146739130434783e-06, "loss": 1.1309, "step": 79 }, { "epoch": 0.05445881552076242, "grad_norm": 2.3381471633911133, "learning_rate": 2.173913043478261e-06, "loss": 1.2009, "step": 80 }, { "epoch": 0.05513955071477195, "grad_norm": 2.595384359359741, "learning_rate": 2.2010869565217394e-06, "loss": 1.1112, "step": 81 }, { "epoch": 0.055820285908781485, "grad_norm": 2.3774971961975098, "learning_rate": 2.2282608695652176e-06, "loss": 1.1727, "step": 82 }, { "epoch": 0.05650102110279102, "grad_norm": 2.420893907546997, "learning_rate": 2.255434782608696e-06, "loss": 1.1214, "step": 83 }, { "epoch": 0.05718175629680054, "grad_norm": 2.2953290939331055, "learning_rate": 2.282608695652174e-06, "loss": 1.0563, "step": 84 }, { "epoch": 0.057862491490810075, "grad_norm": 2.2939884662628174, "learning_rate": 2.3097826086956524e-06, "loss": 1.042, "step": 85 }, { "epoch": 0.05854322668481961, "grad_norm": 2.4668335914611816, "learning_rate": 2.3369565217391307e-06, "loss": 1.0501, "step": 86 }, { "epoch": 0.05922396187882913, "grad_norm": 2.469686508178711, "learning_rate": 2.364130434782609e-06, "loss": 1.0917, "step": 87 }, { "epoch": 0.059904697072838665, "grad_norm": 2.3787453174591064, "learning_rate": 2.391304347826087e-06, "loss": 1.0547, "step": 88 }, { "epoch": 0.0605854322668482, "grad_norm": 2.587209701538086, "learning_rate": 2.4184782608695654e-06, "loss": 1.0818, "step": 89 }, { "epoch": 0.06126616746085772, "grad_norm": 2.2854652404785156, "learning_rate": 2.4456521739130437e-06, "loss": 1.1739, "step": 90 }, { "epoch": 0.061946902654867256, "grad_norm": 2.8589603900909424, "learning_rate": 2.472826086956522e-06, "loss": 0.9723, "step": 91 }, { "epoch": 0.06262763784887679, "grad_norm": 2.122974157333374, "learning_rate": 2.5e-06, "loss": 1.2104, "step": 92 }, { "epoch": 0.06330837304288632, "grad_norm": 2.4499661922454834, "learning_rate": 2.5271739130434785e-06, "loss": 1.1321, "step": 93 }, { "epoch": 0.06398910823689585, "grad_norm": 2.4468698501586914, "learning_rate": 2.554347826086957e-06, "loss": 1.0609, "step": 94 }, { "epoch": 0.06466984343090537, "grad_norm": 2.3068535327911377, "learning_rate": 2.581521739130435e-06, "loss": 1.1105, "step": 95 }, { "epoch": 0.0653505786249149, "grad_norm": 2.2638657093048096, "learning_rate": 2.6086956521739132e-06, "loss": 1.0836, "step": 96 }, { "epoch": 0.06603131381892444, "grad_norm": 2.21193528175354, "learning_rate": 2.6358695652173915e-06, "loss": 1.1028, "step": 97 }, { "epoch": 0.06671204901293397, "grad_norm": 2.303368330001831, "learning_rate": 2.6630434782608698e-06, "loss": 1.0809, "step": 98 }, { "epoch": 0.0673927842069435, "grad_norm": 2.4292569160461426, "learning_rate": 2.6902173913043476e-06, "loss": 1.0396, "step": 99 }, { "epoch": 0.06807351940095303, "grad_norm": 2.445131778717041, "learning_rate": 2.7173913043478263e-06, "loss": 1.1032, "step": 100 }, { "epoch": 0.06875425459496257, "grad_norm": 2.4465198516845703, "learning_rate": 2.7445652173913045e-06, "loss": 1.0869, "step": 101 }, { "epoch": 0.06943498978897208, "grad_norm": 2.2061312198638916, "learning_rate": 2.771739130434783e-06, "loss": 1.1264, "step": 102 }, { "epoch": 0.07011572498298162, "grad_norm": 2.698472023010254, "learning_rate": 2.7989130434782615e-06, "loss": 1.0833, "step": 103 }, { "epoch": 0.07079646017699115, "grad_norm": 2.5737645626068115, "learning_rate": 2.8260869565217393e-06, "loss": 1.0335, "step": 104 }, { "epoch": 0.07147719537100068, "grad_norm": 2.3545587062835693, "learning_rate": 2.8532608695652176e-06, "loss": 1.0854, "step": 105 }, { "epoch": 0.07215793056501021, "grad_norm": 2.2886199951171875, "learning_rate": 2.880434782608696e-06, "loss": 1.0691, "step": 106 }, { "epoch": 0.07283866575901975, "grad_norm": 2.2058234214782715, "learning_rate": 2.9076086956521745e-06, "loss": 1.0899, "step": 107 }, { "epoch": 0.07351940095302927, "grad_norm": 2.3350367546081543, "learning_rate": 2.9347826086956528e-06, "loss": 1.1321, "step": 108 }, { "epoch": 0.0742001361470388, "grad_norm": 2.3059897422790527, "learning_rate": 2.9619565217391306e-06, "loss": 1.0836, "step": 109 }, { "epoch": 0.07488087134104833, "grad_norm": 2.134575366973877, "learning_rate": 2.989130434782609e-06, "loss": 1.0796, "step": 110 }, { "epoch": 0.07556160653505786, "grad_norm": 2.5286600589752197, "learning_rate": 3.016304347826087e-06, "loss": 0.9628, "step": 111 }, { "epoch": 0.0762423417290674, "grad_norm": 2.2128405570983887, "learning_rate": 3.043478260869566e-06, "loss": 1.1327, "step": 112 }, { "epoch": 0.07692307692307693, "grad_norm": 2.4016366004943848, "learning_rate": 3.0706521739130436e-06, "loss": 1.1076, "step": 113 }, { "epoch": 0.07760381211708646, "grad_norm": 2.5070478916168213, "learning_rate": 3.097826086956522e-06, "loss": 1.1239, "step": 114 }, { "epoch": 0.07828454731109598, "grad_norm": 2.6835837364196777, "learning_rate": 3.125e-06, "loss": 1.057, "step": 115 }, { "epoch": 0.07896528250510551, "grad_norm": 2.57804799079895, "learning_rate": 3.152173913043479e-06, "loss": 1.1033, "step": 116 }, { "epoch": 0.07964601769911504, "grad_norm": 2.1273112297058105, "learning_rate": 3.179347826086957e-06, "loss": 1.0804, "step": 117 }, { "epoch": 0.08032675289312458, "grad_norm": 2.4979422092437744, "learning_rate": 3.206521739130435e-06, "loss": 1.0249, "step": 118 }, { "epoch": 0.08100748808713411, "grad_norm": 2.4129951000213623, "learning_rate": 3.233695652173913e-06, "loss": 1.1411, "step": 119 }, { "epoch": 0.08168822328114364, "grad_norm": 2.3184964656829834, "learning_rate": 3.2608695652173914e-06, "loss": 1.0029, "step": 120 }, { "epoch": 0.08236895847515316, "grad_norm": 2.450953722000122, "learning_rate": 3.28804347826087e-06, "loss": 1.0642, "step": 121 }, { "epoch": 0.08304969366916269, "grad_norm": 2.343376874923706, "learning_rate": 3.315217391304348e-06, "loss": 1.0658, "step": 122 }, { "epoch": 0.08373042886317222, "grad_norm": 2.330251455307007, "learning_rate": 3.3423913043478262e-06, "loss": 1.1008, "step": 123 }, { "epoch": 0.08441116405718176, "grad_norm": 2.586716651916504, "learning_rate": 3.3695652173913045e-06, "loss": 1.0367, "step": 124 }, { "epoch": 0.08509189925119129, "grad_norm": 2.373642921447754, "learning_rate": 3.396739130434783e-06, "loss": 1.0397, "step": 125 }, { "epoch": 0.08577263444520082, "grad_norm": 2.1154682636260986, "learning_rate": 3.4239130434782614e-06, "loss": 1.1518, "step": 126 }, { "epoch": 0.08645336963921035, "grad_norm": 2.3378279209136963, "learning_rate": 3.4510869565217393e-06, "loss": 1.0468, "step": 127 }, { "epoch": 0.08713410483321987, "grad_norm": 2.3721272945404053, "learning_rate": 3.4782608695652175e-06, "loss": 1.1278, "step": 128 }, { "epoch": 0.0878148400272294, "grad_norm": 2.2576138973236084, "learning_rate": 3.5054347826086958e-06, "loss": 1.143, "step": 129 }, { "epoch": 0.08849557522123894, "grad_norm": 2.4028680324554443, "learning_rate": 3.5326086956521745e-06, "loss": 1.039, "step": 130 }, { "epoch": 0.08917631041524847, "grad_norm": 2.199537992477417, "learning_rate": 3.5597826086956527e-06, "loss": 1.1198, "step": 131 }, { "epoch": 0.089857045609258, "grad_norm": 2.422365665435791, "learning_rate": 3.5869565217391305e-06, "loss": 1.048, "step": 132 }, { "epoch": 0.09053778080326753, "grad_norm": 2.3881185054779053, "learning_rate": 3.614130434782609e-06, "loss": 1.1525, "step": 133 }, { "epoch": 0.09121851599727705, "grad_norm": 2.498486042022705, "learning_rate": 3.6413043478260875e-06, "loss": 1.0613, "step": 134 }, { "epoch": 0.09189925119128659, "grad_norm": 2.526881217956543, "learning_rate": 3.6684782608695657e-06, "loss": 1.009, "step": 135 }, { "epoch": 0.09257998638529612, "grad_norm": 2.4080402851104736, "learning_rate": 3.6956521739130436e-06, "loss": 1.0785, "step": 136 }, { "epoch": 0.09326072157930565, "grad_norm": 2.166621685028076, "learning_rate": 3.722826086956522e-06, "loss": 1.1058, "step": 137 }, { "epoch": 0.09394145677331518, "grad_norm": 2.3951592445373535, "learning_rate": 3.7500000000000005e-06, "loss": 1.0802, "step": 138 }, { "epoch": 0.09462219196732471, "grad_norm": 2.5259339809417725, "learning_rate": 3.7771739130434788e-06, "loss": 1.0378, "step": 139 }, { "epoch": 0.09530292716133425, "grad_norm": 2.3359711170196533, "learning_rate": 3.804347826086957e-06, "loss": 1.0538, "step": 140 }, { "epoch": 0.09598366235534377, "grad_norm": 2.2705836296081543, "learning_rate": 3.831521739130435e-06, "loss": 1.1674, "step": 141 }, { "epoch": 0.0966643975493533, "grad_norm": 2.239401340484619, "learning_rate": 3.8586956521739136e-06, "loss": 1.0999, "step": 142 }, { "epoch": 0.09734513274336283, "grad_norm": 2.392457962036133, "learning_rate": 3.885869565217392e-06, "loss": 1.0438, "step": 143 }, { "epoch": 0.09802586793737236, "grad_norm": 2.715057611465454, "learning_rate": 3.91304347826087e-06, "loss": 1.0576, "step": 144 }, { "epoch": 0.0987066031313819, "grad_norm": 2.645658254623413, "learning_rate": 3.9402173913043475e-06, "loss": 0.9874, "step": 145 }, { "epoch": 0.09938733832539143, "grad_norm": 2.6830410957336426, "learning_rate": 3.967391304347827e-06, "loss": 1.0376, "step": 146 }, { "epoch": 0.10006807351940095, "grad_norm": 2.3416192531585693, "learning_rate": 3.994565217391305e-06, "loss": 1.0579, "step": 147 }, { "epoch": 0.10074880871341048, "grad_norm": 2.3623571395874023, "learning_rate": 4.021739130434783e-06, "loss": 1.0584, "step": 148 }, { "epoch": 0.10142954390742001, "grad_norm": 2.3031437397003174, "learning_rate": 4.048913043478261e-06, "loss": 1.0078, "step": 149 }, { "epoch": 0.10211027910142954, "grad_norm": 2.5134778022766113, "learning_rate": 4.07608695652174e-06, "loss": 1.0161, "step": 150 }, { "epoch": 0.10279101429543908, "grad_norm": 2.6593782901763916, "learning_rate": 4.103260869565218e-06, "loss": 0.9906, "step": 151 }, { "epoch": 0.10347174948944861, "grad_norm": 2.5228047370910645, "learning_rate": 4.130434782608696e-06, "loss": 1.0308, "step": 152 }, { "epoch": 0.10415248468345814, "grad_norm": 2.686753034591675, "learning_rate": 4.157608695652174e-06, "loss": 1.0539, "step": 153 }, { "epoch": 0.10483321987746766, "grad_norm": 2.2955172061920166, "learning_rate": 4.184782608695653e-06, "loss": 1.0324, "step": 154 }, { "epoch": 0.10551395507147719, "grad_norm": 2.3064095973968506, "learning_rate": 4.211956521739131e-06, "loss": 1.0536, "step": 155 }, { "epoch": 0.10619469026548672, "grad_norm": 2.422003984451294, "learning_rate": 4.239130434782609e-06, "loss": 1.0353, "step": 156 }, { "epoch": 0.10687542545949626, "grad_norm": 2.490906000137329, "learning_rate": 4.2663043478260874e-06, "loss": 1.0875, "step": 157 }, { "epoch": 0.10755616065350579, "grad_norm": 2.420973300933838, "learning_rate": 4.293478260869566e-06, "loss": 1.083, "step": 158 }, { "epoch": 0.10823689584751532, "grad_norm": 2.4357595443725586, "learning_rate": 4.320652173913044e-06, "loss": 1.062, "step": 159 }, { "epoch": 0.10891763104152484, "grad_norm": 2.4009718894958496, "learning_rate": 4.347826086956522e-06, "loss": 1.0733, "step": 160 }, { "epoch": 0.10959836623553437, "grad_norm": 2.2068700790405273, "learning_rate": 4.3750000000000005e-06, "loss": 1.1833, "step": 161 }, { "epoch": 0.1102791014295439, "grad_norm": 2.5307655334472656, "learning_rate": 4.402173913043479e-06, "loss": 1.0329, "step": 162 }, { "epoch": 0.11095983662355344, "grad_norm": 2.566969394683838, "learning_rate": 4.429347826086957e-06, "loss": 1.0002, "step": 163 }, { "epoch": 0.11164057181756297, "grad_norm": 2.18622088432312, "learning_rate": 4.456521739130435e-06, "loss": 1.1368, "step": 164 }, { "epoch": 0.1123213070115725, "grad_norm": 2.2748265266418457, "learning_rate": 4.4836956521739135e-06, "loss": 1.0939, "step": 165 }, { "epoch": 0.11300204220558203, "grad_norm": 2.2752645015716553, "learning_rate": 4.510869565217392e-06, "loss": 0.9737, "step": 166 }, { "epoch": 0.11368277739959155, "grad_norm": 2.36954927444458, "learning_rate": 4.53804347826087e-06, "loss": 1.0542, "step": 167 }, { "epoch": 0.11436351259360109, "grad_norm": 2.3275344371795654, "learning_rate": 4.565217391304348e-06, "loss": 1.0915, "step": 168 }, { "epoch": 0.11504424778761062, "grad_norm": 2.4562489986419678, "learning_rate": 4.5923913043478265e-06, "loss": 1.0187, "step": 169 }, { "epoch": 0.11572498298162015, "grad_norm": 2.7282819747924805, "learning_rate": 4.619565217391305e-06, "loss": 0.9729, "step": 170 }, { "epoch": 0.11640571817562968, "grad_norm": 2.417023181915283, "learning_rate": 4.646739130434783e-06, "loss": 1.034, "step": 171 }, { "epoch": 0.11708645336963922, "grad_norm": 2.2387776374816895, "learning_rate": 4.673913043478261e-06, "loss": 1.0122, "step": 172 }, { "epoch": 0.11776718856364875, "grad_norm": 2.427194595336914, "learning_rate": 4.7010869565217396e-06, "loss": 0.9789, "step": 173 }, { "epoch": 0.11844792375765827, "grad_norm": 2.3564376831054688, "learning_rate": 4.728260869565218e-06, "loss": 1.0661, "step": 174 }, { "epoch": 0.1191286589516678, "grad_norm": 2.7351183891296387, "learning_rate": 4.755434782608696e-06, "loss": 0.9994, "step": 175 }, { "epoch": 0.11980939414567733, "grad_norm": 2.517888307571411, "learning_rate": 4.782608695652174e-06, "loss": 1.1254, "step": 176 }, { "epoch": 0.12049012933968686, "grad_norm": 2.440819501876831, "learning_rate": 4.809782608695653e-06, "loss": 1.0972, "step": 177 }, { "epoch": 0.1211708645336964, "grad_norm": 2.22603178024292, "learning_rate": 4.836956521739131e-06, "loss": 1.0747, "step": 178 }, { "epoch": 0.12185159972770593, "grad_norm": 2.369175672531128, "learning_rate": 4.864130434782609e-06, "loss": 1.1028, "step": 179 }, { "epoch": 0.12253233492171545, "grad_norm": 2.6357884407043457, "learning_rate": 4.891304347826087e-06, "loss": 1.0381, "step": 180 }, { "epoch": 0.12321307011572498, "grad_norm": 2.2001607418060303, "learning_rate": 4.918478260869566e-06, "loss": 1.0773, "step": 181 }, { "epoch": 0.12389380530973451, "grad_norm": 2.1721177101135254, "learning_rate": 4.945652173913044e-06, "loss": 1.1096, "step": 182 }, { "epoch": 0.12457454050374404, "grad_norm": 2.4069325923919678, "learning_rate": 4.972826086956522e-06, "loss": 1.0206, "step": 183 }, { "epoch": 0.12525527569775358, "grad_norm": 2.3059194087982178, "learning_rate": 5e-06, "loss": 1.0229, "step": 184 }, { "epoch": 0.1259360108917631, "grad_norm": 2.3728904724121094, "learning_rate": 5.027173913043478e-06, "loss": 1.0852, "step": 185 }, { "epoch": 0.12661674608577264, "grad_norm": 2.204096794128418, "learning_rate": 5.054347826086957e-06, "loss": 1.052, "step": 186 }, { "epoch": 0.12729748127978216, "grad_norm": 2.471513032913208, "learning_rate": 5.081521739130435e-06, "loss": 0.9951, "step": 187 }, { "epoch": 0.1279782164737917, "grad_norm": 2.1701419353485107, "learning_rate": 5.108695652173914e-06, "loss": 1.1385, "step": 188 }, { "epoch": 0.12865895166780122, "grad_norm": 2.603036880493164, "learning_rate": 5.135869565217392e-06, "loss": 1.0173, "step": 189 }, { "epoch": 0.12933968686181074, "grad_norm": 2.3770251274108887, "learning_rate": 5.16304347826087e-06, "loss": 1.0414, "step": 190 }, { "epoch": 0.1300204220558203, "grad_norm": 2.414656639099121, "learning_rate": 5.190217391304348e-06, "loss": 1.058, "step": 191 }, { "epoch": 0.1307011572498298, "grad_norm": 2.1379714012145996, "learning_rate": 5.2173913043478265e-06, "loss": 1.0922, "step": 192 }, { "epoch": 0.13138189244383935, "grad_norm": 2.2891619205474854, "learning_rate": 5.244565217391306e-06, "loss": 1.1478, "step": 193 }, { "epoch": 0.13206262763784887, "grad_norm": 2.4182350635528564, "learning_rate": 5.271739130434783e-06, "loss": 1.0677, "step": 194 }, { "epoch": 0.13274336283185842, "grad_norm": 2.55892276763916, "learning_rate": 5.298913043478261e-06, "loss": 1.0257, "step": 195 }, { "epoch": 0.13342409802586794, "grad_norm": 2.3760218620300293, "learning_rate": 5.3260869565217395e-06, "loss": 1.0796, "step": 196 }, { "epoch": 0.13410483321987746, "grad_norm": 2.4523468017578125, "learning_rate": 5.353260869565218e-06, "loss": 1.052, "step": 197 }, { "epoch": 0.134785568413887, "grad_norm": 2.3198702335357666, "learning_rate": 5.380434782608695e-06, "loss": 1.0235, "step": 198 }, { "epoch": 0.13546630360789652, "grad_norm": 2.424988031387329, "learning_rate": 5.407608695652174e-06, "loss": 1.0731, "step": 199 }, { "epoch": 0.13614703880190607, "grad_norm": 2.549807548522949, "learning_rate": 5.4347826086956525e-06, "loss": 1.13, "step": 200 }, { "epoch": 0.13682777399591559, "grad_norm": 2.333083391189575, "learning_rate": 5.461956521739132e-06, "loss": 1.1224, "step": 201 }, { "epoch": 0.13750850918992513, "grad_norm": 2.577021837234497, "learning_rate": 5.489130434782609e-06, "loss": 1.0058, "step": 202 }, { "epoch": 0.13818924438393465, "grad_norm": 2.5863170623779297, "learning_rate": 5.516304347826087e-06, "loss": 1.0956, "step": 203 }, { "epoch": 0.13886997957794417, "grad_norm": 2.5165910720825195, "learning_rate": 5.543478260869566e-06, "loss": 0.961, "step": 204 }, { "epoch": 0.13955071477195372, "grad_norm": 2.399223804473877, "learning_rate": 5.570652173913044e-06, "loss": 1.0371, "step": 205 }, { "epoch": 0.14023144996596323, "grad_norm": 2.313145875930786, "learning_rate": 5.597826086956523e-06, "loss": 1.0568, "step": 206 }, { "epoch": 0.14091218515997278, "grad_norm": 2.6645545959472656, "learning_rate": 5.625e-06, "loss": 1.0576, "step": 207 }, { "epoch": 0.1415929203539823, "grad_norm": 2.4592907428741455, "learning_rate": 5.652173913043479e-06, "loss": 1.0579, "step": 208 }, { "epoch": 0.14227365554799182, "grad_norm": 2.268498182296753, "learning_rate": 5.679347826086957e-06, "loss": 0.9424, "step": 209 }, { "epoch": 0.14295439074200136, "grad_norm": 2.548046112060547, "learning_rate": 5.706521739130435e-06, "loss": 1.0495, "step": 210 }, { "epoch": 0.14363512593601088, "grad_norm": 2.396383047103882, "learning_rate": 5.733695652173914e-06, "loss": 1.1334, "step": 211 }, { "epoch": 0.14431586113002043, "grad_norm": 2.5217361450195312, "learning_rate": 5.760869565217392e-06, "loss": 1.1308, "step": 212 }, { "epoch": 0.14499659632402995, "grad_norm": 2.2625479698181152, "learning_rate": 5.78804347826087e-06, "loss": 1.0917, "step": 213 }, { "epoch": 0.1456773315180395, "grad_norm": 2.5831873416900635, "learning_rate": 5.815217391304349e-06, "loss": 1.0577, "step": 214 }, { "epoch": 0.146358066712049, "grad_norm": 2.4478628635406494, "learning_rate": 5.842391304347826e-06, "loss": 0.9645, "step": 215 }, { "epoch": 0.14703880190605853, "grad_norm": 2.3501839637756348, "learning_rate": 5.8695652173913055e-06, "loss": 1.1043, "step": 216 }, { "epoch": 0.14771953710006808, "grad_norm": 2.377361297607422, "learning_rate": 5.896739130434783e-06, "loss": 1.0465, "step": 217 }, { "epoch": 0.1484002722940776, "grad_norm": 2.3686490058898926, "learning_rate": 5.923913043478261e-06, "loss": 0.9984, "step": 218 }, { "epoch": 0.14908100748808714, "grad_norm": 2.748157501220703, "learning_rate": 5.95108695652174e-06, "loss": 1.0025, "step": 219 }, { "epoch": 0.14976174268209666, "grad_norm": 2.3950247764587402, "learning_rate": 5.978260869565218e-06, "loss": 1.0299, "step": 220 }, { "epoch": 0.1504424778761062, "grad_norm": 2.2241857051849365, "learning_rate": 6.005434782608696e-06, "loss": 0.9978, "step": 221 }, { "epoch": 0.15112321307011573, "grad_norm": 2.304382801055908, "learning_rate": 6.032608695652174e-06, "loss": 1.0194, "step": 222 }, { "epoch": 0.15180394826412524, "grad_norm": 2.655785083770752, "learning_rate": 6.0597826086956525e-06, "loss": 1.0516, "step": 223 }, { "epoch": 0.1524846834581348, "grad_norm": 2.2875590324401855, "learning_rate": 6.086956521739132e-06, "loss": 1.0026, "step": 224 }, { "epoch": 0.1531654186521443, "grad_norm": 2.6040761470794678, "learning_rate": 6.114130434782609e-06, "loss": 1.0223, "step": 225 }, { "epoch": 0.15384615384615385, "grad_norm": 2.088196277618408, "learning_rate": 6.141304347826087e-06, "loss": 1.1325, "step": 226 }, { "epoch": 0.15452688904016337, "grad_norm": 2.1001176834106445, "learning_rate": 6.1684782608695655e-06, "loss": 1.0686, "step": 227 }, { "epoch": 0.15520762423417292, "grad_norm": 2.237783432006836, "learning_rate": 6.195652173913044e-06, "loss": 1.0105, "step": 228 }, { "epoch": 0.15588835942818244, "grad_norm": 2.2608025074005127, "learning_rate": 6.222826086956523e-06, "loss": 1.122, "step": 229 }, { "epoch": 0.15656909462219196, "grad_norm": 2.596219539642334, "learning_rate": 6.25e-06, "loss": 1.0051, "step": 230 }, { "epoch": 0.1572498298162015, "grad_norm": 2.2671215534210205, "learning_rate": 6.2771739130434786e-06, "loss": 1.0701, "step": 231 }, { "epoch": 0.15793056501021102, "grad_norm": 2.1990771293640137, "learning_rate": 6.304347826086958e-06, "loss": 1.102, "step": 232 }, { "epoch": 0.15861130020422057, "grad_norm": 2.514075994491577, "learning_rate": 6.331521739130435e-06, "loss": 0.9899, "step": 233 }, { "epoch": 0.1592920353982301, "grad_norm": 2.332288980484009, "learning_rate": 6.358695652173914e-06, "loss": 0.9979, "step": 234 }, { "epoch": 0.15997277059223963, "grad_norm": 2.714240550994873, "learning_rate": 6.385869565217392e-06, "loss": 1.1158, "step": 235 }, { "epoch": 0.16065350578624915, "grad_norm": 2.7420296669006348, "learning_rate": 6.41304347826087e-06, "loss": 1.0198, "step": 236 }, { "epoch": 0.16133424098025867, "grad_norm": 2.5333967208862305, "learning_rate": 6.440217391304349e-06, "loss": 1.0508, "step": 237 }, { "epoch": 0.16201497617426822, "grad_norm": 2.2713186740875244, "learning_rate": 6.467391304347826e-06, "loss": 1.0897, "step": 238 }, { "epoch": 0.16269571136827773, "grad_norm": 2.5154035091400146, "learning_rate": 6.4945652173913055e-06, "loss": 1.1154, "step": 239 }, { "epoch": 0.16337644656228728, "grad_norm": 2.5951755046844482, "learning_rate": 6.521739130434783e-06, "loss": 0.9581, "step": 240 }, { "epoch": 0.1640571817562968, "grad_norm": 2.244513750076294, "learning_rate": 6.548913043478261e-06, "loss": 1.1295, "step": 241 }, { "epoch": 0.16473791695030632, "grad_norm": 2.6814210414886475, "learning_rate": 6.57608695652174e-06, "loss": 0.9459, "step": 242 }, { "epoch": 0.16541865214431586, "grad_norm": 2.475835084915161, "learning_rate": 6.603260869565218e-06, "loss": 0.9141, "step": 243 }, { "epoch": 0.16609938733832538, "grad_norm": 2.8717780113220215, "learning_rate": 6.630434782608696e-06, "loss": 1.0297, "step": 244 }, { "epoch": 0.16678012253233493, "grad_norm": 2.6751208305358887, "learning_rate": 6.657608695652175e-06, "loss": 1.0773, "step": 245 }, { "epoch": 0.16746085772634445, "grad_norm": 2.6523423194885254, "learning_rate": 6.6847826086956524e-06, "loss": 1.0095, "step": 246 }, { "epoch": 0.168141592920354, "grad_norm": 2.2531895637512207, "learning_rate": 6.7119565217391315e-06, "loss": 1.07, "step": 247 }, { "epoch": 0.1688223281143635, "grad_norm": 2.4043781757354736, "learning_rate": 6.739130434782609e-06, "loss": 1.0121, "step": 248 }, { "epoch": 0.16950306330837303, "grad_norm": 2.3339803218841553, "learning_rate": 6.766304347826087e-06, "loss": 0.9964, "step": 249 }, { "epoch": 0.17018379850238258, "grad_norm": 2.681110143661499, "learning_rate": 6.793478260869566e-06, "loss": 0.9879, "step": 250 }, { "epoch": 0.1708645336963921, "grad_norm": 2.3274896144866943, "learning_rate": 6.820652173913044e-06, "loss": 0.9744, "step": 251 }, { "epoch": 0.17154526889040164, "grad_norm": 2.4216065406799316, "learning_rate": 6.847826086956523e-06, "loss": 1.0678, "step": 252 }, { "epoch": 0.17222600408441116, "grad_norm": 2.269141674041748, "learning_rate": 6.875e-06, "loss": 1.0901, "step": 253 }, { "epoch": 0.1729067392784207, "grad_norm": 2.443155288696289, "learning_rate": 6.9021739130434785e-06, "loss": 1.0218, "step": 254 }, { "epoch": 0.17358747447243023, "grad_norm": 2.501671314239502, "learning_rate": 6.929347826086958e-06, "loss": 0.9766, "step": 255 }, { "epoch": 0.17426820966643974, "grad_norm": 2.2378690242767334, "learning_rate": 6.956521739130435e-06, "loss": 1.1733, "step": 256 }, { "epoch": 0.1749489448604493, "grad_norm": 2.3927643299102783, "learning_rate": 6.983695652173914e-06, "loss": 0.9664, "step": 257 }, { "epoch": 0.1756296800544588, "grad_norm": 2.4229676723480225, "learning_rate": 7.0108695652173915e-06, "loss": 1.0387, "step": 258 }, { "epoch": 0.17631041524846836, "grad_norm": 2.6655898094177246, "learning_rate": 7.03804347826087e-06, "loss": 1.0524, "step": 259 }, { "epoch": 0.17699115044247787, "grad_norm": 2.40675687789917, "learning_rate": 7.065217391304349e-06, "loss": 1.0596, "step": 260 }, { "epoch": 0.17767188563648742, "grad_norm": 2.3983304500579834, "learning_rate": 7.092391304347826e-06, "loss": 1.0892, "step": 261 }, { "epoch": 0.17835262083049694, "grad_norm": 2.6269028186798096, "learning_rate": 7.119565217391305e-06, "loss": 0.9519, "step": 262 }, { "epoch": 0.17903335602450646, "grad_norm": 2.4628236293792725, "learning_rate": 7.146739130434784e-06, "loss": 1.0363, "step": 263 }, { "epoch": 0.179714091218516, "grad_norm": 2.551271438598633, "learning_rate": 7.173913043478261e-06, "loss": 0.9942, "step": 264 }, { "epoch": 0.18039482641252552, "grad_norm": 2.2620327472686768, "learning_rate": 7.20108695652174e-06, "loss": 1.0616, "step": 265 }, { "epoch": 0.18107556160653507, "grad_norm": 2.3769941329956055, "learning_rate": 7.228260869565218e-06, "loss": 1.0149, "step": 266 }, { "epoch": 0.1817562968005446, "grad_norm": 2.340247392654419, "learning_rate": 7.255434782608696e-06, "loss": 0.977, "step": 267 }, { "epoch": 0.1824370319945541, "grad_norm": 2.2794764041900635, "learning_rate": 7.282608695652175e-06, "loss": 1.0535, "step": 268 }, { "epoch": 0.18311776718856365, "grad_norm": 2.3733646869659424, "learning_rate": 7.309782608695652e-06, "loss": 1.1305, "step": 269 }, { "epoch": 0.18379850238257317, "grad_norm": 2.587172746658325, "learning_rate": 7.3369565217391315e-06, "loss": 1.0141, "step": 270 }, { "epoch": 0.18447923757658272, "grad_norm": 2.3849103450775146, "learning_rate": 7.364130434782609e-06, "loss": 1.0663, "step": 271 }, { "epoch": 0.18515997277059223, "grad_norm": 2.4138333797454834, "learning_rate": 7.391304347826087e-06, "loss": 0.9908, "step": 272 }, { "epoch": 0.18584070796460178, "grad_norm": 2.324575185775757, "learning_rate": 7.418478260869566e-06, "loss": 1.0967, "step": 273 }, { "epoch": 0.1865214431586113, "grad_norm": 2.590644359588623, "learning_rate": 7.445652173913044e-06, "loss": 1.0675, "step": 274 }, { "epoch": 0.18720217835262082, "grad_norm": 2.3226735591888428, "learning_rate": 7.472826086956523e-06, "loss": 1.0557, "step": 275 }, { "epoch": 0.18788291354663036, "grad_norm": 2.1490490436553955, "learning_rate": 7.500000000000001e-06, "loss": 1.0418, "step": 276 }, { "epoch": 0.18856364874063988, "grad_norm": 2.5482070446014404, "learning_rate": 7.5271739130434784e-06, "loss": 1.0896, "step": 277 }, { "epoch": 0.18924438393464943, "grad_norm": 2.212069034576416, "learning_rate": 7.5543478260869576e-06, "loss": 1.0235, "step": 278 }, { "epoch": 0.18992511912865895, "grad_norm": 2.19982647895813, "learning_rate": 7.581521739130435e-06, "loss": 1.1141, "step": 279 }, { "epoch": 0.1906058543226685, "grad_norm": 2.011047840118408, "learning_rate": 7.608695652173914e-06, "loss": 1.1013, "step": 280 }, { "epoch": 0.191286589516678, "grad_norm": 2.2568576335906982, "learning_rate": 7.635869565217392e-06, "loss": 1.0402, "step": 281 }, { "epoch": 0.19196732471068753, "grad_norm": 2.1944785118103027, "learning_rate": 7.66304347826087e-06, "loss": 1.0903, "step": 282 }, { "epoch": 0.19264805990469708, "grad_norm": 2.489993095397949, "learning_rate": 7.690217391304349e-06, "loss": 0.9553, "step": 283 }, { "epoch": 0.1933287950987066, "grad_norm": 2.615896224975586, "learning_rate": 7.717391304347827e-06, "loss": 1.0571, "step": 284 }, { "epoch": 0.19400953029271614, "grad_norm": 2.410660743713379, "learning_rate": 7.744565217391305e-06, "loss": 1.1298, "step": 285 }, { "epoch": 0.19469026548672566, "grad_norm": 2.3664093017578125, "learning_rate": 7.771739130434784e-06, "loss": 1.0136, "step": 286 }, { "epoch": 0.1953710006807352, "grad_norm": 2.5400993824005127, "learning_rate": 7.798913043478262e-06, "loss": 1.0384, "step": 287 }, { "epoch": 0.19605173587474473, "grad_norm": 2.5852315425872803, "learning_rate": 7.82608695652174e-06, "loss": 1.0167, "step": 288 }, { "epoch": 0.19673247106875424, "grad_norm": 2.4094398021698, "learning_rate": 7.853260869565218e-06, "loss": 0.9932, "step": 289 }, { "epoch": 0.1974132062627638, "grad_norm": 2.824359655380249, "learning_rate": 7.880434782608695e-06, "loss": 1.0288, "step": 290 }, { "epoch": 0.1980939414567733, "grad_norm": 2.4488449096679688, "learning_rate": 7.907608695652175e-06, "loss": 1.129, "step": 291 }, { "epoch": 0.19877467665078286, "grad_norm": 2.584688663482666, "learning_rate": 7.934782608695653e-06, "loss": 1.0475, "step": 292 }, { "epoch": 0.19945541184479237, "grad_norm": 2.4023141860961914, "learning_rate": 7.961956521739131e-06, "loss": 1.0623, "step": 293 }, { "epoch": 0.2001361470388019, "grad_norm": 2.42437744140625, "learning_rate": 7.98913043478261e-06, "loss": 0.9623, "step": 294 }, { "epoch": 0.20081688223281144, "grad_norm": 2.6083831787109375, "learning_rate": 8.016304347826088e-06, "loss": 1.0018, "step": 295 }, { "epoch": 0.20149761742682096, "grad_norm": 2.62176513671875, "learning_rate": 8.043478260869566e-06, "loss": 1.0035, "step": 296 }, { "epoch": 0.2021783526208305, "grad_norm": 2.2258503437042236, "learning_rate": 8.070652173913044e-06, "loss": 1.0509, "step": 297 }, { "epoch": 0.20285908781484002, "grad_norm": 2.623220205307007, "learning_rate": 8.097826086956523e-06, "loss": 1.0801, "step": 298 }, { "epoch": 0.20353982300884957, "grad_norm": 2.485987424850464, "learning_rate": 8.125000000000001e-06, "loss": 1.0253, "step": 299 }, { "epoch": 0.2042205582028591, "grad_norm": 2.6101903915405273, "learning_rate": 8.15217391304348e-06, "loss": 1.0076, "step": 300 }, { "epoch": 0.2049012933968686, "grad_norm": 2.281885862350464, "learning_rate": 8.179347826086957e-06, "loss": 1.0733, "step": 301 }, { "epoch": 0.20558202859087815, "grad_norm": 2.19132661819458, "learning_rate": 8.206521739130436e-06, "loss": 1.1109, "step": 302 }, { "epoch": 0.20626276378488767, "grad_norm": 2.412275791168213, "learning_rate": 8.233695652173914e-06, "loss": 1.0347, "step": 303 }, { "epoch": 0.20694349897889722, "grad_norm": 2.2232069969177246, "learning_rate": 8.260869565217392e-06, "loss": 1.024, "step": 304 }, { "epoch": 0.20762423417290674, "grad_norm": 2.5764968395233154, "learning_rate": 8.28804347826087e-06, "loss": 1.0004, "step": 305 }, { "epoch": 0.20830496936691628, "grad_norm": 2.301159381866455, "learning_rate": 8.315217391304349e-06, "loss": 1.0348, "step": 306 }, { "epoch": 0.2089857045609258, "grad_norm": 2.2480227947235107, "learning_rate": 8.342391304347827e-06, "loss": 1.1306, "step": 307 }, { "epoch": 0.20966643975493532, "grad_norm": 2.372671365737915, "learning_rate": 8.369565217391305e-06, "loss": 0.9631, "step": 308 }, { "epoch": 0.21034717494894487, "grad_norm": 2.6320180892944336, "learning_rate": 8.396739130434784e-06, "loss": 1.0775, "step": 309 }, { "epoch": 0.21102791014295438, "grad_norm": 2.3166544437408447, "learning_rate": 8.423913043478262e-06, "loss": 1.0288, "step": 310 }, { "epoch": 0.21170864533696393, "grad_norm": 2.5941920280456543, "learning_rate": 8.45108695652174e-06, "loss": 1.0933, "step": 311 }, { "epoch": 0.21238938053097345, "grad_norm": 2.4642257690429688, "learning_rate": 8.478260869565218e-06, "loss": 0.9358, "step": 312 }, { "epoch": 0.213070115724983, "grad_norm": 2.38065505027771, "learning_rate": 8.505434782608697e-06, "loss": 0.9913, "step": 313 }, { "epoch": 0.2137508509189925, "grad_norm": 2.3824048042297363, "learning_rate": 8.532608695652175e-06, "loss": 1.0414, "step": 314 }, { "epoch": 0.21443158611300203, "grad_norm": 2.3917298316955566, "learning_rate": 8.559782608695653e-06, "loss": 1.0177, "step": 315 }, { "epoch": 0.21511232130701158, "grad_norm": 2.8381402492523193, "learning_rate": 8.586956521739131e-06, "loss": 1.0902, "step": 316 }, { "epoch": 0.2157930565010211, "grad_norm": 2.685175895690918, "learning_rate": 8.61413043478261e-06, "loss": 0.9894, "step": 317 }, { "epoch": 0.21647379169503064, "grad_norm": 2.4474854469299316, "learning_rate": 8.641304347826088e-06, "loss": 1.011, "step": 318 }, { "epoch": 0.21715452688904016, "grad_norm": 2.538790464401245, "learning_rate": 8.668478260869566e-06, "loss": 1.0703, "step": 319 }, { "epoch": 0.21783526208304968, "grad_norm": 2.2185704708099365, "learning_rate": 8.695652173913044e-06, "loss": 1.0804, "step": 320 }, { "epoch": 0.21851599727705923, "grad_norm": 2.628652572631836, "learning_rate": 8.722826086956523e-06, "loss": 1.0419, "step": 321 }, { "epoch": 0.21919673247106874, "grad_norm": 2.8277769088745117, "learning_rate": 8.750000000000001e-06, "loss": 0.988, "step": 322 }, { "epoch": 0.2198774676650783, "grad_norm": 2.282291889190674, "learning_rate": 8.77717391304348e-06, "loss": 1.063, "step": 323 }, { "epoch": 0.2205582028590878, "grad_norm": 2.4630186557769775, "learning_rate": 8.804347826086957e-06, "loss": 1.0541, "step": 324 }, { "epoch": 0.22123893805309736, "grad_norm": 2.669428825378418, "learning_rate": 8.831521739130436e-06, "loss": 1.0328, "step": 325 }, { "epoch": 0.22191967324710687, "grad_norm": 2.56377911567688, "learning_rate": 8.858695652173914e-06, "loss": 1.0111, "step": 326 }, { "epoch": 0.2226004084411164, "grad_norm": 2.2539968490600586, "learning_rate": 8.885869565217392e-06, "loss": 1.0469, "step": 327 }, { "epoch": 0.22328114363512594, "grad_norm": 2.708238363265991, "learning_rate": 8.91304347826087e-06, "loss": 0.9347, "step": 328 }, { "epoch": 0.22396187882913546, "grad_norm": 2.1013259887695312, "learning_rate": 8.940217391304349e-06, "loss": 1.0804, "step": 329 }, { "epoch": 0.224642614023145, "grad_norm": 2.3452155590057373, "learning_rate": 8.967391304347827e-06, "loss": 1.0549, "step": 330 }, { "epoch": 0.22532334921715452, "grad_norm": 2.259608030319214, "learning_rate": 8.994565217391305e-06, "loss": 1.0703, "step": 331 }, { "epoch": 0.22600408441116407, "grad_norm": 2.4531896114349365, "learning_rate": 9.021739130434784e-06, "loss": 1.0676, "step": 332 }, { "epoch": 0.2266848196051736, "grad_norm": 2.3896090984344482, "learning_rate": 9.048913043478262e-06, "loss": 1.0483, "step": 333 }, { "epoch": 0.2273655547991831, "grad_norm": 2.256312370300293, "learning_rate": 9.07608695652174e-06, "loss": 1.1066, "step": 334 }, { "epoch": 0.22804628999319265, "grad_norm": 2.203258752822876, "learning_rate": 9.103260869565218e-06, "loss": 0.9917, "step": 335 }, { "epoch": 0.22872702518720217, "grad_norm": 2.227479934692383, "learning_rate": 9.130434782608697e-06, "loss": 1.0127, "step": 336 }, { "epoch": 0.22940776038121172, "grad_norm": 2.6632421016693115, "learning_rate": 9.157608695652175e-06, "loss": 1.1111, "step": 337 }, { "epoch": 0.23008849557522124, "grad_norm": 2.375009298324585, "learning_rate": 9.184782608695653e-06, "loss": 0.9996, "step": 338 }, { "epoch": 0.23076923076923078, "grad_norm": 2.8797881603240967, "learning_rate": 9.211956521739131e-06, "loss": 1.024, "step": 339 }, { "epoch": 0.2314499659632403, "grad_norm": 2.620755434036255, "learning_rate": 9.23913043478261e-06, "loss": 0.9929, "step": 340 }, { "epoch": 0.23213070115724982, "grad_norm": 2.6765706539154053, "learning_rate": 9.266304347826088e-06, "loss": 1.092, "step": 341 }, { "epoch": 0.23281143635125937, "grad_norm": 2.296424627304077, "learning_rate": 9.293478260869566e-06, "loss": 1.0924, "step": 342 }, { "epoch": 0.23349217154526888, "grad_norm": 2.2380383014678955, "learning_rate": 9.320652173913044e-06, "loss": 1.0518, "step": 343 }, { "epoch": 0.23417290673927843, "grad_norm": 2.8272461891174316, "learning_rate": 9.347826086956523e-06, "loss": 0.9892, "step": 344 }, { "epoch": 0.23485364193328795, "grad_norm": 2.496649980545044, "learning_rate": 9.375000000000001e-06, "loss": 0.9624, "step": 345 }, { "epoch": 0.2355343771272975, "grad_norm": 2.3250906467437744, "learning_rate": 9.402173913043479e-06, "loss": 1.1158, "step": 346 }, { "epoch": 0.236215112321307, "grad_norm": 2.348468780517578, "learning_rate": 9.429347826086957e-06, "loss": 1.0393, "step": 347 }, { "epoch": 0.23689584751531653, "grad_norm": 2.421215772628784, "learning_rate": 9.456521739130436e-06, "loss": 1.0394, "step": 348 }, { "epoch": 0.23757658270932608, "grad_norm": 2.442814588546753, "learning_rate": 9.483695652173914e-06, "loss": 1.0346, "step": 349 }, { "epoch": 0.2382573179033356, "grad_norm": 2.1602420806884766, "learning_rate": 9.510869565217392e-06, "loss": 1.0265, "step": 350 }, { "epoch": 0.23893805309734514, "grad_norm": 2.350445032119751, "learning_rate": 9.53804347826087e-06, "loss": 1.0837, "step": 351 }, { "epoch": 0.23961878829135466, "grad_norm": 2.1436750888824463, "learning_rate": 9.565217391304349e-06, "loss": 0.9841, "step": 352 }, { "epoch": 0.24029952348536418, "grad_norm": 2.2978482246398926, "learning_rate": 9.592391304347827e-06, "loss": 1.0652, "step": 353 }, { "epoch": 0.24098025867937373, "grad_norm": 2.407144546508789, "learning_rate": 9.619565217391305e-06, "loss": 0.9848, "step": 354 }, { "epoch": 0.24166099387338325, "grad_norm": 2.457070827484131, "learning_rate": 9.646739130434783e-06, "loss": 1.0346, "step": 355 }, { "epoch": 0.2423417290673928, "grad_norm": 2.3382568359375, "learning_rate": 9.673913043478262e-06, "loss": 1.0038, "step": 356 }, { "epoch": 0.2430224642614023, "grad_norm": 2.326432466506958, "learning_rate": 9.70108695652174e-06, "loss": 1.0237, "step": 357 }, { "epoch": 0.24370319945541186, "grad_norm": 2.4134767055511475, "learning_rate": 9.728260869565218e-06, "loss": 1.0229, "step": 358 }, { "epoch": 0.24438393464942137, "grad_norm": 2.4790549278259277, "learning_rate": 9.755434782608696e-06, "loss": 1.1242, "step": 359 }, { "epoch": 0.2450646698434309, "grad_norm": 2.08585786819458, "learning_rate": 9.782608695652175e-06, "loss": 1.1403, "step": 360 }, { "epoch": 0.24574540503744044, "grad_norm": 2.34435772895813, "learning_rate": 9.809782608695653e-06, "loss": 1.0712, "step": 361 }, { "epoch": 0.24642614023144996, "grad_norm": 2.4203248023986816, "learning_rate": 9.836956521739131e-06, "loss": 0.9823, "step": 362 }, { "epoch": 0.2471068754254595, "grad_norm": 2.526815176010132, "learning_rate": 9.86413043478261e-06, "loss": 1.0508, "step": 363 }, { "epoch": 0.24778761061946902, "grad_norm": 2.542679786682129, "learning_rate": 9.891304347826088e-06, "loss": 1.0068, "step": 364 }, { "epoch": 0.24846834581347857, "grad_norm": 2.596121072769165, "learning_rate": 9.918478260869566e-06, "loss": 1.0397, "step": 365 }, { "epoch": 0.2491490810074881, "grad_norm": 2.5471556186676025, "learning_rate": 9.945652173913044e-06, "loss": 0.9416, "step": 366 }, { "epoch": 0.2498298162014976, "grad_norm": 2.6176846027374268, "learning_rate": 9.972826086956523e-06, "loss": 1.0662, "step": 367 }, { "epoch": 0.25051055139550715, "grad_norm": 2.0725624561309814, "learning_rate": 1e-05, "loss": 1.1185, "step": 368 }, { "epoch": 0.25119128658951667, "grad_norm": 3.010388135910034, "learning_rate": 9.999999493123334e-06, "loss": 1.1017, "step": 369 }, { "epoch": 0.2518720217835262, "grad_norm": 2.764875888824463, "learning_rate": 9.999997972493432e-06, "loss": 0.9559, "step": 370 }, { "epoch": 0.25255275697753576, "grad_norm": 2.390565872192383, "learning_rate": 9.999995438110604e-06, "loss": 1.1077, "step": 371 }, { "epoch": 0.2532334921715453, "grad_norm": 2.6435811519622803, "learning_rate": 9.999991889975367e-06, "loss": 1.0483, "step": 372 }, { "epoch": 0.2539142273655548, "grad_norm": 2.156831741333008, "learning_rate": 9.999987328088438e-06, "loss": 0.9714, "step": 373 }, { "epoch": 0.2545949625595643, "grad_norm": 2.3864943981170654, "learning_rate": 9.999981752450742e-06, "loss": 0.9467, "step": 374 }, { "epoch": 0.25527569775357384, "grad_norm": 2.5396370887756348, "learning_rate": 9.99997516306341e-06, "loss": 1.0741, "step": 375 }, { "epoch": 0.2559564329475834, "grad_norm": 2.4179553985595703, "learning_rate": 9.999967559927774e-06, "loss": 1.0802, "step": 376 }, { "epoch": 0.25663716814159293, "grad_norm": 2.5180976390838623, "learning_rate": 9.999958943045383e-06, "loss": 1.0222, "step": 377 }, { "epoch": 0.25731790333560245, "grad_norm": 2.397437572479248, "learning_rate": 9.999949312417978e-06, "loss": 1.0728, "step": 378 }, { "epoch": 0.25799863852961197, "grad_norm": 2.2503881454467773, "learning_rate": 9.999938668047516e-06, "loss": 0.9805, "step": 379 }, { "epoch": 0.2586793737236215, "grad_norm": 2.449476957321167, "learning_rate": 9.999927009936152e-06, "loss": 1.0105, "step": 380 }, { "epoch": 0.25936010891763106, "grad_norm": 2.6828219890594482, "learning_rate": 9.999914338086249e-06, "loss": 1.131, "step": 381 }, { "epoch": 0.2600408441116406, "grad_norm": 2.619758367538452, "learning_rate": 9.999900652500381e-06, "loss": 1.0525, "step": 382 }, { "epoch": 0.2607215793056501, "grad_norm": 2.470205068588257, "learning_rate": 9.999885953181317e-06, "loss": 1.0282, "step": 383 }, { "epoch": 0.2614023144996596, "grad_norm": 2.378730535507202, "learning_rate": 9.999870240132042e-06, "loss": 1.0297, "step": 384 }, { "epoch": 0.2620830496936692, "grad_norm": 2.381194829940796, "learning_rate": 9.999853513355738e-06, "loss": 0.9857, "step": 385 }, { "epoch": 0.2627637848876787, "grad_norm": 2.2506682872772217, "learning_rate": 9.9998357728558e-06, "loss": 0.9917, "step": 386 }, { "epoch": 0.2634445200816882, "grad_norm": 2.283416748046875, "learning_rate": 9.999817018635821e-06, "loss": 1.042, "step": 387 }, { "epoch": 0.26412525527569775, "grad_norm": 2.3830740451812744, "learning_rate": 9.999797250699606e-06, "loss": 1.0434, "step": 388 }, { "epoch": 0.26480599046970726, "grad_norm": 2.3913588523864746, "learning_rate": 9.999776469051164e-06, "loss": 0.973, "step": 389 }, { "epoch": 0.26548672566371684, "grad_norm": 2.098625898361206, "learning_rate": 9.999754673694705e-06, "loss": 1.1431, "step": 390 }, { "epoch": 0.26616746085772636, "grad_norm": 2.3761239051818848, "learning_rate": 9.999731864634651e-06, "loss": 1.0096, "step": 391 }, { "epoch": 0.2668481960517359, "grad_norm": 2.32773756980896, "learning_rate": 9.999708041875624e-06, "loss": 1.0293, "step": 392 }, { "epoch": 0.2675289312457454, "grad_norm": 2.5408854484558105, "learning_rate": 9.999683205422456e-06, "loss": 0.9757, "step": 393 }, { "epoch": 0.2682096664397549, "grad_norm": 2.4831409454345703, "learning_rate": 9.999657355280185e-06, "loss": 1.0574, "step": 394 }, { "epoch": 0.2688904016337645, "grad_norm": 2.0340447425842285, "learning_rate": 9.999630491454046e-06, "loss": 1.0975, "step": 395 }, { "epoch": 0.269571136827774, "grad_norm": 2.505852460861206, "learning_rate": 9.999602613949491e-06, "loss": 1.0737, "step": 396 }, { "epoch": 0.2702518720217835, "grad_norm": 2.6955692768096924, "learning_rate": 9.99957372277217e-06, "loss": 1.0718, "step": 397 }, { "epoch": 0.27093260721579304, "grad_norm": 2.3776602745056152, "learning_rate": 9.999543817927939e-06, "loss": 0.9606, "step": 398 }, { "epoch": 0.27161334240980256, "grad_norm": 2.7608606815338135, "learning_rate": 9.999512899422866e-06, "loss": 1.0036, "step": 399 }, { "epoch": 0.27229407760381213, "grad_norm": 2.523548126220703, "learning_rate": 9.999480967263213e-06, "loss": 1.0017, "step": 400 }, { "epoch": 0.27297481279782165, "grad_norm": 2.501403331756592, "learning_rate": 9.99944802145546e-06, "loss": 0.9944, "step": 401 }, { "epoch": 0.27365554799183117, "grad_norm": 2.374725103378296, "learning_rate": 9.999414062006286e-06, "loss": 1.0238, "step": 402 }, { "epoch": 0.2743362831858407, "grad_norm": 2.541238307952881, "learning_rate": 9.999379088922575e-06, "loss": 1.0847, "step": 403 }, { "epoch": 0.27501701837985026, "grad_norm": 2.449004888534546, "learning_rate": 9.999343102211417e-06, "loss": 1.0858, "step": 404 }, { "epoch": 0.2756977535738598, "grad_norm": 2.399129867553711, "learning_rate": 9.999306101880109e-06, "loss": 1.0727, "step": 405 }, { "epoch": 0.2763784887678693, "grad_norm": 2.581270456314087, "learning_rate": 9.999268087936154e-06, "loss": 1.0297, "step": 406 }, { "epoch": 0.2770592239618788, "grad_norm": 2.2927660942077637, "learning_rate": 9.999229060387259e-06, "loss": 1.0241, "step": 407 }, { "epoch": 0.27773995915588834, "grad_norm": 2.7774739265441895, "learning_rate": 9.999189019241335e-06, "loss": 0.9627, "step": 408 }, { "epoch": 0.2784206943498979, "grad_norm": 2.3202285766601562, "learning_rate": 9.999147964506502e-06, "loss": 1.0618, "step": 409 }, { "epoch": 0.27910142954390743, "grad_norm": 2.361233711242676, "learning_rate": 9.999105896191085e-06, "loss": 0.9759, "step": 410 }, { "epoch": 0.27978216473791695, "grad_norm": 2.3030874729156494, "learning_rate": 9.999062814303611e-06, "loss": 1.0841, "step": 411 }, { "epoch": 0.28046289993192647, "grad_norm": 2.557695150375366, "learning_rate": 9.999018718852815e-06, "loss": 1.0437, "step": 412 }, { "epoch": 0.281143635125936, "grad_norm": 2.4319918155670166, "learning_rate": 9.99897360984764e-06, "loss": 0.9674, "step": 413 }, { "epoch": 0.28182437031994556, "grad_norm": 2.3730781078338623, "learning_rate": 9.99892748729723e-06, "loss": 1.0186, "step": 414 }, { "epoch": 0.2825051055139551, "grad_norm": 2.149017810821533, "learning_rate": 9.998880351210936e-06, "loss": 1.1176, "step": 415 }, { "epoch": 0.2831858407079646, "grad_norm": 2.5130064487457275, "learning_rate": 9.998832201598315e-06, "loss": 1.0278, "step": 416 }, { "epoch": 0.2838665759019741, "grad_norm": 2.3143599033355713, "learning_rate": 9.998783038469131e-06, "loss": 1.0321, "step": 417 }, { "epoch": 0.28454731109598363, "grad_norm": 2.4106266498565674, "learning_rate": 9.99873286183335e-06, "loss": 1.0793, "step": 418 }, { "epoch": 0.2852280462899932, "grad_norm": 2.373736619949341, "learning_rate": 9.998681671701145e-06, "loss": 0.9717, "step": 419 }, { "epoch": 0.2859087814840027, "grad_norm": 2.346564292907715, "learning_rate": 9.998629468082896e-06, "loss": 1.0694, "step": 420 }, { "epoch": 0.28658951667801225, "grad_norm": 2.46859073638916, "learning_rate": 9.99857625098919e-06, "loss": 1.0201, "step": 421 }, { "epoch": 0.28727025187202176, "grad_norm": 2.613831043243408, "learning_rate": 9.99852202043081e-06, "loss": 0.9875, "step": 422 }, { "epoch": 0.28795098706603134, "grad_norm": 2.4987637996673584, "learning_rate": 9.998466776418758e-06, "loss": 0.9911, "step": 423 }, { "epoch": 0.28863172226004086, "grad_norm": 2.3048605918884277, "learning_rate": 9.99841051896423e-06, "loss": 1.0095, "step": 424 }, { "epoch": 0.2893124574540504, "grad_norm": 2.2739601135253906, "learning_rate": 9.998353248078636e-06, "loss": 0.9885, "step": 425 }, { "epoch": 0.2899931926480599, "grad_norm": 2.6454122066497803, "learning_rate": 9.998294963773583e-06, "loss": 1.0054, "step": 426 }, { "epoch": 0.2906739278420694, "grad_norm": 2.4938273429870605, "learning_rate": 9.998235666060893e-06, "loss": 1.0423, "step": 427 }, { "epoch": 0.291354663036079, "grad_norm": 2.2877814769744873, "learning_rate": 9.998175354952589e-06, "loss": 1.0707, "step": 428 }, { "epoch": 0.2920353982300885, "grad_norm": 2.388251781463623, "learning_rate": 9.998114030460894e-06, "loss": 1.0376, "step": 429 }, { "epoch": 0.292716133424098, "grad_norm": 2.5014209747314453, "learning_rate": 9.998051692598246e-06, "loss": 1.1414, "step": 430 }, { "epoch": 0.29339686861810754, "grad_norm": 2.465886116027832, "learning_rate": 9.99798834137728e-06, "loss": 1.0026, "step": 431 }, { "epoch": 0.29407760381211706, "grad_norm": 2.2141902446746826, "learning_rate": 9.997923976810845e-06, "loss": 1.0636, "step": 432 }, { "epoch": 0.29475833900612664, "grad_norm": 2.413776397705078, "learning_rate": 9.997858598911989e-06, "loss": 1.048, "step": 433 }, { "epoch": 0.29543907420013615, "grad_norm": 2.4169111251831055, "learning_rate": 9.997792207693965e-06, "loss": 1.0325, "step": 434 }, { "epoch": 0.29611980939414567, "grad_norm": 2.332242012023926, "learning_rate": 9.99772480317024e-06, "loss": 1.0394, "step": 435 }, { "epoch": 0.2968005445881552, "grad_norm": 2.126521348953247, "learning_rate": 9.997656385354475e-06, "loss": 1.0103, "step": 436 }, { "epoch": 0.29748127978216476, "grad_norm": 2.395387887954712, "learning_rate": 9.997586954260543e-06, "loss": 1.0017, "step": 437 }, { "epoch": 0.2981620149761743, "grad_norm": 2.2873833179473877, "learning_rate": 9.99751650990252e-06, "loss": 0.9778, "step": 438 }, { "epoch": 0.2988427501701838, "grad_norm": 2.1572721004486084, "learning_rate": 9.997445052294693e-06, "loss": 1.0988, "step": 439 }, { "epoch": 0.2995234853641933, "grad_norm": 2.6992719173431396, "learning_rate": 9.997372581451545e-06, "loss": 1.0885, "step": 440 }, { "epoch": 0.30020422055820284, "grad_norm": 2.3019678592681885, "learning_rate": 9.997299097387773e-06, "loss": 1.0167, "step": 441 }, { "epoch": 0.3008849557522124, "grad_norm": 2.207509756088257, "learning_rate": 9.997224600118275e-06, "loss": 1.0429, "step": 442 }, { "epoch": 0.30156569094622193, "grad_norm": 2.573612928390503, "learning_rate": 9.997149089658155e-06, "loss": 1.031, "step": 443 }, { "epoch": 0.30224642614023145, "grad_norm": 2.235095977783203, "learning_rate": 9.997072566022723e-06, "loss": 1.0345, "step": 444 }, { "epoch": 0.30292716133424097, "grad_norm": 2.36238169670105, "learning_rate": 9.996995029227493e-06, "loss": 0.9811, "step": 445 }, { "epoch": 0.3036078965282505, "grad_norm": 2.4491286277770996, "learning_rate": 9.996916479288185e-06, "loss": 1.1241, "step": 446 }, { "epoch": 0.30428863172226006, "grad_norm": 2.4202709197998047, "learning_rate": 9.996836916220729e-06, "loss": 0.9952, "step": 447 }, { "epoch": 0.3049693669162696, "grad_norm": 2.419241428375244, "learning_rate": 9.996756340041255e-06, "loss": 1.0264, "step": 448 }, { "epoch": 0.3056501021102791, "grad_norm": 2.646857976913452, "learning_rate": 9.996674750766099e-06, "loss": 0.9863, "step": 449 }, { "epoch": 0.3063308373042886, "grad_norm": 2.165775775909424, "learning_rate": 9.9965921484118e-06, "loss": 1.0272, "step": 450 }, { "epoch": 0.30701157249829814, "grad_norm": 2.3942887783050537, "learning_rate": 9.996508532995113e-06, "loss": 1.0119, "step": 451 }, { "epoch": 0.3076923076923077, "grad_norm": 2.133295774459839, "learning_rate": 9.996423904532985e-06, "loss": 1.1081, "step": 452 }, { "epoch": 0.30837304288631723, "grad_norm": 2.5016231536865234, "learning_rate": 9.996338263042577e-06, "loss": 0.9571, "step": 453 }, { "epoch": 0.30905377808032675, "grad_norm": 2.5205271244049072, "learning_rate": 9.996251608541253e-06, "loss": 0.9939, "step": 454 }, { "epoch": 0.30973451327433627, "grad_norm": 2.301798105239868, "learning_rate": 9.996163941046582e-06, "loss": 1.0321, "step": 455 }, { "epoch": 0.31041524846834584, "grad_norm": 2.3188488483428955, "learning_rate": 9.996075260576338e-06, "loss": 0.9833, "step": 456 }, { "epoch": 0.31109598366235536, "grad_norm": 2.3669357299804688, "learning_rate": 9.9959855671485e-06, "loss": 1.0653, "step": 457 }, { "epoch": 0.3117767188563649, "grad_norm": 2.1264538764953613, "learning_rate": 9.995894860781256e-06, "loss": 1.0979, "step": 458 }, { "epoch": 0.3124574540503744, "grad_norm": 2.2337534427642822, "learning_rate": 9.995803141492996e-06, "loss": 1.0724, "step": 459 }, { "epoch": 0.3131381892443839, "grad_norm": 2.1086370944976807, "learning_rate": 9.995710409302316e-06, "loss": 0.9873, "step": 460 }, { "epoch": 0.3138189244383935, "grad_norm": 2.531106472015381, "learning_rate": 9.995616664228016e-06, "loss": 1.0862, "step": 461 }, { "epoch": 0.314499659632403, "grad_norm": 2.6131207942962646, "learning_rate": 9.995521906289106e-06, "loss": 0.9888, "step": 462 }, { "epoch": 0.3151803948264125, "grad_norm": 2.2551958560943604, "learning_rate": 9.995426135504794e-06, "loss": 1.0951, "step": 463 }, { "epoch": 0.31586113002042204, "grad_norm": 2.382486343383789, "learning_rate": 9.995329351894502e-06, "loss": 1.0133, "step": 464 }, { "epoch": 0.31654186521443156, "grad_norm": 2.3164634704589844, "learning_rate": 9.99523155547785e-06, "loss": 0.9892, "step": 465 }, { "epoch": 0.31722260040844114, "grad_norm": 2.473841428756714, "learning_rate": 9.995132746274668e-06, "loss": 1.0567, "step": 466 }, { "epoch": 0.31790333560245065, "grad_norm": 2.1158294677734375, "learning_rate": 9.995032924304986e-06, "loss": 1.04, "step": 467 }, { "epoch": 0.3185840707964602, "grad_norm": 2.1685357093811035, "learning_rate": 9.994932089589048e-06, "loss": 0.9555, "step": 468 }, { "epoch": 0.3192648059904697, "grad_norm": 2.3535573482513428, "learning_rate": 9.994830242147295e-06, "loss": 1.1303, "step": 469 }, { "epoch": 0.31994554118447927, "grad_norm": 2.2496843338012695, "learning_rate": 9.99472738200038e-06, "loss": 1.006, "step": 470 }, { "epoch": 0.3206262763784888, "grad_norm": 2.169665575027466, "learning_rate": 9.994623509169154e-06, "loss": 1.0309, "step": 471 }, { "epoch": 0.3213070115724983, "grad_norm": 2.3261265754699707, "learning_rate": 9.994518623674679e-06, "loss": 1.0606, "step": 472 }, { "epoch": 0.3219877467665078, "grad_norm": 2.5296521186828613, "learning_rate": 9.99441272553822e-06, "loss": 1.0065, "step": 473 }, { "epoch": 0.32266848196051734, "grad_norm": 2.305798292160034, "learning_rate": 9.99430581478125e-06, "loss": 1.0607, "step": 474 }, { "epoch": 0.3233492171545269, "grad_norm": 2.2874155044555664, "learning_rate": 9.994197891425443e-06, "loss": 0.9554, "step": 475 }, { "epoch": 0.32402995234853643, "grad_norm": 2.2076282501220703, "learning_rate": 9.99408895549268e-06, "loss": 1.001, "step": 476 }, { "epoch": 0.32471068754254595, "grad_norm": 2.1300289630889893, "learning_rate": 9.993979007005051e-06, "loss": 1.039, "step": 477 }, { "epoch": 0.32539142273655547, "grad_norm": 2.227430582046509, "learning_rate": 9.993868045984846e-06, "loss": 0.9976, "step": 478 }, { "epoch": 0.326072157930565, "grad_norm": 2.3049068450927734, "learning_rate": 9.993756072454562e-06, "loss": 1.0176, "step": 479 }, { "epoch": 0.32675289312457456, "grad_norm": 2.3240349292755127, "learning_rate": 9.993643086436901e-06, "loss": 1.0833, "step": 480 }, { "epoch": 0.3274336283185841, "grad_norm": 2.5473272800445557, "learning_rate": 9.993529087954776e-06, "loss": 1.0432, "step": 481 }, { "epoch": 0.3281143635125936, "grad_norm": 2.317716598510742, "learning_rate": 9.993414077031294e-06, "loss": 1.0619, "step": 482 }, { "epoch": 0.3287950987066031, "grad_norm": 2.4824061393737793, "learning_rate": 9.993298053689775e-06, "loss": 1.0295, "step": 483 }, { "epoch": 0.32947583390061264, "grad_norm": 2.4003026485443115, "learning_rate": 9.993181017953747e-06, "loss": 1.0523, "step": 484 }, { "epoch": 0.3301565690946222, "grad_norm": 2.3149049282073975, "learning_rate": 9.993062969846935e-06, "loss": 1.0125, "step": 485 }, { "epoch": 0.33083730428863173, "grad_norm": 2.494936943054199, "learning_rate": 9.992943909393273e-06, "loss": 1.0658, "step": 486 }, { "epoch": 0.33151803948264125, "grad_norm": 2.4694604873657227, "learning_rate": 9.992823836616903e-06, "loss": 1.0299, "step": 487 }, { "epoch": 0.33219877467665077, "grad_norm": 2.3582074642181396, "learning_rate": 9.99270275154217e-06, "loss": 1.1283, "step": 488 }, { "epoch": 0.33287950987066034, "grad_norm": 2.496190309524536, "learning_rate": 9.99258065419362e-06, "loss": 1.0515, "step": 489 }, { "epoch": 0.33356024506466986, "grad_norm": 2.4160237312316895, "learning_rate": 9.992457544596012e-06, "loss": 0.994, "step": 490 }, { "epoch": 0.3342409802586794, "grad_norm": 2.2844502925872803, "learning_rate": 9.992333422774308e-06, "loss": 1.0252, "step": 491 }, { "epoch": 0.3349217154526889, "grad_norm": 2.387355089187622, "learning_rate": 9.99220828875367e-06, "loss": 0.9915, "step": 492 }, { "epoch": 0.3356024506466984, "grad_norm": 1.9775962829589844, "learning_rate": 9.99208214255947e-06, "loss": 1.0753, "step": 493 }, { "epoch": 0.336283185840708, "grad_norm": 2.081005573272705, "learning_rate": 9.991954984217285e-06, "loss": 1.0086, "step": 494 }, { "epoch": 0.3369639210347175, "grad_norm": 2.0945208072662354, "learning_rate": 9.991826813752897e-06, "loss": 1.0503, "step": 495 }, { "epoch": 0.337644656228727, "grad_norm": 2.2974061965942383, "learning_rate": 9.99169763119229e-06, "loss": 0.989, "step": 496 }, { "epoch": 0.33832539142273654, "grad_norm": 2.2870137691497803, "learning_rate": 9.99156743656166e-06, "loss": 0.9882, "step": 497 }, { "epoch": 0.33900612661674606, "grad_norm": 2.149015426635742, "learning_rate": 9.9914362298874e-06, "loss": 0.9865, "step": 498 }, { "epoch": 0.33968686181075564, "grad_norm": 2.4018490314483643, "learning_rate": 9.991304011196115e-06, "loss": 1.0512, "step": 499 }, { "epoch": 0.34036759700476515, "grad_norm": 2.284541130065918, "learning_rate": 9.991170780514611e-06, "loss": 1.0772, "step": 500 }, { "epoch": 0.3410483321987747, "grad_norm": 2.129209280014038, "learning_rate": 9.9910365378699e-06, "loss": 1.0475, "step": 501 }, { "epoch": 0.3417290673927842, "grad_norm": 2.1880950927734375, "learning_rate": 9.990901283289202e-06, "loss": 1.0201, "step": 502 }, { "epoch": 0.3424098025867937, "grad_norm": 2.0944719314575195, "learning_rate": 9.99076501679994e-06, "loss": 0.9927, "step": 503 }, { "epoch": 0.3430905377808033, "grad_norm": 2.1870265007019043, "learning_rate": 9.99062773842974e-06, "loss": 1.0457, "step": 504 }, { "epoch": 0.3437712729748128, "grad_norm": 2.220947504043579, "learning_rate": 9.990489448206436e-06, "loss": 1.0486, "step": 505 }, { "epoch": 0.3444520081688223, "grad_norm": 2.524402379989624, "learning_rate": 9.990350146158066e-06, "loss": 1.0331, "step": 506 }, { "epoch": 0.34513274336283184, "grad_norm": 2.325007200241089, "learning_rate": 9.990209832312874e-06, "loss": 1.0061, "step": 507 }, { "epoch": 0.3458134785568414, "grad_norm": 2.4523890018463135, "learning_rate": 9.990068506699309e-06, "loss": 1.0515, "step": 508 }, { "epoch": 0.34649421375085093, "grad_norm": 2.2753114700317383, "learning_rate": 9.989926169346025e-06, "loss": 0.9929, "step": 509 }, { "epoch": 0.34717494894486045, "grad_norm": 2.1817166805267334, "learning_rate": 9.989782820281881e-06, "loss": 1.0853, "step": 510 }, { "epoch": 0.34785568413886997, "grad_norm": 2.423363208770752, "learning_rate": 9.98963845953594e-06, "loss": 0.9823, "step": 511 }, { "epoch": 0.3485364193328795, "grad_norm": 2.2913856506347656, "learning_rate": 9.989493087137473e-06, "loss": 1.008, "step": 512 }, { "epoch": 0.34921715452688906, "grad_norm": 2.124427080154419, "learning_rate": 9.989346703115954e-06, "loss": 0.9805, "step": 513 }, { "epoch": 0.3498978897208986, "grad_norm": 1.982823371887207, "learning_rate": 9.98919930750106e-06, "loss": 1.0194, "step": 514 }, { "epoch": 0.3505786249149081, "grad_norm": 2.252472400665283, "learning_rate": 9.98905090032268e-06, "loss": 1.0499, "step": 515 }, { "epoch": 0.3512593601089176, "grad_norm": 2.1255998611450195, "learning_rate": 9.9889014816109e-06, "loss": 1.044, "step": 516 }, { "epoch": 0.35194009530292714, "grad_norm": 2.4269280433654785, "learning_rate": 9.988751051396015e-06, "loss": 1.0703, "step": 517 }, { "epoch": 0.3526208304969367, "grad_norm": 2.1001908779144287, "learning_rate": 9.988599609708527e-06, "loss": 1.0583, "step": 518 }, { "epoch": 0.35330156569094623, "grad_norm": 2.2770180702209473, "learning_rate": 9.988447156579139e-06, "loss": 1.0525, "step": 519 }, { "epoch": 0.35398230088495575, "grad_norm": 2.3234312534332275, "learning_rate": 9.988293692038762e-06, "loss": 0.98, "step": 520 }, { "epoch": 0.35466303607896527, "grad_norm": 2.1030709743499756, "learning_rate": 9.988139216118508e-06, "loss": 0.9799, "step": 521 }, { "epoch": 0.35534377127297484, "grad_norm": 1.994309902191162, "learning_rate": 9.987983728849705e-06, "loss": 1.0934, "step": 522 }, { "epoch": 0.35602450646698436, "grad_norm": 2.3500373363494873, "learning_rate": 9.987827230263867e-06, "loss": 0.9596, "step": 523 }, { "epoch": 0.3567052416609939, "grad_norm": 2.2766199111938477, "learning_rate": 9.987669720392734e-06, "loss": 1.0054, "step": 524 }, { "epoch": 0.3573859768550034, "grad_norm": 2.234240770339966, "learning_rate": 9.987511199268237e-06, "loss": 1.0506, "step": 525 }, { "epoch": 0.3580667120490129, "grad_norm": 2.132190465927124, "learning_rate": 9.987351666922514e-06, "loss": 1.0719, "step": 526 }, { "epoch": 0.3587474472430225, "grad_norm": 2.098172664642334, "learning_rate": 9.987191123387916e-06, "loss": 1.1109, "step": 527 }, { "epoch": 0.359428182437032, "grad_norm": 2.216951608657837, "learning_rate": 9.98702956869699e-06, "loss": 1.0865, "step": 528 }, { "epoch": 0.3601089176310415, "grad_norm": 2.052443504333496, "learning_rate": 9.986867002882488e-06, "loss": 1.1543, "step": 529 }, { "epoch": 0.36078965282505104, "grad_norm": 1.9781889915466309, "learning_rate": 9.986703425977376e-06, "loss": 1.0193, "step": 530 }, { "epoch": 0.36147038801906056, "grad_norm": 1.9716296195983887, "learning_rate": 9.986538838014817e-06, "loss": 1.036, "step": 531 }, { "epoch": 0.36215112321307014, "grad_norm": 2.2070844173431396, "learning_rate": 9.986373239028182e-06, "loss": 1.0715, "step": 532 }, { "epoch": 0.36283185840707965, "grad_norm": 2.074355363845825, "learning_rate": 9.986206629051045e-06, "loss": 1.0367, "step": 533 }, { "epoch": 0.3635125936010892, "grad_norm": 2.4412999153137207, "learning_rate": 9.986039008117187e-06, "loss": 0.9952, "step": 534 }, { "epoch": 0.3641933287950987, "grad_norm": 2.2586417198181152, "learning_rate": 9.985870376260594e-06, "loss": 0.9787, "step": 535 }, { "epoch": 0.3648740639891082, "grad_norm": 2.1293888092041016, "learning_rate": 9.985700733515455e-06, "loss": 1.0945, "step": 536 }, { "epoch": 0.3655547991831178, "grad_norm": 2.4148900508880615, "learning_rate": 9.985530079916166e-06, "loss": 1.0178, "step": 537 }, { "epoch": 0.3662355343771273, "grad_norm": 2.0015156269073486, "learning_rate": 9.985358415497326e-06, "loss": 1.1048, "step": 538 }, { "epoch": 0.3669162695711368, "grad_norm": 2.3732075691223145, "learning_rate": 9.985185740293741e-06, "loss": 0.986, "step": 539 }, { "epoch": 0.36759700476514634, "grad_norm": 2.106384754180908, "learning_rate": 9.985012054340421e-06, "loss": 0.9948, "step": 540 }, { "epoch": 0.3682777399591559, "grad_norm": 2.552187442779541, "learning_rate": 9.984837357672584e-06, "loss": 0.9837, "step": 541 }, { "epoch": 0.36895847515316543, "grad_norm": 2.2588284015655518, "learning_rate": 9.984661650325642e-06, "loss": 1.0485, "step": 542 }, { "epoch": 0.36963921034717495, "grad_norm": 2.4820425510406494, "learning_rate": 9.984484932335228e-06, "loss": 1.0273, "step": 543 }, { "epoch": 0.37031994554118447, "grad_norm": 2.102745532989502, "learning_rate": 9.984307203737166e-06, "loss": 1.0855, "step": 544 }, { "epoch": 0.371000680735194, "grad_norm": 1.9784592390060425, "learning_rate": 9.984128464567493e-06, "loss": 1.0768, "step": 545 }, { "epoch": 0.37168141592920356, "grad_norm": 2.149081230163574, "learning_rate": 9.983948714862449e-06, "loss": 0.9973, "step": 546 }, { "epoch": 0.3723621511232131, "grad_norm": 1.9896153211593628, "learning_rate": 9.983767954658477e-06, "loss": 1.038, "step": 547 }, { "epoch": 0.3730428863172226, "grad_norm": 2.108525514602661, "learning_rate": 9.983586183992228e-06, "loss": 0.9759, "step": 548 }, { "epoch": 0.3737236215112321, "grad_norm": 2.2003297805786133, "learning_rate": 9.983403402900554e-06, "loss": 0.9721, "step": 549 }, { "epoch": 0.37440435670524164, "grad_norm": 2.2074573040008545, "learning_rate": 9.983219611420514e-06, "loss": 1.03, "step": 550 }, { "epoch": 0.3750850918992512, "grad_norm": 2.172774076461792, "learning_rate": 9.983034809589375e-06, "loss": 0.9818, "step": 551 }, { "epoch": 0.37576582709326073, "grad_norm": 2.2997896671295166, "learning_rate": 9.982848997444603e-06, "loss": 0.9744, "step": 552 }, { "epoch": 0.37644656228727025, "grad_norm": 1.8503903150558472, "learning_rate": 9.982662175023871e-06, "loss": 1.1223, "step": 553 }, { "epoch": 0.37712729748127977, "grad_norm": 2.296679735183716, "learning_rate": 9.982474342365061e-06, "loss": 0.9587, "step": 554 }, { "epoch": 0.3778080326752893, "grad_norm": 2.279360055923462, "learning_rate": 9.98228549950625e-06, "loss": 0.983, "step": 555 }, { "epoch": 0.37848876786929886, "grad_norm": 2.423574447631836, "learning_rate": 9.982095646485732e-06, "loss": 1.0461, "step": 556 }, { "epoch": 0.3791695030633084, "grad_norm": 2.017990827560425, "learning_rate": 9.981904783341998e-06, "loss": 1.045, "step": 557 }, { "epoch": 0.3798502382573179, "grad_norm": 2.4109930992126465, "learning_rate": 9.981712910113743e-06, "loss": 1.0762, "step": 558 }, { "epoch": 0.3805309734513274, "grad_norm": 2.064772367477417, "learning_rate": 9.981520026839873e-06, "loss": 0.996, "step": 559 }, { "epoch": 0.381211708645337, "grad_norm": 2.0749459266662598, "learning_rate": 9.981326133559494e-06, "loss": 0.9891, "step": 560 }, { "epoch": 0.3818924438393465, "grad_norm": 2.545250415802002, "learning_rate": 9.981131230311917e-06, "loss": 1.0241, "step": 561 }, { "epoch": 0.382573179033356, "grad_norm": 2.1630187034606934, "learning_rate": 9.980935317136659e-06, "loss": 1.0025, "step": 562 }, { "epoch": 0.38325391422736554, "grad_norm": 2.281973123550415, "learning_rate": 9.980738394073444e-06, "loss": 0.9618, "step": 563 }, { "epoch": 0.38393464942137506, "grad_norm": 2.3384907245635986, "learning_rate": 9.980540461162194e-06, "loss": 1.042, "step": 564 }, { "epoch": 0.38461538461538464, "grad_norm": 2.377974510192871, "learning_rate": 9.980341518443045e-06, "loss": 0.9363, "step": 565 }, { "epoch": 0.38529611980939416, "grad_norm": 2.6853628158569336, "learning_rate": 9.980141565956328e-06, "loss": 1.0128, "step": 566 }, { "epoch": 0.3859768550034037, "grad_norm": 2.121795177459717, "learning_rate": 9.979940603742584e-06, "loss": 1.0512, "step": 567 }, { "epoch": 0.3866575901974132, "grad_norm": 2.0181031227111816, "learning_rate": 9.979738631842564e-06, "loss": 0.9445, "step": 568 }, { "epoch": 0.3873383253914227, "grad_norm": 1.9385688304901123, "learning_rate": 9.979535650297211e-06, "loss": 1.0873, "step": 569 }, { "epoch": 0.3880190605854323, "grad_norm": 2.0759599208831787, "learning_rate": 9.979331659147685e-06, "loss": 1.0044, "step": 570 }, { "epoch": 0.3886997957794418, "grad_norm": 2.234694480895996, "learning_rate": 9.97912665843534e-06, "loss": 0.9707, "step": 571 }, { "epoch": 0.3893805309734513, "grad_norm": 2.6630678176879883, "learning_rate": 9.978920648201743e-06, "loss": 0.9772, "step": 572 }, { "epoch": 0.39006126616746084, "grad_norm": 2.4115383625030518, "learning_rate": 9.978713628488662e-06, "loss": 1.0825, "step": 573 }, { "epoch": 0.3907420013614704, "grad_norm": 2.3958728313446045, "learning_rate": 9.978505599338074e-06, "loss": 0.9447, "step": 574 }, { "epoch": 0.39142273655547993, "grad_norm": 2.309798240661621, "learning_rate": 9.97829656079215e-06, "loss": 0.9601, "step": 575 }, { "epoch": 0.39210347174948945, "grad_norm": 2.054898738861084, "learning_rate": 9.97808651289328e-06, "loss": 1.0894, "step": 576 }, { "epoch": 0.39278420694349897, "grad_norm": 2.156574010848999, "learning_rate": 9.977875455684047e-06, "loss": 1.015, "step": 577 }, { "epoch": 0.3934649421375085, "grad_norm": 2.355095148086548, "learning_rate": 9.977663389207245e-06, "loss": 1.1773, "step": 578 }, { "epoch": 0.39414567733151806, "grad_norm": 2.356771469116211, "learning_rate": 9.977450313505868e-06, "loss": 1.0332, "step": 579 }, { "epoch": 0.3948264125255276, "grad_norm": 2.3600096702575684, "learning_rate": 9.97723622862312e-06, "loss": 1.0357, "step": 580 }, { "epoch": 0.3955071477195371, "grad_norm": 2.375229835510254, "learning_rate": 9.977021134602404e-06, "loss": 1.012, "step": 581 }, { "epoch": 0.3961878829135466, "grad_norm": 2.2554311752319336, "learning_rate": 9.976805031487333e-06, "loss": 1.0626, "step": 582 }, { "epoch": 0.39686861810755614, "grad_norm": 2.360065460205078, "learning_rate": 9.97658791932172e-06, "loss": 0.9872, "step": 583 }, { "epoch": 0.3975493533015657, "grad_norm": 1.9536573886871338, "learning_rate": 9.976369798149588e-06, "loss": 1.1111, "step": 584 }, { "epoch": 0.39823008849557523, "grad_norm": 2.0497865676879883, "learning_rate": 9.976150668015159e-06, "loss": 0.9558, "step": 585 }, { "epoch": 0.39891082368958475, "grad_norm": 2.2464561462402344, "learning_rate": 9.97593052896286e-06, "loss": 0.9565, "step": 586 }, { "epoch": 0.39959155888359427, "grad_norm": 2.0778818130493164, "learning_rate": 9.975709381037329e-06, "loss": 0.9502, "step": 587 }, { "epoch": 0.4002722940776038, "grad_norm": 2.0046918392181396, "learning_rate": 9.975487224283398e-06, "loss": 1.1018, "step": 588 }, { "epoch": 0.40095302927161336, "grad_norm": 2.437671422958374, "learning_rate": 9.975264058746115e-06, "loss": 1.034, "step": 589 }, { "epoch": 0.4016337644656229, "grad_norm": 2.1721107959747314, "learning_rate": 9.975039884470723e-06, "loss": 1.0557, "step": 590 }, { "epoch": 0.4023144996596324, "grad_norm": 2.287762403488159, "learning_rate": 9.974814701502675e-06, "loss": 1.0065, "step": 591 }, { "epoch": 0.4029952348536419, "grad_norm": 2.383241891860962, "learning_rate": 9.974588509887627e-06, "loss": 1.0778, "step": 592 }, { "epoch": 0.4036759700476515, "grad_norm": 2.5038061141967773, "learning_rate": 9.97436130967144e-06, "loss": 0.9969, "step": 593 }, { "epoch": 0.404356705241661, "grad_norm": 2.0532329082489014, "learning_rate": 9.974133100900178e-06, "loss": 1.0416, "step": 594 }, { "epoch": 0.4050374404356705, "grad_norm": 2.331378698348999, "learning_rate": 9.973903883620111e-06, "loss": 0.9764, "step": 595 }, { "epoch": 0.40571817562968004, "grad_norm": 2.264805555343628, "learning_rate": 9.973673657877712e-06, "loss": 1.0532, "step": 596 }, { "epoch": 0.40639891082368956, "grad_norm": 2.056396007537842, "learning_rate": 9.973442423719662e-06, "loss": 0.9829, "step": 597 }, { "epoch": 0.40707964601769914, "grad_norm": 2.2083394527435303, "learning_rate": 9.97321018119284e-06, "loss": 1.0047, "step": 598 }, { "epoch": 0.40776038121170866, "grad_norm": 2.2118868827819824, "learning_rate": 9.972976930344338e-06, "loss": 1.0662, "step": 599 }, { "epoch": 0.4084411164057182, "grad_norm": 2.379075765609741, "learning_rate": 9.972742671221444e-06, "loss": 1.0858, "step": 600 }, { "epoch": 0.4091218515997277, "grad_norm": 1.9494893550872803, "learning_rate": 9.972507403871656e-06, "loss": 1.0311, "step": 601 }, { "epoch": 0.4098025867937372, "grad_norm": 2.271019458770752, "learning_rate": 9.972271128342673e-06, "loss": 0.9918, "step": 602 }, { "epoch": 0.4104833219877468, "grad_norm": 2.0621514320373535, "learning_rate": 9.972033844682401e-06, "loss": 0.9419, "step": 603 }, { "epoch": 0.4111640571817563, "grad_norm": 2.2681937217712402, "learning_rate": 9.97179555293895e-06, "loss": 0.9903, "step": 604 }, { "epoch": 0.4118447923757658, "grad_norm": 2.2979941368103027, "learning_rate": 9.971556253160634e-06, "loss": 1.0053, "step": 605 }, { "epoch": 0.41252552756977534, "grad_norm": 2.116744041442871, "learning_rate": 9.971315945395971e-06, "loss": 0.9111, "step": 606 }, { "epoch": 0.4132062627637849, "grad_norm": 2.1530344486236572, "learning_rate": 9.971074629693682e-06, "loss": 1.0645, "step": 607 }, { "epoch": 0.41388699795779443, "grad_norm": 2.1403679847717285, "learning_rate": 9.970832306102696e-06, "loss": 1.0434, "step": 608 }, { "epoch": 0.41456773315180395, "grad_norm": 2.314173460006714, "learning_rate": 9.970588974672142e-06, "loss": 1.1401, "step": 609 }, { "epoch": 0.41524846834581347, "grad_norm": 2.4207961559295654, "learning_rate": 9.970344635451356e-06, "loss": 0.98, "step": 610 }, { "epoch": 0.415929203539823, "grad_norm": 2.107900381088257, "learning_rate": 9.970099288489881e-06, "loss": 0.9915, "step": 611 }, { "epoch": 0.41660993873383256, "grad_norm": 2.1517984867095947, "learning_rate": 9.969852933837458e-06, "loss": 1.0966, "step": 612 }, { "epoch": 0.4172906739278421, "grad_norm": 1.9369992017745972, "learning_rate": 9.969605571544038e-06, "loss": 1.0264, "step": 613 }, { "epoch": 0.4179714091218516, "grad_norm": 2.4180517196655273, "learning_rate": 9.96935720165977e-06, "loss": 0.9816, "step": 614 }, { "epoch": 0.4186521443158611, "grad_norm": 2.3475139141082764, "learning_rate": 9.969107824235016e-06, "loss": 1.0151, "step": 615 }, { "epoch": 0.41933287950987064, "grad_norm": 2.3105008602142334, "learning_rate": 9.968857439320334e-06, "loss": 1.1232, "step": 616 }, { "epoch": 0.4200136147038802, "grad_norm": 2.8746228218078613, "learning_rate": 9.96860604696649e-06, "loss": 1.0287, "step": 617 }, { "epoch": 0.42069434989788973, "grad_norm": 2.1955411434173584, "learning_rate": 9.968353647224457e-06, "loss": 1.0442, "step": 618 }, { "epoch": 0.42137508509189925, "grad_norm": 2.172842264175415, "learning_rate": 9.968100240145406e-06, "loss": 0.8987, "step": 619 }, { "epoch": 0.42205582028590877, "grad_norm": 2.3157501220703125, "learning_rate": 9.967845825780717e-06, "loss": 1.0434, "step": 620 }, { "epoch": 0.4227365554799183, "grad_norm": 2.357668399810791, "learning_rate": 9.967590404181971e-06, "loss": 0.9579, "step": 621 }, { "epoch": 0.42341729067392786, "grad_norm": 2.295579195022583, "learning_rate": 9.967333975400957e-06, "loss": 1.0442, "step": 622 }, { "epoch": 0.4240980258679374, "grad_norm": 2.4925854206085205, "learning_rate": 9.967076539489664e-06, "loss": 1.0103, "step": 623 }, { "epoch": 0.4247787610619469, "grad_norm": 2.242765188217163, "learning_rate": 9.966818096500289e-06, "loss": 0.9394, "step": 624 }, { "epoch": 0.4254594962559564, "grad_norm": 2.2165873050689697, "learning_rate": 9.966558646485231e-06, "loss": 1.0613, "step": 625 }, { "epoch": 0.426140231449966, "grad_norm": 2.272343873977661, "learning_rate": 9.966298189497094e-06, "loss": 0.8863, "step": 626 }, { "epoch": 0.4268209666439755, "grad_norm": 2.630770683288574, "learning_rate": 9.966036725588686e-06, "loss": 1.081, "step": 627 }, { "epoch": 0.427501701837985, "grad_norm": 2.4183578491210938, "learning_rate": 9.965774254813017e-06, "loss": 0.9472, "step": 628 }, { "epoch": 0.42818243703199454, "grad_norm": 2.1254074573516846, "learning_rate": 9.965510777223304e-06, "loss": 1.1362, "step": 629 }, { "epoch": 0.42886317222600406, "grad_norm": 2.0783681869506836, "learning_rate": 9.96524629287297e-06, "loss": 0.9714, "step": 630 }, { "epoch": 0.42954390742001364, "grad_norm": 2.169232130050659, "learning_rate": 9.964980801815636e-06, "loss": 0.9639, "step": 631 }, { "epoch": 0.43022464261402316, "grad_norm": 2.1896884441375732, "learning_rate": 9.964714304105133e-06, "loss": 1.0791, "step": 632 }, { "epoch": 0.4309053778080327, "grad_norm": 2.1295297145843506, "learning_rate": 9.964446799795488e-06, "loss": 1.0477, "step": 633 }, { "epoch": 0.4315861130020422, "grad_norm": 2.140238046646118, "learning_rate": 9.964178288940946e-06, "loss": 0.9589, "step": 634 }, { "epoch": 0.4322668481960517, "grad_norm": 2.226646661758423, "learning_rate": 9.963908771595942e-06, "loss": 1.0208, "step": 635 }, { "epoch": 0.4329475833900613, "grad_norm": 2.1151907444000244, "learning_rate": 9.963638247815125e-06, "loss": 0.9603, "step": 636 }, { "epoch": 0.4336283185840708, "grad_norm": 2.1821706295013428, "learning_rate": 9.963366717653337e-06, "loss": 1.0152, "step": 637 }, { "epoch": 0.4343090537780803, "grad_norm": 2.114786148071289, "learning_rate": 9.96309418116564e-06, "loss": 1.0073, "step": 638 }, { "epoch": 0.43498978897208984, "grad_norm": 2.019909620285034, "learning_rate": 9.962820638407284e-06, "loss": 0.966, "step": 639 }, { "epoch": 0.43567052416609936, "grad_norm": 1.9241317510604858, "learning_rate": 9.96254608943373e-06, "loss": 1.1282, "step": 640 }, { "epoch": 0.43635125936010893, "grad_norm": 2.3013651371002197, "learning_rate": 9.962270534300649e-06, "loss": 0.9367, "step": 641 }, { "epoch": 0.43703199455411845, "grad_norm": 2.2372002601623535, "learning_rate": 9.961993973063904e-06, "loss": 1.0032, "step": 642 }, { "epoch": 0.43771272974812797, "grad_norm": 2.136929512023926, "learning_rate": 9.961716405779572e-06, "loss": 0.9977, "step": 643 }, { "epoch": 0.4383934649421375, "grad_norm": 2.0203137397766113, "learning_rate": 9.961437832503927e-06, "loss": 1.0944, "step": 644 }, { "epoch": 0.43907420013614706, "grad_norm": 2.2437000274658203, "learning_rate": 9.96115825329345e-06, "loss": 0.969, "step": 645 }, { "epoch": 0.4397549353301566, "grad_norm": 2.28483247756958, "learning_rate": 9.96087766820483e-06, "loss": 1.1233, "step": 646 }, { "epoch": 0.4404356705241661, "grad_norm": 2.370783567428589, "learning_rate": 9.960596077294951e-06, "loss": 1.0227, "step": 647 }, { "epoch": 0.4411164057181756, "grad_norm": 2.0798165798187256, "learning_rate": 9.960313480620906e-06, "loss": 1.0453, "step": 648 }, { "epoch": 0.44179714091218514, "grad_norm": 1.9845945835113525, "learning_rate": 9.960029878239995e-06, "loss": 1.0372, "step": 649 }, { "epoch": 0.4424778761061947, "grad_norm": 2.2128567695617676, "learning_rate": 9.959745270209716e-06, "loss": 0.9582, "step": 650 }, { "epoch": 0.44315861130020423, "grad_norm": 2.1529641151428223, "learning_rate": 9.959459656587775e-06, "loss": 1.0465, "step": 651 }, { "epoch": 0.44383934649421375, "grad_norm": 2.163851261138916, "learning_rate": 9.959173037432079e-06, "loss": 1.1042, "step": 652 }, { "epoch": 0.44452008168822327, "grad_norm": 2.125809907913208, "learning_rate": 9.95888541280074e-06, "loss": 1.0064, "step": 653 }, { "epoch": 0.4452008168822328, "grad_norm": 1.992212176322937, "learning_rate": 9.958596782752075e-06, "loss": 0.9908, "step": 654 }, { "epoch": 0.44588155207624236, "grad_norm": 2.101740598678589, "learning_rate": 9.958307147344605e-06, "loss": 1.0514, "step": 655 }, { "epoch": 0.4465622872702519, "grad_norm": 2.0912704467773438, "learning_rate": 9.958016506637052e-06, "loss": 0.9813, "step": 656 }, { "epoch": 0.4472430224642614, "grad_norm": 2.0760116577148438, "learning_rate": 9.957724860688343e-06, "loss": 1.1025, "step": 657 }, { "epoch": 0.4479237576582709, "grad_norm": 2.217310667037964, "learning_rate": 9.957432209557613e-06, "loss": 1.0189, "step": 658 }, { "epoch": 0.4486044928522805, "grad_norm": 2.2074899673461914, "learning_rate": 9.957138553304194e-06, "loss": 0.9458, "step": 659 }, { "epoch": 0.44928522804629, "grad_norm": 2.091895818710327, "learning_rate": 9.956843891987624e-06, "loss": 1.076, "step": 660 }, { "epoch": 0.4499659632402995, "grad_norm": 2.0911099910736084, "learning_rate": 9.956548225667648e-06, "loss": 1.0173, "step": 661 }, { "epoch": 0.45064669843430905, "grad_norm": 2.3913888931274414, "learning_rate": 9.956251554404214e-06, "loss": 1.082, "step": 662 }, { "epoch": 0.45132743362831856, "grad_norm": 2.1402413845062256, "learning_rate": 9.955953878257469e-06, "loss": 1.0235, "step": 663 }, { "epoch": 0.45200816882232814, "grad_norm": 2.14678692817688, "learning_rate": 9.955655197287769e-06, "loss": 0.9102, "step": 664 }, { "epoch": 0.45268890401633766, "grad_norm": 2.355276584625244, "learning_rate": 9.95535551155567e-06, "loss": 1.0082, "step": 665 }, { "epoch": 0.4533696392103472, "grad_norm": 2.1523900032043457, "learning_rate": 9.955054821121936e-06, "loss": 1.0335, "step": 666 }, { "epoch": 0.4540503744043567, "grad_norm": 1.9026039838790894, "learning_rate": 9.95475312604753e-06, "loss": 1.0551, "step": 667 }, { "epoch": 0.4547311095983662, "grad_norm": 2.1432881355285645, "learning_rate": 9.954450426393622e-06, "loss": 1.0167, "step": 668 }, { "epoch": 0.4554118447923758, "grad_norm": 2.3295490741729736, "learning_rate": 9.954146722221585e-06, "loss": 0.9811, "step": 669 }, { "epoch": 0.4560925799863853, "grad_norm": 2.0887794494628906, "learning_rate": 9.953842013592994e-06, "loss": 0.9925, "step": 670 }, { "epoch": 0.4567733151803948, "grad_norm": 2.209843873977661, "learning_rate": 9.95353630056963e-06, "loss": 0.971, "step": 671 }, { "epoch": 0.45745405037440434, "grad_norm": 2.156886100769043, "learning_rate": 9.953229583213474e-06, "loss": 0.9556, "step": 672 }, { "epoch": 0.45813478556841386, "grad_norm": 1.9889105558395386, "learning_rate": 9.952921861586718e-06, "loss": 0.9673, "step": 673 }, { "epoch": 0.45881552076242343, "grad_norm": 2.0865468978881836, "learning_rate": 9.952613135751749e-06, "loss": 0.984, "step": 674 }, { "epoch": 0.45949625595643295, "grad_norm": 2.1853573322296143, "learning_rate": 9.952303405771162e-06, "loss": 1.0652, "step": 675 }, { "epoch": 0.46017699115044247, "grad_norm": 2.247410535812378, "learning_rate": 9.951992671707753e-06, "loss": 1.0056, "step": 676 }, { "epoch": 0.460857726344452, "grad_norm": 2.3069210052490234, "learning_rate": 9.951680933624528e-06, "loss": 1.0135, "step": 677 }, { "epoch": 0.46153846153846156, "grad_norm": 2.2917978763580322, "learning_rate": 9.951368191584689e-06, "loss": 1.0371, "step": 678 }, { "epoch": 0.4622191967324711, "grad_norm": 2.139084815979004, "learning_rate": 9.951054445651646e-06, "loss": 1.0866, "step": 679 }, { "epoch": 0.4628999319264806, "grad_norm": 2.1601362228393555, "learning_rate": 9.950739695889011e-06, "loss": 1.097, "step": 680 }, { "epoch": 0.4635806671204901, "grad_norm": 1.9987519979476929, "learning_rate": 9.950423942360598e-06, "loss": 1.1028, "step": 681 }, { "epoch": 0.46426140231449964, "grad_norm": 2.2551381587982178, "learning_rate": 9.950107185130429e-06, "loss": 1.0951, "step": 682 }, { "epoch": 0.4649421375085092, "grad_norm": 2.2095906734466553, "learning_rate": 9.949789424262726e-06, "loss": 1.0034, "step": 683 }, { "epoch": 0.46562287270251873, "grad_norm": 2.2221004962921143, "learning_rate": 9.949470659821913e-06, "loss": 1.0007, "step": 684 }, { "epoch": 0.46630360789652825, "grad_norm": 2.178969144821167, "learning_rate": 9.949150891872621e-06, "loss": 1.0004, "step": 685 }, { "epoch": 0.46698434309053777, "grad_norm": 1.9273219108581543, "learning_rate": 9.948830120479685e-06, "loss": 1.0575, "step": 686 }, { "epoch": 0.4676650782845473, "grad_norm": 2.218118906021118, "learning_rate": 9.948508345708139e-06, "loss": 0.9937, "step": 687 }, { "epoch": 0.46834581347855686, "grad_norm": 2.284283399581909, "learning_rate": 9.948185567623224e-06, "loss": 1.1043, "step": 688 }, { "epoch": 0.4690265486725664, "grad_norm": 2.23660945892334, "learning_rate": 9.947861786290385e-06, "loss": 0.9515, "step": 689 }, { "epoch": 0.4697072838665759, "grad_norm": 1.9338237047195435, "learning_rate": 9.947537001775265e-06, "loss": 0.9568, "step": 690 }, { "epoch": 0.4703880190605854, "grad_norm": 2.2623801231384277, "learning_rate": 9.947211214143719e-06, "loss": 0.98, "step": 691 }, { "epoch": 0.471068754254595, "grad_norm": 2.084049940109253, "learning_rate": 9.946884423461798e-06, "loss": 0.9692, "step": 692 }, { "epoch": 0.4717494894486045, "grad_norm": 2.0684614181518555, "learning_rate": 9.946556629795758e-06, "loss": 1.063, "step": 693 }, { "epoch": 0.472430224642614, "grad_norm": 2.23626971244812, "learning_rate": 9.946227833212063e-06, "loss": 0.9914, "step": 694 }, { "epoch": 0.47311095983662355, "grad_norm": 1.991605520248413, "learning_rate": 9.945898033777372e-06, "loss": 1.0378, "step": 695 }, { "epoch": 0.47379169503063306, "grad_norm": 2.3884434700012207, "learning_rate": 9.945567231558557e-06, "loss": 1.0002, "step": 696 }, { "epoch": 0.47447243022464264, "grad_norm": 2.16017746925354, "learning_rate": 9.945235426622686e-06, "loss": 1.0608, "step": 697 }, { "epoch": 0.47515316541865216, "grad_norm": 2.0519323348999023, "learning_rate": 9.944902619037032e-06, "loss": 1.0134, "step": 698 }, { "epoch": 0.4758339006126617, "grad_norm": 2.0493483543395996, "learning_rate": 9.944568808869072e-06, "loss": 1.0742, "step": 699 }, { "epoch": 0.4765146358066712, "grad_norm": 2.0154902935028076, "learning_rate": 9.944233996186488e-06, "loss": 1.0466, "step": 700 }, { "epoch": 0.4771953710006807, "grad_norm": 2.044024705886841, "learning_rate": 9.943898181057161e-06, "loss": 1.0344, "step": 701 }, { "epoch": 0.4778761061946903, "grad_norm": 2.124537467956543, "learning_rate": 9.94356136354918e-06, "loss": 1.0126, "step": 702 }, { "epoch": 0.4785568413886998, "grad_norm": 2.2967023849487305, "learning_rate": 9.943223543730833e-06, "loss": 1.052, "step": 703 }, { "epoch": 0.4792375765827093, "grad_norm": 1.965723991394043, "learning_rate": 9.942884721670616e-06, "loss": 0.9628, "step": 704 }, { "epoch": 0.47991831177671884, "grad_norm": 2.0059001445770264, "learning_rate": 9.942544897437222e-06, "loss": 1.0178, "step": 705 }, { "epoch": 0.48059904697072836, "grad_norm": 2.0911006927490234, "learning_rate": 9.942204071099553e-06, "loss": 1.0114, "step": 706 }, { "epoch": 0.48127978216473793, "grad_norm": 2.281522750854492, "learning_rate": 9.941862242726712e-06, "loss": 0.9767, "step": 707 }, { "epoch": 0.48196051735874745, "grad_norm": 2.098243474960327, "learning_rate": 9.941519412388002e-06, "loss": 1.0048, "step": 708 }, { "epoch": 0.48264125255275697, "grad_norm": 1.9177372455596924, "learning_rate": 9.941175580152936e-06, "loss": 1.0765, "step": 709 }, { "epoch": 0.4833219877467665, "grad_norm": 2.0915207862854004, "learning_rate": 9.940830746091223e-06, "loss": 1.0937, "step": 710 }, { "epoch": 0.48400272294077606, "grad_norm": 2.1659369468688965, "learning_rate": 9.94048491027278e-06, "loss": 0.9344, "step": 711 }, { "epoch": 0.4846834581347856, "grad_norm": 1.8910703659057617, "learning_rate": 9.940138072767724e-06, "loss": 1.0704, "step": 712 }, { "epoch": 0.4853641933287951, "grad_norm": 2.3714263439178467, "learning_rate": 9.939790233646378e-06, "loss": 0.9963, "step": 713 }, { "epoch": 0.4860449285228046, "grad_norm": 2.1825780868530273, "learning_rate": 9.939441392979268e-06, "loss": 1.0074, "step": 714 }, { "epoch": 0.48672566371681414, "grad_norm": 2.209138870239258, "learning_rate": 9.939091550837118e-06, "loss": 0.9932, "step": 715 }, { "epoch": 0.4874063989108237, "grad_norm": 2.470627546310425, "learning_rate": 9.93874070729086e-06, "loss": 0.9372, "step": 716 }, { "epoch": 0.48808713410483323, "grad_norm": 2.1208560466766357, "learning_rate": 9.93838886241163e-06, "loss": 0.9964, "step": 717 }, { "epoch": 0.48876786929884275, "grad_norm": 2.108321189880371, "learning_rate": 9.938036016270763e-06, "loss": 0.9434, "step": 718 }, { "epoch": 0.48944860449285227, "grad_norm": 2.1022746562957764, "learning_rate": 9.937682168939797e-06, "loss": 1.0074, "step": 719 }, { "epoch": 0.4901293396868618, "grad_norm": 2.2204394340515137, "learning_rate": 9.937327320490478e-06, "loss": 0.999, "step": 720 }, { "epoch": 0.49081007488087136, "grad_norm": 1.9774335622787476, "learning_rate": 9.936971470994751e-06, "loss": 0.9862, "step": 721 }, { "epoch": 0.4914908100748809, "grad_norm": 2.4150078296661377, "learning_rate": 9.936614620524764e-06, "loss": 0.9339, "step": 722 }, { "epoch": 0.4921715452688904, "grad_norm": 2.022026300430298, "learning_rate": 9.936256769152869e-06, "loss": 1.046, "step": 723 }, { "epoch": 0.4928522804628999, "grad_norm": 2.10003924369812, "learning_rate": 9.93589791695162e-06, "loss": 0.9462, "step": 724 }, { "epoch": 0.49353301565690944, "grad_norm": 2.0601696968078613, "learning_rate": 9.935538063993775e-06, "loss": 0.9637, "step": 725 }, { "epoch": 0.494213750850919, "grad_norm": 1.8991214036941528, "learning_rate": 9.935177210352294e-06, "loss": 1.0001, "step": 726 }, { "epoch": 0.4948944860449285, "grad_norm": 2.2516989707946777, "learning_rate": 9.934815356100342e-06, "loss": 0.9517, "step": 727 }, { "epoch": 0.49557522123893805, "grad_norm": 2.0969841480255127, "learning_rate": 9.934452501311283e-06, "loss": 1.0652, "step": 728 }, { "epoch": 0.49625595643294756, "grad_norm": 1.8424376249313354, "learning_rate": 9.934088646058686e-06, "loss": 1.0221, "step": 729 }, { "epoch": 0.49693669162695714, "grad_norm": 2.2182631492614746, "learning_rate": 9.933723790416326e-06, "loss": 1.0713, "step": 730 }, { "epoch": 0.49761742682096666, "grad_norm": 1.969964861869812, "learning_rate": 9.933357934458173e-06, "loss": 1.0053, "step": 731 }, { "epoch": 0.4982981620149762, "grad_norm": 1.9586201906204224, "learning_rate": 9.932991078258409e-06, "loss": 1.0532, "step": 732 }, { "epoch": 0.4989788972089857, "grad_norm": 2.0291876792907715, "learning_rate": 9.932623221891412e-06, "loss": 1.0144, "step": 733 }, { "epoch": 0.4996596324029952, "grad_norm": 2.1616761684417725, "learning_rate": 9.932254365431766e-06, "loss": 1.0099, "step": 734 }, { "epoch": 0.5003403675970047, "grad_norm": 1.9439470767974854, "learning_rate": 9.931884508954254e-06, "loss": 1.062, "step": 735 }, { "epoch": 0.5010211027910143, "grad_norm": 2.092010736465454, "learning_rate": 9.931513652533871e-06, "loss": 1.0137, "step": 736 }, { "epoch": 0.5017018379850239, "grad_norm": 2.1703498363494873, "learning_rate": 9.931141796245803e-06, "loss": 1.05, "step": 737 }, { "epoch": 0.5023825731790333, "grad_norm": 1.9449833631515503, "learning_rate": 9.930768940165445e-06, "loss": 1.0186, "step": 738 }, { "epoch": 0.5030633083730429, "grad_norm": 2.195995330810547, "learning_rate": 9.930395084368395e-06, "loss": 1.0698, "step": 739 }, { "epoch": 0.5037440435670524, "grad_norm": 2.1040611267089844, "learning_rate": 9.930020228930452e-06, "loss": 1.0403, "step": 740 }, { "epoch": 0.504424778761062, "grad_norm": 2.056734085083008, "learning_rate": 9.929644373927617e-06, "loss": 0.9953, "step": 741 }, { "epoch": 0.5051055139550715, "grad_norm": 1.9095091819763184, "learning_rate": 9.929267519436097e-06, "loss": 0.8965, "step": 742 }, { "epoch": 0.505786249149081, "grad_norm": 2.4425923824310303, "learning_rate": 9.928889665532298e-06, "loss": 1.0105, "step": 743 }, { "epoch": 0.5064669843430906, "grad_norm": 2.2205615043640137, "learning_rate": 9.928510812292832e-06, "loss": 0.9765, "step": 744 }, { "epoch": 0.5071477195371, "grad_norm": 2.1632914543151855, "learning_rate": 9.92813095979451e-06, "loss": 1.1146, "step": 745 }, { "epoch": 0.5078284547311096, "grad_norm": 2.3374507427215576, "learning_rate": 9.927750108114347e-06, "loss": 0.9564, "step": 746 }, { "epoch": 0.5085091899251192, "grad_norm": 2.081418752670288, "learning_rate": 9.927368257329561e-06, "loss": 0.9638, "step": 747 }, { "epoch": 0.5091899251191286, "grad_norm": 2.2042195796966553, "learning_rate": 9.926985407517574e-06, "loss": 0.9744, "step": 748 }, { "epoch": 0.5098706603131382, "grad_norm": 1.8470373153686523, "learning_rate": 9.926601558756009e-06, "loss": 1.0049, "step": 749 }, { "epoch": 0.5105513955071477, "grad_norm": 2.1753313541412354, "learning_rate": 9.92621671112269e-06, "loss": 1.0059, "step": 750 }, { "epoch": 0.5112321307011573, "grad_norm": 2.2256672382354736, "learning_rate": 9.925830864695645e-06, "loss": 1.0095, "step": 751 }, { "epoch": 0.5119128658951668, "grad_norm": 2.185298442840576, "learning_rate": 9.925444019553107e-06, "loss": 1.0522, "step": 752 }, { "epoch": 0.5125936010891763, "grad_norm": 2.1719937324523926, "learning_rate": 9.925056175773506e-06, "loss": 1.0257, "step": 753 }, { "epoch": 0.5132743362831859, "grad_norm": 2.097170114517212, "learning_rate": 9.92466733343548e-06, "loss": 1.0088, "step": 754 }, { "epoch": 0.5139550714771953, "grad_norm": 2.039109468460083, "learning_rate": 9.924277492617868e-06, "loss": 1.0464, "step": 755 }, { "epoch": 0.5146358066712049, "grad_norm": 2.135955333709717, "learning_rate": 9.923886653399706e-06, "loss": 0.9872, "step": 756 }, { "epoch": 0.5153165418652145, "grad_norm": 1.932708740234375, "learning_rate": 9.923494815860242e-06, "loss": 0.9766, "step": 757 }, { "epoch": 0.5159972770592239, "grad_norm": 2.183417797088623, "learning_rate": 9.923101980078917e-06, "loss": 1.0003, "step": 758 }, { "epoch": 0.5166780122532335, "grad_norm": 2.1959333419799805, "learning_rate": 9.922708146135382e-06, "loss": 1.0606, "step": 759 }, { "epoch": 0.517358747447243, "grad_norm": 2.097686529159546, "learning_rate": 9.922313314109485e-06, "loss": 0.9125, "step": 760 }, { "epoch": 0.5180394826412525, "grad_norm": 1.9695998430252075, "learning_rate": 9.92191748408128e-06, "loss": 1.111, "step": 761 }, { "epoch": 0.5187202178352621, "grad_norm": 2.281296491622925, "learning_rate": 9.921520656131021e-06, "loss": 1.0417, "step": 762 }, { "epoch": 0.5194009530292716, "grad_norm": 2.0339159965515137, "learning_rate": 9.921122830339165e-06, "loss": 0.9846, "step": 763 }, { "epoch": 0.5200816882232812, "grad_norm": 2.1752119064331055, "learning_rate": 9.920724006786372e-06, "loss": 1.1112, "step": 764 }, { "epoch": 0.5207624234172906, "grad_norm": 2.0858325958251953, "learning_rate": 9.920324185553504e-06, "loss": 1.0034, "step": 765 }, { "epoch": 0.5214431586113002, "grad_norm": 2.1243622303009033, "learning_rate": 9.919923366721623e-06, "loss": 1.0047, "step": 766 }, { "epoch": 0.5221238938053098, "grad_norm": 1.8855478763580322, "learning_rate": 9.919521550371998e-06, "loss": 1.0307, "step": 767 }, { "epoch": 0.5228046289993192, "grad_norm": 2.1192171573638916, "learning_rate": 9.919118736586096e-06, "loss": 1.0424, "step": 768 }, { "epoch": 0.5234853641933288, "grad_norm": 2.208878993988037, "learning_rate": 9.918714925445588e-06, "loss": 0.9946, "step": 769 }, { "epoch": 0.5241660993873384, "grad_norm": 1.8173844814300537, "learning_rate": 9.918310117032345e-06, "loss": 1.0605, "step": 770 }, { "epoch": 0.5248468345813478, "grad_norm": 1.8859155178070068, "learning_rate": 9.917904311428446e-06, "loss": 1.0248, "step": 771 }, { "epoch": 0.5255275697753574, "grad_norm": 1.8432461023330688, "learning_rate": 9.917497508716165e-06, "loss": 1.0355, "step": 772 }, { "epoch": 0.5262083049693669, "grad_norm": 2.088691473007202, "learning_rate": 9.917089708977985e-06, "loss": 1.0001, "step": 773 }, { "epoch": 0.5268890401633765, "grad_norm": 2.151447296142578, "learning_rate": 9.916680912296585e-06, "loss": 1.0069, "step": 774 }, { "epoch": 0.527569775357386, "grad_norm": 1.9200519323349, "learning_rate": 9.916271118754849e-06, "loss": 1.0225, "step": 775 }, { "epoch": 0.5282505105513955, "grad_norm": 1.9136662483215332, "learning_rate": 9.915860328435863e-06, "loss": 1.1394, "step": 776 }, { "epoch": 0.5289312457454051, "grad_norm": 2.1392388343811035, "learning_rate": 9.915448541422916e-06, "loss": 0.9898, "step": 777 }, { "epoch": 0.5296119809394145, "grad_norm": 2.236759662628174, "learning_rate": 9.915035757799498e-06, "loss": 1.1034, "step": 778 }, { "epoch": 0.5302927161334241, "grad_norm": 2.445589303970337, "learning_rate": 9.914621977649301e-06, "loss": 0.9537, "step": 779 }, { "epoch": 0.5309734513274337, "grad_norm": 2.1899478435516357, "learning_rate": 9.914207201056218e-06, "loss": 0.9901, "step": 780 }, { "epoch": 0.5316541865214431, "grad_norm": 1.9291949272155762, "learning_rate": 9.913791428104347e-06, "loss": 1.0199, "step": 781 }, { "epoch": 0.5323349217154527, "grad_norm": 2.0287134647369385, "learning_rate": 9.913374658877983e-06, "loss": 0.9611, "step": 782 }, { "epoch": 0.5330156569094622, "grad_norm": 2.2155582904815674, "learning_rate": 9.912956893461631e-06, "loss": 1.0107, "step": 783 }, { "epoch": 0.5336963921034718, "grad_norm": 1.7858301401138306, "learning_rate": 9.91253813193999e-06, "loss": 1.0357, "step": 784 }, { "epoch": 0.5343771272974813, "grad_norm": 1.8565458059310913, "learning_rate": 9.912118374397965e-06, "loss": 1.1297, "step": 785 }, { "epoch": 0.5350578624914908, "grad_norm": 2.1250271797180176, "learning_rate": 9.911697620920662e-06, "loss": 1.0026, "step": 786 }, { "epoch": 0.5357385976855004, "grad_norm": 2.074768304824829, "learning_rate": 9.911275871593389e-06, "loss": 0.9598, "step": 787 }, { "epoch": 0.5364193328795098, "grad_norm": 2.1049795150756836, "learning_rate": 9.910853126501657e-06, "loss": 0.9802, "step": 788 }, { "epoch": 0.5371000680735194, "grad_norm": 1.946449875831604, "learning_rate": 9.910429385731174e-06, "loss": 1.0274, "step": 789 }, { "epoch": 0.537780803267529, "grad_norm": 2.343855857849121, "learning_rate": 9.91000464936786e-06, "loss": 0.9164, "step": 790 }, { "epoch": 0.5384615384615384, "grad_norm": 2.2239623069763184, "learning_rate": 9.909578917497827e-06, "loss": 0.9448, "step": 791 }, { "epoch": 0.539142273655548, "grad_norm": 2.188370704650879, "learning_rate": 9.909152190207391e-06, "loss": 0.9587, "step": 792 }, { "epoch": 0.5398230088495575, "grad_norm": 2.1053695678710938, "learning_rate": 9.908724467583076e-06, "loss": 0.9827, "step": 793 }, { "epoch": 0.540503744043567, "grad_norm": 2.4236180782318115, "learning_rate": 9.908295749711597e-06, "loss": 0.9352, "step": 794 }, { "epoch": 0.5411844792375766, "grad_norm": 2.147263526916504, "learning_rate": 9.90786603667988e-06, "loss": 1.0287, "step": 795 }, { "epoch": 0.5418652144315861, "grad_norm": 2.025390863418579, "learning_rate": 9.90743532857505e-06, "loss": 0.9635, "step": 796 }, { "epoch": 0.5425459496255957, "grad_norm": 2.1258325576782227, "learning_rate": 9.907003625484433e-06, "loss": 1.0345, "step": 797 }, { "epoch": 0.5432266848196051, "grad_norm": 2.009967803955078, "learning_rate": 9.906570927495555e-06, "loss": 1.0111, "step": 798 }, { "epoch": 0.5439074200136147, "grad_norm": 2.2707183361053467, "learning_rate": 9.906137234696151e-06, "loss": 1.0387, "step": 799 }, { "epoch": 0.5445881552076243, "grad_norm": 2.140623092651367, "learning_rate": 9.905702547174147e-06, "loss": 1.033, "step": 800 }, { "epoch": 0.5452688904016337, "grad_norm": 2.0303051471710205, "learning_rate": 9.905266865017678e-06, "loss": 1.0133, "step": 801 }, { "epoch": 0.5459496255956433, "grad_norm": 1.9455711841583252, "learning_rate": 9.90483018831508e-06, "loss": 1.0591, "step": 802 }, { "epoch": 0.5466303607896529, "grad_norm": 1.855560541152954, "learning_rate": 9.90439251715489e-06, "loss": 0.9528, "step": 803 }, { "epoch": 0.5473110959836623, "grad_norm": 2.3779516220092773, "learning_rate": 9.903953851625843e-06, "loss": 0.94, "step": 804 }, { "epoch": 0.5479918311776719, "grad_norm": 2.4215002059936523, "learning_rate": 9.903514191816882e-06, "loss": 1.0401, "step": 805 }, { "epoch": 0.5486725663716814, "grad_norm": 2.1087074279785156, "learning_rate": 9.903073537817146e-06, "loss": 1.0648, "step": 806 }, { "epoch": 0.549353301565691, "grad_norm": 2.245439052581787, "learning_rate": 9.902631889715976e-06, "loss": 1.0493, "step": 807 }, { "epoch": 0.5500340367597005, "grad_norm": 2.2325809001922607, "learning_rate": 9.902189247602923e-06, "loss": 1.043, "step": 808 }, { "epoch": 0.55071477195371, "grad_norm": 2.200836420059204, "learning_rate": 9.901745611567727e-06, "loss": 1.0175, "step": 809 }, { "epoch": 0.5513955071477196, "grad_norm": 2.2036516666412354, "learning_rate": 9.901300981700338e-06, "loss": 1.0203, "step": 810 }, { "epoch": 0.552076242341729, "grad_norm": 1.8634361028671265, "learning_rate": 9.900855358090906e-06, "loss": 1.012, "step": 811 }, { "epoch": 0.5527569775357386, "grad_norm": 2.1547598838806152, "learning_rate": 9.90040874082978e-06, "loss": 0.9773, "step": 812 }, { "epoch": 0.5534377127297482, "grad_norm": 2.0819661617279053, "learning_rate": 9.899961130007511e-06, "loss": 0.9976, "step": 813 }, { "epoch": 0.5541184479237576, "grad_norm": 2.1049740314483643, "learning_rate": 9.899512525714855e-06, "loss": 0.9691, "step": 814 }, { "epoch": 0.5547991831177672, "grad_norm": 2.3320095539093018, "learning_rate": 9.899062928042764e-06, "loss": 0.9904, "step": 815 }, { "epoch": 0.5554799183117767, "grad_norm": 2.404789686203003, "learning_rate": 9.898612337082396e-06, "loss": 0.9621, "step": 816 }, { "epoch": 0.5561606535057863, "grad_norm": 1.9673351049423218, "learning_rate": 9.898160752925109e-06, "loss": 0.9428, "step": 817 }, { "epoch": 0.5568413886997958, "grad_norm": 2.2687606811523438, "learning_rate": 9.89770817566246e-06, "loss": 0.9989, "step": 818 }, { "epoch": 0.5575221238938053, "grad_norm": 2.2767789363861084, "learning_rate": 9.897254605386211e-06, "loss": 0.9839, "step": 819 }, { "epoch": 0.5582028590878149, "grad_norm": 2.163177490234375, "learning_rate": 9.896800042188324e-06, "loss": 1.045, "step": 820 }, { "epoch": 0.5588835942818243, "grad_norm": 2.3543949127197266, "learning_rate": 9.89634448616096e-06, "loss": 0.9944, "step": 821 }, { "epoch": 0.5595643294758339, "grad_norm": 2.0946552753448486, "learning_rate": 9.895887937396484e-06, "loss": 0.9519, "step": 822 }, { "epoch": 0.5602450646698435, "grad_norm": 2.014224052429199, "learning_rate": 9.895430395987461e-06, "loss": 1.0192, "step": 823 }, { "epoch": 0.5609257998638529, "grad_norm": 2.3315207958221436, "learning_rate": 9.894971862026662e-06, "loss": 1.0577, "step": 824 }, { "epoch": 0.5616065350578625, "grad_norm": 2.0830237865448, "learning_rate": 9.894512335607052e-06, "loss": 1.053, "step": 825 }, { "epoch": 0.562287270251872, "grad_norm": 2.0801024436950684, "learning_rate": 9.894051816821798e-06, "loss": 1.0347, "step": 826 }, { "epoch": 0.5629680054458815, "grad_norm": 2.046891689300537, "learning_rate": 9.893590305764273e-06, "loss": 0.9737, "step": 827 }, { "epoch": 0.5636487406398911, "grad_norm": 1.9009387493133545, "learning_rate": 9.893127802528048e-06, "loss": 1.021, "step": 828 }, { "epoch": 0.5643294758339006, "grad_norm": 2.176443338394165, "learning_rate": 9.8926643072069e-06, "loss": 1.0633, "step": 829 }, { "epoch": 0.5650102110279102, "grad_norm": 1.863447666168213, "learning_rate": 9.892199819894795e-06, "loss": 1.0426, "step": 830 }, { "epoch": 0.5656909462219196, "grad_norm": 2.024886131286621, "learning_rate": 9.891734340685913e-06, "loss": 1.0812, "step": 831 }, { "epoch": 0.5663716814159292, "grad_norm": 1.9269508123397827, "learning_rate": 9.89126786967463e-06, "loss": 1.0408, "step": 832 }, { "epoch": 0.5670524166099388, "grad_norm": 2.0297322273254395, "learning_rate": 9.890800406955523e-06, "loss": 0.9975, "step": 833 }, { "epoch": 0.5677331518039482, "grad_norm": 2.0565907955169678, "learning_rate": 9.890331952623369e-06, "loss": 0.9725, "step": 834 }, { "epoch": 0.5684138869979578, "grad_norm": 2.0387747287750244, "learning_rate": 9.88986250677315e-06, "loss": 0.9975, "step": 835 }, { "epoch": 0.5690946221919673, "grad_norm": 2.0997819900512695, "learning_rate": 9.889392069500046e-06, "loss": 0.9986, "step": 836 }, { "epoch": 0.5697753573859768, "grad_norm": 2.0372061729431152, "learning_rate": 9.888920640899435e-06, "loss": 0.9585, "step": 837 }, { "epoch": 0.5704560925799864, "grad_norm": 1.88876473903656, "learning_rate": 9.888448221066905e-06, "loss": 1.014, "step": 838 }, { "epoch": 0.5711368277739959, "grad_norm": 2.247715711593628, "learning_rate": 9.887974810098233e-06, "loss": 0.991, "step": 839 }, { "epoch": 0.5718175629680055, "grad_norm": 2.631568193435669, "learning_rate": 9.88750040808941e-06, "loss": 1.0159, "step": 840 }, { "epoch": 0.572498298162015, "grad_norm": 2.0849649906158447, "learning_rate": 9.887025015136616e-06, "loss": 1.0374, "step": 841 }, { "epoch": 0.5731790333560245, "grad_norm": 1.9696688652038574, "learning_rate": 9.886548631336242e-06, "loss": 0.9767, "step": 842 }, { "epoch": 0.5738597685500341, "grad_norm": 2.011814832687378, "learning_rate": 9.886071256784871e-06, "loss": 0.9255, "step": 843 }, { "epoch": 0.5745405037440435, "grad_norm": 2.0289926528930664, "learning_rate": 9.885592891579291e-06, "loss": 1.0694, "step": 844 }, { "epoch": 0.5752212389380531, "grad_norm": 2.0369811058044434, "learning_rate": 9.885113535816495e-06, "loss": 1.0325, "step": 845 }, { "epoch": 0.5759019741320627, "grad_norm": 1.7820242643356323, "learning_rate": 9.884633189593669e-06, "loss": 1.0857, "step": 846 }, { "epoch": 0.5765827093260721, "grad_norm": 2.105360507965088, "learning_rate": 9.884151853008204e-06, "loss": 1.0098, "step": 847 }, { "epoch": 0.5772634445200817, "grad_norm": 2.164134979248047, "learning_rate": 9.883669526157694e-06, "loss": 1.0202, "step": 848 }, { "epoch": 0.5779441797140912, "grad_norm": 2.0617918968200684, "learning_rate": 9.883186209139928e-06, "loss": 1.1001, "step": 849 }, { "epoch": 0.5786249149081008, "grad_norm": 2.314378023147583, "learning_rate": 9.8827019020529e-06, "loss": 1.0281, "step": 850 }, { "epoch": 0.5793056501021103, "grad_norm": 2.0787973403930664, "learning_rate": 9.882216604994803e-06, "loss": 1.057, "step": 851 }, { "epoch": 0.5799863852961198, "grad_norm": 2.113546371459961, "learning_rate": 9.88173031806403e-06, "loss": 0.9995, "step": 852 }, { "epoch": 0.5806671204901294, "grad_norm": 1.909450650215149, "learning_rate": 9.881243041359181e-06, "loss": 1.0456, "step": 853 }, { "epoch": 0.5813478556841388, "grad_norm": 1.9040272235870361, "learning_rate": 9.880754774979047e-06, "loss": 1.0154, "step": 854 }, { "epoch": 0.5820285908781484, "grad_norm": 2.1600277423858643, "learning_rate": 9.880265519022627e-06, "loss": 0.9181, "step": 855 }, { "epoch": 0.582709326072158, "grad_norm": 2.2992641925811768, "learning_rate": 9.879775273589116e-06, "loss": 1.0462, "step": 856 }, { "epoch": 0.5833900612661674, "grad_norm": 2.2328498363494873, "learning_rate": 9.879284038777912e-06, "loss": 1.0802, "step": 857 }, { "epoch": 0.584070796460177, "grad_norm": 2.3833749294281006, "learning_rate": 9.878791814688613e-06, "loss": 1.0307, "step": 858 }, { "epoch": 0.5847515316541865, "grad_norm": 2.110724449157715, "learning_rate": 9.878298601421019e-06, "loss": 1.0619, "step": 859 }, { "epoch": 0.585432266848196, "grad_norm": 2.041015386581421, "learning_rate": 9.87780439907513e-06, "loss": 1.0113, "step": 860 }, { "epoch": 0.5861130020422056, "grad_norm": 1.973246455192566, "learning_rate": 9.877309207751142e-06, "loss": 1.0111, "step": 861 }, { "epoch": 0.5867937372362151, "grad_norm": 2.1879873275756836, "learning_rate": 9.87681302754946e-06, "loss": 1.0598, "step": 862 }, { "epoch": 0.5874744724302247, "grad_norm": 2.0003929138183594, "learning_rate": 9.876315858570681e-06, "loss": 1.0225, "step": 863 }, { "epoch": 0.5881552076242341, "grad_norm": 1.9444280862808228, "learning_rate": 9.87581770091561e-06, "loss": 1.0835, "step": 864 }, { "epoch": 0.5888359428182437, "grad_norm": 2.126615524291992, "learning_rate": 9.875318554685244e-06, "loss": 0.9805, "step": 865 }, { "epoch": 0.5895166780122533, "grad_norm": 1.9044896364212036, "learning_rate": 9.874818419980792e-06, "loss": 1.0259, "step": 866 }, { "epoch": 0.5901974132062627, "grad_norm": 2.212880849838257, "learning_rate": 9.87431729690365e-06, "loss": 0.9866, "step": 867 }, { "epoch": 0.5908781484002723, "grad_norm": 2.1117568016052246, "learning_rate": 9.873815185555425e-06, "loss": 0.997, "step": 868 }, { "epoch": 0.5915588835942818, "grad_norm": 2.1203904151916504, "learning_rate": 9.873312086037919e-06, "loss": 1.0103, "step": 869 }, { "epoch": 0.5922396187882913, "grad_norm": 1.8935147523880005, "learning_rate": 9.872807998453135e-06, "loss": 1.0173, "step": 870 }, { "epoch": 0.5929203539823009, "grad_norm": 2.0899088382720947, "learning_rate": 9.872302922903278e-06, "loss": 1.0107, "step": 871 }, { "epoch": 0.5936010891763104, "grad_norm": 2.0902256965637207, "learning_rate": 9.871796859490752e-06, "loss": 1.044, "step": 872 }, { "epoch": 0.59428182437032, "grad_norm": 2.042379140853882, "learning_rate": 9.871289808318164e-06, "loss": 1.0174, "step": 873 }, { "epoch": 0.5949625595643295, "grad_norm": 2.124852418899536, "learning_rate": 9.870781769488316e-06, "loss": 0.9653, "step": 874 }, { "epoch": 0.595643294758339, "grad_norm": 1.9659476280212402, "learning_rate": 9.870272743104215e-06, "loss": 1.0111, "step": 875 }, { "epoch": 0.5963240299523486, "grad_norm": 2.4136805534362793, "learning_rate": 9.869762729269065e-06, "loss": 0.957, "step": 876 }, { "epoch": 0.597004765146358, "grad_norm": 1.8963017463684082, "learning_rate": 9.869251728086274e-06, "loss": 1.0256, "step": 877 }, { "epoch": 0.5976855003403676, "grad_norm": 2.2273483276367188, "learning_rate": 9.868739739659443e-06, "loss": 0.975, "step": 878 }, { "epoch": 0.5983662355343772, "grad_norm": 2.02884840965271, "learning_rate": 9.868226764092385e-06, "loss": 1.0014, "step": 879 }, { "epoch": 0.5990469707283866, "grad_norm": 2.2161176204681396, "learning_rate": 9.8677128014891e-06, "loss": 1.0309, "step": 880 }, { "epoch": 0.5997277059223962, "grad_norm": 2.2122037410736084, "learning_rate": 9.867197851953798e-06, "loss": 1.0299, "step": 881 }, { "epoch": 0.6004084411164057, "grad_norm": 2.1859867572784424, "learning_rate": 9.866681915590884e-06, "loss": 0.9892, "step": 882 }, { "epoch": 0.6010891763104153, "grad_norm": 1.9332598447799683, "learning_rate": 9.866164992504963e-06, "loss": 1.0159, "step": 883 }, { "epoch": 0.6017699115044248, "grad_norm": 2.0825541019439697, "learning_rate": 9.865647082800843e-06, "loss": 1.1033, "step": 884 }, { "epoch": 0.6024506466984343, "grad_norm": 2.116215229034424, "learning_rate": 9.865128186583531e-06, "loss": 0.9703, "step": 885 }, { "epoch": 0.6031313818924439, "grad_norm": 2.053077459335327, "learning_rate": 9.864608303958235e-06, "loss": 0.9358, "step": 886 }, { "epoch": 0.6038121170864533, "grad_norm": 2.1073946952819824, "learning_rate": 9.864087435030356e-06, "loss": 0.9811, "step": 887 }, { "epoch": 0.6044928522804629, "grad_norm": 2.2537200450897217, "learning_rate": 9.863565579905506e-06, "loss": 1.0113, "step": 888 }, { "epoch": 0.6051735874744725, "grad_norm": 1.9974656105041504, "learning_rate": 9.86304273868949e-06, "loss": 0.9891, "step": 889 }, { "epoch": 0.6058543226684819, "grad_norm": 2.070492744445801, "learning_rate": 9.862518911488312e-06, "loss": 1.0053, "step": 890 }, { "epoch": 0.6065350578624915, "grad_norm": 2.2489025592803955, "learning_rate": 9.861994098408183e-06, "loss": 0.9845, "step": 891 }, { "epoch": 0.607215793056501, "grad_norm": 1.9066351652145386, "learning_rate": 9.861468299555505e-06, "loss": 1.0154, "step": 892 }, { "epoch": 0.6078965282505105, "grad_norm": 1.7672909498214722, "learning_rate": 9.860941515036886e-06, "loss": 1.0188, "step": 893 }, { "epoch": 0.6085772634445201, "grad_norm": 2.0062382221221924, "learning_rate": 9.86041374495913e-06, "loss": 0.9568, "step": 894 }, { "epoch": 0.6092579986385296, "grad_norm": 1.9574869871139526, "learning_rate": 9.859884989429247e-06, "loss": 1.1065, "step": 895 }, { "epoch": 0.6099387338325392, "grad_norm": 2.1054179668426514, "learning_rate": 9.859355248554437e-06, "loss": 1.1107, "step": 896 }, { "epoch": 0.6106194690265486, "grad_norm": 2.1551449298858643, "learning_rate": 9.858824522442109e-06, "loss": 0.9417, "step": 897 }, { "epoch": 0.6113002042205582, "grad_norm": 2.2311851978302, "learning_rate": 9.858292811199867e-06, "loss": 0.9682, "step": 898 }, { "epoch": 0.6119809394145678, "grad_norm": 2.35722017288208, "learning_rate": 9.857760114935518e-06, "loss": 0.9678, "step": 899 }, { "epoch": 0.6126616746085772, "grad_norm": 2.0305426120758057, "learning_rate": 9.857226433757062e-06, "loss": 1.0077, "step": 900 }, { "epoch": 0.6133424098025868, "grad_norm": 2.104762554168701, "learning_rate": 9.856691767772705e-06, "loss": 1.0491, "step": 901 }, { "epoch": 0.6140231449965963, "grad_norm": 1.7781306505203247, "learning_rate": 9.856156117090855e-06, "loss": 0.9736, "step": 902 }, { "epoch": 0.6147038801906058, "grad_norm": 2.158886194229126, "learning_rate": 9.855619481820109e-06, "loss": 0.9443, "step": 903 }, { "epoch": 0.6153846153846154, "grad_norm": 2.4453773498535156, "learning_rate": 9.855081862069276e-06, "loss": 1.029, "step": 904 }, { "epoch": 0.6160653505786249, "grad_norm": 2.1982290744781494, "learning_rate": 9.854543257947353e-06, "loss": 1.0135, "step": 905 }, { "epoch": 0.6167460857726345, "grad_norm": 1.8882032632827759, "learning_rate": 9.854003669563548e-06, "loss": 0.999, "step": 906 }, { "epoch": 0.617426820966644, "grad_norm": 1.9557271003723145, "learning_rate": 9.853463097027258e-06, "loss": 0.9361, "step": 907 }, { "epoch": 0.6181075561606535, "grad_norm": 1.916608214378357, "learning_rate": 9.852921540448089e-06, "loss": 0.9885, "step": 908 }, { "epoch": 0.6187882913546631, "grad_norm": 2.1028027534484863, "learning_rate": 9.852378999935839e-06, "loss": 0.9805, "step": 909 }, { "epoch": 0.6194690265486725, "grad_norm": 2.2402353286743164, "learning_rate": 9.85183547560051e-06, "loss": 0.9431, "step": 910 }, { "epoch": 0.6201497617426821, "grad_norm": 2.0959224700927734, "learning_rate": 9.851290967552298e-06, "loss": 0.9921, "step": 911 }, { "epoch": 0.6208304969366917, "grad_norm": 1.9594484567642212, "learning_rate": 9.850745475901606e-06, "loss": 0.971, "step": 912 }, { "epoch": 0.6215112321307011, "grad_norm": 2.0870275497436523, "learning_rate": 9.850199000759033e-06, "loss": 0.9235, "step": 913 }, { "epoch": 0.6221919673247107, "grad_norm": 1.8302019834518433, "learning_rate": 9.849651542235377e-06, "loss": 0.9893, "step": 914 }, { "epoch": 0.6228727025187202, "grad_norm": 2.0264673233032227, "learning_rate": 9.849103100441635e-06, "loss": 1.0546, "step": 915 }, { "epoch": 0.6235534377127298, "grad_norm": 1.8171327114105225, "learning_rate": 9.848553675489005e-06, "loss": 1.0117, "step": 916 }, { "epoch": 0.6242341729067393, "grad_norm": 2.0011043548583984, "learning_rate": 9.84800326748888e-06, "loss": 1.0006, "step": 917 }, { "epoch": 0.6249149081007488, "grad_norm": 1.9801586866378784, "learning_rate": 9.847451876552858e-06, "loss": 0.9446, "step": 918 }, { "epoch": 0.6255956432947584, "grad_norm": 2.1103732585906982, "learning_rate": 9.846899502792735e-06, "loss": 0.9247, "step": 919 }, { "epoch": 0.6262763784887678, "grad_norm": 2.022899866104126, "learning_rate": 9.846346146320503e-06, "loss": 1.0584, "step": 920 }, { "epoch": 0.6269571136827774, "grad_norm": 2.223039150238037, "learning_rate": 9.845791807248357e-06, "loss": 0.9483, "step": 921 }, { "epoch": 0.627637848876787, "grad_norm": 1.933185338973999, "learning_rate": 9.845236485688689e-06, "loss": 0.9497, "step": 922 }, { "epoch": 0.6283185840707964, "grad_norm": 1.8697402477264404, "learning_rate": 9.84468018175409e-06, "loss": 1.0126, "step": 923 }, { "epoch": 0.628999319264806, "grad_norm": 2.056222438812256, "learning_rate": 9.84412289555735e-06, "loss": 1.039, "step": 924 }, { "epoch": 0.6296800544588155, "grad_norm": 1.9688512086868286, "learning_rate": 9.843564627211463e-06, "loss": 1.1314, "step": 925 }, { "epoch": 0.630360789652825, "grad_norm": 1.9337358474731445, "learning_rate": 9.843005376829616e-06, "loss": 1.0794, "step": 926 }, { "epoch": 0.6310415248468346, "grad_norm": 1.904363989830017, "learning_rate": 9.842445144525197e-06, "loss": 0.9561, "step": 927 }, { "epoch": 0.6317222600408441, "grad_norm": 1.9757875204086304, "learning_rate": 9.841883930411792e-06, "loss": 0.9323, "step": 928 }, { "epoch": 0.6324029952348537, "grad_norm": 2.1980464458465576, "learning_rate": 9.841321734603194e-06, "loss": 1.0458, "step": 929 }, { "epoch": 0.6330837304288631, "grad_norm": 2.3132193088531494, "learning_rate": 9.84075855721338e-06, "loss": 0.9472, "step": 930 }, { "epoch": 0.6337644656228727, "grad_norm": 1.8223919868469238, "learning_rate": 9.84019439835654e-06, "loss": 0.9629, "step": 931 }, { "epoch": 0.6344452008168823, "grad_norm": 1.878920316696167, "learning_rate": 9.839629258147056e-06, "loss": 0.9593, "step": 932 }, { "epoch": 0.6351259360108917, "grad_norm": 1.9294874668121338, "learning_rate": 9.83906313669951e-06, "loss": 0.9863, "step": 933 }, { "epoch": 0.6358066712049013, "grad_norm": 2.097735643386841, "learning_rate": 9.838496034128687e-06, "loss": 1.0516, "step": 934 }, { "epoch": 0.6364874063989108, "grad_norm": 1.9689089059829712, "learning_rate": 9.837927950549564e-06, "loss": 1.0217, "step": 935 }, { "epoch": 0.6371681415929203, "grad_norm": 1.903524398803711, "learning_rate": 9.837358886077319e-06, "loss": 0.9844, "step": 936 }, { "epoch": 0.6378488767869299, "grad_norm": 2.1407346725463867, "learning_rate": 9.836788840827334e-06, "loss": 0.9393, "step": 937 }, { "epoch": 0.6385296119809394, "grad_norm": 1.8094035387039185, "learning_rate": 9.836217814915183e-06, "loss": 0.9643, "step": 938 }, { "epoch": 0.639210347174949, "grad_norm": 2.196821451187134, "learning_rate": 9.835645808456644e-06, "loss": 0.999, "step": 939 }, { "epoch": 0.6398910823689585, "grad_norm": 2.0297114849090576, "learning_rate": 9.835072821567691e-06, "loss": 1.0107, "step": 940 }, { "epoch": 0.640571817562968, "grad_norm": 2.19671630859375, "learning_rate": 9.834498854364497e-06, "loss": 0.9624, "step": 941 }, { "epoch": 0.6412525527569776, "grad_norm": 2.0576066970825195, "learning_rate": 9.833923906963434e-06, "loss": 0.9659, "step": 942 }, { "epoch": 0.641933287950987, "grad_norm": 2.2586987018585205, "learning_rate": 9.833347979481075e-06, "loss": 0.9002, "step": 943 }, { "epoch": 0.6426140231449966, "grad_norm": 1.8855748176574707, "learning_rate": 9.832771072034188e-06, "loss": 1.0096, "step": 944 }, { "epoch": 0.6432947583390062, "grad_norm": 2.0172250270843506, "learning_rate": 9.83219318473974e-06, "loss": 1.0188, "step": 945 }, { "epoch": 0.6439754935330156, "grad_norm": 1.982285737991333, "learning_rate": 9.831614317714902e-06, "loss": 1.02, "step": 946 }, { "epoch": 0.6446562287270252, "grad_norm": 1.8745343685150146, "learning_rate": 9.831034471077034e-06, "loss": 1.0659, "step": 947 }, { "epoch": 0.6453369639210347, "grad_norm": 2.2572829723358154, "learning_rate": 9.830453644943707e-06, "loss": 0.9401, "step": 948 }, { "epoch": 0.6460176991150443, "grad_norm": 1.8179370164871216, "learning_rate": 9.82987183943268e-06, "loss": 0.9865, "step": 949 }, { "epoch": 0.6466984343090538, "grad_norm": 1.8629419803619385, "learning_rate": 9.829289054661912e-06, "loss": 1.0646, "step": 950 }, { "epoch": 0.6473791695030633, "grad_norm": 2.0831334590911865, "learning_rate": 9.828705290749566e-06, "loss": 0.958, "step": 951 }, { "epoch": 0.6480599046970729, "grad_norm": 1.969333529472351, "learning_rate": 9.828120547814005e-06, "loss": 0.9802, "step": 952 }, { "epoch": 0.6487406398910823, "grad_norm": 2.1967544555664062, "learning_rate": 9.827534825973779e-06, "loss": 0.932, "step": 953 }, { "epoch": 0.6494213750850919, "grad_norm": 2.0201406478881836, "learning_rate": 9.826948125347644e-06, "loss": 1.0604, "step": 954 }, { "epoch": 0.6501021102791015, "grad_norm": 2.0390334129333496, "learning_rate": 9.826360446054559e-06, "loss": 0.9527, "step": 955 }, { "epoch": 0.6507828454731109, "grad_norm": 2.0419867038726807, "learning_rate": 9.825771788213672e-06, "loss": 1.1028, "step": 956 }, { "epoch": 0.6514635806671205, "grad_norm": 1.8261662721633911, "learning_rate": 9.825182151944334e-06, "loss": 1.0249, "step": 957 }, { "epoch": 0.65214431586113, "grad_norm": 1.87009859085083, "learning_rate": 9.824591537366097e-06, "loss": 1.0028, "step": 958 }, { "epoch": 0.6528250510551395, "grad_norm": 2.019423723220825, "learning_rate": 9.823999944598705e-06, "loss": 0.9997, "step": 959 }, { "epoch": 0.6535057862491491, "grad_norm": 1.9268293380737305, "learning_rate": 9.823407373762108e-06, "loss": 1.0695, "step": 960 }, { "epoch": 0.6541865214431586, "grad_norm": 1.975738525390625, "learning_rate": 9.822813824976446e-06, "loss": 1.0571, "step": 961 }, { "epoch": 0.6548672566371682, "grad_norm": 1.907774806022644, "learning_rate": 9.822219298362063e-06, "loss": 0.9488, "step": 962 }, { "epoch": 0.6555479918311776, "grad_norm": 2.21335506439209, "learning_rate": 9.8216237940395e-06, "loss": 1.0092, "step": 963 }, { "epoch": 0.6562287270251872, "grad_norm": 2.0519251823425293, "learning_rate": 9.821027312129495e-06, "loss": 0.9605, "step": 964 }, { "epoch": 0.6569094622191968, "grad_norm": 1.9179717302322388, "learning_rate": 9.820429852752986e-06, "loss": 0.971, "step": 965 }, { "epoch": 0.6575901974132062, "grad_norm": 2.39516282081604, "learning_rate": 9.81983141603111e-06, "loss": 0.9058, "step": 966 }, { "epoch": 0.6582709326072158, "grad_norm": 2.239435911178589, "learning_rate": 9.819232002085197e-06, "loss": 0.9645, "step": 967 }, { "epoch": 0.6589516678012253, "grad_norm": 1.7742937803268433, "learning_rate": 9.818631611036779e-06, "loss": 1.0369, "step": 968 }, { "epoch": 0.6596324029952348, "grad_norm": 2.0485687255859375, "learning_rate": 9.818030243007588e-06, "loss": 0.9574, "step": 969 }, { "epoch": 0.6603131381892444, "grad_norm": 1.8793060779571533, "learning_rate": 9.81742789811955e-06, "loss": 1.0175, "step": 970 }, { "epoch": 0.6609938733832539, "grad_norm": 2.092682361602783, "learning_rate": 9.816824576494791e-06, "loss": 1.0079, "step": 971 }, { "epoch": 0.6616746085772635, "grad_norm": 2.008579969406128, "learning_rate": 9.816220278255636e-06, "loss": 0.9577, "step": 972 }, { "epoch": 0.6623553437712729, "grad_norm": 2.226522445678711, "learning_rate": 9.815615003524606e-06, "loss": 0.9735, "step": 973 }, { "epoch": 0.6630360789652825, "grad_norm": 2.1677777767181396, "learning_rate": 9.81500875242442e-06, "loss": 0.9994, "step": 974 }, { "epoch": 0.6637168141592921, "grad_norm": 1.9020992517471313, "learning_rate": 9.814401525077999e-06, "loss": 0.994, "step": 975 }, { "epoch": 0.6643975493533015, "grad_norm": 2.058400869369507, "learning_rate": 9.813793321608454e-06, "loss": 0.9928, "step": 976 }, { "epoch": 0.6650782845473111, "grad_norm": 2.0310609340667725, "learning_rate": 9.813184142139102e-06, "loss": 0.9992, "step": 977 }, { "epoch": 0.6657590197413207, "grad_norm": 1.8896145820617676, "learning_rate": 9.812573986793454e-06, "loss": 1.0258, "step": 978 }, { "epoch": 0.6664397549353301, "grad_norm": 2.0021724700927734, "learning_rate": 9.811962855695219e-06, "loss": 0.9499, "step": 979 }, { "epoch": 0.6671204901293397, "grad_norm": 1.8172354698181152, "learning_rate": 9.811350748968305e-06, "loss": 0.9871, "step": 980 }, { "epoch": 0.6678012253233492, "grad_norm": 1.9719759225845337, "learning_rate": 9.810737666736816e-06, "loss": 1.0452, "step": 981 }, { "epoch": 0.6684819605173588, "grad_norm": 2.1356942653656006, "learning_rate": 9.810123609125054e-06, "loss": 1.0195, "step": 982 }, { "epoch": 0.6691626957113683, "grad_norm": 2.3134918212890625, "learning_rate": 9.809508576257522e-06, "loss": 0.9989, "step": 983 }, { "epoch": 0.6698434309053778, "grad_norm": 2.005204916000366, "learning_rate": 9.808892568258917e-06, "loss": 1.006, "step": 984 }, { "epoch": 0.6705241660993874, "grad_norm": 1.7025779485702515, "learning_rate": 9.808275585254134e-06, "loss": 1.033, "step": 985 }, { "epoch": 0.6712049012933968, "grad_norm": 1.9830129146575928, "learning_rate": 9.80765762736827e-06, "loss": 0.973, "step": 986 }, { "epoch": 0.6718856364874064, "grad_norm": 2.0268917083740234, "learning_rate": 9.807038694726613e-06, "loss": 1.07, "step": 987 }, { "epoch": 0.672566371681416, "grad_norm": 1.9642794132232666, "learning_rate": 9.806418787454653e-06, "loss": 0.9963, "step": 988 }, { "epoch": 0.6732471068754254, "grad_norm": 2.023327112197876, "learning_rate": 9.805797905678078e-06, "loss": 1.0198, "step": 989 }, { "epoch": 0.673927842069435, "grad_norm": 2.0306105613708496, "learning_rate": 9.80517604952277e-06, "loss": 0.9789, "step": 990 }, { "epoch": 0.6746085772634445, "grad_norm": 2.1501636505126953, "learning_rate": 9.804553219114812e-06, "loss": 0.997, "step": 991 }, { "epoch": 0.675289312457454, "grad_norm": 1.8645073175430298, "learning_rate": 9.803929414580485e-06, "loss": 1.0677, "step": 992 }, { "epoch": 0.6759700476514636, "grad_norm": 1.9965192079544067, "learning_rate": 9.80330463604626e-06, "loss": 0.8879, "step": 993 }, { "epoch": 0.6766507828454731, "grad_norm": 1.7422516345977783, "learning_rate": 9.80267888363882e-06, "loss": 0.9505, "step": 994 }, { "epoch": 0.6773315180394827, "grad_norm": 1.97728431224823, "learning_rate": 9.802052157485027e-06, "loss": 1.0067, "step": 995 }, { "epoch": 0.6780122532334921, "grad_norm": 2.085315704345703, "learning_rate": 9.801424457711957e-06, "loss": 0.9553, "step": 996 }, { "epoch": 0.6786929884275017, "grad_norm": 2.076984405517578, "learning_rate": 9.800795784446874e-06, "loss": 1.0102, "step": 997 }, { "epoch": 0.6793737236215113, "grad_norm": 2.1054790019989014, "learning_rate": 9.800166137817245e-06, "loss": 1.0084, "step": 998 }, { "epoch": 0.6800544588155207, "grad_norm": 2.2606310844421387, "learning_rate": 9.799535517950727e-06, "loss": 0.9697, "step": 999 }, { "epoch": 0.6807351940095303, "grad_norm": 2.4961092472076416, "learning_rate": 9.798903924975181e-06, "loss": 0.9224, "step": 1000 }, { "epoch": 0.6814159292035398, "grad_norm": 2.174959182739258, "learning_rate": 9.79827135901866e-06, "loss": 0.9873, "step": 1001 }, { "epoch": 0.6820966643975493, "grad_norm": 2.1144044399261475, "learning_rate": 9.797637820209422e-06, "loss": 0.9883, "step": 1002 }, { "epoch": 0.6827773995915589, "grad_norm": 2.104097843170166, "learning_rate": 9.797003308675914e-06, "loss": 0.9971, "step": 1003 }, { "epoch": 0.6834581347855684, "grad_norm": 2.045323371887207, "learning_rate": 9.796367824546784e-06, "loss": 0.8991, "step": 1004 }, { "epoch": 0.684138869979578, "grad_norm": 1.9488970041275024, "learning_rate": 9.795731367950878e-06, "loss": 1.0233, "step": 1005 }, { "epoch": 0.6848196051735874, "grad_norm": 1.8884408473968506, "learning_rate": 9.795093939017237e-06, "loss": 1.0642, "step": 1006 }, { "epoch": 0.685500340367597, "grad_norm": 1.88993501663208, "learning_rate": 9.794455537875101e-06, "loss": 1.0605, "step": 1007 }, { "epoch": 0.6861810755616066, "grad_norm": 2.3836774826049805, "learning_rate": 9.793816164653904e-06, "loss": 1.052, "step": 1008 }, { "epoch": 0.686861810755616, "grad_norm": 2.0053882598876953, "learning_rate": 9.793175819483283e-06, "loss": 0.8683, "step": 1009 }, { "epoch": 0.6875425459496256, "grad_norm": 1.7216533422470093, "learning_rate": 9.792534502493065e-06, "loss": 1.0352, "step": 1010 }, { "epoch": 0.6882232811436352, "grad_norm": 1.961105227470398, "learning_rate": 9.791892213813281e-06, "loss": 1.0609, "step": 1011 }, { "epoch": 0.6889040163376446, "grad_norm": 1.9839102029800415, "learning_rate": 9.79124895357415e-06, "loss": 0.9902, "step": 1012 }, { "epoch": 0.6895847515316542, "grad_norm": 1.8408719301223755, "learning_rate": 9.790604721906101e-06, "loss": 0.9944, "step": 1013 }, { "epoch": 0.6902654867256637, "grad_norm": 2.043607711791992, "learning_rate": 9.789959518939745e-06, "loss": 0.918, "step": 1014 }, { "epoch": 0.6909462219196733, "grad_norm": 2.1144766807556152, "learning_rate": 9.789313344805902e-06, "loss": 1.0674, "step": 1015 }, { "epoch": 0.6916269571136828, "grad_norm": 1.9654816389083862, "learning_rate": 9.788666199635581e-06, "loss": 1.075, "step": 1016 }, { "epoch": 0.6923076923076923, "grad_norm": 2.059591054916382, "learning_rate": 9.788018083559996e-06, "loss": 0.9206, "step": 1017 }, { "epoch": 0.6929884275017019, "grad_norm": 1.983723759651184, "learning_rate": 9.787368996710548e-06, "loss": 0.996, "step": 1018 }, { "epoch": 0.6936691626957113, "grad_norm": 2.072120189666748, "learning_rate": 9.786718939218843e-06, "loss": 0.9717, "step": 1019 }, { "epoch": 0.6943498978897209, "grad_norm": 2.1689586639404297, "learning_rate": 9.786067911216676e-06, "loss": 1.0578, "step": 1020 }, { "epoch": 0.6950306330837305, "grad_norm": 1.9417396783828735, "learning_rate": 9.785415912836049e-06, "loss": 0.9267, "step": 1021 }, { "epoch": 0.6957113682777399, "grad_norm": 1.9197709560394287, "learning_rate": 9.784762944209152e-06, "loss": 0.9475, "step": 1022 }, { "epoch": 0.6963921034717495, "grad_norm": 2.06866192817688, "learning_rate": 9.784109005468377e-06, "loss": 1.0813, "step": 1023 }, { "epoch": 0.697072838665759, "grad_norm": 2.0668625831604004, "learning_rate": 9.783454096746307e-06, "loss": 1.0173, "step": 1024 }, { "epoch": 0.6977535738597685, "grad_norm": 2.3613834381103516, "learning_rate": 9.782798218175728e-06, "loss": 0.96, "step": 1025 }, { "epoch": 0.6984343090537781, "grad_norm": 2.1265361309051514, "learning_rate": 9.78214136988962e-06, "loss": 0.9515, "step": 1026 }, { "epoch": 0.6991150442477876, "grad_norm": 2.280014991760254, "learning_rate": 9.781483552021158e-06, "loss": 1.0281, "step": 1027 }, { "epoch": 0.6997957794417972, "grad_norm": 2.109417676925659, "learning_rate": 9.780824764703715e-06, "loss": 0.9591, "step": 1028 }, { "epoch": 0.7004765146358066, "grad_norm": 2.013640880584717, "learning_rate": 9.780165008070861e-06, "loss": 0.9985, "step": 1029 }, { "epoch": 0.7011572498298162, "grad_norm": 2.184074640274048, "learning_rate": 9.779504282256363e-06, "loss": 1.0326, "step": 1030 }, { "epoch": 0.7018379850238258, "grad_norm": 2.1678836345672607, "learning_rate": 9.778842587394181e-06, "loss": 0.9832, "step": 1031 }, { "epoch": 0.7025187202178352, "grad_norm": 1.8427095413208008, "learning_rate": 9.778179923618478e-06, "loss": 1.0474, "step": 1032 }, { "epoch": 0.7031994554118448, "grad_norm": 2.1277859210968018, "learning_rate": 9.777516291063607e-06, "loss": 0.9248, "step": 1033 }, { "epoch": 0.7038801906058543, "grad_norm": 1.9393547773361206, "learning_rate": 9.77685168986412e-06, "loss": 0.9387, "step": 1034 }, { "epoch": 0.7045609257998638, "grad_norm": 2.1679580211639404, "learning_rate": 9.776186120154764e-06, "loss": 0.9571, "step": 1035 }, { "epoch": 0.7052416609938734, "grad_norm": 2.2139763832092285, "learning_rate": 9.775519582070487e-06, "loss": 1.0561, "step": 1036 }, { "epoch": 0.7059223961878829, "grad_norm": 1.936196208000183, "learning_rate": 9.774852075746428e-06, "loss": 1.0445, "step": 1037 }, { "epoch": 0.7066031313818925, "grad_norm": 2.2275445461273193, "learning_rate": 9.774183601317926e-06, "loss": 1.0049, "step": 1038 }, { "epoch": 0.7072838665759019, "grad_norm": 1.8572945594787598, "learning_rate": 9.773514158920512e-06, "loss": 0.9966, "step": 1039 }, { "epoch": 0.7079646017699115, "grad_norm": 2.0048840045928955, "learning_rate": 9.772843748689918e-06, "loss": 1.0176, "step": 1040 }, { "epoch": 0.7086453369639211, "grad_norm": 2.0624358654022217, "learning_rate": 9.772172370762067e-06, "loss": 0.9833, "step": 1041 }, { "epoch": 0.7093260721579305, "grad_norm": 2.0701427459716797, "learning_rate": 9.771500025273086e-06, "loss": 1.0871, "step": 1042 }, { "epoch": 0.7100068073519401, "grad_norm": 2.216984748840332, "learning_rate": 9.770826712359292e-06, "loss": 0.9837, "step": 1043 }, { "epoch": 0.7106875425459497, "grad_norm": 2.317779541015625, "learning_rate": 9.770152432157198e-06, "loss": 0.9494, "step": 1044 }, { "epoch": 0.7113682777399591, "grad_norm": 1.9995105266571045, "learning_rate": 9.769477184803515e-06, "loss": 1.0306, "step": 1045 }, { "epoch": 0.7120490129339687, "grad_norm": 2.0498270988464355, "learning_rate": 9.76880097043515e-06, "loss": 0.9847, "step": 1046 }, { "epoch": 0.7127297481279782, "grad_norm": 1.800195336341858, "learning_rate": 9.768123789189208e-06, "loss": 1.0033, "step": 1047 }, { "epoch": 0.7134104833219878, "grad_norm": 1.863578200340271, "learning_rate": 9.767445641202984e-06, "loss": 0.9622, "step": 1048 }, { "epoch": 0.7140912185159973, "grad_norm": 2.1897354125976562, "learning_rate": 9.766766526613978e-06, "loss": 0.9964, "step": 1049 }, { "epoch": 0.7147719537100068, "grad_norm": 2.0867583751678467, "learning_rate": 9.766086445559877e-06, "loss": 1.0287, "step": 1050 }, { "epoch": 0.7154526889040164, "grad_norm": 2.224201202392578, "learning_rate": 9.765405398178569e-06, "loss": 1.0032, "step": 1051 }, { "epoch": 0.7161334240980258, "grad_norm": 2.3352434635162354, "learning_rate": 9.764723384608137e-06, "loss": 1.0198, "step": 1052 }, { "epoch": 0.7168141592920354, "grad_norm": 1.8714039325714111, "learning_rate": 9.764040404986858e-06, "loss": 1.0521, "step": 1053 }, { "epoch": 0.717494894486045, "grad_norm": 1.9678809642791748, "learning_rate": 9.76335645945321e-06, "loss": 0.9868, "step": 1054 }, { "epoch": 0.7181756296800544, "grad_norm": 2.0302913188934326, "learning_rate": 9.762671548145862e-06, "loss": 0.9823, "step": 1055 }, { "epoch": 0.718856364874064, "grad_norm": 2.114642381668091, "learning_rate": 9.76198567120368e-06, "loss": 0.9472, "step": 1056 }, { "epoch": 0.7195371000680735, "grad_norm": 2.0317435264587402, "learning_rate": 9.761298828765723e-06, "loss": 0.904, "step": 1057 }, { "epoch": 0.720217835262083, "grad_norm": 1.7394747734069824, "learning_rate": 9.760611020971256e-06, "loss": 1.0352, "step": 1058 }, { "epoch": 0.7208985704560926, "grad_norm": 1.8554726839065552, "learning_rate": 9.759922247959725e-06, "loss": 1.0386, "step": 1059 }, { "epoch": 0.7215793056501021, "grad_norm": 2.0274014472961426, "learning_rate": 9.759232509870784e-06, "loss": 1.0408, "step": 1060 }, { "epoch": 0.7222600408441117, "grad_norm": 2.1441540718078613, "learning_rate": 9.758541806844273e-06, "loss": 1.0072, "step": 1061 }, { "epoch": 0.7229407760381211, "grad_norm": 1.761690378189087, "learning_rate": 9.75785013902024e-06, "loss": 0.9886, "step": 1062 }, { "epoch": 0.7236215112321307, "grad_norm": 1.9760303497314453, "learning_rate": 9.757157506538914e-06, "loss": 1.0406, "step": 1063 }, { "epoch": 0.7243022464261403, "grad_norm": 2.101773977279663, "learning_rate": 9.75646390954073e-06, "loss": 0.9875, "step": 1064 }, { "epoch": 0.7249829816201497, "grad_norm": 2.121441602706909, "learning_rate": 9.755769348166314e-06, "loss": 0.9054, "step": 1065 }, { "epoch": 0.7256637168141593, "grad_norm": 1.9828540086746216, "learning_rate": 9.75507382255649e-06, "loss": 0.9092, "step": 1066 }, { "epoch": 0.7263444520081688, "grad_norm": 1.8164392709732056, "learning_rate": 9.754377332852276e-06, "loss": 0.9629, "step": 1067 }, { "epoch": 0.7270251872021783, "grad_norm": 1.9683822393417358, "learning_rate": 9.753679879194887e-06, "loss": 1.0066, "step": 1068 }, { "epoch": 0.7277059223961879, "grad_norm": 2.3392934799194336, "learning_rate": 9.752981461725728e-06, "loss": 0.9733, "step": 1069 }, { "epoch": 0.7283866575901974, "grad_norm": 1.8814078569412231, "learning_rate": 9.752282080586409e-06, "loss": 0.956, "step": 1070 }, { "epoch": 0.729067392784207, "grad_norm": 1.8135275840759277, "learning_rate": 9.751581735918724e-06, "loss": 1.0477, "step": 1071 }, { "epoch": 0.7297481279782164, "grad_norm": 1.775945782661438, "learning_rate": 9.750880427864674e-06, "loss": 1.0436, "step": 1072 }, { "epoch": 0.730428863172226, "grad_norm": 1.9942443370819092, "learning_rate": 9.750178156566448e-06, "loss": 1.0566, "step": 1073 }, { "epoch": 0.7311095983662356, "grad_norm": 2.2383272647857666, "learning_rate": 9.749474922166429e-06, "loss": 0.9651, "step": 1074 }, { "epoch": 0.731790333560245, "grad_norm": 1.8200080394744873, "learning_rate": 9.748770724807201e-06, "loss": 0.9647, "step": 1075 }, { "epoch": 0.7324710687542546, "grad_norm": 1.8579457998275757, "learning_rate": 9.748065564631541e-06, "loss": 0.9721, "step": 1076 }, { "epoch": 0.7331518039482642, "grad_norm": 1.8954161405563354, "learning_rate": 9.747359441782421e-06, "loss": 1.0235, "step": 1077 }, { "epoch": 0.7338325391422736, "grad_norm": 2.20928692817688, "learning_rate": 9.746652356403004e-06, "loss": 0.9303, "step": 1078 }, { "epoch": 0.7345132743362832, "grad_norm": 2.169642448425293, "learning_rate": 9.745944308636657e-06, "loss": 0.9922, "step": 1079 }, { "epoch": 0.7351940095302927, "grad_norm": 1.9325952529907227, "learning_rate": 9.745235298626933e-06, "loss": 1.0415, "step": 1080 }, { "epoch": 0.7358747447243023, "grad_norm": 2.0772805213928223, "learning_rate": 9.744525326517587e-06, "loss": 0.932, "step": 1081 }, { "epoch": 0.7365554799183118, "grad_norm": 2.0207505226135254, "learning_rate": 9.743814392452565e-06, "loss": 0.9849, "step": 1082 }, { "epoch": 0.7372362151123213, "grad_norm": 2.0342068672180176, "learning_rate": 9.743102496576008e-06, "loss": 0.9732, "step": 1083 }, { "epoch": 0.7379169503063309, "grad_norm": 2.503340721130371, "learning_rate": 9.742389639032257e-06, "loss": 0.9983, "step": 1084 }, { "epoch": 0.7385976855003403, "grad_norm": 2.2408313751220703, "learning_rate": 9.741675819965844e-06, "loss": 1.0639, "step": 1085 }, { "epoch": 0.7392784206943499, "grad_norm": 2.240088939666748, "learning_rate": 9.740961039521492e-06, "loss": 0.9814, "step": 1086 }, { "epoch": 0.7399591558883595, "grad_norm": 1.8510591983795166, "learning_rate": 9.740245297844127e-06, "loss": 0.9595, "step": 1087 }, { "epoch": 0.7406398910823689, "grad_norm": 1.803383708000183, "learning_rate": 9.739528595078864e-06, "loss": 0.9628, "step": 1088 }, { "epoch": 0.7413206262763785, "grad_norm": 1.923726201057434, "learning_rate": 9.73881093137102e-06, "loss": 0.9669, "step": 1089 }, { "epoch": 0.742001361470388, "grad_norm": 2.310262441635132, "learning_rate": 9.738092306866092e-06, "loss": 0.9398, "step": 1090 }, { "epoch": 0.7426820966643976, "grad_norm": 2.036689043045044, "learning_rate": 9.73737272170979e-06, "loss": 1.0183, "step": 1091 }, { "epoch": 0.7433628318584071, "grad_norm": 2.2552709579467773, "learning_rate": 9.736652176048009e-06, "loss": 1.0122, "step": 1092 }, { "epoch": 0.7440435670524166, "grad_norm": 2.0515875816345215, "learning_rate": 9.735930670026839e-06, "loss": 0.9927, "step": 1093 }, { "epoch": 0.7447243022464262, "grad_norm": 1.9370959997177124, "learning_rate": 9.735208203792563e-06, "loss": 1.0163, "step": 1094 }, { "epoch": 0.7454050374404356, "grad_norm": 2.0023200511932373, "learning_rate": 9.734484777491667e-06, "loss": 0.9649, "step": 1095 }, { "epoch": 0.7460857726344452, "grad_norm": 1.8865139484405518, "learning_rate": 9.733760391270821e-06, "loss": 0.95, "step": 1096 }, { "epoch": 0.7467665078284548, "grad_norm": 2.3284385204315186, "learning_rate": 9.7330350452769e-06, "loss": 0.9962, "step": 1097 }, { "epoch": 0.7474472430224642, "grad_norm": 2.056295871734619, "learning_rate": 9.732308739656964e-06, "loss": 1.0112, "step": 1098 }, { "epoch": 0.7481279782164738, "grad_norm": 1.8181774616241455, "learning_rate": 9.731581474558272e-06, "loss": 1.0824, "step": 1099 }, { "epoch": 0.7488087134104833, "grad_norm": 1.7704414129257202, "learning_rate": 9.73085325012828e-06, "loss": 0.9628, "step": 1100 }, { "epoch": 0.7494894486044928, "grad_norm": 1.9379035234451294, "learning_rate": 9.730124066514637e-06, "loss": 1.0085, "step": 1101 }, { "epoch": 0.7501701837985024, "grad_norm": 1.7957592010498047, "learning_rate": 9.729393923865181e-06, "loss": 1.0103, "step": 1102 }, { "epoch": 0.7508509189925119, "grad_norm": 1.7376385927200317, "learning_rate": 9.728662822327952e-06, "loss": 0.9508, "step": 1103 }, { "epoch": 0.7515316541865215, "grad_norm": 2.0302765369415283, "learning_rate": 9.727930762051181e-06, "loss": 0.9886, "step": 1104 }, { "epoch": 0.7522123893805309, "grad_norm": 1.7526317834854126, "learning_rate": 9.727197743183293e-06, "loss": 1.0715, "step": 1105 }, { "epoch": 0.7528931245745405, "grad_norm": 1.9895135164260864, "learning_rate": 9.72646376587291e-06, "loss": 0.9805, "step": 1106 }, { "epoch": 0.7535738597685501, "grad_norm": 1.9061970710754395, "learning_rate": 9.725728830268843e-06, "loss": 0.9916, "step": 1107 }, { "epoch": 0.7542545949625595, "grad_norm": 1.8115593194961548, "learning_rate": 9.724992936520103e-06, "loss": 1.0176, "step": 1108 }, { "epoch": 0.7549353301565691, "grad_norm": 2.034205198287964, "learning_rate": 9.724256084775895e-06, "loss": 0.8952, "step": 1109 }, { "epoch": 0.7556160653505786, "grad_norm": 1.8998271226882935, "learning_rate": 9.72351827518561e-06, "loss": 1.0023, "step": 1110 }, { "epoch": 0.7562968005445881, "grad_norm": 2.3513503074645996, "learning_rate": 9.722779507898845e-06, "loss": 1.0228, "step": 1111 }, { "epoch": 0.7569775357385977, "grad_norm": 1.7837958335876465, "learning_rate": 9.722039783065383e-06, "loss": 1.0934, "step": 1112 }, { "epoch": 0.7576582709326072, "grad_norm": 2.0146915912628174, "learning_rate": 9.721299100835203e-06, "loss": 1.0525, "step": 1113 }, { "epoch": 0.7583390061266168, "grad_norm": 2.0698671340942383, "learning_rate": 9.720557461358482e-06, "loss": 0.9126, "step": 1114 }, { "epoch": 0.7590197413206263, "grad_norm": 2.180112361907959, "learning_rate": 9.719814864785587e-06, "loss": 1.0183, "step": 1115 }, { "epoch": 0.7597004765146358, "grad_norm": 1.9082396030426025, "learning_rate": 9.719071311267077e-06, "loss": 0.9942, "step": 1116 }, { "epoch": 0.7603812117086454, "grad_norm": 1.8638674020767212, "learning_rate": 9.71832680095371e-06, "loss": 1.0738, "step": 1117 }, { "epoch": 0.7610619469026548, "grad_norm": 1.8701839447021484, "learning_rate": 9.717581333996436e-06, "loss": 1.0321, "step": 1118 }, { "epoch": 0.7617426820966644, "grad_norm": 1.9954543113708496, "learning_rate": 9.7168349105464e-06, "loss": 0.9575, "step": 1119 }, { "epoch": 0.762423417290674, "grad_norm": 1.8240680694580078, "learning_rate": 9.716087530754937e-06, "loss": 1.0256, "step": 1120 }, { "epoch": 0.7631041524846834, "grad_norm": 2.1907520294189453, "learning_rate": 9.715339194773581e-06, "loss": 1.0505, "step": 1121 }, { "epoch": 0.763784887678693, "grad_norm": 2.165240526199341, "learning_rate": 9.714589902754058e-06, "loss": 1.0927, "step": 1122 }, { "epoch": 0.7644656228727025, "grad_norm": 1.8037995100021362, "learning_rate": 9.713839654848285e-06, "loss": 0.975, "step": 1123 }, { "epoch": 0.765146358066712, "grad_norm": 1.873745083808899, "learning_rate": 9.713088451208379e-06, "loss": 1.052, "step": 1124 }, { "epoch": 0.7658270932607216, "grad_norm": 1.6440790891647339, "learning_rate": 9.712336291986643e-06, "loss": 0.9947, "step": 1125 }, { "epoch": 0.7665078284547311, "grad_norm": 2.3042380809783936, "learning_rate": 9.711583177335579e-06, "loss": 0.874, "step": 1126 }, { "epoch": 0.7671885636487407, "grad_norm": 2.2647974491119385, "learning_rate": 9.710829107407884e-06, "loss": 0.9859, "step": 1127 }, { "epoch": 0.7678692988427501, "grad_norm": 2.1860451698303223, "learning_rate": 9.710074082356444e-06, "loss": 1.0499, "step": 1128 }, { "epoch": 0.7685500340367597, "grad_norm": 2.2163326740264893, "learning_rate": 9.70931810233434e-06, "loss": 0.9479, "step": 1129 }, { "epoch": 0.7692307692307693, "grad_norm": 2.1586079597473145, "learning_rate": 9.70856116749485e-06, "loss": 0.9434, "step": 1130 }, { "epoch": 0.7699115044247787, "grad_norm": 2.1430070400238037, "learning_rate": 9.70780327799144e-06, "loss": 1.041, "step": 1131 }, { "epoch": 0.7705922396187883, "grad_norm": 2.0469791889190674, "learning_rate": 9.707044433977775e-06, "loss": 1.0584, "step": 1132 }, { "epoch": 0.7712729748127978, "grad_norm": 2.1207222938537598, "learning_rate": 9.70628463560771e-06, "loss": 0.9806, "step": 1133 }, { "epoch": 0.7719537100068073, "grad_norm": 1.9405550956726074, "learning_rate": 9.705523883035297e-06, "loss": 0.9963, "step": 1134 }, { "epoch": 0.7726344452008169, "grad_norm": 1.9299132823944092, "learning_rate": 9.704762176414774e-06, "loss": 0.9733, "step": 1135 }, { "epoch": 0.7733151803948264, "grad_norm": 1.866877555847168, "learning_rate": 9.703999515900581e-06, "loss": 0.9743, "step": 1136 }, { "epoch": 0.773995915588836, "grad_norm": 1.9750484228134155, "learning_rate": 9.70323590164735e-06, "loss": 0.9812, "step": 1137 }, { "epoch": 0.7746766507828454, "grad_norm": 1.9593526124954224, "learning_rate": 9.702471333809899e-06, "loss": 1.0051, "step": 1138 }, { "epoch": 0.775357385976855, "grad_norm": 2.1702849864959717, "learning_rate": 9.701705812543249e-06, "loss": 0.9769, "step": 1139 }, { "epoch": 0.7760381211708646, "grad_norm": 1.9560904502868652, "learning_rate": 9.700939338002608e-06, "loss": 0.9835, "step": 1140 }, { "epoch": 0.776718856364874, "grad_norm": 2.0337820053100586, "learning_rate": 9.700171910343381e-06, "loss": 0.8682, "step": 1141 }, { "epoch": 0.7773995915588836, "grad_norm": 2.200451135635376, "learning_rate": 9.699403529721162e-06, "loss": 1.0041, "step": 1142 }, { "epoch": 0.7780803267528931, "grad_norm": 2.142317771911621, "learning_rate": 9.698634196291742e-06, "loss": 1.0255, "step": 1143 }, { "epoch": 0.7787610619469026, "grad_norm": 1.9389939308166504, "learning_rate": 9.697863910211103e-06, "loss": 0.9444, "step": 1144 }, { "epoch": 0.7794417971409122, "grad_norm": 1.8195466995239258, "learning_rate": 9.697092671635421e-06, "loss": 1.0009, "step": 1145 }, { "epoch": 0.7801225323349217, "grad_norm": 1.8000355958938599, "learning_rate": 9.69632048072107e-06, "loss": 1.0075, "step": 1146 }, { "epoch": 0.7808032675289313, "grad_norm": 2.007988691329956, "learning_rate": 9.695547337624603e-06, "loss": 1.0149, "step": 1147 }, { "epoch": 0.7814840027229408, "grad_norm": 2.1690845489501953, "learning_rate": 9.694773242502782e-06, "loss": 0.9957, "step": 1148 }, { "epoch": 0.7821647379169503, "grad_norm": 2.179372787475586, "learning_rate": 9.693998195512553e-06, "loss": 0.9626, "step": 1149 }, { "epoch": 0.7828454731109599, "grad_norm": 2.2538018226623535, "learning_rate": 9.693222196811059e-06, "loss": 0.9093, "step": 1150 }, { "epoch": 0.7835262083049693, "grad_norm": 1.637227177619934, "learning_rate": 9.692445246555633e-06, "loss": 1.0575, "step": 1151 }, { "epoch": 0.7842069434989789, "grad_norm": 2.1023497581481934, "learning_rate": 9.691667344903802e-06, "loss": 1.0646, "step": 1152 }, { "epoch": 0.7848876786929885, "grad_norm": 2.0469958782196045, "learning_rate": 9.690888492013287e-06, "loss": 0.9765, "step": 1153 }, { "epoch": 0.7855684138869979, "grad_norm": 1.966726541519165, "learning_rate": 9.690108688042001e-06, "loss": 0.9991, "step": 1154 }, { "epoch": 0.7862491490810075, "grad_norm": 1.9538718461990356, "learning_rate": 9.689327933148047e-06, "loss": 0.9749, "step": 1155 }, { "epoch": 0.786929884275017, "grad_norm": 1.845763921737671, "learning_rate": 9.688546227489727e-06, "loss": 1.0262, "step": 1156 }, { "epoch": 0.7876106194690266, "grad_norm": 2.018038272857666, "learning_rate": 9.687763571225531e-06, "loss": 1.0, "step": 1157 }, { "epoch": 0.7882913546630361, "grad_norm": 1.9591482877731323, "learning_rate": 9.686979964514143e-06, "loss": 1.0213, "step": 1158 }, { "epoch": 0.7889720898570456, "grad_norm": 2.0700228214263916, "learning_rate": 9.686195407514441e-06, "loss": 0.9872, "step": 1159 }, { "epoch": 0.7896528250510552, "grad_norm": 2.313756227493286, "learning_rate": 9.685409900385491e-06, "loss": 0.9532, "step": 1160 }, { "epoch": 0.7903335602450646, "grad_norm": 1.7188661098480225, "learning_rate": 9.684623443286558e-06, "loss": 1.0543, "step": 1161 }, { "epoch": 0.7910142954390742, "grad_norm": 1.91647469997406, "learning_rate": 9.683836036377098e-06, "loss": 1.0047, "step": 1162 }, { "epoch": 0.7916950306330838, "grad_norm": 2.016676425933838, "learning_rate": 9.683047679816755e-06, "loss": 0.91, "step": 1163 }, { "epoch": 0.7923757658270932, "grad_norm": 2.4188413619995117, "learning_rate": 9.68225837376537e-06, "loss": 1.0405, "step": 1164 }, { "epoch": 0.7930565010211028, "grad_norm": 2.2385613918304443, "learning_rate": 9.681468118382978e-06, "loss": 0.904, "step": 1165 }, { "epoch": 0.7937372362151123, "grad_norm": 1.8813036680221558, "learning_rate": 9.680676913829799e-06, "loss": 0.8981, "step": 1166 }, { "epoch": 0.7944179714091218, "grad_norm": 1.7871785163879395, "learning_rate": 9.679884760266253e-06, "loss": 0.8557, "step": 1167 }, { "epoch": 0.7950987066031314, "grad_norm": 2.032212495803833, "learning_rate": 9.67909165785295e-06, "loss": 0.9951, "step": 1168 }, { "epoch": 0.7957794417971409, "grad_norm": 2.110983371734619, "learning_rate": 9.67829760675069e-06, "loss": 1.0611, "step": 1169 }, { "epoch": 0.7964601769911505, "grad_norm": 2.0880839824676514, "learning_rate": 9.677502607120471e-06, "loss": 0.9644, "step": 1170 }, { "epoch": 0.7971409121851599, "grad_norm": 2.181886672973633, "learning_rate": 9.676706659123475e-06, "loss": 0.9016, "step": 1171 }, { "epoch": 0.7978216473791695, "grad_norm": 1.8697818517684937, "learning_rate": 9.675909762921084e-06, "loss": 1.0457, "step": 1172 }, { "epoch": 0.7985023825731791, "grad_norm": 2.0684797763824463, "learning_rate": 9.67511191867487e-06, "loss": 1.031, "step": 1173 }, { "epoch": 0.7991831177671885, "grad_norm": 1.8423869609832764, "learning_rate": 9.674313126546596e-06, "loss": 1.0236, "step": 1174 }, { "epoch": 0.7998638529611981, "grad_norm": 2.0528109073638916, "learning_rate": 9.673513386698215e-06, "loss": 0.9981, "step": 1175 }, { "epoch": 0.8005445881552076, "grad_norm": 2.075442314147949, "learning_rate": 9.672712699291876e-06, "loss": 0.9852, "step": 1176 }, { "epoch": 0.8012253233492171, "grad_norm": 1.7277793884277344, "learning_rate": 9.67191106448992e-06, "loss": 1.0926, "step": 1177 }, { "epoch": 0.8019060585432267, "grad_norm": 1.9054745435714722, "learning_rate": 9.67110848245488e-06, "loss": 0.9111, "step": 1178 }, { "epoch": 0.8025867937372362, "grad_norm": 1.8388168811798096, "learning_rate": 9.670304953349476e-06, "loss": 0.9811, "step": 1179 }, { "epoch": 0.8032675289312458, "grad_norm": 1.6849392652511597, "learning_rate": 9.669500477336628e-06, "loss": 1.0166, "step": 1180 }, { "epoch": 0.8039482641252553, "grad_norm": 2.044428825378418, "learning_rate": 9.668695054579443e-06, "loss": 0.916, "step": 1181 }, { "epoch": 0.8046289993192648, "grad_norm": 1.973457932472229, "learning_rate": 9.66788868524122e-06, "loss": 0.9893, "step": 1182 }, { "epoch": 0.8053097345132744, "grad_norm": 2.060741901397705, "learning_rate": 9.66708136948545e-06, "loss": 0.9378, "step": 1183 }, { "epoch": 0.8059904697072838, "grad_norm": 1.8219025135040283, "learning_rate": 9.666273107475822e-06, "loss": 1.0639, "step": 1184 }, { "epoch": 0.8066712049012934, "grad_norm": 1.898919939994812, "learning_rate": 9.665463899376203e-06, "loss": 0.993, "step": 1185 }, { "epoch": 0.807351940095303, "grad_norm": 1.786133050918579, "learning_rate": 9.66465374535067e-06, "loss": 0.9985, "step": 1186 }, { "epoch": 0.8080326752893124, "grad_norm": 2.3456356525421143, "learning_rate": 9.663842645563474e-06, "loss": 0.9638, "step": 1187 }, { "epoch": 0.808713410483322, "grad_norm": 2.121288776397705, "learning_rate": 9.663030600179073e-06, "loss": 1.0083, "step": 1188 }, { "epoch": 0.8093941456773315, "grad_norm": 1.7468533515930176, "learning_rate": 9.662217609362106e-06, "loss": 0.9185, "step": 1189 }, { "epoch": 0.810074880871341, "grad_norm": 1.9983463287353516, "learning_rate": 9.661403673277408e-06, "loss": 0.987, "step": 1190 }, { "epoch": 0.8107556160653506, "grad_norm": 2.057610273361206, "learning_rate": 9.660588792090003e-06, "loss": 1.0156, "step": 1191 }, { "epoch": 0.8114363512593601, "grad_norm": 1.8458786010742188, "learning_rate": 9.659772965965113e-06, "loss": 0.9635, "step": 1192 }, { "epoch": 0.8121170864533697, "grad_norm": 2.0066146850585938, "learning_rate": 9.658956195068144e-06, "loss": 1.0556, "step": 1193 }, { "epoch": 0.8127978216473791, "grad_norm": 1.8319350481033325, "learning_rate": 9.658138479564697e-06, "loss": 1.0264, "step": 1194 }, { "epoch": 0.8134785568413887, "grad_norm": 1.7851649522781372, "learning_rate": 9.657319819620566e-06, "loss": 1.0154, "step": 1195 }, { "epoch": 0.8141592920353983, "grad_norm": 1.9888805150985718, "learning_rate": 9.656500215401734e-06, "loss": 1.0319, "step": 1196 }, { "epoch": 0.8148400272294077, "grad_norm": 2.1308255195617676, "learning_rate": 9.655679667074378e-06, "loss": 0.999, "step": 1197 }, { "epoch": 0.8155207624234173, "grad_norm": 1.8434817790985107, "learning_rate": 9.654858174804862e-06, "loss": 0.9789, "step": 1198 }, { "epoch": 0.8162014976174268, "grad_norm": 1.9482849836349487, "learning_rate": 9.654035738759745e-06, "loss": 0.9338, "step": 1199 }, { "epoch": 0.8168822328114363, "grad_norm": 1.9978809356689453, "learning_rate": 9.653212359105776e-06, "loss": 0.9705, "step": 1200 }, { "epoch": 0.8175629680054459, "grad_norm": 1.7859097719192505, "learning_rate": 9.652388036009896e-06, "loss": 0.9929, "step": 1201 }, { "epoch": 0.8182437031994554, "grad_norm": 1.980744481086731, "learning_rate": 9.65156276963924e-06, "loss": 0.966, "step": 1202 }, { "epoch": 0.818924438393465, "grad_norm": 1.7868183851242065, "learning_rate": 9.650736560161127e-06, "loss": 1.0228, "step": 1203 }, { "epoch": 0.8196051735874744, "grad_norm": 2.008242130279541, "learning_rate": 9.649909407743074e-06, "loss": 0.9722, "step": 1204 }, { "epoch": 0.820285908781484, "grad_norm": 2.0242483615875244, "learning_rate": 9.649081312552786e-06, "loss": 0.9995, "step": 1205 }, { "epoch": 0.8209666439754936, "grad_norm": 1.9556878805160522, "learning_rate": 9.648252274758158e-06, "loss": 1.0135, "step": 1206 }, { "epoch": 0.821647379169503, "grad_norm": 1.8694723844528198, "learning_rate": 9.647422294527283e-06, "loss": 0.9508, "step": 1207 }, { "epoch": 0.8223281143635126, "grad_norm": 1.923060417175293, "learning_rate": 9.646591372028435e-06, "loss": 0.9533, "step": 1208 }, { "epoch": 0.8230088495575221, "grad_norm": 2.128546714782715, "learning_rate": 9.645759507430088e-06, "loss": 0.8997, "step": 1209 }, { "epoch": 0.8236895847515316, "grad_norm": 2.0903728008270264, "learning_rate": 9.644926700900898e-06, "loss": 1.0456, "step": 1210 }, { "epoch": 0.8243703199455412, "grad_norm": 2.1653668880462646, "learning_rate": 9.644092952609724e-06, "loss": 0.969, "step": 1211 }, { "epoch": 0.8250510551395507, "grad_norm": 1.9795252084732056, "learning_rate": 9.643258262725602e-06, "loss": 1.009, "step": 1212 }, { "epoch": 0.8257317903335603, "grad_norm": 1.9295529127120972, "learning_rate": 9.64242263141777e-06, "loss": 1.042, "step": 1213 }, { "epoch": 0.8264125255275698, "grad_norm": 1.8612538576126099, "learning_rate": 9.641586058855652e-06, "loss": 1.0073, "step": 1214 }, { "epoch": 0.8270932607215793, "grad_norm": 1.8008559942245483, "learning_rate": 9.640748545208862e-06, "loss": 1.0196, "step": 1215 }, { "epoch": 0.8277739959155889, "grad_norm": 2.2710275650024414, "learning_rate": 9.639910090647211e-06, "loss": 0.9712, "step": 1216 }, { "epoch": 0.8284547311095983, "grad_norm": 1.9838342666625977, "learning_rate": 9.639070695340692e-06, "loss": 1.0904, "step": 1217 }, { "epoch": 0.8291354663036079, "grad_norm": 1.8577924966812134, "learning_rate": 9.638230359459493e-06, "loss": 0.9895, "step": 1218 }, { "epoch": 0.8298162014976175, "grad_norm": 1.8991799354553223, "learning_rate": 9.637389083173996e-06, "loss": 0.9895, "step": 1219 }, { "epoch": 0.8304969366916269, "grad_norm": 2.0174179077148438, "learning_rate": 9.636546866654766e-06, "loss": 1.002, "step": 1220 }, { "epoch": 0.8311776718856365, "grad_norm": 1.7698297500610352, "learning_rate": 9.635703710072566e-06, "loss": 1.0361, "step": 1221 }, { "epoch": 0.831858407079646, "grad_norm": 2.01356840133667, "learning_rate": 9.634859613598346e-06, "loss": 0.9376, "step": 1222 }, { "epoch": 0.8325391422736556, "grad_norm": 1.8312395811080933, "learning_rate": 9.634014577403247e-06, "loss": 1.0595, "step": 1223 }, { "epoch": 0.8332198774676651, "grad_norm": 1.9650626182556152, "learning_rate": 9.6331686016586e-06, "loss": 1.0047, "step": 1224 }, { "epoch": 0.8339006126616746, "grad_norm": 2.2749361991882324, "learning_rate": 9.632321686535928e-06, "loss": 0.9524, "step": 1225 }, { "epoch": 0.8345813478556842, "grad_norm": 1.8616303205490112, "learning_rate": 9.631473832206944e-06, "loss": 0.9743, "step": 1226 }, { "epoch": 0.8352620830496936, "grad_norm": 1.8674901723861694, "learning_rate": 9.63062503884355e-06, "loss": 0.9424, "step": 1227 }, { "epoch": 0.8359428182437032, "grad_norm": 2.008355140686035, "learning_rate": 9.629775306617838e-06, "loss": 0.9491, "step": 1228 }, { "epoch": 0.8366235534377128, "grad_norm": 1.9097756147384644, "learning_rate": 9.628924635702095e-06, "loss": 1.0086, "step": 1229 }, { "epoch": 0.8373042886317222, "grad_norm": 2.023232936859131, "learning_rate": 9.628073026268794e-06, "loss": 1.0432, "step": 1230 }, { "epoch": 0.8379850238257318, "grad_norm": 1.7646604776382446, "learning_rate": 9.627220478490597e-06, "loss": 1.0184, "step": 1231 }, { "epoch": 0.8386657590197413, "grad_norm": 1.96335768699646, "learning_rate": 9.626366992540363e-06, "loss": 1.0015, "step": 1232 }, { "epoch": 0.8393464942137508, "grad_norm": 1.688720941543579, "learning_rate": 9.625512568591133e-06, "loss": 1.0303, "step": 1233 }, { "epoch": 0.8400272294077604, "grad_norm": 2.0872151851654053, "learning_rate": 9.624657206816144e-06, "loss": 1.0064, "step": 1234 }, { "epoch": 0.8407079646017699, "grad_norm": 1.8344061374664307, "learning_rate": 9.623800907388821e-06, "loss": 0.9625, "step": 1235 }, { "epoch": 0.8413886997957795, "grad_norm": 2.076206684112549, "learning_rate": 9.622943670482777e-06, "loss": 0.9163, "step": 1236 }, { "epoch": 0.8420694349897889, "grad_norm": 1.8274973630905151, "learning_rate": 9.622085496271821e-06, "loss": 0.9795, "step": 1237 }, { "epoch": 0.8427501701837985, "grad_norm": 1.965605616569519, "learning_rate": 9.621226384929945e-06, "loss": 0.9913, "step": 1238 }, { "epoch": 0.8434309053778081, "grad_norm": 1.8534419536590576, "learning_rate": 9.620366336631338e-06, "loss": 1.0238, "step": 1239 }, { "epoch": 0.8441116405718175, "grad_norm": 2.141986846923828, "learning_rate": 9.619505351550373e-06, "loss": 0.9804, "step": 1240 }, { "epoch": 0.8447923757658271, "grad_norm": 1.8165063858032227, "learning_rate": 9.618643429861615e-06, "loss": 0.9257, "step": 1241 }, { "epoch": 0.8454731109598366, "grad_norm": 1.9480124711990356, "learning_rate": 9.617780571739821e-06, "loss": 1.0765, "step": 1242 }, { "epoch": 0.8461538461538461, "grad_norm": 1.9952715635299683, "learning_rate": 9.616916777359933e-06, "loss": 0.8992, "step": 1243 }, { "epoch": 0.8468345813478557, "grad_norm": 1.7492924928665161, "learning_rate": 9.61605204689709e-06, "loss": 1.0674, "step": 1244 }, { "epoch": 0.8475153165418652, "grad_norm": 1.656075358390808, "learning_rate": 9.615186380526613e-06, "loss": 1.0548, "step": 1245 }, { "epoch": 0.8481960517358748, "grad_norm": 2.086073398590088, "learning_rate": 9.61431977842402e-06, "loss": 0.962, "step": 1246 }, { "epoch": 0.8488767869298843, "grad_norm": 1.8386855125427246, "learning_rate": 9.613452240765011e-06, "loss": 0.8943, "step": 1247 }, { "epoch": 0.8495575221238938, "grad_norm": 1.6711719036102295, "learning_rate": 9.612583767725483e-06, "loss": 0.9428, "step": 1248 }, { "epoch": 0.8502382573179034, "grad_norm": 1.8944822549819946, "learning_rate": 9.611714359481518e-06, "loss": 0.9129, "step": 1249 }, { "epoch": 0.8509189925119128, "grad_norm": 1.8434607982635498, "learning_rate": 9.610844016209388e-06, "loss": 0.9716, "step": 1250 }, { "epoch": 0.8515997277059224, "grad_norm": 2.021721839904785, "learning_rate": 9.60997273808556e-06, "loss": 1.0317, "step": 1251 }, { "epoch": 0.852280462899932, "grad_norm": 1.9780349731445312, "learning_rate": 9.609100525286684e-06, "loss": 0.9591, "step": 1252 }, { "epoch": 0.8529611980939414, "grad_norm": 1.9885460138320923, "learning_rate": 9.608227377989599e-06, "loss": 0.9336, "step": 1253 }, { "epoch": 0.853641933287951, "grad_norm": 1.5049078464508057, "learning_rate": 9.607353296371338e-06, "loss": 1.0914, "step": 1254 }, { "epoch": 0.8543226684819605, "grad_norm": 1.6526128053665161, "learning_rate": 9.606478280609124e-06, "loss": 1.075, "step": 1255 }, { "epoch": 0.85500340367597, "grad_norm": 1.9795904159545898, "learning_rate": 9.605602330880363e-06, "loss": 0.9411, "step": 1256 }, { "epoch": 0.8556841388699796, "grad_norm": 2.0720596313476562, "learning_rate": 9.604725447362657e-06, "loss": 1.0149, "step": 1257 }, { "epoch": 0.8563648740639891, "grad_norm": 2.3009371757507324, "learning_rate": 9.603847630233795e-06, "loss": 0.9837, "step": 1258 }, { "epoch": 0.8570456092579987, "grad_norm": 2.1871142387390137, "learning_rate": 9.602968879671754e-06, "loss": 0.9564, "step": 1259 }, { "epoch": 0.8577263444520081, "grad_norm": 2.014641523361206, "learning_rate": 9.602089195854701e-06, "loss": 0.9464, "step": 1260 }, { "epoch": 0.8584070796460177, "grad_norm": 1.8033595085144043, "learning_rate": 9.601208578960995e-06, "loss": 0.9622, "step": 1261 }, { "epoch": 0.8590878148400273, "grad_norm": 1.7728910446166992, "learning_rate": 9.600327029169176e-06, "loss": 1.0542, "step": 1262 }, { "epoch": 0.8597685500340367, "grad_norm": 2.0163841247558594, "learning_rate": 9.599444546657985e-06, "loss": 1.0377, "step": 1263 }, { "epoch": 0.8604492852280463, "grad_norm": 2.191502094268799, "learning_rate": 9.598561131606341e-06, "loss": 1.0193, "step": 1264 }, { "epoch": 0.8611300204220558, "grad_norm": 1.9334722757339478, "learning_rate": 9.59767678419336e-06, "loss": 1.0068, "step": 1265 }, { "epoch": 0.8618107556160653, "grad_norm": 1.5662777423858643, "learning_rate": 9.596791504598345e-06, "loss": 1.0782, "step": 1266 }, { "epoch": 0.8624914908100749, "grad_norm": 1.8948779106140137, "learning_rate": 9.595905293000785e-06, "loss": 1.0315, "step": 1267 }, { "epoch": 0.8631722260040844, "grad_norm": 1.7471779584884644, "learning_rate": 9.59501814958036e-06, "loss": 1.0063, "step": 1268 }, { "epoch": 0.863852961198094, "grad_norm": 2.088643789291382, "learning_rate": 9.594130074516938e-06, "loss": 0.9694, "step": 1269 }, { "epoch": 0.8645336963921034, "grad_norm": 1.9209201335906982, "learning_rate": 9.59324106799058e-06, "loss": 0.9265, "step": 1270 }, { "epoch": 0.865214431586113, "grad_norm": 2.1277999877929688, "learning_rate": 9.592351130181528e-06, "loss": 1.0077, "step": 1271 }, { "epoch": 0.8658951667801226, "grad_norm": 1.7477760314941406, "learning_rate": 9.591460261270223e-06, "loss": 0.9385, "step": 1272 }, { "epoch": 0.866575901974132, "grad_norm": 1.8639060258865356, "learning_rate": 9.590568461437285e-06, "loss": 0.9888, "step": 1273 }, { "epoch": 0.8672566371681416, "grad_norm": 2.074686050415039, "learning_rate": 9.58967573086353e-06, "loss": 0.8696, "step": 1274 }, { "epoch": 0.8679373723621511, "grad_norm": 1.9664256572723389, "learning_rate": 9.588782069729956e-06, "loss": 0.9999, "step": 1275 }, { "epoch": 0.8686181075561606, "grad_norm": 1.9032307863235474, "learning_rate": 9.587887478217756e-06, "loss": 0.9476, "step": 1276 }, { "epoch": 0.8692988427501702, "grad_norm": 1.7639293670654297, "learning_rate": 9.58699195650831e-06, "loss": 0.9818, "step": 1277 }, { "epoch": 0.8699795779441797, "grad_norm": 2.089792251586914, "learning_rate": 9.586095504783184e-06, "loss": 0.9988, "step": 1278 }, { "epoch": 0.8706603131381893, "grad_norm": 1.9147918224334717, "learning_rate": 9.585198123224134e-06, "loss": 0.9895, "step": 1279 }, { "epoch": 0.8713410483321987, "grad_norm": 1.9342272281646729, "learning_rate": 9.584299812013105e-06, "loss": 0.917, "step": 1280 }, { "epoch": 0.8720217835262083, "grad_norm": 1.7299555540084839, "learning_rate": 9.583400571332231e-06, "loss": 1.0243, "step": 1281 }, { "epoch": 0.8727025187202179, "grad_norm": 1.8376952409744263, "learning_rate": 9.582500401363833e-06, "loss": 0.9599, "step": 1282 }, { "epoch": 0.8733832539142273, "grad_norm": 1.8179891109466553, "learning_rate": 9.581599302290421e-06, "loss": 0.9602, "step": 1283 }, { "epoch": 0.8740639891082369, "grad_norm": 2.002046585083008, "learning_rate": 9.580697274294693e-06, "loss": 1.0149, "step": 1284 }, { "epoch": 0.8747447243022465, "grad_norm": 1.7668848037719727, "learning_rate": 9.579794317559536e-06, "loss": 1.0163, "step": 1285 }, { "epoch": 0.8754254594962559, "grad_norm": 1.7509013414382935, "learning_rate": 9.578890432268025e-06, "loss": 1.1493, "step": 1286 }, { "epoch": 0.8761061946902655, "grad_norm": 1.9796061515808105, "learning_rate": 9.577985618603424e-06, "loss": 0.9555, "step": 1287 }, { "epoch": 0.876786929884275, "grad_norm": 1.9527016878128052, "learning_rate": 9.577079876749185e-06, "loss": 0.9755, "step": 1288 }, { "epoch": 0.8774676650782846, "grad_norm": 1.949352502822876, "learning_rate": 9.576173206888949e-06, "loss": 0.9793, "step": 1289 }, { "epoch": 0.8781484002722941, "grad_norm": 2.1195967197418213, "learning_rate": 9.575265609206539e-06, "loss": 0.9352, "step": 1290 }, { "epoch": 0.8788291354663036, "grad_norm": 1.7822142839431763, "learning_rate": 9.574357083885974e-06, "loss": 1.0216, "step": 1291 }, { "epoch": 0.8795098706603132, "grad_norm": 1.7333722114562988, "learning_rate": 9.57344763111146e-06, "loss": 1.0551, "step": 1292 }, { "epoch": 0.8801906058543226, "grad_norm": 1.7548189163208008, "learning_rate": 9.572537251067386e-06, "loss": 0.9858, "step": 1293 }, { "epoch": 0.8808713410483322, "grad_norm": 1.88992178440094, "learning_rate": 9.571625943938333e-06, "loss": 0.9593, "step": 1294 }, { "epoch": 0.8815520762423418, "grad_norm": 1.8583999872207642, "learning_rate": 9.570713709909069e-06, "loss": 0.9953, "step": 1295 }, { "epoch": 0.8822328114363512, "grad_norm": 2.0261588096618652, "learning_rate": 9.569800549164551e-06, "loss": 0.9369, "step": 1296 }, { "epoch": 0.8829135466303608, "grad_norm": 2.017665386199951, "learning_rate": 9.568886461889925e-06, "loss": 0.9857, "step": 1297 }, { "epoch": 0.8835942818243703, "grad_norm": 2.0280847549438477, "learning_rate": 9.567971448270518e-06, "loss": 1.0562, "step": 1298 }, { "epoch": 0.8842750170183798, "grad_norm": 2.142171859741211, "learning_rate": 9.567055508491854e-06, "loss": 0.9382, "step": 1299 }, { "epoch": 0.8849557522123894, "grad_norm": 2.084022283554077, "learning_rate": 9.566138642739636e-06, "loss": 0.9983, "step": 1300 }, { "epoch": 0.8856364874063989, "grad_norm": 1.7730895280838013, "learning_rate": 9.565220851199764e-06, "loss": 0.992, "step": 1301 }, { "epoch": 0.8863172226004085, "grad_norm": 2.003438711166382, "learning_rate": 9.564302134058317e-06, "loss": 1.0101, "step": 1302 }, { "epoch": 0.8869979577944179, "grad_norm": 1.7726845741271973, "learning_rate": 9.563382491501565e-06, "loss": 1.0292, "step": 1303 }, { "epoch": 0.8876786929884275, "grad_norm": 2.014549493789673, "learning_rate": 9.56246192371597e-06, "loss": 0.96, "step": 1304 }, { "epoch": 0.8883594281824371, "grad_norm": 2.0423355102539062, "learning_rate": 9.561540430888177e-06, "loss": 0.9714, "step": 1305 }, { "epoch": 0.8890401633764465, "grad_norm": 1.9095995426177979, "learning_rate": 9.560618013205017e-06, "loss": 0.9651, "step": 1306 }, { "epoch": 0.8897208985704561, "grad_norm": 1.887734055519104, "learning_rate": 9.55969467085351e-06, "loss": 1.0036, "step": 1307 }, { "epoch": 0.8904016337644656, "grad_norm": 1.6949198246002197, "learning_rate": 9.558770404020869e-06, "loss": 0.9926, "step": 1308 }, { "epoch": 0.8910823689584751, "grad_norm": 1.8744829893112183, "learning_rate": 9.557845212894486e-06, "loss": 0.9775, "step": 1309 }, { "epoch": 0.8917631041524847, "grad_norm": 1.997134804725647, "learning_rate": 9.556919097661944e-06, "loss": 0.9128, "step": 1310 }, { "epoch": 0.8924438393464942, "grad_norm": 2.0209410190582275, "learning_rate": 9.555992058511014e-06, "loss": 0.9243, "step": 1311 }, { "epoch": 0.8931245745405038, "grad_norm": 1.968667984008789, "learning_rate": 9.555064095629656e-06, "loss": 0.9605, "step": 1312 }, { "epoch": 0.8938053097345132, "grad_norm": 1.8892627954483032, "learning_rate": 9.554135209206013e-06, "loss": 1.0159, "step": 1313 }, { "epoch": 0.8944860449285228, "grad_norm": 1.959761619567871, "learning_rate": 9.553205399428417e-06, "loss": 0.979, "step": 1314 }, { "epoch": 0.8951667801225324, "grad_norm": 1.8431966304779053, "learning_rate": 9.552274666485388e-06, "loss": 1.0427, "step": 1315 }, { "epoch": 0.8958475153165418, "grad_norm": 1.8742425441741943, "learning_rate": 9.551343010565635e-06, "loss": 1.0179, "step": 1316 }, { "epoch": 0.8965282505105514, "grad_norm": 1.909454107284546, "learning_rate": 9.550410431858049e-06, "loss": 1.0077, "step": 1317 }, { "epoch": 0.897208985704561, "grad_norm": 2.06901478767395, "learning_rate": 9.54947693055171e-06, "loss": 1.0651, "step": 1318 }, { "epoch": 0.8978897208985704, "grad_norm": 1.9240137338638306, "learning_rate": 9.54854250683589e-06, "loss": 0.9874, "step": 1319 }, { "epoch": 0.89857045609258, "grad_norm": 1.9715245962142944, "learning_rate": 9.547607160900038e-06, "loss": 0.9554, "step": 1320 }, { "epoch": 0.8992511912865895, "grad_norm": 2.0688281059265137, "learning_rate": 9.546670892933804e-06, "loss": 0.9653, "step": 1321 }, { "epoch": 0.899931926480599, "grad_norm": 1.8868058919906616, "learning_rate": 9.54573370312701e-06, "loss": 1.0046, "step": 1322 }, { "epoch": 0.9006126616746086, "grad_norm": 1.8153016567230225, "learning_rate": 9.544795591669678e-06, "loss": 0.9828, "step": 1323 }, { "epoch": 0.9012933968686181, "grad_norm": 2.088829517364502, "learning_rate": 9.543856558752006e-06, "loss": 0.9936, "step": 1324 }, { "epoch": 0.9019741320626277, "grad_norm": 2.0255980491638184, "learning_rate": 9.542916604564384e-06, "loss": 0.9688, "step": 1325 }, { "epoch": 0.9026548672566371, "grad_norm": 1.9366118907928467, "learning_rate": 9.54197572929739e-06, "loss": 0.9703, "step": 1326 }, { "epoch": 0.9033356024506467, "grad_norm": 1.675809383392334, "learning_rate": 9.541033933141786e-06, "loss": 1.051, "step": 1327 }, { "epoch": 0.9040163376446563, "grad_norm": 2.086355686187744, "learning_rate": 9.540091216288522e-06, "loss": 0.9509, "step": 1328 }, { "epoch": 0.9046970728386657, "grad_norm": 2.0330121517181396, "learning_rate": 9.539147578928735e-06, "loss": 1.0236, "step": 1329 }, { "epoch": 0.9053778080326753, "grad_norm": 1.8229461908340454, "learning_rate": 9.538203021253747e-06, "loss": 1.0703, "step": 1330 }, { "epoch": 0.9060585432266848, "grad_norm": 2.0182998180389404, "learning_rate": 9.53725754345507e-06, "loss": 0.9879, "step": 1331 }, { "epoch": 0.9067392784206944, "grad_norm": 1.8699594736099243, "learning_rate": 9.536311145724398e-06, "loss": 1.0203, "step": 1332 }, { "epoch": 0.9074200136147039, "grad_norm": 1.8564950227737427, "learning_rate": 9.535363828253613e-06, "loss": 0.8957, "step": 1333 }, { "epoch": 0.9081007488087134, "grad_norm": 2.0923683643341064, "learning_rate": 9.534415591234787e-06, "loss": 1.0193, "step": 1334 }, { "epoch": 0.908781484002723, "grad_norm": 1.9121843576431274, "learning_rate": 9.533466434860174e-06, "loss": 0.9328, "step": 1335 }, { "epoch": 0.9094622191967324, "grad_norm": 1.9806609153747559, "learning_rate": 9.532516359322218e-06, "loss": 0.9987, "step": 1336 }, { "epoch": 0.910142954390742, "grad_norm": 1.8946186304092407, "learning_rate": 9.531565364813544e-06, "loss": 0.9507, "step": 1337 }, { "epoch": 0.9108236895847516, "grad_norm": 1.9196884632110596, "learning_rate": 9.53061345152697e-06, "loss": 0.9686, "step": 1338 }, { "epoch": 0.911504424778761, "grad_norm": 1.6656087636947632, "learning_rate": 9.529660619655494e-06, "loss": 1.0673, "step": 1339 }, { "epoch": 0.9121851599727706, "grad_norm": 1.8482599258422852, "learning_rate": 9.528706869392307e-06, "loss": 0.9677, "step": 1340 }, { "epoch": 0.9128658951667801, "grad_norm": 1.8738361597061157, "learning_rate": 9.52775220093078e-06, "loss": 0.9839, "step": 1341 }, { "epoch": 0.9135466303607896, "grad_norm": 2.0327680110931396, "learning_rate": 9.526796614464474e-06, "loss": 0.8954, "step": 1342 }, { "epoch": 0.9142273655547992, "grad_norm": 1.7315024137496948, "learning_rate": 9.525840110187132e-06, "loss": 0.9991, "step": 1343 }, { "epoch": 0.9149081007488087, "grad_norm": 1.9874696731567383, "learning_rate": 9.524882688292688e-06, "loss": 1.0555, "step": 1344 }, { "epoch": 0.9155888359428183, "grad_norm": 2.0333926677703857, "learning_rate": 9.523924348975261e-06, "loss": 1.0507, "step": 1345 }, { "epoch": 0.9162695711368277, "grad_norm": 2.089407205581665, "learning_rate": 9.522965092429154e-06, "loss": 1.079, "step": 1346 }, { "epoch": 0.9169503063308373, "grad_norm": 1.7879128456115723, "learning_rate": 9.522004918848857e-06, "loss": 1.0538, "step": 1347 }, { "epoch": 0.9176310415248469, "grad_norm": 1.936577558517456, "learning_rate": 9.521043828429044e-06, "loss": 0.9869, "step": 1348 }, { "epoch": 0.9183117767188563, "grad_norm": 1.831776738166809, "learning_rate": 9.520081821364578e-06, "loss": 0.9948, "step": 1349 }, { "epoch": 0.9189925119128659, "grad_norm": 1.9043395519256592, "learning_rate": 9.519118897850509e-06, "loss": 0.9497, "step": 1350 }, { "epoch": 0.9196732471068755, "grad_norm": 1.7280079126358032, "learning_rate": 9.518155058082066e-06, "loss": 1.0676, "step": 1351 }, { "epoch": 0.9203539823008849, "grad_norm": 2.008016586303711, "learning_rate": 9.517190302254671e-06, "loss": 0.9857, "step": 1352 }, { "epoch": 0.9210347174948945, "grad_norm": 1.9204630851745605, "learning_rate": 9.516224630563928e-06, "loss": 0.9607, "step": 1353 }, { "epoch": 0.921715452688904, "grad_norm": 1.9131485223770142, "learning_rate": 9.515258043205628e-06, "loss": 1.0578, "step": 1354 }, { "epoch": 0.9223961878829136, "grad_norm": 1.9188491106033325, "learning_rate": 9.514290540375749e-06, "loss": 0.9412, "step": 1355 }, { "epoch": 0.9230769230769231, "grad_norm": 1.9244946241378784, "learning_rate": 9.513322122270449e-06, "loss": 0.9304, "step": 1356 }, { "epoch": 0.9237576582709326, "grad_norm": 2.193641424179077, "learning_rate": 9.512352789086078e-06, "loss": 0.9518, "step": 1357 }, { "epoch": 0.9244383934649422, "grad_norm": 1.9119389057159424, "learning_rate": 9.511382541019167e-06, "loss": 0.9668, "step": 1358 }, { "epoch": 0.9251191286589516, "grad_norm": 1.7026275396347046, "learning_rate": 9.510411378266437e-06, "loss": 1.0718, "step": 1359 }, { "epoch": 0.9257998638529612, "grad_norm": 1.992437481880188, "learning_rate": 9.50943930102479e-06, "loss": 0.9482, "step": 1360 }, { "epoch": 0.9264805990469708, "grad_norm": 1.995895266532898, "learning_rate": 9.508466309491315e-06, "loss": 0.9077, "step": 1361 }, { "epoch": 0.9271613342409802, "grad_norm": 2.2898402214050293, "learning_rate": 9.50749240386329e-06, "loss": 0.9637, "step": 1362 }, { "epoch": 0.9278420694349898, "grad_norm": 2.0049383640289307, "learning_rate": 9.50651758433817e-06, "loss": 0.9198, "step": 1363 }, { "epoch": 0.9285228046289993, "grad_norm": 1.7825983762741089, "learning_rate": 9.505541851113604e-06, "loss": 1.0875, "step": 1364 }, { "epoch": 0.9292035398230089, "grad_norm": 1.4971048831939697, "learning_rate": 9.504565204387422e-06, "loss": 1.067, "step": 1365 }, { "epoch": 0.9298842750170184, "grad_norm": 1.8165844678878784, "learning_rate": 9.503587644357639e-06, "loss": 1.0026, "step": 1366 }, { "epoch": 0.9305650102110279, "grad_norm": 1.8021408319473267, "learning_rate": 9.502609171222456e-06, "loss": 0.9627, "step": 1367 }, { "epoch": 0.9312457454050375, "grad_norm": 1.9065154790878296, "learning_rate": 9.50162978518026e-06, "loss": 1.0661, "step": 1368 }, { "epoch": 0.9319264805990469, "grad_norm": 1.8527199029922485, "learning_rate": 9.500649486429621e-06, "loss": 1.0269, "step": 1369 }, { "epoch": 0.9326072157930565, "grad_norm": 1.9549951553344727, "learning_rate": 9.499668275169294e-06, "loss": 0.9913, "step": 1370 }, { "epoch": 0.9332879509870661, "grad_norm": 2.0424742698669434, "learning_rate": 9.498686151598224e-06, "loss": 1.0096, "step": 1371 }, { "epoch": 0.9339686861810755, "grad_norm": 1.7176570892333984, "learning_rate": 9.497703115915533e-06, "loss": 1.0045, "step": 1372 }, { "epoch": 0.9346494213750851, "grad_norm": 1.937803864479065, "learning_rate": 9.496719168320536e-06, "loss": 1.0238, "step": 1373 }, { "epoch": 0.9353301565690946, "grad_norm": 1.9379475116729736, "learning_rate": 9.495734309012728e-06, "loss": 0.9538, "step": 1374 }, { "epoch": 0.9360108917631041, "grad_norm": 1.9759173393249512, "learning_rate": 9.494748538191787e-06, "loss": 1.0243, "step": 1375 }, { "epoch": 0.9366916269571137, "grad_norm": 1.9117100238800049, "learning_rate": 9.493761856057582e-06, "loss": 1.101, "step": 1376 }, { "epoch": 0.9373723621511232, "grad_norm": 1.872820258140564, "learning_rate": 9.492774262810163e-06, "loss": 0.9608, "step": 1377 }, { "epoch": 0.9380530973451328, "grad_norm": 2.0114166736602783, "learning_rate": 9.491785758649764e-06, "loss": 0.8924, "step": 1378 }, { "epoch": 0.9387338325391422, "grad_norm": 1.8123794794082642, "learning_rate": 9.490796343776805e-06, "loss": 1.0639, "step": 1379 }, { "epoch": 0.9394145677331518, "grad_norm": 2.2506868839263916, "learning_rate": 9.489806018391892e-06, "loss": 1.0188, "step": 1380 }, { "epoch": 0.9400953029271614, "grad_norm": 2.191877603530884, "learning_rate": 9.488814782695812e-06, "loss": 0.9148, "step": 1381 }, { "epoch": 0.9407760381211708, "grad_norm": 1.9012198448181152, "learning_rate": 9.48782263688954e-06, "loss": 0.9148, "step": 1382 }, { "epoch": 0.9414567733151804, "grad_norm": 1.827172875404358, "learning_rate": 9.486829581174235e-06, "loss": 1.0461, "step": 1383 }, { "epoch": 0.94213750850919, "grad_norm": 1.8115383386611938, "learning_rate": 9.485835615751237e-06, "loss": 1.0808, "step": 1384 }, { "epoch": 0.9428182437031994, "grad_norm": 2.029202938079834, "learning_rate": 9.484840740822074e-06, "loss": 1.0217, "step": 1385 }, { "epoch": 0.943498978897209, "grad_norm": 2.1063175201416016, "learning_rate": 9.483844956588462e-06, "loss": 0.9623, "step": 1386 }, { "epoch": 0.9441797140912185, "grad_norm": 2.1057851314544678, "learning_rate": 9.48284826325229e-06, "loss": 0.9062, "step": 1387 }, { "epoch": 0.944860449285228, "grad_norm": 2.068176507949829, "learning_rate": 9.481850661015643e-06, "loss": 1.0279, "step": 1388 }, { "epoch": 0.9455411844792376, "grad_norm": 1.7961922883987427, "learning_rate": 9.480852150080784e-06, "loss": 1.0162, "step": 1389 }, { "epoch": 0.9462219196732471, "grad_norm": 1.804118275642395, "learning_rate": 9.47985273065016e-06, "loss": 1.0171, "step": 1390 }, { "epoch": 0.9469026548672567, "grad_norm": 1.8451051712036133, "learning_rate": 9.47885240292641e-06, "loss": 1.0192, "step": 1391 }, { "epoch": 0.9475833900612661, "grad_norm": 1.8615028858184814, "learning_rate": 9.477851167112344e-06, "loss": 1.0102, "step": 1392 }, { "epoch": 0.9482641252552757, "grad_norm": 1.8540565967559814, "learning_rate": 9.476849023410965e-06, "loss": 1.0157, "step": 1393 }, { "epoch": 0.9489448604492853, "grad_norm": 1.9447901248931885, "learning_rate": 9.475845972025462e-06, "loss": 1.0187, "step": 1394 }, { "epoch": 0.9496255956432947, "grad_norm": 1.9192029237747192, "learning_rate": 9.4748420131592e-06, "loss": 1.0461, "step": 1395 }, { "epoch": 0.9503063308373043, "grad_norm": 1.9552040100097656, "learning_rate": 9.473837147015734e-06, "loss": 0.9833, "step": 1396 }, { "epoch": 0.9509870660313138, "grad_norm": 1.9270353317260742, "learning_rate": 9.472831373798802e-06, "loss": 0.9041, "step": 1397 }, { "epoch": 0.9516678012253234, "grad_norm": 2.218173027038574, "learning_rate": 9.471824693712325e-06, "loss": 0.9901, "step": 1398 }, { "epoch": 0.9523485364193329, "grad_norm": 2.2088990211486816, "learning_rate": 9.470817106960407e-06, "loss": 0.97, "step": 1399 }, { "epoch": 0.9530292716133424, "grad_norm": 2.0546252727508545, "learning_rate": 9.469808613747338e-06, "loss": 1.0045, "step": 1400 }, { "epoch": 0.953710006807352, "grad_norm": 1.842024326324463, "learning_rate": 9.468799214277591e-06, "loss": 0.9821, "step": 1401 }, { "epoch": 0.9543907420013614, "grad_norm": 1.8485363721847534, "learning_rate": 9.467788908755821e-06, "loss": 0.9376, "step": 1402 }, { "epoch": 0.955071477195371, "grad_norm": 1.8719462156295776, "learning_rate": 9.466777697386868e-06, "loss": 0.9936, "step": 1403 }, { "epoch": 0.9557522123893806, "grad_norm": 1.9255026578903198, "learning_rate": 9.465765580375757e-06, "loss": 0.9628, "step": 1404 }, { "epoch": 0.95643294758339, "grad_norm": 1.9795597791671753, "learning_rate": 9.464752557927695e-06, "loss": 0.95, "step": 1405 }, { "epoch": 0.9571136827773996, "grad_norm": 2.1956446170806885, "learning_rate": 9.463738630248073e-06, "loss": 0.9768, "step": 1406 }, { "epoch": 0.9577944179714091, "grad_norm": 1.8489092588424683, "learning_rate": 9.462723797542465e-06, "loss": 1.0226, "step": 1407 }, { "epoch": 0.9584751531654186, "grad_norm": 1.9483685493469238, "learning_rate": 9.46170806001663e-06, "loss": 1.0333, "step": 1408 }, { "epoch": 0.9591558883594282, "grad_norm": 1.6403745412826538, "learning_rate": 9.46069141787651e-06, "loss": 1.0516, "step": 1409 }, { "epoch": 0.9598366235534377, "grad_norm": 1.9516277313232422, "learning_rate": 9.459673871328225e-06, "loss": 0.9545, "step": 1410 }, { "epoch": 0.9605173587474473, "grad_norm": 1.8281733989715576, "learning_rate": 9.45865542057809e-06, "loss": 0.973, "step": 1411 }, { "epoch": 0.9611980939414567, "grad_norm": 1.8623758554458618, "learning_rate": 9.457636065832592e-06, "loss": 0.8696, "step": 1412 }, { "epoch": 0.9618788291354663, "grad_norm": 1.7017723321914673, "learning_rate": 9.456615807298408e-06, "loss": 1.1049, "step": 1413 }, { "epoch": 0.9625595643294759, "grad_norm": 1.8792554140090942, "learning_rate": 9.455594645182396e-06, "loss": 0.9843, "step": 1414 }, { "epoch": 0.9632402995234853, "grad_norm": 1.8779371976852417, "learning_rate": 9.454572579691596e-06, "loss": 1.0754, "step": 1415 }, { "epoch": 0.9639210347174949, "grad_norm": 1.8597980737686157, "learning_rate": 9.453549611033233e-06, "loss": 0.9761, "step": 1416 }, { "epoch": 0.9646017699115044, "grad_norm": 1.9541290998458862, "learning_rate": 9.452525739414716e-06, "loss": 0.9424, "step": 1417 }, { "epoch": 0.9652825051055139, "grad_norm": 1.8907159566879272, "learning_rate": 9.451500965043633e-06, "loss": 0.9852, "step": 1418 }, { "epoch": 0.9659632402995235, "grad_norm": 1.9603592157363892, "learning_rate": 9.450475288127761e-06, "loss": 0.9323, "step": 1419 }, { "epoch": 0.966643975493533, "grad_norm": 1.8519670963287354, "learning_rate": 9.449448708875053e-06, "loss": 0.9506, "step": 1420 }, { "epoch": 0.9673247106875426, "grad_norm": 1.6846238374710083, "learning_rate": 9.448421227493651e-06, "loss": 0.944, "step": 1421 }, { "epoch": 0.9680054458815521, "grad_norm": 1.6625161170959473, "learning_rate": 9.447392844191879e-06, "loss": 1.0597, "step": 1422 }, { "epoch": 0.9686861810755616, "grad_norm": 1.672576904296875, "learning_rate": 9.446363559178238e-06, "loss": 1.0573, "step": 1423 }, { "epoch": 0.9693669162695712, "grad_norm": 1.837986946105957, "learning_rate": 9.445333372661421e-06, "loss": 0.9395, "step": 1424 }, { "epoch": 0.9700476514635806, "grad_norm": 1.7358434200286865, "learning_rate": 9.444302284850295e-06, "loss": 0.9944, "step": 1425 }, { "epoch": 0.9707283866575902, "grad_norm": 1.8636044263839722, "learning_rate": 9.443270295953914e-06, "loss": 1.1018, "step": 1426 }, { "epoch": 0.9714091218515998, "grad_norm": 1.9299615621566772, "learning_rate": 9.442237406181518e-06, "loss": 1.0141, "step": 1427 }, { "epoch": 0.9720898570456092, "grad_norm": 1.9350035190582275, "learning_rate": 9.441203615742524e-06, "loss": 0.9725, "step": 1428 }, { "epoch": 0.9727705922396188, "grad_norm": 1.8675600290298462, "learning_rate": 9.440168924846533e-06, "loss": 0.9133, "step": 1429 }, { "epoch": 0.9734513274336283, "grad_norm": 1.8599721193313599, "learning_rate": 9.43913333370333e-06, "loss": 1.0912, "step": 1430 }, { "epoch": 0.9741320626276379, "grad_norm": 2.0441317558288574, "learning_rate": 9.43809684252288e-06, "loss": 1.0442, "step": 1431 }, { "epoch": 0.9748127978216474, "grad_norm": 1.875463604927063, "learning_rate": 9.437059451515335e-06, "loss": 0.9522, "step": 1432 }, { "epoch": 0.9754935330156569, "grad_norm": 1.9532735347747803, "learning_rate": 9.436021160891027e-06, "loss": 0.9848, "step": 1433 }, { "epoch": 0.9761742682096665, "grad_norm": 1.8147196769714355, "learning_rate": 9.434981970860468e-06, "loss": 0.9713, "step": 1434 }, { "epoch": 0.9768550034036759, "grad_norm": 2.0051066875457764, "learning_rate": 9.433941881634357e-06, "loss": 1.0016, "step": 1435 }, { "epoch": 0.9775357385976855, "grad_norm": 2.0337605476379395, "learning_rate": 9.432900893423567e-06, "loss": 0.9802, "step": 1436 }, { "epoch": 0.9782164737916951, "grad_norm": 1.8962024450302124, "learning_rate": 9.431859006439166e-06, "loss": 1.0163, "step": 1437 }, { "epoch": 0.9788972089857045, "grad_norm": 2.084394931793213, "learning_rate": 9.430816220892394e-06, "loss": 1.0143, "step": 1438 }, { "epoch": 0.9795779441797141, "grad_norm": 1.946412444114685, "learning_rate": 9.429772536994676e-06, "loss": 0.9345, "step": 1439 }, { "epoch": 0.9802586793737236, "grad_norm": 1.8054437637329102, "learning_rate": 9.428727954957622e-06, "loss": 1.0513, "step": 1440 }, { "epoch": 0.9809394145677331, "grad_norm": 1.8219609260559082, "learning_rate": 9.42768247499302e-06, "loss": 0.9801, "step": 1441 }, { "epoch": 0.9816201497617427, "grad_norm": 1.7432136535644531, "learning_rate": 9.42663609731284e-06, "loss": 0.9455, "step": 1442 }, { "epoch": 0.9823008849557522, "grad_norm": 1.7787905931472778, "learning_rate": 9.425588822129239e-06, "loss": 0.9831, "step": 1443 }, { "epoch": 0.9829816201497618, "grad_norm": 1.7195236682891846, "learning_rate": 9.424540649654552e-06, "loss": 1.0289, "step": 1444 }, { "epoch": 0.9836623553437712, "grad_norm": 1.8911025524139404, "learning_rate": 9.423491580101296e-06, "loss": 0.9355, "step": 1445 }, { "epoch": 0.9843430905377808, "grad_norm": 1.703369379043579, "learning_rate": 9.422441613682169e-06, "loss": 1.0104, "step": 1446 }, { "epoch": 0.9850238257317904, "grad_norm": 1.5140862464904785, "learning_rate": 9.421390750610056e-06, "loss": 1.1164, "step": 1447 }, { "epoch": 0.9857045609257998, "grad_norm": 1.7296870946884155, "learning_rate": 9.420338991098018e-06, "loss": 1.0642, "step": 1448 }, { "epoch": 0.9863852961198094, "grad_norm": 1.7883292436599731, "learning_rate": 9.419286335359299e-06, "loss": 0.9738, "step": 1449 }, { "epoch": 0.9870660313138189, "grad_norm": 1.9914320707321167, "learning_rate": 9.418232783607327e-06, "loss": 1.038, "step": 1450 }, { "epoch": 0.9877467665078284, "grad_norm": 2.0041117668151855, "learning_rate": 9.41717833605571e-06, "loss": 0.9891, "step": 1451 }, { "epoch": 0.988427501701838, "grad_norm": 1.7326236963272095, "learning_rate": 9.41612299291824e-06, "loss": 1.0058, "step": 1452 }, { "epoch": 0.9891082368958475, "grad_norm": 1.8678719997406006, "learning_rate": 9.415066754408886e-06, "loss": 0.9832, "step": 1453 }, { "epoch": 0.989788972089857, "grad_norm": 1.8308335542678833, "learning_rate": 9.4140096207418e-06, "loss": 1.0045, "step": 1454 }, { "epoch": 0.9904697072838666, "grad_norm": 1.7028920650482178, "learning_rate": 9.41295159213132e-06, "loss": 0.9444, "step": 1455 }, { "epoch": 0.9911504424778761, "grad_norm": 1.755160927772522, "learning_rate": 9.41189266879196e-06, "loss": 0.9704, "step": 1456 }, { "epoch": 0.9918311776718857, "grad_norm": 1.769636869430542, "learning_rate": 9.410832850938417e-06, "loss": 0.986, "step": 1457 }, { "epoch": 0.9925119128658951, "grad_norm": 1.8588826656341553, "learning_rate": 9.40977213878557e-06, "loss": 0.9558, "step": 1458 }, { "epoch": 0.9931926480599047, "grad_norm": 2.196061372756958, "learning_rate": 9.40871053254848e-06, "loss": 0.9515, "step": 1459 }, { "epoch": 0.9938733832539143, "grad_norm": 2.0292675495147705, "learning_rate": 9.40764803244239e-06, "loss": 0.9258, "step": 1460 }, { "epoch": 0.9945541184479237, "grad_norm": 2.0396785736083984, "learning_rate": 9.406584638682717e-06, "loss": 1.0002, "step": 1461 }, { "epoch": 0.9952348536419333, "grad_norm": 1.9609512090682983, "learning_rate": 9.405520351485071e-06, "loss": 0.9515, "step": 1462 }, { "epoch": 0.9959155888359428, "grad_norm": 1.9784932136535645, "learning_rate": 9.404455171065232e-06, "loss": 0.9636, "step": 1463 }, { "epoch": 0.9965963240299524, "grad_norm": 2.1777522563934326, "learning_rate": 9.40338909763917e-06, "loss": 0.9454, "step": 1464 }, { "epoch": 0.9972770592239619, "grad_norm": 1.7829298973083496, "learning_rate": 9.40232213142303e-06, "loss": 0.9874, "step": 1465 }, { "epoch": 0.9979577944179714, "grad_norm": 1.9985429048538208, "learning_rate": 9.401254272633141e-06, "loss": 0.9673, "step": 1466 }, { "epoch": 0.998638529611981, "grad_norm": 2.0434322357177734, "learning_rate": 9.400185521486011e-06, "loss": 0.9323, "step": 1467 }, { "epoch": 0.9993192648059904, "grad_norm": 1.9057528972625732, "learning_rate": 9.39911587819833e-06, "loss": 0.948, "step": 1468 }, { "epoch": 1.0, "grad_norm": 1.9995355606079102, "learning_rate": 9.39804534298697e-06, "loss": 0.9801, "step": 1469 }, { "epoch": 1.0006807351940095, "grad_norm": 2.1018810272216797, "learning_rate": 9.396973916068983e-06, "loss": 0.7164, "step": 1470 }, { "epoch": 1.0013614703880191, "grad_norm": 1.5669177770614624, "learning_rate": 9.395901597661601e-06, "loss": 0.8685, "step": 1471 }, { "epoch": 1.0020422055820286, "grad_norm": 1.6616401672363281, "learning_rate": 9.394828387982235e-06, "loss": 0.7568, "step": 1472 }, { "epoch": 1.002722940776038, "grad_norm": 1.637068510055542, "learning_rate": 9.393754287248482e-06, "loss": 0.7662, "step": 1473 }, { "epoch": 1.0034036759700478, "grad_norm": 1.800912857055664, "learning_rate": 9.392679295678116e-06, "loss": 0.8111, "step": 1474 }, { "epoch": 1.0040844111640572, "grad_norm": 1.7395880222320557, "learning_rate": 9.391603413489092e-06, "loss": 0.7401, "step": 1475 }, { "epoch": 1.0047651463580667, "grad_norm": 1.6605087518692017, "learning_rate": 9.390526640899545e-06, "loss": 0.8678, "step": 1476 }, { "epoch": 1.0054458815520761, "grad_norm": 1.842107892036438, "learning_rate": 9.389448978127793e-06, "loss": 0.8628, "step": 1477 }, { "epoch": 1.0061266167460858, "grad_norm": 2.027766704559326, "learning_rate": 9.38837042539233e-06, "loss": 0.825, "step": 1478 }, { "epoch": 1.0068073519400953, "grad_norm": 1.8518561124801636, "learning_rate": 9.387290982911838e-06, "loss": 0.8935, "step": 1479 }, { "epoch": 1.0074880871341048, "grad_norm": 1.6379393339157104, "learning_rate": 9.38621065090517e-06, "loss": 0.8584, "step": 1480 }, { "epoch": 1.0081688223281144, "grad_norm": 1.7422528266906738, "learning_rate": 9.385129429591367e-06, "loss": 0.6897, "step": 1481 }, { "epoch": 1.008849557522124, "grad_norm": 1.9034839868545532, "learning_rate": 9.384047319189647e-06, "loss": 0.8513, "step": 1482 }, { "epoch": 1.0095302927161334, "grad_norm": 1.8792617321014404, "learning_rate": 9.382964319919406e-06, "loss": 0.6588, "step": 1483 }, { "epoch": 1.010211027910143, "grad_norm": 1.8037797212600708, "learning_rate": 9.381880432000226e-06, "loss": 0.7518, "step": 1484 }, { "epoch": 1.0108917631041525, "grad_norm": 2.0158700942993164, "learning_rate": 9.380795655651863e-06, "loss": 0.8044, "step": 1485 }, { "epoch": 1.011572498298162, "grad_norm": 1.9686881303787231, "learning_rate": 9.379709991094258e-06, "loss": 0.8424, "step": 1486 }, { "epoch": 1.0122532334921714, "grad_norm": 1.6813817024230957, "learning_rate": 9.378623438547532e-06, "loss": 0.7894, "step": 1487 }, { "epoch": 1.0129339686861811, "grad_norm": 1.8843743801116943, "learning_rate": 9.377535998231981e-06, "loss": 0.6984, "step": 1488 }, { "epoch": 1.0136147038801906, "grad_norm": 1.8219627141952515, "learning_rate": 9.376447670368086e-06, "loss": 0.8043, "step": 1489 }, { "epoch": 1.0142954390742, "grad_norm": 1.9801603555679321, "learning_rate": 9.375358455176505e-06, "loss": 0.7811, "step": 1490 }, { "epoch": 1.0149761742682097, "grad_norm": 1.844611644744873, "learning_rate": 9.374268352878078e-06, "loss": 0.8058, "step": 1491 }, { "epoch": 1.0156569094622192, "grad_norm": 1.6732650995254517, "learning_rate": 9.373177363693825e-06, "loss": 0.7368, "step": 1492 }, { "epoch": 1.0163376446562287, "grad_norm": 1.810802698135376, "learning_rate": 9.372085487844943e-06, "loss": 0.6986, "step": 1493 }, { "epoch": 1.0170183798502384, "grad_norm": 1.6670031547546387, "learning_rate": 9.37099272555281e-06, "loss": 0.7549, "step": 1494 }, { "epoch": 1.0176991150442478, "grad_norm": 1.8129396438598633, "learning_rate": 9.369899077038987e-06, "loss": 0.82, "step": 1495 }, { "epoch": 1.0183798502382573, "grad_norm": 1.87721848487854, "learning_rate": 9.368804542525211e-06, "loss": 0.8088, "step": 1496 }, { "epoch": 1.0190605854322667, "grad_norm": 1.9689056873321533, "learning_rate": 9.367709122233396e-06, "loss": 0.7109, "step": 1497 }, { "epoch": 1.0197413206262764, "grad_norm": 1.7855298519134521, "learning_rate": 9.366612816385646e-06, "loss": 0.8423, "step": 1498 }, { "epoch": 1.020422055820286, "grad_norm": 1.7172685861587524, "learning_rate": 9.36551562520423e-06, "loss": 0.8142, "step": 1499 }, { "epoch": 1.0211027910142954, "grad_norm": 1.8751262426376343, "learning_rate": 9.364417548911612e-06, "loss": 0.851, "step": 1500 }, { "epoch": 1.021783526208305, "grad_norm": 2.0040738582611084, "learning_rate": 9.36331858773042e-06, "loss": 0.7661, "step": 1501 }, { "epoch": 1.0224642614023145, "grad_norm": 1.6423708200454712, "learning_rate": 9.362218741883477e-06, "loss": 0.8279, "step": 1502 }, { "epoch": 1.023144996596324, "grad_norm": 1.749450922012329, "learning_rate": 9.361118011593772e-06, "loss": 0.8302, "step": 1503 }, { "epoch": 1.0238257317903336, "grad_norm": 1.8154640197753906, "learning_rate": 9.360016397084479e-06, "loss": 0.7685, "step": 1504 }, { "epoch": 1.0245064669843431, "grad_norm": 1.8985596895217896, "learning_rate": 9.358913898578954e-06, "loss": 0.8354, "step": 1505 }, { "epoch": 1.0251872021783526, "grad_norm": 1.99636971950531, "learning_rate": 9.357810516300726e-06, "loss": 0.7429, "step": 1506 }, { "epoch": 1.0258679373723623, "grad_norm": 1.731945514678955, "learning_rate": 9.35670625047351e-06, "loss": 0.7012, "step": 1507 }, { "epoch": 1.0265486725663717, "grad_norm": 1.8420974016189575, "learning_rate": 9.355601101321195e-06, "loss": 0.859, "step": 1508 }, { "epoch": 1.0272294077603812, "grad_norm": 1.9254237413406372, "learning_rate": 9.354495069067849e-06, "loss": 0.7969, "step": 1509 }, { "epoch": 1.0279101429543906, "grad_norm": 1.8611512184143066, "learning_rate": 9.353388153937723e-06, "loss": 0.786, "step": 1510 }, { "epoch": 1.0285908781484003, "grad_norm": 1.8616780042648315, "learning_rate": 9.352280356155244e-06, "loss": 0.8195, "step": 1511 }, { "epoch": 1.0292716133424098, "grad_norm": 1.6002469062805176, "learning_rate": 9.35117167594502e-06, "loss": 0.8199, "step": 1512 }, { "epoch": 1.0299523485364193, "grad_norm": 1.8964072465896606, "learning_rate": 9.350062113531832e-06, "loss": 0.798, "step": 1513 }, { "epoch": 1.030633083730429, "grad_norm": 1.9978306293487549, "learning_rate": 9.348951669140652e-06, "loss": 0.8081, "step": 1514 }, { "epoch": 1.0313138189244384, "grad_norm": 1.8240927457809448, "learning_rate": 9.347840342996617e-06, "loss": 0.7343, "step": 1515 }, { "epoch": 1.0319945541184479, "grad_norm": 1.7946373224258423, "learning_rate": 9.346728135325051e-06, "loss": 0.8361, "step": 1516 }, { "epoch": 1.0326752893124576, "grad_norm": 1.861647129058838, "learning_rate": 9.345615046351457e-06, "loss": 0.7917, "step": 1517 }, { "epoch": 1.033356024506467, "grad_norm": 2.008620500564575, "learning_rate": 9.344501076301513e-06, "loss": 0.7666, "step": 1518 }, { "epoch": 1.0340367597004765, "grad_norm": 1.9055758714675903, "learning_rate": 9.343386225401076e-06, "loss": 0.7112, "step": 1519 }, { "epoch": 1.034717494894486, "grad_norm": 1.7738134860992432, "learning_rate": 9.342270493876185e-06, "loss": 0.6682, "step": 1520 }, { "epoch": 1.0353982300884956, "grad_norm": 1.83958101272583, "learning_rate": 9.341153881953052e-06, "loss": 0.8114, "step": 1521 }, { "epoch": 1.036078965282505, "grad_norm": 1.9332841634750366, "learning_rate": 9.340036389858075e-06, "loss": 0.7363, "step": 1522 }, { "epoch": 1.0367597004765146, "grad_norm": 1.9778627157211304, "learning_rate": 9.338918017817821e-06, "loss": 0.7981, "step": 1523 }, { "epoch": 1.0374404356705242, "grad_norm": 1.8411414623260498, "learning_rate": 9.337798766059047e-06, "loss": 0.6897, "step": 1524 }, { "epoch": 1.0381211708645337, "grad_norm": 1.803958773612976, "learning_rate": 9.336678634808678e-06, "loss": 0.8715, "step": 1525 }, { "epoch": 1.0388019060585432, "grad_norm": 1.9736131429672241, "learning_rate": 9.335557624293823e-06, "loss": 0.7494, "step": 1526 }, { "epoch": 1.0394826412525529, "grad_norm": 1.7641857862472534, "learning_rate": 9.334435734741766e-06, "loss": 0.7673, "step": 1527 }, { "epoch": 1.0401633764465623, "grad_norm": 1.6451663970947266, "learning_rate": 9.333312966379973e-06, "loss": 0.7749, "step": 1528 }, { "epoch": 1.0408441116405718, "grad_norm": 1.8571653366088867, "learning_rate": 9.332189319436082e-06, "loss": 0.8041, "step": 1529 }, { "epoch": 1.0415248468345812, "grad_norm": 1.7600890398025513, "learning_rate": 9.331064794137918e-06, "loss": 0.7552, "step": 1530 }, { "epoch": 1.042205582028591, "grad_norm": 1.7581672668457031, "learning_rate": 9.329939390713476e-06, "loss": 0.8277, "step": 1531 }, { "epoch": 1.0428863172226004, "grad_norm": 1.860924243927002, "learning_rate": 9.328813109390936e-06, "loss": 0.7617, "step": 1532 }, { "epoch": 1.0435670524166099, "grad_norm": 1.8694303035736084, "learning_rate": 9.327685950398647e-06, "loss": 0.8305, "step": 1533 }, { "epoch": 1.0442477876106195, "grad_norm": 1.9284718036651611, "learning_rate": 9.326557913965146e-06, "loss": 0.7696, "step": 1534 }, { "epoch": 1.044928522804629, "grad_norm": 1.8752230405807495, "learning_rate": 9.32542900031914e-06, "loss": 0.7623, "step": 1535 }, { "epoch": 1.0456092579986385, "grad_norm": 1.7036082744598389, "learning_rate": 9.324299209689519e-06, "loss": 0.7959, "step": 1536 }, { "epoch": 1.0462899931926481, "grad_norm": 1.6040116548538208, "learning_rate": 9.323168542305345e-06, "loss": 0.838, "step": 1537 }, { "epoch": 1.0469707283866576, "grad_norm": 1.7264668941497803, "learning_rate": 9.322036998395866e-06, "loss": 0.856, "step": 1538 }, { "epoch": 1.047651463580667, "grad_norm": 2.044069528579712, "learning_rate": 9.320904578190504e-06, "loss": 0.7313, "step": 1539 }, { "epoch": 1.0483321987746765, "grad_norm": 1.8450818061828613, "learning_rate": 9.319771281918854e-06, "loss": 0.7757, "step": 1540 }, { "epoch": 1.0490129339686862, "grad_norm": 1.6915664672851562, "learning_rate": 9.318637109810695e-06, "loss": 0.8185, "step": 1541 }, { "epoch": 1.0496936691626957, "grad_norm": 1.6378705501556396, "learning_rate": 9.31750206209598e-06, "loss": 0.7754, "step": 1542 }, { "epoch": 1.0503744043567051, "grad_norm": 1.771519422531128, "learning_rate": 9.316366139004842e-06, "loss": 0.8886, "step": 1543 }, { "epoch": 1.0510551395507148, "grad_norm": 1.7351131439208984, "learning_rate": 9.315229340767588e-06, "loss": 0.8753, "step": 1544 }, { "epoch": 1.0517358747447243, "grad_norm": 1.7155870199203491, "learning_rate": 9.314091667614706e-06, "loss": 0.7314, "step": 1545 }, { "epoch": 1.0524166099387338, "grad_norm": 1.7759099006652832, "learning_rate": 9.312953119776863e-06, "loss": 0.746, "step": 1546 }, { "epoch": 1.0530973451327434, "grad_norm": 1.8277018070220947, "learning_rate": 9.311813697484893e-06, "loss": 0.6929, "step": 1547 }, { "epoch": 1.053778080326753, "grad_norm": 1.9130133390426636, "learning_rate": 9.31067340096982e-06, "loss": 0.7892, "step": 1548 }, { "epoch": 1.0544588155207624, "grad_norm": 1.9773248434066772, "learning_rate": 9.309532230462839e-06, "loss": 0.7974, "step": 1549 }, { "epoch": 1.055139550714772, "grad_norm": 1.8623472452163696, "learning_rate": 9.308390186195324e-06, "loss": 0.7466, "step": 1550 }, { "epoch": 1.0558202859087815, "grad_norm": 1.7985899448394775, "learning_rate": 9.307247268398822e-06, "loss": 0.8044, "step": 1551 }, { "epoch": 1.056501021102791, "grad_norm": 1.8342342376708984, "learning_rate": 9.306103477305065e-06, "loss": 0.808, "step": 1552 }, { "epoch": 1.0571817562968004, "grad_norm": 1.8344080448150635, "learning_rate": 9.304958813145954e-06, "loss": 0.7429, "step": 1553 }, { "epoch": 1.0578624914908101, "grad_norm": 1.810716152191162, "learning_rate": 9.30381327615357e-06, "loss": 0.732, "step": 1554 }, { "epoch": 1.0585432266848196, "grad_norm": 1.9701831340789795, "learning_rate": 9.302666866560172e-06, "loss": 0.8469, "step": 1555 }, { "epoch": 1.059223961878829, "grad_norm": 2.039745330810547, "learning_rate": 9.301519584598199e-06, "loss": 0.8037, "step": 1556 }, { "epoch": 1.0599046970728387, "grad_norm": 2.006089448928833, "learning_rate": 9.30037143050026e-06, "loss": 0.8428, "step": 1557 }, { "epoch": 1.0605854322668482, "grad_norm": 1.850959300994873, "learning_rate": 9.299222404499141e-06, "loss": 0.864, "step": 1558 }, { "epoch": 1.0612661674608577, "grad_norm": 1.8265290260314941, "learning_rate": 9.298072506827814e-06, "loss": 0.7755, "step": 1559 }, { "epoch": 1.0619469026548674, "grad_norm": 1.6753199100494385, "learning_rate": 9.296921737719415e-06, "loss": 0.8755, "step": 1560 }, { "epoch": 1.0626276378488768, "grad_norm": 1.8233299255371094, "learning_rate": 9.295770097407269e-06, "loss": 0.7578, "step": 1561 }, { "epoch": 1.0633083730428863, "grad_norm": 1.6985143423080444, "learning_rate": 9.29461758612487e-06, "loss": 0.8688, "step": 1562 }, { "epoch": 1.0639891082368957, "grad_norm": 1.7589019536972046, "learning_rate": 9.293464204105889e-06, "loss": 0.7678, "step": 1563 }, { "epoch": 1.0646698434309054, "grad_norm": 1.8447562456130981, "learning_rate": 9.292309951584176e-06, "loss": 0.7736, "step": 1564 }, { "epoch": 1.065350578624915, "grad_norm": 1.8782999515533447, "learning_rate": 9.291154828793756e-06, "loss": 0.8106, "step": 1565 }, { "epoch": 1.0660313138189244, "grad_norm": 1.8713566064834595, "learning_rate": 9.289998835968832e-06, "loss": 0.8237, "step": 1566 }, { "epoch": 1.066712049012934, "grad_norm": 1.8017008304595947, "learning_rate": 9.288841973343781e-06, "loss": 0.7629, "step": 1567 }, { "epoch": 1.0673927842069435, "grad_norm": 1.8423258066177368, "learning_rate": 9.287684241153159e-06, "loss": 0.7901, "step": 1568 }, { "epoch": 1.068073519400953, "grad_norm": 1.7129005193710327, "learning_rate": 9.286525639631695e-06, "loss": 0.8074, "step": 1569 }, { "epoch": 1.0687542545949626, "grad_norm": 1.5739344358444214, "learning_rate": 9.2853661690143e-06, "loss": 0.7925, "step": 1570 }, { "epoch": 1.0694349897889721, "grad_norm": 1.7243211269378662, "learning_rate": 9.284205829536052e-06, "loss": 0.7359, "step": 1571 }, { "epoch": 1.0701157249829816, "grad_norm": 1.9861772060394287, "learning_rate": 9.283044621432216e-06, "loss": 0.6946, "step": 1572 }, { "epoch": 1.0707964601769913, "grad_norm": 1.7402827739715576, "learning_rate": 9.281882544938224e-06, "loss": 0.8235, "step": 1573 }, { "epoch": 1.0714771953710007, "grad_norm": 1.7612621784210205, "learning_rate": 9.280719600289688e-06, "loss": 0.7891, "step": 1574 }, { "epoch": 1.0721579305650102, "grad_norm": 1.7194862365722656, "learning_rate": 9.279555787722398e-06, "loss": 0.7552, "step": 1575 }, { "epoch": 1.0728386657590196, "grad_norm": 1.7932472229003906, "learning_rate": 9.278391107472316e-06, "loss": 0.7386, "step": 1576 }, { "epoch": 1.0735194009530293, "grad_norm": 1.7257440090179443, "learning_rate": 9.277225559775584e-06, "loss": 0.8512, "step": 1577 }, { "epoch": 1.0742001361470388, "grad_norm": 1.858128309249878, "learning_rate": 9.276059144868515e-06, "loss": 0.8815, "step": 1578 }, { "epoch": 1.0748808713410483, "grad_norm": 1.6934176683425903, "learning_rate": 9.274891862987601e-06, "loss": 0.8063, "step": 1579 }, { "epoch": 1.075561606535058, "grad_norm": 1.7115936279296875, "learning_rate": 9.273723714369509e-06, "loss": 0.8149, "step": 1580 }, { "epoch": 1.0762423417290674, "grad_norm": 1.835433840751648, "learning_rate": 9.272554699251081e-06, "loss": 0.741, "step": 1581 }, { "epoch": 1.0769230769230769, "grad_norm": 1.735743761062622, "learning_rate": 9.27138481786934e-06, "loss": 0.7828, "step": 1582 }, { "epoch": 1.0776038121170866, "grad_norm": 1.932988166809082, "learning_rate": 9.270214070461476e-06, "loss": 0.8196, "step": 1583 }, { "epoch": 1.078284547311096, "grad_norm": 1.684312105178833, "learning_rate": 9.26904245726486e-06, "loss": 0.8494, "step": 1584 }, { "epoch": 1.0789652825051055, "grad_norm": 1.6603301763534546, "learning_rate": 9.267869978517036e-06, "loss": 0.8126, "step": 1585 }, { "epoch": 1.079646017699115, "grad_norm": 1.6825748682022095, "learning_rate": 9.266696634455729e-06, "loss": 0.7704, "step": 1586 }, { "epoch": 1.0803267528931246, "grad_norm": 1.969325304031372, "learning_rate": 9.26552242531883e-06, "loss": 0.8576, "step": 1587 }, { "epoch": 1.081007488087134, "grad_norm": 2.014302968978882, "learning_rate": 9.264347351344415e-06, "loss": 0.8138, "step": 1588 }, { "epoch": 1.0816882232811436, "grad_norm": 1.770189642906189, "learning_rate": 9.263171412770728e-06, "loss": 0.8328, "step": 1589 }, { "epoch": 1.0823689584751532, "grad_norm": 1.6413630247116089, "learning_rate": 9.261994609836194e-06, "loss": 0.9342, "step": 1590 }, { "epoch": 1.0830496936691627, "grad_norm": 1.6454356908798218, "learning_rate": 9.260816942779409e-06, "loss": 0.8575, "step": 1591 }, { "epoch": 1.0837304288631722, "grad_norm": 1.8591599464416504, "learning_rate": 9.259638411839144e-06, "loss": 0.7528, "step": 1592 }, { "epoch": 1.0844111640571819, "grad_norm": 1.710437536239624, "learning_rate": 9.258459017254352e-06, "loss": 0.8098, "step": 1593 }, { "epoch": 1.0850918992511913, "grad_norm": 2.009887456893921, "learning_rate": 9.257278759264149e-06, "loss": 0.9002, "step": 1594 }, { "epoch": 1.0857726344452008, "grad_norm": 1.9509892463684082, "learning_rate": 9.256097638107839e-06, "loss": 0.8765, "step": 1595 }, { "epoch": 1.0864533696392105, "grad_norm": 1.8031030893325806, "learning_rate": 9.254915654024894e-06, "loss": 0.8308, "step": 1596 }, { "epoch": 1.08713410483322, "grad_norm": 1.7527135610580444, "learning_rate": 9.253732807254959e-06, "loss": 0.7148, "step": 1597 }, { "epoch": 1.0878148400272294, "grad_norm": 1.7403998374938965, "learning_rate": 9.252549098037859e-06, "loss": 0.8319, "step": 1598 }, { "epoch": 1.0884955752212389, "grad_norm": 1.7698739767074585, "learning_rate": 9.251364526613594e-06, "loss": 0.7384, "step": 1599 }, { "epoch": 1.0891763104152485, "grad_norm": 1.8379913568496704, "learning_rate": 9.250179093222335e-06, "loss": 0.8302, "step": 1600 }, { "epoch": 1.089857045609258, "grad_norm": 1.8969520330429077, "learning_rate": 9.248992798104427e-06, "loss": 0.7728, "step": 1601 }, { "epoch": 1.0905377808032675, "grad_norm": 1.703058123588562, "learning_rate": 9.247805641500396e-06, "loss": 0.8221, "step": 1602 }, { "epoch": 1.0912185159972771, "grad_norm": 1.8214147090911865, "learning_rate": 9.246617623650935e-06, "loss": 0.7456, "step": 1603 }, { "epoch": 1.0918992511912866, "grad_norm": 1.8173205852508545, "learning_rate": 9.245428744796919e-06, "loss": 0.8377, "step": 1604 }, { "epoch": 1.092579986385296, "grad_norm": 1.968502402305603, "learning_rate": 9.24423900517939e-06, "loss": 0.7032, "step": 1605 }, { "epoch": 1.0932607215793055, "grad_norm": 1.6435940265655518, "learning_rate": 9.243048405039574e-06, "loss": 0.8494, "step": 1606 }, { "epoch": 1.0939414567733152, "grad_norm": 1.662204384803772, "learning_rate": 9.241856944618861e-06, "loss": 0.7015, "step": 1607 }, { "epoch": 1.0946221919673247, "grad_norm": 1.9373350143432617, "learning_rate": 9.240664624158822e-06, "loss": 0.707, "step": 1608 }, { "epoch": 1.0953029271613341, "grad_norm": 1.7623521089553833, "learning_rate": 9.239471443901202e-06, "loss": 0.8407, "step": 1609 }, { "epoch": 1.0959836623553438, "grad_norm": 1.7420331239700317, "learning_rate": 9.238277404087918e-06, "loss": 0.9192, "step": 1610 }, { "epoch": 1.0966643975493533, "grad_norm": 1.966062068939209, "learning_rate": 9.23708250496106e-06, "loss": 0.7865, "step": 1611 }, { "epoch": 1.0973451327433628, "grad_norm": 1.9323396682739258, "learning_rate": 9.2358867467629e-06, "loss": 0.7522, "step": 1612 }, { "epoch": 1.0980258679373724, "grad_norm": 1.6541727781295776, "learning_rate": 9.234690129735875e-06, "loss": 0.7564, "step": 1613 }, { "epoch": 1.098706603131382, "grad_norm": 1.770675778388977, "learning_rate": 9.2334926541226e-06, "loss": 0.7365, "step": 1614 }, { "epoch": 1.0993873383253914, "grad_norm": 1.9275522232055664, "learning_rate": 9.232294320165865e-06, "loss": 0.764, "step": 1615 }, { "epoch": 1.100068073519401, "grad_norm": 2.005216360092163, "learning_rate": 9.231095128108634e-06, "loss": 0.8192, "step": 1616 }, { "epoch": 1.1007488087134105, "grad_norm": 1.7534209489822388, "learning_rate": 9.22989507819404e-06, "loss": 0.7966, "step": 1617 }, { "epoch": 1.10142954390742, "grad_norm": 1.8749147653579712, "learning_rate": 9.2286941706654e-06, "loss": 0.8572, "step": 1618 }, { "epoch": 1.1021102791014294, "grad_norm": 1.793827772140503, "learning_rate": 9.227492405766193e-06, "loss": 0.7865, "step": 1619 }, { "epoch": 1.1027910142954391, "grad_norm": 1.6262098550796509, "learning_rate": 9.22628978374008e-06, "loss": 0.8529, "step": 1620 }, { "epoch": 1.1034717494894486, "grad_norm": 1.7273542881011963, "learning_rate": 9.225086304830896e-06, "loss": 0.9091, "step": 1621 }, { "epoch": 1.104152484683458, "grad_norm": 1.8729362487792969, "learning_rate": 9.22388196928264e-06, "loss": 0.9463, "step": 1622 }, { "epoch": 1.1048332198774677, "grad_norm": 1.9880154132843018, "learning_rate": 9.222676777339498e-06, "loss": 0.7462, "step": 1623 }, { "epoch": 1.1055139550714772, "grad_norm": 1.712626338005066, "learning_rate": 9.221470729245826e-06, "loss": 0.7024, "step": 1624 }, { "epoch": 1.1061946902654867, "grad_norm": 1.9441778659820557, "learning_rate": 9.220263825246143e-06, "loss": 0.781, "step": 1625 }, { "epoch": 1.1068754254594964, "grad_norm": 2.0140738487243652, "learning_rate": 9.219056065585154e-06, "loss": 0.8153, "step": 1626 }, { "epoch": 1.1075561606535058, "grad_norm": 2.159289836883545, "learning_rate": 9.217847450507733e-06, "loss": 0.8212, "step": 1627 }, { "epoch": 1.1082368958475153, "grad_norm": 1.8244514465332031, "learning_rate": 9.216637980258926e-06, "loss": 0.8452, "step": 1628 }, { "epoch": 1.1089176310415247, "grad_norm": 1.979771375656128, "learning_rate": 9.215427655083957e-06, "loss": 0.8513, "step": 1629 }, { "epoch": 1.1095983662355344, "grad_norm": 1.7599126100540161, "learning_rate": 9.214216475228217e-06, "loss": 0.7115, "step": 1630 }, { "epoch": 1.110279101429544, "grad_norm": 1.8526699542999268, "learning_rate": 9.213004440937277e-06, "loss": 0.8667, "step": 1631 }, { "epoch": 1.1109598366235534, "grad_norm": 2.063385009765625, "learning_rate": 9.211791552456873e-06, "loss": 0.8399, "step": 1632 }, { "epoch": 1.111640571817563, "grad_norm": 1.8728729486465454, "learning_rate": 9.210577810032923e-06, "loss": 0.7862, "step": 1633 }, { "epoch": 1.1123213070115725, "grad_norm": 1.743039608001709, "learning_rate": 9.209363213911512e-06, "loss": 0.756, "step": 1634 }, { "epoch": 1.113002042205582, "grad_norm": 1.9992308616638184, "learning_rate": 9.208147764338902e-06, "loss": 0.8562, "step": 1635 }, { "epoch": 1.1136827773995917, "grad_norm": 1.7953579425811768, "learning_rate": 9.206931461561524e-06, "loss": 0.7574, "step": 1636 }, { "epoch": 1.1143635125936011, "grad_norm": 1.7610301971435547, "learning_rate": 9.205714305825983e-06, "loss": 0.9068, "step": 1637 }, { "epoch": 1.1150442477876106, "grad_norm": 2.0419275760650635, "learning_rate": 9.204496297379063e-06, "loss": 0.7935, "step": 1638 }, { "epoch": 1.1157249829816203, "grad_norm": 1.9682350158691406, "learning_rate": 9.203277436467713e-06, "loss": 0.7921, "step": 1639 }, { "epoch": 1.1164057181756297, "grad_norm": 1.8553355932235718, "learning_rate": 9.20205772333906e-06, "loss": 0.7027, "step": 1640 }, { "epoch": 1.1170864533696392, "grad_norm": 1.8439000844955444, "learning_rate": 9.200837158240396e-06, "loss": 0.6823, "step": 1641 }, { "epoch": 1.1177671885636487, "grad_norm": 1.8477747440338135, "learning_rate": 9.199615741419196e-06, "loss": 0.8391, "step": 1642 }, { "epoch": 1.1184479237576583, "grad_norm": 1.9535002708435059, "learning_rate": 9.198393473123105e-06, "loss": 0.8101, "step": 1643 }, { "epoch": 1.1191286589516678, "grad_norm": 1.9212429523468018, "learning_rate": 9.197170353599935e-06, "loss": 0.7918, "step": 1644 }, { "epoch": 1.1198093941456773, "grad_norm": 1.6819672584533691, "learning_rate": 9.195946383097674e-06, "loss": 0.7911, "step": 1645 }, { "epoch": 1.120490129339687, "grad_norm": 1.784295916557312, "learning_rate": 9.194721561864485e-06, "loss": 0.7899, "step": 1646 }, { "epoch": 1.1211708645336964, "grad_norm": 1.6305112838745117, "learning_rate": 9.193495890148702e-06, "loss": 0.7982, "step": 1647 }, { "epoch": 1.1218515997277059, "grad_norm": 1.6946897506713867, "learning_rate": 9.192269368198827e-06, "loss": 0.9101, "step": 1648 }, { "epoch": 1.1225323349217153, "grad_norm": 1.7787859439849854, "learning_rate": 9.191041996263542e-06, "loss": 0.837, "step": 1649 }, { "epoch": 1.123213070115725, "grad_norm": 1.7407352924346924, "learning_rate": 9.189813774591693e-06, "loss": 0.8807, "step": 1650 }, { "epoch": 1.1238938053097345, "grad_norm": 2.095094680786133, "learning_rate": 9.18858470343231e-06, "loss": 0.6874, "step": 1651 }, { "epoch": 1.124574540503744, "grad_norm": 2.0206189155578613, "learning_rate": 9.18735478303458e-06, "loss": 0.7366, "step": 1652 }, { "epoch": 1.1252552756977536, "grad_norm": 1.8310730457305908, "learning_rate": 9.186124013647875e-06, "loss": 0.9095, "step": 1653 }, { "epoch": 1.125936010891763, "grad_norm": 1.7600774765014648, "learning_rate": 9.184892395521734e-06, "loss": 0.6861, "step": 1654 }, { "epoch": 1.1266167460857726, "grad_norm": 1.913411259651184, "learning_rate": 9.183659928905867e-06, "loss": 0.747, "step": 1655 }, { "epoch": 1.1272974812797822, "grad_norm": 1.814164161682129, "learning_rate": 9.182426614050157e-06, "loss": 0.8397, "step": 1656 }, { "epoch": 1.1279782164737917, "grad_norm": 1.6965473890304565, "learning_rate": 9.181192451204662e-06, "loss": 0.7879, "step": 1657 }, { "epoch": 1.1286589516678012, "grad_norm": 1.7506492137908936, "learning_rate": 9.179957440619606e-06, "loss": 0.8622, "step": 1658 }, { "epoch": 1.1293396868618109, "grad_norm": 1.8360580205917358, "learning_rate": 9.178721582545391e-06, "loss": 0.7651, "step": 1659 }, { "epoch": 1.1300204220558203, "grad_norm": 1.8918166160583496, "learning_rate": 9.177484877232588e-06, "loss": 0.7445, "step": 1660 }, { "epoch": 1.1307011572498298, "grad_norm": 1.6860038042068481, "learning_rate": 9.176247324931937e-06, "loss": 0.7831, "step": 1661 }, { "epoch": 1.1313818924438395, "grad_norm": 1.8171100616455078, "learning_rate": 9.175008925894355e-06, "loss": 0.7995, "step": 1662 }, { "epoch": 1.132062627637849, "grad_norm": 1.9272820949554443, "learning_rate": 9.173769680370928e-06, "loss": 0.7746, "step": 1663 }, { "epoch": 1.1327433628318584, "grad_norm": 1.8904575109481812, "learning_rate": 9.172529588612912e-06, "loss": 0.7552, "step": 1664 }, { "epoch": 1.1334240980258679, "grad_norm": 1.9194221496582031, "learning_rate": 9.171288650871742e-06, "loss": 0.8247, "step": 1665 }, { "epoch": 1.1341048332198775, "grad_norm": 1.9350415468215942, "learning_rate": 9.17004686739901e-06, "loss": 0.8083, "step": 1666 }, { "epoch": 1.134785568413887, "grad_norm": 1.5525730848312378, "learning_rate": 9.168804238446495e-06, "loss": 0.7958, "step": 1667 }, { "epoch": 1.1354663036078965, "grad_norm": 1.773350715637207, "learning_rate": 9.167560764266138e-06, "loss": 0.7846, "step": 1668 }, { "epoch": 1.1361470388019062, "grad_norm": 1.8412326574325562, "learning_rate": 9.166316445110058e-06, "loss": 0.8919, "step": 1669 }, { "epoch": 1.1368277739959156, "grad_norm": 1.9987083673477173, "learning_rate": 9.165071281230538e-06, "loss": 0.8279, "step": 1670 }, { "epoch": 1.137508509189925, "grad_norm": 1.7346954345703125, "learning_rate": 9.163825272880036e-06, "loss": 0.9227, "step": 1671 }, { "epoch": 1.1381892443839345, "grad_norm": 1.8564746379852295, "learning_rate": 9.162578420311182e-06, "loss": 0.8161, "step": 1672 }, { "epoch": 1.1388699795779442, "grad_norm": 1.8128962516784668, "learning_rate": 9.161330723776775e-06, "loss": 0.6956, "step": 1673 }, { "epoch": 1.1395507147719537, "grad_norm": 2.1118242740631104, "learning_rate": 9.160082183529788e-06, "loss": 0.7508, "step": 1674 }, { "epoch": 1.1402314499659632, "grad_norm": 1.9120557308197021, "learning_rate": 9.158832799823363e-06, "loss": 0.7215, "step": 1675 }, { "epoch": 1.1409121851599728, "grad_norm": 1.678755760192871, "learning_rate": 9.157582572910813e-06, "loss": 0.8806, "step": 1676 }, { "epoch": 1.1415929203539823, "grad_norm": 1.7770640850067139, "learning_rate": 9.15633150304562e-06, "loss": 0.8061, "step": 1677 }, { "epoch": 1.1422736555479918, "grad_norm": 1.8982157707214355, "learning_rate": 9.155079590481446e-06, "loss": 0.8372, "step": 1678 }, { "epoch": 1.1429543907420014, "grad_norm": 1.829430103302002, "learning_rate": 9.153826835472108e-06, "loss": 0.7644, "step": 1679 }, { "epoch": 1.143635125936011, "grad_norm": 2.013606071472168, "learning_rate": 9.15257323827161e-06, "loss": 0.8534, "step": 1680 }, { "epoch": 1.1443158611300204, "grad_norm": 1.8991175889968872, "learning_rate": 9.151318799134118e-06, "loss": 0.8443, "step": 1681 }, { "epoch": 1.14499659632403, "grad_norm": 1.762550711631775, "learning_rate": 9.150063518313968e-06, "loss": 0.8753, "step": 1682 }, { "epoch": 1.1456773315180395, "grad_norm": 1.590149164199829, "learning_rate": 9.148807396065671e-06, "loss": 0.8314, "step": 1683 }, { "epoch": 1.146358066712049, "grad_norm": 1.7471764087677002, "learning_rate": 9.147550432643906e-06, "loss": 0.7977, "step": 1684 }, { "epoch": 1.1470388019060584, "grad_norm": 1.899868130683899, "learning_rate": 9.146292628303523e-06, "loss": 0.6691, "step": 1685 }, { "epoch": 1.1477195371000681, "grad_norm": 1.7359877824783325, "learning_rate": 9.145033983299545e-06, "loss": 0.8084, "step": 1686 }, { "epoch": 1.1484002722940776, "grad_norm": 1.8152694702148438, "learning_rate": 9.143774497887158e-06, "loss": 0.7471, "step": 1687 }, { "epoch": 1.149081007488087, "grad_norm": 1.841660737991333, "learning_rate": 9.14251417232173e-06, "loss": 0.8465, "step": 1688 }, { "epoch": 1.1497617426820967, "grad_norm": 1.7655729055404663, "learning_rate": 9.14125300685879e-06, "loss": 0.7405, "step": 1689 }, { "epoch": 1.1504424778761062, "grad_norm": 1.797224760055542, "learning_rate": 9.139991001754037e-06, "loss": 0.8111, "step": 1690 }, { "epoch": 1.1511232130701157, "grad_norm": 1.897777795791626, "learning_rate": 9.138728157263347e-06, "loss": 0.7112, "step": 1691 }, { "epoch": 1.1518039482641251, "grad_norm": 1.891294002532959, "learning_rate": 9.137464473642762e-06, "loss": 0.742, "step": 1692 }, { "epoch": 1.1524846834581348, "grad_norm": 1.8665229082107544, "learning_rate": 9.136199951148495e-06, "loss": 0.6719, "step": 1693 }, { "epoch": 1.1531654186521443, "grad_norm": 1.8881504535675049, "learning_rate": 9.134934590036926e-06, "loss": 0.7287, "step": 1694 }, { "epoch": 1.1538461538461537, "grad_norm": 2.000664710998535, "learning_rate": 9.133668390564611e-06, "loss": 0.8439, "step": 1695 }, { "epoch": 1.1545268890401634, "grad_norm": 1.9496846199035645, "learning_rate": 9.132401352988272e-06, "loss": 0.7125, "step": 1696 }, { "epoch": 1.155207624234173, "grad_norm": 1.9355915784835815, "learning_rate": 9.131133477564802e-06, "loss": 0.8127, "step": 1697 }, { "epoch": 1.1558883594281824, "grad_norm": 1.8514153957366943, "learning_rate": 9.129864764551261e-06, "loss": 0.7852, "step": 1698 }, { "epoch": 1.156569094622192, "grad_norm": 1.8373408317565918, "learning_rate": 9.128595214204885e-06, "loss": 0.7149, "step": 1699 }, { "epoch": 1.1572498298162015, "grad_norm": 1.885533094406128, "learning_rate": 9.127324826783075e-06, "loss": 0.6745, "step": 1700 }, { "epoch": 1.157930565010211, "grad_norm": 1.8819025754928589, "learning_rate": 9.126053602543403e-06, "loss": 0.7929, "step": 1701 }, { "epoch": 1.1586113002042207, "grad_norm": 1.8844836950302124, "learning_rate": 9.124781541743609e-06, "loss": 0.8977, "step": 1702 }, { "epoch": 1.1592920353982301, "grad_norm": 1.725303053855896, "learning_rate": 9.123508644641604e-06, "loss": 0.832, "step": 1703 }, { "epoch": 1.1599727705922396, "grad_norm": 2.0042226314544678, "learning_rate": 9.122234911495472e-06, "loss": 0.8073, "step": 1704 }, { "epoch": 1.1606535057862493, "grad_norm": 2.3246262073516846, "learning_rate": 9.12096034256346e-06, "loss": 0.6846, "step": 1705 }, { "epoch": 1.1613342409802587, "grad_norm": 2.104496479034424, "learning_rate": 9.11968493810399e-06, "loss": 0.7416, "step": 1706 }, { "epoch": 1.1620149761742682, "grad_norm": 1.6774659156799316, "learning_rate": 9.118408698375649e-06, "loss": 0.7476, "step": 1707 }, { "epoch": 1.1626957113682777, "grad_norm": 1.8513365983963013, "learning_rate": 9.117131623637197e-06, "loss": 0.8577, "step": 1708 }, { "epoch": 1.1633764465622873, "grad_norm": 2.0386478900909424, "learning_rate": 9.11585371414756e-06, "loss": 0.8011, "step": 1709 }, { "epoch": 1.1640571817562968, "grad_norm": 1.8415758609771729, "learning_rate": 9.114574970165838e-06, "loss": 0.8808, "step": 1710 }, { "epoch": 1.1647379169503063, "grad_norm": 1.7992658615112305, "learning_rate": 9.113295391951295e-06, "loss": 0.9422, "step": 1711 }, { "epoch": 1.165418652144316, "grad_norm": 2.026799201965332, "learning_rate": 9.112014979763366e-06, "loss": 0.7542, "step": 1712 }, { "epoch": 1.1660993873383254, "grad_norm": 1.9632463455200195, "learning_rate": 9.110733733861657e-06, "loss": 0.7849, "step": 1713 }, { "epoch": 1.1667801225323349, "grad_norm": 1.826840877532959, "learning_rate": 9.10945165450594e-06, "loss": 0.7377, "step": 1714 }, { "epoch": 1.1674608577263443, "grad_norm": 1.7323476076126099, "learning_rate": 9.10816874195616e-06, "loss": 0.7476, "step": 1715 }, { "epoch": 1.168141592920354, "grad_norm": 1.6775234937667847, "learning_rate": 9.106884996472424e-06, "loss": 0.8218, "step": 1716 }, { "epoch": 1.1688223281143635, "grad_norm": 1.811280369758606, "learning_rate": 9.105600418315017e-06, "loss": 0.7861, "step": 1717 }, { "epoch": 1.169503063308373, "grad_norm": 1.910958170890808, "learning_rate": 9.104315007744385e-06, "loss": 0.826, "step": 1718 }, { "epoch": 1.1701837985023826, "grad_norm": 1.907768726348877, "learning_rate": 9.103028765021146e-06, "loss": 0.8434, "step": 1719 }, { "epoch": 1.170864533696392, "grad_norm": 1.9291532039642334, "learning_rate": 9.101741690406088e-06, "loss": 0.8152, "step": 1720 }, { "epoch": 1.1715452688904016, "grad_norm": 1.830837607383728, "learning_rate": 9.100453784160165e-06, "loss": 0.8123, "step": 1721 }, { "epoch": 1.1722260040844112, "grad_norm": 1.7684122323989868, "learning_rate": 9.099165046544501e-06, "loss": 0.8186, "step": 1722 }, { "epoch": 1.1729067392784207, "grad_norm": 1.8851134777069092, "learning_rate": 9.097875477820389e-06, "loss": 0.7138, "step": 1723 }, { "epoch": 1.1735874744724302, "grad_norm": 1.7178969383239746, "learning_rate": 9.096585078249289e-06, "loss": 0.7788, "step": 1724 }, { "epoch": 1.1742682096664399, "grad_norm": 1.557334303855896, "learning_rate": 9.095293848092833e-06, "loss": 0.7922, "step": 1725 }, { "epoch": 1.1749489448604493, "grad_norm": 1.7606419324874878, "learning_rate": 9.094001787612816e-06, "loss": 0.8108, "step": 1726 }, { "epoch": 1.1756296800544588, "grad_norm": 1.8642417192459106, "learning_rate": 9.092708897071206e-06, "loss": 0.8248, "step": 1727 }, { "epoch": 1.1763104152484685, "grad_norm": 1.6149094104766846, "learning_rate": 9.091415176730135e-06, "loss": 0.8789, "step": 1728 }, { "epoch": 1.176991150442478, "grad_norm": 1.9004523754119873, "learning_rate": 9.090120626851906e-06, "loss": 0.7351, "step": 1729 }, { "epoch": 1.1776718856364874, "grad_norm": 1.9429432153701782, "learning_rate": 9.088825247698992e-06, "loss": 0.7429, "step": 1730 }, { "epoch": 1.1783526208304969, "grad_norm": 1.6934906244277954, "learning_rate": 9.08752903953403e-06, "loss": 0.7919, "step": 1731 }, { "epoch": 1.1790333560245065, "grad_norm": 1.978026270866394, "learning_rate": 9.086232002619828e-06, "loss": 0.6306, "step": 1732 }, { "epoch": 1.179714091218516, "grad_norm": 1.7656826972961426, "learning_rate": 9.084934137219363e-06, "loss": 0.9033, "step": 1733 }, { "epoch": 1.1803948264125255, "grad_norm": 1.6386032104492188, "learning_rate": 9.083635443595774e-06, "loss": 0.8809, "step": 1734 }, { "epoch": 1.1810755616065352, "grad_norm": 1.884752631187439, "learning_rate": 9.082335922012375e-06, "loss": 0.8199, "step": 1735 }, { "epoch": 1.1817562968005446, "grad_norm": 1.8680825233459473, "learning_rate": 9.081035572732644e-06, "loss": 0.7702, "step": 1736 }, { "epoch": 1.182437031994554, "grad_norm": 1.6961694955825806, "learning_rate": 9.079734396020227e-06, "loss": 0.788, "step": 1737 }, { "epoch": 1.1831177671885635, "grad_norm": 1.7703497409820557, "learning_rate": 9.078432392138941e-06, "loss": 0.9067, "step": 1738 }, { "epoch": 1.1837985023825732, "grad_norm": 1.791306734085083, "learning_rate": 9.077129561352764e-06, "loss": 0.7678, "step": 1739 }, { "epoch": 1.1844792375765827, "grad_norm": 1.8933446407318115, "learning_rate": 9.075825903925849e-06, "loss": 0.7512, "step": 1740 }, { "epoch": 1.1851599727705922, "grad_norm": 1.863607406616211, "learning_rate": 9.074521420122513e-06, "loss": 0.7992, "step": 1741 }, { "epoch": 1.1858407079646018, "grad_norm": 1.8850502967834473, "learning_rate": 9.073216110207238e-06, "loss": 0.7894, "step": 1742 }, { "epoch": 1.1865214431586113, "grad_norm": 1.721366047859192, "learning_rate": 9.071909974444682e-06, "loss": 0.8694, "step": 1743 }, { "epoch": 1.1872021783526208, "grad_norm": 2.0423176288604736, "learning_rate": 9.07060301309966e-06, "loss": 0.7903, "step": 1744 }, { "epoch": 1.1878829135466304, "grad_norm": 1.917249083518982, "learning_rate": 9.069295226437163e-06, "loss": 0.7936, "step": 1745 }, { "epoch": 1.18856364874064, "grad_norm": 1.545013666152954, "learning_rate": 9.067986614722342e-06, "loss": 0.9672, "step": 1746 }, { "epoch": 1.1892443839346494, "grad_norm": 1.8363862037658691, "learning_rate": 9.066677178220519e-06, "loss": 0.8258, "step": 1747 }, { "epoch": 1.189925119128659, "grad_norm": 1.6768817901611328, "learning_rate": 9.065366917197187e-06, "loss": 0.8621, "step": 1748 }, { "epoch": 1.1906058543226685, "grad_norm": 1.9988384246826172, "learning_rate": 9.064055831918e-06, "loss": 0.8023, "step": 1749 }, { "epoch": 1.191286589516678, "grad_norm": 1.7048882246017456, "learning_rate": 9.062743922648783e-06, "loss": 0.7886, "step": 1750 }, { "epoch": 1.1919673247106874, "grad_norm": 1.7340853214263916, "learning_rate": 9.061431189655523e-06, "loss": 0.7721, "step": 1751 }, { "epoch": 1.1926480599046971, "grad_norm": 1.915126919746399, "learning_rate": 9.060117633204379e-06, "loss": 0.7688, "step": 1752 }, { "epoch": 1.1933287950987066, "grad_norm": 1.8252276182174683, "learning_rate": 9.058803253561676e-06, "loss": 0.7076, "step": 1753 }, { "epoch": 1.194009530292716, "grad_norm": 1.8087233304977417, "learning_rate": 9.057488050993906e-06, "loss": 0.7935, "step": 1754 }, { "epoch": 1.1946902654867257, "grad_norm": 1.8052319288253784, "learning_rate": 9.056172025767725e-06, "loss": 0.8517, "step": 1755 }, { "epoch": 1.1953710006807352, "grad_norm": 1.8458967208862305, "learning_rate": 9.05485517814996e-06, "loss": 0.8577, "step": 1756 }, { "epoch": 1.1960517358747447, "grad_norm": 1.8833473920822144, "learning_rate": 9.053537508407603e-06, "loss": 0.7663, "step": 1757 }, { "epoch": 1.1967324710687541, "grad_norm": 1.5747740268707275, "learning_rate": 9.05221901680781e-06, "loss": 0.9245, "step": 1758 }, { "epoch": 1.1974132062627638, "grad_norm": 1.7445180416107178, "learning_rate": 9.05089970361791e-06, "loss": 0.7249, "step": 1759 }, { "epoch": 1.1980939414567733, "grad_norm": 1.6585806608200073, "learning_rate": 9.04957956910539e-06, "loss": 0.7787, "step": 1760 }, { "epoch": 1.1987746766507827, "grad_norm": 1.820158839225769, "learning_rate": 9.048258613537912e-06, "loss": 0.8424, "step": 1761 }, { "epoch": 1.1994554118447924, "grad_norm": 1.6349824666976929, "learning_rate": 9.046936837183297e-06, "loss": 0.9158, "step": 1762 }, { "epoch": 1.200136147038802, "grad_norm": 1.9198825359344482, "learning_rate": 9.045614240309539e-06, "loss": 0.8035, "step": 1763 }, { "epoch": 1.2008168822328114, "grad_norm": 1.8081297874450684, "learning_rate": 9.044290823184796e-06, "loss": 0.7486, "step": 1764 }, { "epoch": 1.201497617426821, "grad_norm": 1.7645050287246704, "learning_rate": 9.042966586077389e-06, "loss": 0.8379, "step": 1765 }, { "epoch": 1.2021783526208305, "grad_norm": 1.8568711280822754, "learning_rate": 9.041641529255808e-06, "loss": 0.7115, "step": 1766 }, { "epoch": 1.20285908781484, "grad_norm": 1.697973370552063, "learning_rate": 9.040315652988712e-06, "loss": 0.7959, "step": 1767 }, { "epoch": 1.2035398230088497, "grad_norm": 1.7311322689056396, "learning_rate": 9.038988957544919e-06, "loss": 0.7421, "step": 1768 }, { "epoch": 1.2042205582028591, "grad_norm": 1.5669091939926147, "learning_rate": 9.037661443193422e-06, "loss": 0.8329, "step": 1769 }, { "epoch": 1.2049012933968686, "grad_norm": 1.8538806438446045, "learning_rate": 9.036333110203373e-06, "loss": 0.7758, "step": 1770 }, { "epoch": 1.2055820285908783, "grad_norm": 1.7788267135620117, "learning_rate": 9.035003958844094e-06, "loss": 0.813, "step": 1771 }, { "epoch": 1.2062627637848877, "grad_norm": 1.895897626876831, "learning_rate": 9.033673989385068e-06, "loss": 0.8269, "step": 1772 }, { "epoch": 1.2069434989788972, "grad_norm": 1.6547260284423828, "learning_rate": 9.03234320209595e-06, "loss": 0.9011, "step": 1773 }, { "epoch": 1.2076242341729067, "grad_norm": 1.9980626106262207, "learning_rate": 9.031011597246557e-06, "loss": 0.7571, "step": 1774 }, { "epoch": 1.2083049693669163, "grad_norm": 1.8157517910003662, "learning_rate": 9.029679175106874e-06, "loss": 0.6907, "step": 1775 }, { "epoch": 1.2089857045609258, "grad_norm": 1.7116400003433228, "learning_rate": 9.028345935947049e-06, "loss": 0.7157, "step": 1776 }, { "epoch": 1.2096664397549353, "grad_norm": 1.9688800573349, "learning_rate": 9.027011880037397e-06, "loss": 0.7885, "step": 1777 }, { "epoch": 1.210347174948945, "grad_norm": 1.919909954071045, "learning_rate": 9.025677007648402e-06, "loss": 0.784, "step": 1778 }, { "epoch": 1.2110279101429544, "grad_norm": 1.6314746141433716, "learning_rate": 9.024341319050704e-06, "loss": 0.8755, "step": 1779 }, { "epoch": 1.2117086453369639, "grad_norm": 1.869079828262329, "learning_rate": 9.023004814515121e-06, "loss": 0.7401, "step": 1780 }, { "epoch": 1.2123893805309733, "grad_norm": 1.8688088655471802, "learning_rate": 9.021667494312628e-06, "loss": 0.9015, "step": 1781 }, { "epoch": 1.213070115724983, "grad_norm": 1.9318856000900269, "learning_rate": 9.020329358714364e-06, "loss": 0.7468, "step": 1782 }, { "epoch": 1.2137508509189925, "grad_norm": 2.1092262268066406, "learning_rate": 9.018990407991643e-06, "loss": 0.8674, "step": 1783 }, { "epoch": 1.214431586113002, "grad_norm": 1.696569800376892, "learning_rate": 9.017650642415933e-06, "loss": 0.7363, "step": 1784 }, { "epoch": 1.2151123213070116, "grad_norm": 1.85198974609375, "learning_rate": 9.016310062258876e-06, "loss": 0.67, "step": 1785 }, { "epoch": 1.215793056501021, "grad_norm": 1.8115390539169312, "learning_rate": 9.014968667792275e-06, "loss": 0.836, "step": 1786 }, { "epoch": 1.2164737916950306, "grad_norm": 1.9156856536865234, "learning_rate": 9.013626459288097e-06, "loss": 0.6851, "step": 1787 }, { "epoch": 1.2171545268890402, "grad_norm": 1.5566480159759521, "learning_rate": 9.012283437018474e-06, "loss": 0.8498, "step": 1788 }, { "epoch": 1.2178352620830497, "grad_norm": 1.9076049327850342, "learning_rate": 9.01093960125571e-06, "loss": 0.7649, "step": 1789 }, { "epoch": 1.2185159972770592, "grad_norm": 1.7261030673980713, "learning_rate": 9.009594952272266e-06, "loss": 0.8501, "step": 1790 }, { "epoch": 1.2191967324710689, "grad_norm": 1.912793517112732, "learning_rate": 9.008249490340767e-06, "loss": 0.8221, "step": 1791 }, { "epoch": 1.2198774676650783, "grad_norm": 1.9346188306808472, "learning_rate": 9.006903215734011e-06, "loss": 0.7781, "step": 1792 }, { "epoch": 1.2205582028590878, "grad_norm": 2.0059704780578613, "learning_rate": 9.005556128724954e-06, "loss": 0.7743, "step": 1793 }, { "epoch": 1.2212389380530975, "grad_norm": 1.6838138103485107, "learning_rate": 9.004208229586721e-06, "loss": 0.8682, "step": 1794 }, { "epoch": 1.221919673247107, "grad_norm": 1.8348103761672974, "learning_rate": 9.002859518592598e-06, "loss": 0.7541, "step": 1795 }, { "epoch": 1.2226004084411164, "grad_norm": 1.9277808666229248, "learning_rate": 9.001509996016034e-06, "loss": 0.7428, "step": 1796 }, { "epoch": 1.2232811436351259, "grad_norm": 1.7585655450820923, "learning_rate": 9.000159662130649e-06, "loss": 0.8075, "step": 1797 }, { "epoch": 1.2239618788291355, "grad_norm": 1.8975517749786377, "learning_rate": 8.998808517210225e-06, "loss": 0.7748, "step": 1798 }, { "epoch": 1.224642614023145, "grad_norm": 1.807596206665039, "learning_rate": 8.997456561528706e-06, "loss": 0.7561, "step": 1799 }, { "epoch": 1.2253233492171545, "grad_norm": 1.8205633163452148, "learning_rate": 8.9961037953602e-06, "loss": 0.833, "step": 1800 }, { "epoch": 1.2260040844111642, "grad_norm": 1.6844879388809204, "learning_rate": 8.994750218978985e-06, "loss": 0.8797, "step": 1801 }, { "epoch": 1.2266848196051736, "grad_norm": 1.7155462503433228, "learning_rate": 8.993395832659497e-06, "loss": 0.8444, "step": 1802 }, { "epoch": 1.227365554799183, "grad_norm": 1.7321100234985352, "learning_rate": 8.99204063667634e-06, "loss": 0.785, "step": 1803 }, { "epoch": 1.2280462899931925, "grad_norm": 1.7533844709396362, "learning_rate": 8.99068463130428e-06, "loss": 0.8343, "step": 1804 }, { "epoch": 1.2287270251872022, "grad_norm": 1.7803937196731567, "learning_rate": 8.989327816818248e-06, "loss": 0.7461, "step": 1805 }, { "epoch": 1.2294077603812117, "grad_norm": 1.9051638841629028, "learning_rate": 8.98797019349334e-06, "loss": 0.7702, "step": 1806 }, { "epoch": 1.2300884955752212, "grad_norm": 1.8913166522979736, "learning_rate": 8.986611761604814e-06, "loss": 0.869, "step": 1807 }, { "epoch": 1.2307692307692308, "grad_norm": 1.770653486251831, "learning_rate": 8.985252521428094e-06, "loss": 0.7649, "step": 1808 }, { "epoch": 1.2314499659632403, "grad_norm": 1.844394326210022, "learning_rate": 8.983892473238765e-06, "loss": 0.7413, "step": 1809 }, { "epoch": 1.2321307011572498, "grad_norm": 1.8893649578094482, "learning_rate": 8.98253161731258e-06, "loss": 0.6954, "step": 1810 }, { "epoch": 1.2328114363512594, "grad_norm": 1.7332369089126587, "learning_rate": 8.981169953925452e-06, "loss": 0.839, "step": 1811 }, { "epoch": 1.233492171545269, "grad_norm": 1.6949418783187866, "learning_rate": 8.979807483353459e-06, "loss": 0.8303, "step": 1812 }, { "epoch": 1.2341729067392784, "grad_norm": 1.7570945024490356, "learning_rate": 8.978444205872844e-06, "loss": 0.7338, "step": 1813 }, { "epoch": 1.234853641933288, "grad_norm": 1.8226885795593262, "learning_rate": 8.977080121760012e-06, "loss": 0.8494, "step": 1814 }, { "epoch": 1.2355343771272975, "grad_norm": 1.7907154560089111, "learning_rate": 8.97571523129153e-06, "loss": 0.7699, "step": 1815 }, { "epoch": 1.236215112321307, "grad_norm": 1.8100851774215698, "learning_rate": 8.974349534744133e-06, "loss": 0.7474, "step": 1816 }, { "epoch": 1.2368958475153164, "grad_norm": 1.6914061307907104, "learning_rate": 8.972983032394716e-06, "loss": 0.7209, "step": 1817 }, { "epoch": 1.2375765827093261, "grad_norm": 1.7752726078033447, "learning_rate": 8.97161572452034e-06, "loss": 0.7926, "step": 1818 }, { "epoch": 1.2382573179033356, "grad_norm": 1.596378207206726, "learning_rate": 8.970247611398223e-06, "loss": 0.8291, "step": 1819 }, { "epoch": 1.238938053097345, "grad_norm": 1.9636428356170654, "learning_rate": 8.968878693305756e-06, "loss": 0.77, "step": 1820 }, { "epoch": 1.2396187882913547, "grad_norm": 1.8350647687911987, "learning_rate": 8.967508970520485e-06, "loss": 0.6914, "step": 1821 }, { "epoch": 1.2402995234853642, "grad_norm": 1.6634657382965088, "learning_rate": 8.966138443320124e-06, "loss": 0.9136, "step": 1822 }, { "epoch": 1.2409802586793737, "grad_norm": 1.7326914072036743, "learning_rate": 8.964767111982547e-06, "loss": 0.9084, "step": 1823 }, { "epoch": 1.2416609938733831, "grad_norm": 1.7170863151550293, "learning_rate": 8.96339497678579e-06, "loss": 0.8091, "step": 1824 }, { "epoch": 1.2423417290673928, "grad_norm": 1.7090586423873901, "learning_rate": 8.962022038008059e-06, "loss": 0.7988, "step": 1825 }, { "epoch": 1.2430224642614023, "grad_norm": 1.696249008178711, "learning_rate": 8.960648295927716e-06, "loss": 0.8317, "step": 1826 }, { "epoch": 1.2437031994554117, "grad_norm": 2.0414299964904785, "learning_rate": 8.95927375082329e-06, "loss": 0.9137, "step": 1827 }, { "epoch": 1.2443839346494214, "grad_norm": 1.7859768867492676, "learning_rate": 8.957898402973467e-06, "loss": 0.8896, "step": 1828 }, { "epoch": 1.245064669843431, "grad_norm": 1.709746241569519, "learning_rate": 8.956522252657102e-06, "loss": 0.9121, "step": 1829 }, { "epoch": 1.2457454050374404, "grad_norm": 1.8650195598602295, "learning_rate": 8.955145300153213e-06, "loss": 0.9162, "step": 1830 }, { "epoch": 1.24642614023145, "grad_norm": 1.7405368089675903, "learning_rate": 8.953767545740972e-06, "loss": 0.8005, "step": 1831 }, { "epoch": 1.2471068754254595, "grad_norm": 1.555091142654419, "learning_rate": 8.952388989699724e-06, "loss": 0.801, "step": 1832 }, { "epoch": 1.247787610619469, "grad_norm": 1.7168864011764526, "learning_rate": 8.951009632308973e-06, "loss": 0.8106, "step": 1833 }, { "epoch": 1.2484683458134787, "grad_norm": 2.036228656768799, "learning_rate": 8.949629473848382e-06, "loss": 0.7929, "step": 1834 }, { "epoch": 1.2491490810074881, "grad_norm": 1.6526601314544678, "learning_rate": 8.948248514597779e-06, "loss": 0.9248, "step": 1835 }, { "epoch": 1.2498298162014976, "grad_norm": 1.9601285457611084, "learning_rate": 8.946866754837156e-06, "loss": 0.8084, "step": 1836 }, { "epoch": 1.2505105513955073, "grad_norm": 1.9171876907348633, "learning_rate": 8.945484194846666e-06, "loss": 0.9121, "step": 1837 }, { "epoch": 1.2511912865895167, "grad_norm": 1.6902662515640259, "learning_rate": 8.944100834906622e-06, "loss": 0.9306, "step": 1838 }, { "epoch": 1.2518720217835262, "grad_norm": 1.726882815361023, "learning_rate": 8.942716675297502e-06, "loss": 0.7926, "step": 1839 }, { "epoch": 1.2525527569775359, "grad_norm": 1.7625555992126465, "learning_rate": 8.941331716299944e-06, "loss": 0.9213, "step": 1840 }, { "epoch": 1.2532334921715453, "grad_norm": 1.8049885034561157, "learning_rate": 8.939945958194753e-06, "loss": 0.8439, "step": 1841 }, { "epoch": 1.2539142273655548, "grad_norm": 1.6533465385437012, "learning_rate": 8.93855940126289e-06, "loss": 0.8109, "step": 1842 }, { "epoch": 1.2545949625595643, "grad_norm": 1.9602165222167969, "learning_rate": 8.937172045785478e-06, "loss": 0.7255, "step": 1843 }, { "epoch": 1.2552756977535737, "grad_norm": 1.8705602884292603, "learning_rate": 8.935783892043809e-06, "loss": 0.8218, "step": 1844 }, { "epoch": 1.2559564329475834, "grad_norm": 1.9260307550430298, "learning_rate": 8.934394940319329e-06, "loss": 0.852, "step": 1845 }, { "epoch": 1.2566371681415929, "grad_norm": 1.8944427967071533, "learning_rate": 8.933005190893651e-06, "loss": 0.8257, "step": 1846 }, { "epoch": 1.2573179033356023, "grad_norm": 1.5789194107055664, "learning_rate": 8.931614644048545e-06, "loss": 0.8486, "step": 1847 }, { "epoch": 1.257998638529612, "grad_norm": 1.8274271488189697, "learning_rate": 8.930223300065947e-06, "loss": 0.7349, "step": 1848 }, { "epoch": 1.2586793737236215, "grad_norm": 1.7154163122177124, "learning_rate": 8.928831159227952e-06, "loss": 0.9093, "step": 1849 }, { "epoch": 1.259360108917631, "grad_norm": 1.957992672920227, "learning_rate": 8.92743822181682e-06, "loss": 0.8377, "step": 1850 }, { "epoch": 1.2600408441116406, "grad_norm": 1.7728822231292725, "learning_rate": 8.926044488114966e-06, "loss": 0.7361, "step": 1851 }, { "epoch": 1.26072157930565, "grad_norm": 1.7152836322784424, "learning_rate": 8.924649958404973e-06, "loss": 0.8172, "step": 1852 }, { "epoch": 1.2614023144996596, "grad_norm": 1.8576616048812866, "learning_rate": 8.923254632969582e-06, "loss": 0.7462, "step": 1853 }, { "epoch": 1.2620830496936692, "grad_norm": 1.703702449798584, "learning_rate": 8.921858512091697e-06, "loss": 0.8424, "step": 1854 }, { "epoch": 1.2627637848876787, "grad_norm": 1.8465960025787354, "learning_rate": 8.920461596054382e-06, "loss": 0.8099, "step": 1855 }, { "epoch": 1.2634445200816882, "grad_norm": 1.8219668865203857, "learning_rate": 8.919063885140862e-06, "loss": 0.8155, "step": 1856 }, { "epoch": 1.2641252552756979, "grad_norm": 1.8600908517837524, "learning_rate": 8.917665379634524e-06, "loss": 0.7831, "step": 1857 }, { "epoch": 1.2648059904697073, "grad_norm": 1.9859881401062012, "learning_rate": 8.916266079818918e-06, "loss": 0.7975, "step": 1858 }, { "epoch": 1.2654867256637168, "grad_norm": 1.7000882625579834, "learning_rate": 8.91486598597775e-06, "loss": 0.7761, "step": 1859 }, { "epoch": 1.2661674608577265, "grad_norm": 1.6804572343826294, "learning_rate": 8.913465098394892e-06, "loss": 0.8159, "step": 1860 }, { "epoch": 1.266848196051736, "grad_norm": 1.8672455549240112, "learning_rate": 8.912063417354374e-06, "loss": 0.7517, "step": 1861 }, { "epoch": 1.2675289312457454, "grad_norm": 2.146207332611084, "learning_rate": 8.910660943140388e-06, "loss": 0.877, "step": 1862 }, { "epoch": 1.2682096664397549, "grad_norm": 1.8488119840621948, "learning_rate": 8.909257676037287e-06, "loss": 0.8507, "step": 1863 }, { "epoch": 1.2688904016337645, "grad_norm": 1.8091404438018799, "learning_rate": 8.907853616329582e-06, "loss": 0.8035, "step": 1864 }, { "epoch": 1.269571136827774, "grad_norm": 1.7483961582183838, "learning_rate": 8.90644876430195e-06, "loss": 0.8722, "step": 1865 }, { "epoch": 1.2702518720217835, "grad_norm": 1.9076073169708252, "learning_rate": 8.905043120239223e-06, "loss": 0.7731, "step": 1866 }, { "epoch": 1.270932607215793, "grad_norm": 1.746904730796814, "learning_rate": 8.9036366844264e-06, "loss": 0.8248, "step": 1867 }, { "epoch": 1.2716133424098026, "grad_norm": 1.9716392755508423, "learning_rate": 8.902229457148634e-06, "loss": 0.7183, "step": 1868 }, { "epoch": 1.272294077603812, "grad_norm": 1.9683860540390015, "learning_rate": 8.900821438691242e-06, "loss": 0.7683, "step": 1869 }, { "epoch": 1.2729748127978215, "grad_norm": 1.9496409893035889, "learning_rate": 8.899412629339699e-06, "loss": 0.8676, "step": 1870 }, { "epoch": 1.2736555479918312, "grad_norm": 1.833980679512024, "learning_rate": 8.898003029379646e-06, "loss": 0.7796, "step": 1871 }, { "epoch": 1.2743362831858407, "grad_norm": 1.6535853147506714, "learning_rate": 8.896592639096877e-06, "loss": 0.8483, "step": 1872 }, { "epoch": 1.2750170183798502, "grad_norm": 1.8071833848953247, "learning_rate": 8.89518145877735e-06, "loss": 0.7736, "step": 1873 }, { "epoch": 1.2756977535738598, "grad_norm": 1.8393594026565552, "learning_rate": 8.893769488707183e-06, "loss": 0.706, "step": 1874 }, { "epoch": 1.2763784887678693, "grad_norm": 1.753636360168457, "learning_rate": 8.892356729172656e-06, "loss": 0.8491, "step": 1875 }, { "epoch": 1.2770592239618788, "grad_norm": 1.86326003074646, "learning_rate": 8.890943180460203e-06, "loss": 0.719, "step": 1876 }, { "epoch": 1.2777399591558884, "grad_norm": 1.902328610420227, "learning_rate": 8.889528842856424e-06, "loss": 0.7188, "step": 1877 }, { "epoch": 1.278420694349898, "grad_norm": 1.804715871810913, "learning_rate": 8.88811371664808e-06, "loss": 0.8229, "step": 1878 }, { "epoch": 1.2791014295439074, "grad_norm": 1.7046537399291992, "learning_rate": 8.886697802122082e-06, "loss": 0.8955, "step": 1879 }, { "epoch": 1.279782164737917, "grad_norm": 1.7768580913543701, "learning_rate": 8.885281099565512e-06, "loss": 0.7601, "step": 1880 }, { "epoch": 1.2804628999319265, "grad_norm": 1.9451833963394165, "learning_rate": 8.883863609265609e-06, "loss": 0.7877, "step": 1881 }, { "epoch": 1.281143635125936, "grad_norm": 1.649100422859192, "learning_rate": 8.882445331509766e-06, "loss": 0.8786, "step": 1882 }, { "epoch": 1.2818243703199457, "grad_norm": 1.840621829032898, "learning_rate": 8.881026266585542e-06, "loss": 0.8906, "step": 1883 }, { "epoch": 1.2825051055139551, "grad_norm": 1.7798974514007568, "learning_rate": 8.879606414780652e-06, "loss": 0.8768, "step": 1884 }, { "epoch": 1.2831858407079646, "grad_norm": 1.8073973655700684, "learning_rate": 8.878185776382974e-06, "loss": 0.7739, "step": 1885 }, { "epoch": 1.283866575901974, "grad_norm": 1.7613645792007446, "learning_rate": 8.876764351680542e-06, "loss": 0.9378, "step": 1886 }, { "epoch": 1.2845473110959835, "grad_norm": 1.7837556600570679, "learning_rate": 8.87534214096155e-06, "loss": 0.8072, "step": 1887 }, { "epoch": 1.2852280462899932, "grad_norm": 1.964368462562561, "learning_rate": 8.873919144514355e-06, "loss": 0.7198, "step": 1888 }, { "epoch": 1.2859087814840027, "grad_norm": 1.8380415439605713, "learning_rate": 8.872495362627468e-06, "loss": 0.6872, "step": 1889 }, { "epoch": 1.2865895166780121, "grad_norm": 1.8229714632034302, "learning_rate": 8.871070795589562e-06, "loss": 0.7221, "step": 1890 }, { "epoch": 1.2872702518720218, "grad_norm": 1.7182371616363525, "learning_rate": 8.86964544368947e-06, "loss": 0.8003, "step": 1891 }, { "epoch": 1.2879509870660313, "grad_norm": 1.7677960395812988, "learning_rate": 8.86821930721618e-06, "loss": 0.8059, "step": 1892 }, { "epoch": 1.2886317222600407, "grad_norm": 1.855527400970459, "learning_rate": 8.866792386458848e-06, "loss": 0.7156, "step": 1893 }, { "epoch": 1.2893124574540504, "grad_norm": 1.768491268157959, "learning_rate": 8.865364681706776e-06, "loss": 0.7655, "step": 1894 }, { "epoch": 1.28999319264806, "grad_norm": 1.765334129333496, "learning_rate": 8.863936193249439e-06, "loss": 0.7677, "step": 1895 }, { "epoch": 1.2906739278420694, "grad_norm": 1.7127997875213623, "learning_rate": 8.86250692137646e-06, "loss": 0.8656, "step": 1896 }, { "epoch": 1.291354663036079, "grad_norm": 1.6272556781768799, "learning_rate": 8.861076866377625e-06, "loss": 0.9277, "step": 1897 }, { "epoch": 1.2920353982300885, "grad_norm": 1.8493058681488037, "learning_rate": 8.859646028542877e-06, "loss": 0.7199, "step": 1898 }, { "epoch": 1.292716133424098, "grad_norm": 1.6856224536895752, "learning_rate": 8.858214408162325e-06, "loss": 0.8547, "step": 1899 }, { "epoch": 1.2933968686181077, "grad_norm": 1.7471873760223389, "learning_rate": 8.856782005526226e-06, "loss": 0.7417, "step": 1900 }, { "epoch": 1.2940776038121171, "grad_norm": 1.770548701286316, "learning_rate": 8.855348820925001e-06, "loss": 0.7266, "step": 1901 }, { "epoch": 1.2947583390061266, "grad_norm": 1.843955397605896, "learning_rate": 8.853914854649232e-06, "loss": 0.6296, "step": 1902 }, { "epoch": 1.2954390742001363, "grad_norm": 1.8358956575393677, "learning_rate": 8.852480106989654e-06, "loss": 0.7894, "step": 1903 }, { "epoch": 1.2961198093941457, "grad_norm": 1.7494573593139648, "learning_rate": 8.851044578237163e-06, "loss": 0.8483, "step": 1904 }, { "epoch": 1.2968005445881552, "grad_norm": 1.7670060396194458, "learning_rate": 8.849608268682814e-06, "loss": 0.7524, "step": 1905 }, { "epoch": 1.2974812797821649, "grad_norm": 1.773266077041626, "learning_rate": 8.848171178617822e-06, "loss": 0.7613, "step": 1906 }, { "epoch": 1.2981620149761743, "grad_norm": 1.9790738821029663, "learning_rate": 8.846733308333555e-06, "loss": 0.7507, "step": 1907 }, { "epoch": 1.2988427501701838, "grad_norm": 1.6681153774261475, "learning_rate": 8.845294658121542e-06, "loss": 0.8801, "step": 1908 }, { "epoch": 1.2995234853641933, "grad_norm": 1.739410400390625, "learning_rate": 8.843855228273472e-06, "loss": 0.8593, "step": 1909 }, { "epoch": 1.3002042205582027, "grad_norm": 1.6264950037002563, "learning_rate": 8.84241501908119e-06, "loss": 0.9691, "step": 1910 }, { "epoch": 1.3008849557522124, "grad_norm": 1.78695809841156, "learning_rate": 8.8409740308367e-06, "loss": 0.8601, "step": 1911 }, { "epoch": 1.3015656909462219, "grad_norm": 1.7164034843444824, "learning_rate": 8.83953226383216e-06, "loss": 0.863, "step": 1912 }, { "epoch": 1.3022464261402313, "grad_norm": 1.687940239906311, "learning_rate": 8.838089718359895e-06, "loss": 0.7798, "step": 1913 }, { "epoch": 1.302927161334241, "grad_norm": 2.0350468158721924, "learning_rate": 8.836646394712376e-06, "loss": 0.702, "step": 1914 }, { "epoch": 1.3036078965282505, "grad_norm": 1.7618558406829834, "learning_rate": 8.835202293182242e-06, "loss": 0.7379, "step": 1915 }, { "epoch": 1.30428863172226, "grad_norm": 1.7772706747055054, "learning_rate": 8.833757414062284e-06, "loss": 0.8059, "step": 1916 }, { "epoch": 1.3049693669162696, "grad_norm": 1.6599663496017456, "learning_rate": 8.832311757645452e-06, "loss": 0.7121, "step": 1917 }, { "epoch": 1.305650102110279, "grad_norm": 1.7415881156921387, "learning_rate": 8.830865324224854e-06, "loss": 0.8819, "step": 1918 }, { "epoch": 1.3063308373042886, "grad_norm": 2.0003769397735596, "learning_rate": 8.829418114093756e-06, "loss": 0.7404, "step": 1919 }, { "epoch": 1.3070115724982982, "grad_norm": 1.9657526016235352, "learning_rate": 8.82797012754558e-06, "loss": 0.7934, "step": 1920 }, { "epoch": 1.3076923076923077, "grad_norm": 1.976892352104187, "learning_rate": 8.826521364873906e-06, "loss": 0.7341, "step": 1921 }, { "epoch": 1.3083730428863172, "grad_norm": 1.8056206703186035, "learning_rate": 8.825071826372474e-06, "loss": 0.7183, "step": 1922 }, { "epoch": 1.3090537780803269, "grad_norm": 1.8258512020111084, "learning_rate": 8.823621512335175e-06, "loss": 0.646, "step": 1923 }, { "epoch": 1.3097345132743363, "grad_norm": 1.9742459058761597, "learning_rate": 8.822170423056062e-06, "loss": 0.8082, "step": 1924 }, { "epoch": 1.3104152484683458, "grad_norm": 2.098227024078369, "learning_rate": 8.82071855882935e-06, "loss": 0.8527, "step": 1925 }, { "epoch": 1.3110959836623555, "grad_norm": 2.136073112487793, "learning_rate": 8.819265919949395e-06, "loss": 0.709, "step": 1926 }, { "epoch": 1.311776718856365, "grad_norm": 1.8401271104812622, "learning_rate": 8.81781250671073e-06, "loss": 0.8158, "step": 1927 }, { "epoch": 1.3124574540503744, "grad_norm": 2.028921365737915, "learning_rate": 8.816358319408031e-06, "loss": 0.7046, "step": 1928 }, { "epoch": 1.3131381892443839, "grad_norm": 1.906426191329956, "learning_rate": 8.814903358336137e-06, "loss": 0.7664, "step": 1929 }, { "epoch": 1.3138189244383935, "grad_norm": 2.0214881896972656, "learning_rate": 8.813447623790043e-06, "loss": 0.7898, "step": 1930 }, { "epoch": 1.314499659632403, "grad_norm": 1.8216445446014404, "learning_rate": 8.811991116064896e-06, "loss": 0.8092, "step": 1931 }, { "epoch": 1.3151803948264125, "grad_norm": 1.8624624013900757, "learning_rate": 8.81053383545601e-06, "loss": 0.8587, "step": 1932 }, { "epoch": 1.315861130020422, "grad_norm": 1.8125817775726318, "learning_rate": 8.809075782258842e-06, "loss": 0.8058, "step": 1933 }, { "epoch": 1.3165418652144316, "grad_norm": 1.8332306146621704, "learning_rate": 8.80761695676902e-06, "loss": 0.7783, "step": 1934 }, { "epoch": 1.317222600408441, "grad_norm": 1.8354462385177612, "learning_rate": 8.80615735928232e-06, "loss": 0.7546, "step": 1935 }, { "epoch": 1.3179033356024505, "grad_norm": 1.792738437652588, "learning_rate": 8.804696990094677e-06, "loss": 0.7947, "step": 1936 }, { "epoch": 1.3185840707964602, "grad_norm": 1.796228289604187, "learning_rate": 8.803235849502178e-06, "loss": 0.7864, "step": 1937 }, { "epoch": 1.3192648059904697, "grad_norm": 1.7752487659454346, "learning_rate": 8.801773937801075e-06, "loss": 0.7735, "step": 1938 }, { "epoch": 1.3199455411844792, "grad_norm": 1.6146351099014282, "learning_rate": 8.800311255287768e-06, "loss": 0.8233, "step": 1939 }, { "epoch": 1.3206262763784888, "grad_norm": 1.6486724615097046, "learning_rate": 8.79884780225882e-06, "loss": 0.806, "step": 1940 }, { "epoch": 1.3213070115724983, "grad_norm": 1.8034082651138306, "learning_rate": 8.797383579010943e-06, "loss": 0.8044, "step": 1941 }, { "epoch": 1.3219877467665078, "grad_norm": 1.707763910293579, "learning_rate": 8.795918585841013e-06, "loss": 0.8465, "step": 1942 }, { "epoch": 1.3226684819605175, "grad_norm": 1.7630115747451782, "learning_rate": 8.794452823046056e-06, "loss": 0.8626, "step": 1943 }, { "epoch": 1.323349217154527, "grad_norm": 1.6967958211898804, "learning_rate": 8.792986290923258e-06, "loss": 0.8149, "step": 1944 }, { "epoch": 1.3240299523485364, "grad_norm": 1.878867506980896, "learning_rate": 8.791518989769959e-06, "loss": 0.7285, "step": 1945 }, { "epoch": 1.324710687542546, "grad_norm": 1.942152976989746, "learning_rate": 8.790050919883655e-06, "loss": 0.8657, "step": 1946 }, { "epoch": 1.3253914227365555, "grad_norm": 1.7275232076644897, "learning_rate": 8.788582081561998e-06, "loss": 0.8513, "step": 1947 }, { "epoch": 1.326072157930565, "grad_norm": 1.8133277893066406, "learning_rate": 8.787112475102796e-06, "loss": 0.7759, "step": 1948 }, { "epoch": 1.3267528931245747, "grad_norm": 1.9056504964828491, "learning_rate": 8.785642100804013e-06, "loss": 0.7866, "step": 1949 }, { "epoch": 1.3274336283185841, "grad_norm": 1.8624910116195679, "learning_rate": 8.784170958963768e-06, "loss": 0.7563, "step": 1950 }, { "epoch": 1.3281143635125936, "grad_norm": 1.8661065101623535, "learning_rate": 8.782699049880336e-06, "loss": 0.8101, "step": 1951 }, { "epoch": 1.328795098706603, "grad_norm": 1.8501825332641602, "learning_rate": 8.781226373852148e-06, "loss": 0.6921, "step": 1952 }, { "epoch": 1.3294758339006125, "grad_norm": 1.8273192644119263, "learning_rate": 8.77975293117779e-06, "loss": 0.6913, "step": 1953 }, { "epoch": 1.3301565690946222, "grad_norm": 1.7428165674209595, "learning_rate": 8.778278722156003e-06, "loss": 0.7886, "step": 1954 }, { "epoch": 1.3308373042886317, "grad_norm": 1.7311029434204102, "learning_rate": 8.776803747085686e-06, "loss": 0.775, "step": 1955 }, { "epoch": 1.3315180394826411, "grad_norm": 1.634639859199524, "learning_rate": 8.775328006265886e-06, "loss": 0.8359, "step": 1956 }, { "epoch": 1.3321987746766508, "grad_norm": 1.952738642692566, "learning_rate": 8.773851499995815e-06, "loss": 0.7674, "step": 1957 }, { "epoch": 1.3328795098706603, "grad_norm": 1.8426547050476074, "learning_rate": 8.772374228574836e-06, "loss": 0.7761, "step": 1958 }, { "epoch": 1.3335602450646697, "grad_norm": 2.171156406402588, "learning_rate": 8.770896192302463e-06, "loss": 0.8204, "step": 1959 }, { "epoch": 1.3342409802586794, "grad_norm": 1.758969783782959, "learning_rate": 8.769417391478372e-06, "loss": 0.7815, "step": 1960 }, { "epoch": 1.334921715452689, "grad_norm": 1.9368314743041992, "learning_rate": 8.767937826402388e-06, "loss": 0.9358, "step": 1961 }, { "epoch": 1.3356024506466984, "grad_norm": 1.9434630870819092, "learning_rate": 8.766457497374498e-06, "loss": 0.6869, "step": 1962 }, { "epoch": 1.336283185840708, "grad_norm": 1.5959628820419312, "learning_rate": 8.764976404694837e-06, "loss": 0.8524, "step": 1963 }, { "epoch": 1.3369639210347175, "grad_norm": 1.8199204206466675, "learning_rate": 8.763494548663698e-06, "loss": 0.728, "step": 1964 }, { "epoch": 1.337644656228727, "grad_norm": 1.9696075916290283, "learning_rate": 8.762011929581526e-06, "loss": 0.8008, "step": 1965 }, { "epoch": 1.3383253914227367, "grad_norm": 1.6990128755569458, "learning_rate": 8.760528547748928e-06, "loss": 0.751, "step": 1966 }, { "epoch": 1.3390061266167461, "grad_norm": 1.654436707496643, "learning_rate": 8.759044403466657e-06, "loss": 0.8581, "step": 1967 }, { "epoch": 1.3396868618107556, "grad_norm": 1.8496148586273193, "learning_rate": 8.757559497035623e-06, "loss": 0.8363, "step": 1968 }, { "epoch": 1.3403675970047653, "grad_norm": 2.0422356128692627, "learning_rate": 8.756073828756895e-06, "loss": 0.7195, "step": 1969 }, { "epoch": 1.3410483321987747, "grad_norm": 1.5627080202102661, "learning_rate": 8.754587398931693e-06, "loss": 0.9698, "step": 1970 }, { "epoch": 1.3417290673927842, "grad_norm": 1.721325397491455, "learning_rate": 8.753100207861392e-06, "loss": 0.69, "step": 1971 }, { "epoch": 1.3424098025867937, "grad_norm": 2.0218400955200195, "learning_rate": 8.751612255847517e-06, "loss": 0.7779, "step": 1972 }, { "epoch": 1.3430905377808033, "grad_norm": 1.5639524459838867, "learning_rate": 8.750123543191754e-06, "loss": 0.9094, "step": 1973 }, { "epoch": 1.3437712729748128, "grad_norm": 1.7965649366378784, "learning_rate": 8.748634070195942e-06, "loss": 0.8663, "step": 1974 }, { "epoch": 1.3444520081688223, "grad_norm": 2.0496206283569336, "learning_rate": 8.747143837162071e-06, "loss": 0.6981, "step": 1975 }, { "epoch": 1.3451327433628317, "grad_norm": 1.7028836011886597, "learning_rate": 8.745652844392285e-06, "loss": 0.8036, "step": 1976 }, { "epoch": 1.3458134785568414, "grad_norm": 1.6878193616867065, "learning_rate": 8.744161092188887e-06, "loss": 0.8594, "step": 1977 }, { "epoch": 1.3464942137508509, "grad_norm": 1.720097303390503, "learning_rate": 8.74266858085433e-06, "loss": 0.7812, "step": 1978 }, { "epoch": 1.3471749489448603, "grad_norm": 1.7511255741119385, "learning_rate": 8.74117531069122e-06, "loss": 0.8441, "step": 1979 }, { "epoch": 1.34785568413887, "grad_norm": 1.7742167711257935, "learning_rate": 8.739681282002318e-06, "loss": 0.8644, "step": 1980 }, { "epoch": 1.3485364193328795, "grad_norm": 1.861372947692871, "learning_rate": 8.738186495090543e-06, "loss": 0.8159, "step": 1981 }, { "epoch": 1.349217154526889, "grad_norm": 1.9662986993789673, "learning_rate": 8.736690950258962e-06, "loss": 0.8208, "step": 1982 }, { "epoch": 1.3498978897208986, "grad_norm": 1.7332818508148193, "learning_rate": 8.735194647810797e-06, "loss": 0.7278, "step": 1983 }, { "epoch": 1.350578624914908, "grad_norm": 1.573482871055603, "learning_rate": 8.733697588049426e-06, "loss": 0.9285, "step": 1984 }, { "epoch": 1.3512593601089176, "grad_norm": 1.5292437076568604, "learning_rate": 8.732199771278376e-06, "loss": 0.8611, "step": 1985 }, { "epoch": 1.3519400953029272, "grad_norm": 1.687495231628418, "learning_rate": 8.730701197801333e-06, "loss": 0.7518, "step": 1986 }, { "epoch": 1.3526208304969367, "grad_norm": 1.9340274333953857, "learning_rate": 8.729201867922132e-06, "loss": 0.7966, "step": 1987 }, { "epoch": 1.3533015656909462, "grad_norm": 1.7857954502105713, "learning_rate": 8.727701781944766e-06, "loss": 0.7615, "step": 1988 }, { "epoch": 1.3539823008849559, "grad_norm": 2.1101977825164795, "learning_rate": 8.726200940173374e-06, "loss": 0.8844, "step": 1989 }, { "epoch": 1.3546630360789653, "grad_norm": 1.7063826322555542, "learning_rate": 8.724699342912258e-06, "loss": 0.8364, "step": 1990 }, { "epoch": 1.3553437712729748, "grad_norm": 1.8804603815078735, "learning_rate": 8.723196990465863e-06, "loss": 0.7439, "step": 1991 }, { "epoch": 1.3560245064669845, "grad_norm": 2.0141472816467285, "learning_rate": 8.721693883138795e-06, "loss": 0.8406, "step": 1992 }, { "epoch": 1.356705241660994, "grad_norm": 1.7353264093399048, "learning_rate": 8.720190021235808e-06, "loss": 0.8482, "step": 1993 }, { "epoch": 1.3573859768550034, "grad_norm": 1.8042001724243164, "learning_rate": 8.718685405061813e-06, "loss": 0.7139, "step": 1994 }, { "epoch": 1.3580667120490129, "grad_norm": 2.072834014892578, "learning_rate": 8.717180034921871e-06, "loss": 0.7584, "step": 1995 }, { "epoch": 1.3587474472430225, "grad_norm": 1.91839599609375, "learning_rate": 8.715673911121196e-06, "loss": 0.8271, "step": 1996 }, { "epoch": 1.359428182437032, "grad_norm": 1.689266562461853, "learning_rate": 8.714167033965156e-06, "loss": 0.7965, "step": 1997 }, { "epoch": 1.3601089176310415, "grad_norm": 1.8823288679122925, "learning_rate": 8.712659403759273e-06, "loss": 0.8335, "step": 1998 }, { "epoch": 1.360789652825051, "grad_norm": 2.072758913040161, "learning_rate": 8.711151020809217e-06, "loss": 0.7507, "step": 1999 }, { "epoch": 1.3614703880190606, "grad_norm": 1.8000452518463135, "learning_rate": 8.709641885420817e-06, "loss": 0.8047, "step": 2000 }, { "epoch": 1.36215112321307, "grad_norm": 1.7981770038604736, "learning_rate": 8.70813199790005e-06, "loss": 0.8763, "step": 2001 }, { "epoch": 1.3628318584070795, "grad_norm": 2.05167555809021, "learning_rate": 8.706621358553044e-06, "loss": 0.7821, "step": 2002 }, { "epoch": 1.3635125936010892, "grad_norm": 1.8129624128341675, "learning_rate": 8.705109967686086e-06, "loss": 0.7694, "step": 2003 }, { "epoch": 1.3641933287950987, "grad_norm": 1.8577157258987427, "learning_rate": 8.703597825605608e-06, "loss": 0.7508, "step": 2004 }, { "epoch": 1.3648740639891082, "grad_norm": 1.8039474487304688, "learning_rate": 8.702084932618202e-06, "loss": 0.7091, "step": 2005 }, { "epoch": 1.3655547991831178, "grad_norm": 1.772028923034668, "learning_rate": 8.700571289030603e-06, "loss": 0.8568, "step": 2006 }, { "epoch": 1.3662355343771273, "grad_norm": 1.665039300918579, "learning_rate": 8.699056895149708e-06, "loss": 0.8426, "step": 2007 }, { "epoch": 1.3669162695711368, "grad_norm": 1.7685844898223877, "learning_rate": 8.69754175128256e-06, "loss": 0.7442, "step": 2008 }, { "epoch": 1.3675970047651465, "grad_norm": 1.7572516202926636, "learning_rate": 8.696025857736355e-06, "loss": 0.7432, "step": 2009 }, { "epoch": 1.368277739959156, "grad_norm": 1.9056570529937744, "learning_rate": 8.69450921481844e-06, "loss": 0.8113, "step": 2010 }, { "epoch": 1.3689584751531654, "grad_norm": 1.8013091087341309, "learning_rate": 8.692991822836316e-06, "loss": 0.8711, "step": 2011 }, { "epoch": 1.369639210347175, "grad_norm": 1.7383626699447632, "learning_rate": 8.691473682097637e-06, "loss": 0.9169, "step": 2012 }, { "epoch": 1.3703199455411845, "grad_norm": 1.7186850309371948, "learning_rate": 8.689954792910207e-06, "loss": 0.8542, "step": 2013 }, { "epoch": 1.371000680735194, "grad_norm": 1.6688395738601685, "learning_rate": 8.68843515558198e-06, "loss": 0.8055, "step": 2014 }, { "epoch": 1.3716814159292037, "grad_norm": 1.8139400482177734, "learning_rate": 8.686914770421063e-06, "loss": 0.7642, "step": 2015 }, { "epoch": 1.3723621511232131, "grad_norm": 1.771032691001892, "learning_rate": 8.685393637735718e-06, "loss": 0.7549, "step": 2016 }, { "epoch": 1.3730428863172226, "grad_norm": 2.0077173709869385, "learning_rate": 8.683871757834353e-06, "loss": 0.7069, "step": 2017 }, { "epoch": 1.373723621511232, "grad_norm": 1.6014620065689087, "learning_rate": 8.68234913102553e-06, "loss": 0.8256, "step": 2018 }, { "epoch": 1.3744043567052415, "grad_norm": 1.7792038917541504, "learning_rate": 8.680825757617966e-06, "loss": 0.8692, "step": 2019 }, { "epoch": 1.3750850918992512, "grad_norm": 1.9630568027496338, "learning_rate": 8.67930163792052e-06, "loss": 0.8663, "step": 2020 }, { "epoch": 1.3757658270932607, "grad_norm": 1.688126802444458, "learning_rate": 8.677776772242216e-06, "loss": 0.9011, "step": 2021 }, { "epoch": 1.3764465622872701, "grad_norm": 1.6736291646957397, "learning_rate": 8.676251160892216e-06, "loss": 0.8634, "step": 2022 }, { "epoch": 1.3771272974812798, "grad_norm": 1.7002289295196533, "learning_rate": 8.67472480417984e-06, "loss": 0.7271, "step": 2023 }, { "epoch": 1.3778080326752893, "grad_norm": 1.718001127243042, "learning_rate": 8.673197702414559e-06, "loss": 0.7971, "step": 2024 }, { "epoch": 1.3784887678692987, "grad_norm": 1.799842357635498, "learning_rate": 8.67166985590599e-06, "loss": 0.7076, "step": 2025 }, { "epoch": 1.3791695030633084, "grad_norm": 1.9075911045074463, "learning_rate": 8.670141264963912e-06, "loss": 0.8105, "step": 2026 }, { "epoch": 1.379850238257318, "grad_norm": 1.6192092895507812, "learning_rate": 8.668611929898242e-06, "loss": 0.7994, "step": 2027 }, { "epoch": 1.3805309734513274, "grad_norm": 1.892288088798523, "learning_rate": 8.667081851019054e-06, "loss": 0.909, "step": 2028 }, { "epoch": 1.381211708645337, "grad_norm": 1.6486752033233643, "learning_rate": 8.665551028636576e-06, "loss": 0.8313, "step": 2029 }, { "epoch": 1.3818924438393465, "grad_norm": 1.7937345504760742, "learning_rate": 8.66401946306118e-06, "loss": 0.7917, "step": 2030 }, { "epoch": 1.382573179033356, "grad_norm": 1.7676059007644653, "learning_rate": 8.662487154603394e-06, "loss": 0.7947, "step": 2031 }, { "epoch": 1.3832539142273657, "grad_norm": 1.632727026939392, "learning_rate": 8.660954103573893e-06, "loss": 0.7511, "step": 2032 }, { "epoch": 1.3839346494213751, "grad_norm": 1.8094213008880615, "learning_rate": 8.659420310283507e-06, "loss": 0.8463, "step": 2033 }, { "epoch": 1.3846153846153846, "grad_norm": 1.8271234035491943, "learning_rate": 8.657885775043208e-06, "loss": 0.862, "step": 2034 }, { "epoch": 1.3852961198093943, "grad_norm": 1.827435851097107, "learning_rate": 8.65635049816413e-06, "loss": 0.8988, "step": 2035 }, { "epoch": 1.3859768550034037, "grad_norm": 1.6704943180084229, "learning_rate": 8.654814479957547e-06, "loss": 0.7585, "step": 2036 }, { "epoch": 1.3866575901974132, "grad_norm": 1.809334635734558, "learning_rate": 8.653277720734891e-06, "loss": 0.847, "step": 2037 }, { "epoch": 1.3873383253914227, "grad_norm": 1.8469113111495972, "learning_rate": 8.651740220807739e-06, "loss": 0.7286, "step": 2038 }, { "epoch": 1.3880190605854323, "grad_norm": 1.7890795469284058, "learning_rate": 8.65020198048782e-06, "loss": 0.7073, "step": 2039 }, { "epoch": 1.3886997957794418, "grad_norm": 1.6373296976089478, "learning_rate": 8.648663000087015e-06, "loss": 0.8191, "step": 2040 }, { "epoch": 1.3893805309734513, "grad_norm": 2.0166778564453125, "learning_rate": 8.647123279917352e-06, "loss": 0.7618, "step": 2041 }, { "epoch": 1.3900612661674607, "grad_norm": 1.8517189025878906, "learning_rate": 8.645582820291009e-06, "loss": 0.7405, "step": 2042 }, { "epoch": 1.3907420013614704, "grad_norm": 1.7127386331558228, "learning_rate": 8.64404162152032e-06, "loss": 0.7946, "step": 2043 }, { "epoch": 1.3914227365554799, "grad_norm": 1.7182601690292358, "learning_rate": 8.642499683917759e-06, "loss": 0.8788, "step": 2044 }, { "epoch": 1.3921034717494893, "grad_norm": 1.8044195175170898, "learning_rate": 8.640957007795956e-06, "loss": 0.7706, "step": 2045 }, { "epoch": 1.392784206943499, "grad_norm": 1.8199208974838257, "learning_rate": 8.639413593467691e-06, "loss": 0.9083, "step": 2046 }, { "epoch": 1.3934649421375085, "grad_norm": 1.7420903444290161, "learning_rate": 8.637869441245892e-06, "loss": 0.8873, "step": 2047 }, { "epoch": 1.394145677331518, "grad_norm": 1.6853636503219604, "learning_rate": 8.636324551443636e-06, "loss": 0.8405, "step": 2048 }, { "epoch": 1.3948264125255276, "grad_norm": 1.8400466442108154, "learning_rate": 8.634778924374152e-06, "loss": 0.8978, "step": 2049 }, { "epoch": 1.395507147719537, "grad_norm": 1.7075140476226807, "learning_rate": 8.633232560350814e-06, "loss": 0.9137, "step": 2050 }, { "epoch": 1.3961878829135466, "grad_norm": 1.905367374420166, "learning_rate": 8.631685459687152e-06, "loss": 0.691, "step": 2051 }, { "epoch": 1.3968686181075562, "grad_norm": 1.9247220754623413, "learning_rate": 8.630137622696839e-06, "loss": 0.7041, "step": 2052 }, { "epoch": 1.3975493533015657, "grad_norm": 1.8249679803848267, "learning_rate": 8.6285890496937e-06, "loss": 0.728, "step": 2053 }, { "epoch": 1.3982300884955752, "grad_norm": 1.7326068878173828, "learning_rate": 8.627039740991712e-06, "loss": 0.8022, "step": 2054 }, { "epoch": 1.3989108236895849, "grad_norm": 1.7637526988983154, "learning_rate": 8.625489696904996e-06, "loss": 0.7779, "step": 2055 }, { "epoch": 1.3995915588835943, "grad_norm": 1.7044557332992554, "learning_rate": 8.623938917747824e-06, "loss": 0.7883, "step": 2056 }, { "epoch": 1.4002722940776038, "grad_norm": 1.8050575256347656, "learning_rate": 8.62238740383462e-06, "loss": 0.7986, "step": 2057 }, { "epoch": 1.4009530292716135, "grad_norm": 1.7521151304244995, "learning_rate": 8.620835155479951e-06, "loss": 0.7946, "step": 2058 }, { "epoch": 1.401633764465623, "grad_norm": 1.8615994453430176, "learning_rate": 8.619282172998539e-06, "loss": 0.7495, "step": 2059 }, { "epoch": 1.4023144996596324, "grad_norm": 1.7418365478515625, "learning_rate": 8.617728456705253e-06, "loss": 0.8807, "step": 2060 }, { "epoch": 1.4029952348536419, "grad_norm": 1.8295193910598755, "learning_rate": 8.616174006915105e-06, "loss": 0.8634, "step": 2061 }, { "epoch": 1.4036759700476515, "grad_norm": 1.8453336954116821, "learning_rate": 8.614618823943268e-06, "loss": 0.7181, "step": 2062 }, { "epoch": 1.404356705241661, "grad_norm": 1.8091472387313843, "learning_rate": 8.613062908105051e-06, "loss": 0.8306, "step": 2063 }, { "epoch": 1.4050374404356705, "grad_norm": 1.9002230167388916, "learning_rate": 8.611506259715918e-06, "loss": 0.7747, "step": 2064 }, { "epoch": 1.40571817562968, "grad_norm": 1.81411874294281, "learning_rate": 8.609948879091482e-06, "loss": 0.8424, "step": 2065 }, { "epoch": 1.4063989108236896, "grad_norm": 2.051539659500122, "learning_rate": 8.608390766547504e-06, "loss": 0.7435, "step": 2066 }, { "epoch": 1.407079646017699, "grad_norm": 2.144313335418701, "learning_rate": 8.606831922399888e-06, "loss": 0.7616, "step": 2067 }, { "epoch": 1.4077603812117085, "grad_norm": 1.952027678489685, "learning_rate": 8.605272346964692e-06, "loss": 0.8637, "step": 2068 }, { "epoch": 1.4084411164057182, "grad_norm": 1.9252525568008423, "learning_rate": 8.603712040558126e-06, "loss": 0.8736, "step": 2069 }, { "epoch": 1.4091218515997277, "grad_norm": 1.760406732559204, "learning_rate": 8.602151003496536e-06, "loss": 0.851, "step": 2070 }, { "epoch": 1.4098025867937372, "grad_norm": 2.1336095333099365, "learning_rate": 8.600589236096428e-06, "loss": 0.819, "step": 2071 }, { "epoch": 1.4104833219877468, "grad_norm": 2.262968063354492, "learning_rate": 8.59902673867445e-06, "loss": 0.8024, "step": 2072 }, { "epoch": 1.4111640571817563, "grad_norm": 2.037644147872925, "learning_rate": 8.597463511547399e-06, "loss": 0.852, "step": 2073 }, { "epoch": 1.4118447923757658, "grad_norm": 2.2812256813049316, "learning_rate": 8.59589955503222e-06, "loss": 0.7467, "step": 2074 }, { "epoch": 1.4125255275697755, "grad_norm": 1.8576240539550781, "learning_rate": 8.594334869446008e-06, "loss": 0.8025, "step": 2075 }, { "epoch": 1.413206262763785, "grad_norm": 1.8539983034133911, "learning_rate": 8.592769455106002e-06, "loss": 0.8217, "step": 2076 }, { "epoch": 1.4138869979577944, "grad_norm": 1.6584193706512451, "learning_rate": 8.591203312329594e-06, "loss": 0.8069, "step": 2077 }, { "epoch": 1.414567733151804, "grad_norm": 1.914306640625, "learning_rate": 8.589636441434317e-06, "loss": 0.852, "step": 2078 }, { "epoch": 1.4152484683458135, "grad_norm": 1.8988767862319946, "learning_rate": 8.588068842737856e-06, "loss": 0.664, "step": 2079 }, { "epoch": 1.415929203539823, "grad_norm": 2.0618896484375, "learning_rate": 8.586500516558044e-06, "loss": 0.7911, "step": 2080 }, { "epoch": 1.4166099387338327, "grad_norm": 1.96096670627594, "learning_rate": 8.584931463212859e-06, "loss": 0.8098, "step": 2081 }, { "epoch": 1.4172906739278421, "grad_norm": 1.79002046585083, "learning_rate": 8.583361683020426e-06, "loss": 0.8009, "step": 2082 }, { "epoch": 1.4179714091218516, "grad_norm": 1.7163114547729492, "learning_rate": 8.581791176299022e-06, "loss": 0.7138, "step": 2083 }, { "epoch": 1.418652144315861, "grad_norm": 1.8238147497177124, "learning_rate": 8.580219943367068e-06, "loss": 0.7009, "step": 2084 }, { "epoch": 1.4193328795098705, "grad_norm": 1.6848605871200562, "learning_rate": 8.578647984543132e-06, "loss": 0.9143, "step": 2085 }, { "epoch": 1.4200136147038802, "grad_norm": 1.878157615661621, "learning_rate": 8.577075300145928e-06, "loss": 0.7957, "step": 2086 }, { "epoch": 1.4206943498978897, "grad_norm": 1.8565421104431152, "learning_rate": 8.57550189049432e-06, "loss": 0.8384, "step": 2087 }, { "epoch": 1.4213750850918991, "grad_norm": 1.8938097953796387, "learning_rate": 8.573927755907318e-06, "loss": 0.8355, "step": 2088 }, { "epoch": 1.4220558202859088, "grad_norm": 1.6289873123168945, "learning_rate": 8.57235289670408e-06, "loss": 0.8078, "step": 2089 }, { "epoch": 1.4227365554799183, "grad_norm": 1.6919738054275513, "learning_rate": 8.570777313203908e-06, "loss": 0.8479, "step": 2090 }, { "epoch": 1.4234172906739277, "grad_norm": 1.993880033493042, "learning_rate": 8.569201005726255e-06, "loss": 0.8153, "step": 2091 }, { "epoch": 1.4240980258679374, "grad_norm": 1.6595864295959473, "learning_rate": 8.567623974590714e-06, "loss": 0.93, "step": 2092 }, { "epoch": 1.424778761061947, "grad_norm": 2.0285804271698, "learning_rate": 8.566046220117032e-06, "loss": 0.7855, "step": 2093 }, { "epoch": 1.4254594962559564, "grad_norm": 1.812151551246643, "learning_rate": 8.5644677426251e-06, "loss": 0.8316, "step": 2094 }, { "epoch": 1.426140231449966, "grad_norm": 1.8221303224563599, "learning_rate": 8.562888542434956e-06, "loss": 0.8479, "step": 2095 }, { "epoch": 1.4268209666439755, "grad_norm": 1.7984673976898193, "learning_rate": 8.561308619866782e-06, "loss": 0.8304, "step": 2096 }, { "epoch": 1.427501701837985, "grad_norm": 1.6362931728363037, "learning_rate": 8.559727975240909e-06, "loss": 0.8224, "step": 2097 }, { "epoch": 1.4281824370319947, "grad_norm": 1.8169142007827759, "learning_rate": 8.558146608877814e-06, "loss": 0.7907, "step": 2098 }, { "epoch": 1.4288631722260041, "grad_norm": 1.6487138271331787, "learning_rate": 8.55656452109812e-06, "loss": 0.8227, "step": 2099 }, { "epoch": 1.4295439074200136, "grad_norm": 1.925169587135315, "learning_rate": 8.554981712222597e-06, "loss": 0.7692, "step": 2100 }, { "epoch": 1.4302246426140233, "grad_norm": 1.7605595588684082, "learning_rate": 8.553398182572158e-06, "loss": 0.7417, "step": 2101 }, { "epoch": 1.4309053778080327, "grad_norm": 1.7500487565994263, "learning_rate": 8.551813932467867e-06, "loss": 0.7711, "step": 2102 }, { "epoch": 1.4315861130020422, "grad_norm": 1.724517822265625, "learning_rate": 8.550228962230934e-06, "loss": 0.8402, "step": 2103 }, { "epoch": 1.4322668481960517, "grad_norm": 2.045761823654175, "learning_rate": 8.548643272182707e-06, "loss": 0.8233, "step": 2104 }, { "epoch": 1.4329475833900613, "grad_norm": 1.814577579498291, "learning_rate": 8.547056862644689e-06, "loss": 0.7664, "step": 2105 }, { "epoch": 1.4336283185840708, "grad_norm": 1.6444884538650513, "learning_rate": 8.545469733938528e-06, "loss": 0.8474, "step": 2106 }, { "epoch": 1.4343090537780803, "grad_norm": 1.7253211736679077, "learning_rate": 8.54388188638601e-06, "loss": 0.8187, "step": 2107 }, { "epoch": 1.4349897889720897, "grad_norm": 1.8200703859329224, "learning_rate": 8.542293320309077e-06, "loss": 0.8771, "step": 2108 }, { "epoch": 1.4356705241660994, "grad_norm": 1.8716150522232056, "learning_rate": 8.540704036029807e-06, "loss": 0.6634, "step": 2109 }, { "epoch": 1.4363512593601089, "grad_norm": 1.8505499362945557, "learning_rate": 8.539114033870434e-06, "loss": 0.7528, "step": 2110 }, { "epoch": 1.4370319945541183, "grad_norm": 1.6958401203155518, "learning_rate": 8.537523314153328e-06, "loss": 0.7895, "step": 2111 }, { "epoch": 1.437712729748128, "grad_norm": 1.7717608213424683, "learning_rate": 8.535931877201012e-06, "loss": 0.6769, "step": 2112 }, { "epoch": 1.4383934649421375, "grad_norm": 1.9160428047180176, "learning_rate": 8.534339723336144e-06, "loss": 0.672, "step": 2113 }, { "epoch": 1.439074200136147, "grad_norm": 1.7046043872833252, "learning_rate": 8.532746852881543e-06, "loss": 0.7792, "step": 2114 }, { "epoch": 1.4397549353301566, "grad_norm": 1.7759287357330322, "learning_rate": 8.53115326616016e-06, "loss": 0.8539, "step": 2115 }, { "epoch": 1.440435670524166, "grad_norm": 1.945239782333374, "learning_rate": 8.529558963495094e-06, "loss": 0.8013, "step": 2116 }, { "epoch": 1.4411164057181756, "grad_norm": 1.881378412246704, "learning_rate": 8.527963945209594e-06, "loss": 0.8396, "step": 2117 }, { "epoch": 1.4417971409121852, "grad_norm": 1.708617091178894, "learning_rate": 8.526368211627051e-06, "loss": 0.7712, "step": 2118 }, { "epoch": 1.4424778761061947, "grad_norm": 1.7345023155212402, "learning_rate": 8.524771763071e-06, "loss": 0.8181, "step": 2119 }, { "epoch": 1.4431586113002042, "grad_norm": 1.8769413232803345, "learning_rate": 8.523174599865122e-06, "loss": 0.7137, "step": 2120 }, { "epoch": 1.4438393464942139, "grad_norm": 1.7991527318954468, "learning_rate": 8.521576722333242e-06, "loss": 0.7286, "step": 2121 }, { "epoch": 1.4445200816882233, "grad_norm": 2.14487886428833, "learning_rate": 8.519978130799334e-06, "loss": 0.8261, "step": 2122 }, { "epoch": 1.4452008168822328, "grad_norm": 1.7407773733139038, "learning_rate": 8.51837882558751e-06, "loss": 0.787, "step": 2123 }, { "epoch": 1.4458815520762425, "grad_norm": 1.7194985151290894, "learning_rate": 8.516778807022032e-06, "loss": 0.8129, "step": 2124 }, { "epoch": 1.446562287270252, "grad_norm": 1.7615214586257935, "learning_rate": 8.515178075427306e-06, "loss": 0.7691, "step": 2125 }, { "epoch": 1.4472430224642614, "grad_norm": 1.6991764307022095, "learning_rate": 8.513576631127876e-06, "loss": 0.8248, "step": 2126 }, { "epoch": 1.4479237576582709, "grad_norm": 1.7834466695785522, "learning_rate": 8.511974474448442e-06, "loss": 0.805, "step": 2127 }, { "epoch": 1.4486044928522805, "grad_norm": 1.7033905982971191, "learning_rate": 8.510371605713841e-06, "loss": 0.7277, "step": 2128 }, { "epoch": 1.44928522804629, "grad_norm": 1.753442406654358, "learning_rate": 8.508768025249052e-06, "loss": 0.7261, "step": 2129 }, { "epoch": 1.4499659632402995, "grad_norm": 1.8306305408477783, "learning_rate": 8.507163733379207e-06, "loss": 0.8115, "step": 2130 }, { "epoch": 1.450646698434309, "grad_norm": 1.5935041904449463, "learning_rate": 8.505558730429574e-06, "loss": 0.8658, "step": 2131 }, { "epoch": 1.4513274336283186, "grad_norm": 1.7606227397918701, "learning_rate": 8.50395301672557e-06, "loss": 0.8363, "step": 2132 }, { "epoch": 1.452008168822328, "grad_norm": 1.814082384109497, "learning_rate": 8.502346592592753e-06, "loss": 0.7868, "step": 2133 }, { "epoch": 1.4526889040163375, "grad_norm": 1.721394658088684, "learning_rate": 8.500739458356827e-06, "loss": 0.8698, "step": 2134 }, { "epoch": 1.4533696392103472, "grad_norm": 1.6673563718795776, "learning_rate": 8.499131614343643e-06, "loss": 0.8407, "step": 2135 }, { "epoch": 1.4540503744043567, "grad_norm": 1.7766505479812622, "learning_rate": 8.497523060879188e-06, "loss": 0.7669, "step": 2136 }, { "epoch": 1.4547311095983662, "grad_norm": 1.7690352201461792, "learning_rate": 8.495913798289596e-06, "loss": 0.7908, "step": 2137 }, { "epoch": 1.4554118447923758, "grad_norm": 1.7787977457046509, "learning_rate": 8.494303826901154e-06, "loss": 0.7506, "step": 2138 }, { "epoch": 1.4560925799863853, "grad_norm": 1.8070435523986816, "learning_rate": 8.492693147040275e-06, "loss": 0.8437, "step": 2139 }, { "epoch": 1.4567733151803948, "grad_norm": 1.6993917226791382, "learning_rate": 8.491081759033533e-06, "loss": 0.8299, "step": 2140 }, { "epoch": 1.4574540503744045, "grad_norm": 1.4760217666625977, "learning_rate": 8.489469663207634e-06, "loss": 0.8586, "step": 2141 }, { "epoch": 1.458134785568414, "grad_norm": 1.8428229093551636, "learning_rate": 8.487856859889432e-06, "loss": 0.8008, "step": 2142 }, { "epoch": 1.4588155207624234, "grad_norm": 1.8554409742355347, "learning_rate": 8.486243349405923e-06, "loss": 0.7461, "step": 2143 }, { "epoch": 1.459496255956433, "grad_norm": 1.9021655321121216, "learning_rate": 8.484629132084252e-06, "loss": 0.6987, "step": 2144 }, { "epoch": 1.4601769911504425, "grad_norm": 2.17982816696167, "learning_rate": 8.483014208251698e-06, "loss": 0.7908, "step": 2145 }, { "epoch": 1.460857726344452, "grad_norm": 1.7572228908538818, "learning_rate": 8.481398578235689e-06, "loss": 0.9175, "step": 2146 }, { "epoch": 1.4615384615384617, "grad_norm": 1.8814198970794678, "learning_rate": 8.479782242363796e-06, "loss": 0.8287, "step": 2147 }, { "epoch": 1.4622191967324711, "grad_norm": 1.6640598773956299, "learning_rate": 8.47816520096373e-06, "loss": 0.8151, "step": 2148 }, { "epoch": 1.4628999319264806, "grad_norm": 1.7185397148132324, "learning_rate": 8.476547454363351e-06, "loss": 0.7665, "step": 2149 }, { "epoch": 1.46358066712049, "grad_norm": 1.887022852897644, "learning_rate": 8.474929002890654e-06, "loss": 0.7205, "step": 2150 }, { "epoch": 1.4642614023144995, "grad_norm": 1.7207322120666504, "learning_rate": 8.473309846873785e-06, "loss": 0.9122, "step": 2151 }, { "epoch": 1.4649421375085092, "grad_norm": 2.0421595573425293, "learning_rate": 8.471689986641028e-06, "loss": 0.6957, "step": 2152 }, { "epoch": 1.4656228727025187, "grad_norm": 1.7097938060760498, "learning_rate": 8.470069422520807e-06, "loss": 0.6903, "step": 2153 }, { "epoch": 1.4663036078965281, "grad_norm": 1.9559435844421387, "learning_rate": 8.468448154841695e-06, "loss": 0.8328, "step": 2154 }, { "epoch": 1.4669843430905378, "grad_norm": 1.9999405145645142, "learning_rate": 8.466826183932408e-06, "loss": 0.7766, "step": 2155 }, { "epoch": 1.4676650782845473, "grad_norm": 1.7464784383773804, "learning_rate": 8.465203510121798e-06, "loss": 0.804, "step": 2156 }, { "epoch": 1.4683458134785567, "grad_norm": 1.6523939371109009, "learning_rate": 8.463580133738864e-06, "loss": 0.9135, "step": 2157 }, { "epoch": 1.4690265486725664, "grad_norm": 1.9985041618347168, "learning_rate": 8.461956055112747e-06, "loss": 0.7446, "step": 2158 }, { "epoch": 1.469707283866576, "grad_norm": 1.7408066987991333, "learning_rate": 8.460331274572729e-06, "loss": 0.8183, "step": 2159 }, { "epoch": 1.4703880190605854, "grad_norm": 1.752076268196106, "learning_rate": 8.458705792448236e-06, "loss": 0.8401, "step": 2160 }, { "epoch": 1.471068754254595, "grad_norm": 1.660658359527588, "learning_rate": 8.457079609068837e-06, "loss": 0.895, "step": 2161 }, { "epoch": 1.4717494894486045, "grad_norm": 1.9064807891845703, "learning_rate": 8.455452724764242e-06, "loss": 0.7092, "step": 2162 }, { "epoch": 1.472430224642614, "grad_norm": 1.968873381614685, "learning_rate": 8.4538251398643e-06, "loss": 0.7372, "step": 2163 }, { "epoch": 1.4731109598366237, "grad_norm": 1.8200806379318237, "learning_rate": 8.452196854699005e-06, "loss": 0.7944, "step": 2164 }, { "epoch": 1.4737916950306331, "grad_norm": 1.90835702419281, "learning_rate": 8.450567869598496e-06, "loss": 0.8093, "step": 2165 }, { "epoch": 1.4744724302246426, "grad_norm": 1.9094480276107788, "learning_rate": 8.448938184893049e-06, "loss": 0.8064, "step": 2166 }, { "epoch": 1.4751531654186523, "grad_norm": 1.7393875122070312, "learning_rate": 8.447307800913084e-06, "loss": 0.8457, "step": 2167 }, { "epoch": 1.4758339006126617, "grad_norm": 1.6285825967788696, "learning_rate": 8.445676717989162e-06, "loss": 0.7759, "step": 2168 }, { "epoch": 1.4765146358066712, "grad_norm": 1.9390610456466675, "learning_rate": 8.444044936451986e-06, "loss": 0.765, "step": 2169 }, { "epoch": 1.4771953710006807, "grad_norm": 2.004735231399536, "learning_rate": 8.442412456632401e-06, "loss": 0.8446, "step": 2170 }, { "epoch": 1.4778761061946903, "grad_norm": 1.8914666175842285, "learning_rate": 8.440779278861395e-06, "loss": 0.8611, "step": 2171 }, { "epoch": 1.4785568413886998, "grad_norm": 1.9086116552352905, "learning_rate": 8.439145403470092e-06, "loss": 0.7849, "step": 2172 }, { "epoch": 1.4792375765827093, "grad_norm": 1.8507188558578491, "learning_rate": 8.437510830789767e-06, "loss": 0.8009, "step": 2173 }, { "epoch": 1.4799183117767187, "grad_norm": 1.8990041017532349, "learning_rate": 8.435875561151824e-06, "loss": 0.7656, "step": 2174 }, { "epoch": 1.4805990469707284, "grad_norm": 1.8089560270309448, "learning_rate": 8.43423959488782e-06, "loss": 0.7591, "step": 2175 }, { "epoch": 1.4812797821647379, "grad_norm": 1.728843092918396, "learning_rate": 8.432602932329448e-06, "loss": 0.825, "step": 2176 }, { "epoch": 1.4819605173587473, "grad_norm": 1.9071193933486938, "learning_rate": 8.430965573808538e-06, "loss": 0.6791, "step": 2177 }, { "epoch": 1.482641252552757, "grad_norm": 1.7538779973983765, "learning_rate": 8.429327519657071e-06, "loss": 0.8648, "step": 2178 }, { "epoch": 1.4833219877467665, "grad_norm": 1.850931167602539, "learning_rate": 8.427688770207162e-06, "loss": 0.8043, "step": 2179 }, { "epoch": 1.484002722940776, "grad_norm": 1.744612455368042, "learning_rate": 8.426049325791066e-06, "loss": 0.8232, "step": 2180 }, { "epoch": 1.4846834581347856, "grad_norm": 1.8960398435592651, "learning_rate": 8.424409186741185e-06, "loss": 0.734, "step": 2181 }, { "epoch": 1.485364193328795, "grad_norm": 1.6879431009292603, "learning_rate": 8.422768353390054e-06, "loss": 0.8222, "step": 2182 }, { "epoch": 1.4860449285228046, "grad_norm": 1.6777678728103638, "learning_rate": 8.421126826070358e-06, "loss": 0.8675, "step": 2183 }, { "epoch": 1.4867256637168142, "grad_norm": 1.8702871799468994, "learning_rate": 8.419484605114914e-06, "loss": 0.7443, "step": 2184 }, { "epoch": 1.4874063989108237, "grad_norm": 1.6739017963409424, "learning_rate": 8.417841690856685e-06, "loss": 0.8612, "step": 2185 }, { "epoch": 1.4880871341048332, "grad_norm": 1.895918369293213, "learning_rate": 8.416198083628773e-06, "loss": 0.7895, "step": 2186 }, { "epoch": 1.4887678692988429, "grad_norm": 1.9290286302566528, "learning_rate": 8.414553783764421e-06, "loss": 0.7812, "step": 2187 }, { "epoch": 1.4894486044928523, "grad_norm": 1.9708138704299927, "learning_rate": 8.41290879159701e-06, "loss": 0.7066, "step": 2188 }, { "epoch": 1.4901293396868618, "grad_norm": 1.697966456413269, "learning_rate": 8.411263107460064e-06, "loss": 0.8728, "step": 2189 }, { "epoch": 1.4908100748808715, "grad_norm": 1.9019428491592407, "learning_rate": 8.409616731687247e-06, "loss": 0.7647, "step": 2190 }, { "epoch": 1.491490810074881, "grad_norm": 1.7353793382644653, "learning_rate": 8.407969664612364e-06, "loss": 0.9355, "step": 2191 }, { "epoch": 1.4921715452688904, "grad_norm": 1.7253633737564087, "learning_rate": 8.406321906569355e-06, "loss": 0.7747, "step": 2192 }, { "epoch": 1.4928522804628999, "grad_norm": 1.8729705810546875, "learning_rate": 8.40467345789231e-06, "loss": 0.6623, "step": 2193 }, { "epoch": 1.4935330156569093, "grad_norm": 1.7984545230865479, "learning_rate": 8.403024318915446e-06, "loss": 0.8763, "step": 2194 }, { "epoch": 1.494213750850919, "grad_norm": 1.6198101043701172, "learning_rate": 8.401374489973133e-06, "loss": 0.8811, "step": 2195 }, { "epoch": 1.4948944860449285, "grad_norm": 1.7445077896118164, "learning_rate": 8.399723971399874e-06, "loss": 0.8501, "step": 2196 }, { "epoch": 1.495575221238938, "grad_norm": 1.7179118394851685, "learning_rate": 8.39807276353031e-06, "loss": 0.7845, "step": 2197 }, { "epoch": 1.4962559564329476, "grad_norm": 1.9132195711135864, "learning_rate": 8.396420866699224e-06, "loss": 0.7934, "step": 2198 }, { "epoch": 1.496936691626957, "grad_norm": 1.8926072120666504, "learning_rate": 8.394768281241543e-06, "loss": 0.8113, "step": 2199 }, { "epoch": 1.4976174268209665, "grad_norm": 1.7118656635284424, "learning_rate": 8.393115007492327e-06, "loss": 0.8447, "step": 2200 }, { "epoch": 1.4982981620149762, "grad_norm": 1.9866474866867065, "learning_rate": 8.39146104578678e-06, "loss": 0.6979, "step": 2201 }, { "epoch": 1.4989788972089857, "grad_norm": 1.729413390159607, "learning_rate": 8.389806396460244e-06, "loss": 0.65, "step": 2202 }, { "epoch": 1.4996596324029952, "grad_norm": 1.759820580482483, "learning_rate": 8.388151059848197e-06, "loss": 0.8876, "step": 2203 }, { "epoch": 1.5003403675970048, "grad_norm": 1.9128665924072266, "learning_rate": 8.386495036286264e-06, "loss": 0.819, "step": 2204 }, { "epoch": 1.5010211027910143, "grad_norm": 1.6822680234909058, "learning_rate": 8.384838326110202e-06, "loss": 0.8819, "step": 2205 }, { "epoch": 1.5017018379850238, "grad_norm": 1.737204670906067, "learning_rate": 8.383180929655912e-06, "loss": 0.7553, "step": 2206 }, { "epoch": 1.5023825731790335, "grad_norm": 1.956997275352478, "learning_rate": 8.38152284725943e-06, "loss": 0.7162, "step": 2207 }, { "epoch": 1.503063308373043, "grad_norm": 1.8564026355743408, "learning_rate": 8.379864079256935e-06, "loss": 0.7146, "step": 2208 }, { "epoch": 1.5037440435670524, "grad_norm": 1.848971962928772, "learning_rate": 8.378204625984743e-06, "loss": 0.8484, "step": 2209 }, { "epoch": 1.504424778761062, "grad_norm": 1.8176857233047485, "learning_rate": 8.376544487779307e-06, "loss": 0.7464, "step": 2210 }, { "epoch": 1.5051055139550715, "grad_norm": 1.7423337697982788, "learning_rate": 8.374883664977225e-06, "loss": 0.7407, "step": 2211 }, { "epoch": 1.505786249149081, "grad_norm": 1.7773797512054443, "learning_rate": 8.373222157915229e-06, "loss": 0.7522, "step": 2212 }, { "epoch": 1.5064669843430907, "grad_norm": 2.056070327758789, "learning_rate": 8.371559966930189e-06, "loss": 0.7613, "step": 2213 }, { "epoch": 1.5071477195371, "grad_norm": 1.8515726327896118, "learning_rate": 8.369897092359115e-06, "loss": 0.8427, "step": 2214 }, { "epoch": 1.5078284547311096, "grad_norm": 1.8661030530929565, "learning_rate": 8.368233534539158e-06, "loss": 0.701, "step": 2215 }, { "epoch": 1.5085091899251193, "grad_norm": 1.7306723594665527, "learning_rate": 8.366569293807605e-06, "loss": 0.7756, "step": 2216 }, { "epoch": 1.5091899251191285, "grad_norm": 1.749058723449707, "learning_rate": 8.36490437050188e-06, "loss": 0.8073, "step": 2217 }, { "epoch": 1.5098706603131382, "grad_norm": 1.8016445636749268, "learning_rate": 8.36323876495955e-06, "loss": 0.7278, "step": 2218 }, { "epoch": 1.5105513955071477, "grad_norm": 2.0961356163024902, "learning_rate": 8.361572477518315e-06, "loss": 0.7992, "step": 2219 }, { "epoch": 1.5112321307011571, "grad_norm": 1.8491392135620117, "learning_rate": 8.359905508516018e-06, "loss": 0.8063, "step": 2220 }, { "epoch": 1.5119128658951668, "grad_norm": 1.7524839639663696, "learning_rate": 8.358237858290637e-06, "loss": 0.845, "step": 2221 }, { "epoch": 1.5125936010891763, "grad_norm": 1.7484389543533325, "learning_rate": 8.356569527180292e-06, "loss": 0.8222, "step": 2222 }, { "epoch": 1.5132743362831858, "grad_norm": 1.815079689025879, "learning_rate": 8.354900515523232e-06, "loss": 0.9482, "step": 2223 }, { "epoch": 1.5139550714771954, "grad_norm": 1.688677191734314, "learning_rate": 8.353230823657857e-06, "loss": 0.7057, "step": 2224 }, { "epoch": 1.514635806671205, "grad_norm": 1.7774726152420044, "learning_rate": 8.351560451922693e-06, "loss": 0.8485, "step": 2225 }, { "epoch": 1.5153165418652144, "grad_norm": 1.9337109327316284, "learning_rate": 8.349889400656412e-06, "loss": 0.8694, "step": 2226 }, { "epoch": 1.515997277059224, "grad_norm": 1.7528162002563477, "learning_rate": 8.34821767019782e-06, "loss": 0.7909, "step": 2227 }, { "epoch": 1.5166780122532335, "grad_norm": 1.8071355819702148, "learning_rate": 8.346545260885862e-06, "loss": 0.8421, "step": 2228 }, { "epoch": 1.517358747447243, "grad_norm": 1.9151734113693237, "learning_rate": 8.344872173059618e-06, "loss": 0.7941, "step": 2229 }, { "epoch": 1.5180394826412527, "grad_norm": 1.6971666812896729, "learning_rate": 8.343198407058311e-06, "loss": 0.9792, "step": 2230 }, { "epoch": 1.5187202178352621, "grad_norm": 1.9258787631988525, "learning_rate": 8.341523963221294e-06, "loss": 0.6506, "step": 2231 }, { "epoch": 1.5194009530292716, "grad_norm": 1.9489420652389526, "learning_rate": 8.339848841888065e-06, "loss": 0.8869, "step": 2232 }, { "epoch": 1.5200816882232813, "grad_norm": 1.7416017055511475, "learning_rate": 8.338173043398255e-06, "loss": 0.7626, "step": 2233 }, { "epoch": 1.5207624234172905, "grad_norm": 1.6563433408737183, "learning_rate": 8.336496568091633e-06, "loss": 0.8583, "step": 2234 }, { "epoch": 1.5214431586113002, "grad_norm": 1.7787909507751465, "learning_rate": 8.334819416308105e-06, "loss": 0.7816, "step": 2235 }, { "epoch": 1.5221238938053099, "grad_norm": 2.0323665142059326, "learning_rate": 8.333141588387717e-06, "loss": 0.7369, "step": 2236 }, { "epoch": 1.5228046289993191, "grad_norm": 2.0381579399108887, "learning_rate": 8.331463084670645e-06, "loss": 0.8849, "step": 2237 }, { "epoch": 1.5234853641933288, "grad_norm": 1.8362658023834229, "learning_rate": 8.329783905497212e-06, "loss": 0.8217, "step": 2238 }, { "epoch": 1.5241660993873385, "grad_norm": 1.908246636390686, "learning_rate": 8.32810405120787e-06, "loss": 0.779, "step": 2239 }, { "epoch": 1.5248468345813477, "grad_norm": 1.902554988861084, "learning_rate": 8.326423522143213e-06, "loss": 0.8314, "step": 2240 }, { "epoch": 1.5255275697753574, "grad_norm": 1.7556418180465698, "learning_rate": 8.324742318643966e-06, "loss": 0.7496, "step": 2241 }, { "epoch": 1.5262083049693669, "grad_norm": 1.75586998462677, "learning_rate": 8.323060441050995e-06, "loss": 0.8039, "step": 2242 }, { "epoch": 1.5268890401633763, "grad_norm": 1.7346041202545166, "learning_rate": 8.321377889705302e-06, "loss": 0.8546, "step": 2243 }, { "epoch": 1.527569775357386, "grad_norm": 1.7866281270980835, "learning_rate": 8.319694664948026e-06, "loss": 0.7999, "step": 2244 }, { "epoch": 1.5282505105513955, "grad_norm": 1.8700850009918213, "learning_rate": 8.318010767120442e-06, "loss": 0.7732, "step": 2245 }, { "epoch": 1.528931245745405, "grad_norm": 2.043004274368286, "learning_rate": 8.31632619656396e-06, "loss": 0.7432, "step": 2246 }, { "epoch": 1.5296119809394146, "grad_norm": 2.023174524307251, "learning_rate": 8.31464095362013e-06, "loss": 0.8371, "step": 2247 }, { "epoch": 1.530292716133424, "grad_norm": 1.982476830482483, "learning_rate": 8.312955038630635e-06, "loss": 0.735, "step": 2248 }, { "epoch": 1.5309734513274336, "grad_norm": 1.9649931192398071, "learning_rate": 8.311268451937296e-06, "loss": 0.7872, "step": 2249 }, { "epoch": 1.5316541865214433, "grad_norm": 1.935408592224121, "learning_rate": 8.309581193882068e-06, "loss": 0.753, "step": 2250 }, { "epoch": 1.5323349217154527, "grad_norm": 1.7679026126861572, "learning_rate": 8.307893264807044e-06, "loss": 0.817, "step": 2251 }, { "epoch": 1.5330156569094622, "grad_norm": 1.7891566753387451, "learning_rate": 8.306204665054455e-06, "loss": 0.948, "step": 2252 }, { "epoch": 1.5336963921034719, "grad_norm": 1.569275975227356, "learning_rate": 8.304515394966663e-06, "loss": 0.9197, "step": 2253 }, { "epoch": 1.5343771272974813, "grad_norm": 1.580511450767517, "learning_rate": 8.302825454886172e-06, "loss": 0.8705, "step": 2254 }, { "epoch": 1.5350578624914908, "grad_norm": 1.9122852087020874, "learning_rate": 8.301134845155614e-06, "loss": 0.7988, "step": 2255 }, { "epoch": 1.5357385976855005, "grad_norm": 1.931699514389038, "learning_rate": 8.299443566117763e-06, "loss": 0.8098, "step": 2256 }, { "epoch": 1.5364193328795097, "grad_norm": 1.7165136337280273, "learning_rate": 8.29775161811553e-06, "loss": 0.9925, "step": 2257 }, { "epoch": 1.5371000680735194, "grad_norm": 1.7605162858963013, "learning_rate": 8.296059001491955e-06, "loss": 0.7639, "step": 2258 }, { "epoch": 1.537780803267529, "grad_norm": 1.7977447509765625, "learning_rate": 8.294365716590218e-06, "loss": 0.9002, "step": 2259 }, { "epoch": 1.5384615384615383, "grad_norm": 1.7801203727722168, "learning_rate": 8.292671763753635e-06, "loss": 0.9819, "step": 2260 }, { "epoch": 1.539142273655548, "grad_norm": 1.866524577140808, "learning_rate": 8.290977143325653e-06, "loss": 0.7523, "step": 2261 }, { "epoch": 1.5398230088495575, "grad_norm": 1.8625298738479614, "learning_rate": 8.28928185564986e-06, "loss": 0.8097, "step": 2262 }, { "epoch": 1.540503744043567, "grad_norm": 1.9170506000518799, "learning_rate": 8.287585901069977e-06, "loss": 0.7473, "step": 2263 }, { "epoch": 1.5411844792375766, "grad_norm": 2.13421630859375, "learning_rate": 8.285889279929857e-06, "loss": 0.8427, "step": 2264 }, { "epoch": 1.541865214431586, "grad_norm": 1.8210307359695435, "learning_rate": 8.284191992573496e-06, "loss": 0.8091, "step": 2265 }, { "epoch": 1.5425459496255955, "grad_norm": 1.7398158311843872, "learning_rate": 8.282494039345015e-06, "loss": 0.8156, "step": 2266 }, { "epoch": 1.5432266848196052, "grad_norm": 2.001664161682129, "learning_rate": 8.280795420588679e-06, "loss": 0.6845, "step": 2267 }, { "epoch": 1.5439074200136147, "grad_norm": 1.8016870021820068, "learning_rate": 8.27909613664888e-06, "loss": 0.8253, "step": 2268 }, { "epoch": 1.5445881552076242, "grad_norm": 1.6702110767364502, "learning_rate": 8.277396187870153e-06, "loss": 0.7741, "step": 2269 }, { "epoch": 1.5452688904016338, "grad_norm": 1.7641340494155884, "learning_rate": 8.275695574597162e-06, "loss": 0.8514, "step": 2270 }, { "epoch": 1.5459496255956433, "grad_norm": 1.817059874534607, "learning_rate": 8.273994297174707e-06, "loss": 0.8053, "step": 2271 }, { "epoch": 1.5466303607896528, "grad_norm": 1.7234628200531006, "learning_rate": 8.272292355947724e-06, "loss": 0.7334, "step": 2272 }, { "epoch": 1.5473110959836625, "grad_norm": 1.6974937915802002, "learning_rate": 8.270589751261285e-06, "loss": 0.8349, "step": 2273 }, { "epoch": 1.547991831177672, "grad_norm": 1.7534457445144653, "learning_rate": 8.268886483460587e-06, "loss": 0.7484, "step": 2274 }, { "epoch": 1.5486725663716814, "grad_norm": 1.6490323543548584, "learning_rate": 8.267182552890978e-06, "loss": 0.7931, "step": 2275 }, { "epoch": 1.549353301565691, "grad_norm": 1.9270710945129395, "learning_rate": 8.265477959897924e-06, "loss": 0.7201, "step": 2276 }, { "epoch": 1.5500340367597005, "grad_norm": 1.6581310033798218, "learning_rate": 8.263772704827036e-06, "loss": 0.7842, "step": 2277 }, { "epoch": 1.55071477195371, "grad_norm": 1.7209326028823853, "learning_rate": 8.262066788024052e-06, "loss": 0.836, "step": 2278 }, { "epoch": 1.5513955071477197, "grad_norm": 1.64900541305542, "learning_rate": 8.260360209834852e-06, "loss": 0.8351, "step": 2279 }, { "epoch": 1.552076242341729, "grad_norm": 2.031986951828003, "learning_rate": 8.258652970605444e-06, "loss": 0.7525, "step": 2280 }, { "epoch": 1.5527569775357386, "grad_norm": 2.072218179702759, "learning_rate": 8.256945070681972e-06, "loss": 0.7481, "step": 2281 }, { "epoch": 1.5534377127297483, "grad_norm": 1.941495656967163, "learning_rate": 8.255236510410714e-06, "loss": 0.8079, "step": 2282 }, { "epoch": 1.5541184479237575, "grad_norm": 1.648929238319397, "learning_rate": 8.253527290138082e-06, "loss": 0.8791, "step": 2283 }, { "epoch": 1.5547991831177672, "grad_norm": 1.6614593267440796, "learning_rate": 8.25181741021062e-06, "loss": 0.8629, "step": 2284 }, { "epoch": 1.5554799183117767, "grad_norm": 1.836179256439209, "learning_rate": 8.250106870975008e-06, "loss": 0.7616, "step": 2285 }, { "epoch": 1.5561606535057861, "grad_norm": 1.9389572143554688, "learning_rate": 8.24839567277806e-06, "loss": 0.7187, "step": 2286 }, { "epoch": 1.5568413886997958, "grad_norm": 1.6452401876449585, "learning_rate": 8.24668381596672e-06, "loss": 0.8244, "step": 2287 }, { "epoch": 1.5575221238938053, "grad_norm": 1.8101783990859985, "learning_rate": 8.244971300888074e-06, "loss": 0.7885, "step": 2288 }, { "epoch": 1.5582028590878148, "grad_norm": 1.62465500831604, "learning_rate": 8.243258127889328e-06, "loss": 0.9386, "step": 2289 }, { "epoch": 1.5588835942818244, "grad_norm": 1.713287591934204, "learning_rate": 8.241544297317835e-06, "loss": 0.8318, "step": 2290 }, { "epoch": 1.559564329475834, "grad_norm": 1.7628713846206665, "learning_rate": 8.239829809521071e-06, "loss": 0.8299, "step": 2291 }, { "epoch": 1.5602450646698434, "grad_norm": 1.670150876045227, "learning_rate": 8.238114664846653e-06, "loss": 0.8289, "step": 2292 }, { "epoch": 1.560925799863853, "grad_norm": 1.607551097869873, "learning_rate": 8.236398863642326e-06, "loss": 0.9199, "step": 2293 }, { "epoch": 1.5616065350578625, "grad_norm": 1.7923020124435425, "learning_rate": 8.23468240625597e-06, "loss": 0.7332, "step": 2294 }, { "epoch": 1.562287270251872, "grad_norm": 1.937216877937317, "learning_rate": 8.232965293035598e-06, "loss": 0.8136, "step": 2295 }, { "epoch": 1.5629680054458817, "grad_norm": 1.6586296558380127, "learning_rate": 8.231247524329354e-06, "loss": 0.8449, "step": 2296 }, { "epoch": 1.5636487406398911, "grad_norm": 1.8154340982437134, "learning_rate": 8.22952910048552e-06, "loss": 0.8373, "step": 2297 }, { "epoch": 1.5643294758339006, "grad_norm": 1.8728797435760498, "learning_rate": 8.227810021852505e-06, "loss": 0.7403, "step": 2298 }, { "epoch": 1.5650102110279103, "grad_norm": 1.8811229467391968, "learning_rate": 8.226090288778855e-06, "loss": 0.7133, "step": 2299 }, { "epoch": 1.5656909462219195, "grad_norm": 1.7917637825012207, "learning_rate": 8.224369901613247e-06, "loss": 0.8019, "step": 2300 }, { "epoch": 1.5663716814159292, "grad_norm": 1.6834642887115479, "learning_rate": 8.22264886070449e-06, "loss": 0.79, "step": 2301 }, { "epoch": 1.5670524166099389, "grad_norm": 1.8581550121307373, "learning_rate": 8.220927166401525e-06, "loss": 0.6826, "step": 2302 }, { "epoch": 1.5677331518039481, "grad_norm": 1.6096782684326172, "learning_rate": 8.21920481905343e-06, "loss": 0.8748, "step": 2303 }, { "epoch": 1.5684138869979578, "grad_norm": 1.8527843952178955, "learning_rate": 8.217481819009408e-06, "loss": 0.721, "step": 2304 }, { "epoch": 1.5690946221919673, "grad_norm": 1.8109945058822632, "learning_rate": 8.215758166618801e-06, "loss": 0.7036, "step": 2305 }, { "epoch": 1.5697753573859767, "grad_norm": 1.9865758419036865, "learning_rate": 8.214033862231081e-06, "loss": 0.7865, "step": 2306 }, { "epoch": 1.5704560925799864, "grad_norm": 1.9212039709091187, "learning_rate": 8.21230890619585e-06, "loss": 0.7245, "step": 2307 }, { "epoch": 1.5711368277739959, "grad_norm": 1.9783092737197876, "learning_rate": 8.210583298862844e-06, "loss": 0.7718, "step": 2308 }, { "epoch": 1.5718175629680053, "grad_norm": 1.8439719676971436, "learning_rate": 8.208857040581934e-06, "loss": 0.8841, "step": 2309 }, { "epoch": 1.572498298162015, "grad_norm": 1.9308481216430664, "learning_rate": 8.207130131703117e-06, "loss": 0.7936, "step": 2310 }, { "epoch": 1.5731790333560245, "grad_norm": 1.6885991096496582, "learning_rate": 8.205402572576527e-06, "loss": 0.8446, "step": 2311 }, { "epoch": 1.573859768550034, "grad_norm": 1.613070011138916, "learning_rate": 8.203674363552424e-06, "loss": 0.7925, "step": 2312 }, { "epoch": 1.5745405037440436, "grad_norm": 1.7831635475158691, "learning_rate": 8.201945504981207e-06, "loss": 0.8423, "step": 2313 }, { "epoch": 1.575221238938053, "grad_norm": 1.8902089595794678, "learning_rate": 8.200215997213406e-06, "loss": 0.7517, "step": 2314 }, { "epoch": 1.5759019741320626, "grad_norm": 1.6343927383422852, "learning_rate": 8.198485840599673e-06, "loss": 0.844, "step": 2315 }, { "epoch": 1.5765827093260723, "grad_norm": 1.7525306940078735, "learning_rate": 8.196755035490802e-06, "loss": 0.7611, "step": 2316 }, { "epoch": 1.5772634445200817, "grad_norm": 1.6396183967590332, "learning_rate": 8.195023582237715e-06, "loss": 0.9001, "step": 2317 }, { "epoch": 1.5779441797140912, "grad_norm": 1.8372339010238647, "learning_rate": 8.193291481191464e-06, "loss": 0.7146, "step": 2318 }, { "epoch": 1.5786249149081009, "grad_norm": 1.6421735286712646, "learning_rate": 8.191558732703235e-06, "loss": 0.8665, "step": 2319 }, { "epoch": 1.5793056501021103, "grad_norm": 1.8254510164260864, "learning_rate": 8.189825337124343e-06, "loss": 0.6871, "step": 2320 }, { "epoch": 1.5799863852961198, "grad_norm": 1.7792116403579712, "learning_rate": 8.188091294806237e-06, "loss": 0.8281, "step": 2321 }, { "epoch": 1.5806671204901295, "grad_norm": 1.894643783569336, "learning_rate": 8.186356606100493e-06, "loss": 0.7929, "step": 2322 }, { "epoch": 1.5813478556841387, "grad_norm": 1.7292810678482056, "learning_rate": 8.184621271358823e-06, "loss": 0.8022, "step": 2323 }, { "epoch": 1.5820285908781484, "grad_norm": 1.5725524425506592, "learning_rate": 8.182885290933063e-06, "loss": 0.9458, "step": 2324 }, { "epoch": 1.582709326072158, "grad_norm": 1.5734728574752808, "learning_rate": 8.181148665175187e-06, "loss": 0.8173, "step": 2325 }, { "epoch": 1.5833900612661673, "grad_norm": 1.6863117218017578, "learning_rate": 8.179411394437297e-06, "loss": 0.8317, "step": 2326 }, { "epoch": 1.584070796460177, "grad_norm": 1.9740571975708008, "learning_rate": 8.177673479071626e-06, "loss": 0.6875, "step": 2327 }, { "epoch": 1.5847515316541865, "grad_norm": 1.9136521816253662, "learning_rate": 8.175934919430534e-06, "loss": 0.7727, "step": 2328 }, { "epoch": 1.585432266848196, "grad_norm": 1.7398738861083984, "learning_rate": 8.174195715866523e-06, "loss": 0.7938, "step": 2329 }, { "epoch": 1.5861130020422056, "grad_norm": 1.7713494300842285, "learning_rate": 8.172455868732207e-06, "loss": 0.702, "step": 2330 }, { "epoch": 1.586793737236215, "grad_norm": 1.8174902200698853, "learning_rate": 8.17071537838035e-06, "loss": 0.8951, "step": 2331 }, { "epoch": 1.5874744724302245, "grad_norm": 1.855499267578125, "learning_rate": 8.168974245163835e-06, "loss": 0.7624, "step": 2332 }, { "epoch": 1.5881552076242342, "grad_norm": 1.9894458055496216, "learning_rate": 8.167232469435676e-06, "loss": 0.7823, "step": 2333 }, { "epoch": 1.5888359428182437, "grad_norm": 1.8498797416687012, "learning_rate": 8.165490051549022e-06, "loss": 0.8693, "step": 2334 }, { "epoch": 1.5895166780122532, "grad_norm": 1.825763463973999, "learning_rate": 8.163746991857147e-06, "loss": 0.7386, "step": 2335 }, { "epoch": 1.5901974132062628, "grad_norm": 1.749941110610962, "learning_rate": 8.162003290713458e-06, "loss": 0.852, "step": 2336 }, { "epoch": 1.5908781484002723, "grad_norm": 1.892744779586792, "learning_rate": 8.160258948471491e-06, "loss": 0.7207, "step": 2337 }, { "epoch": 1.5915588835942818, "grad_norm": 1.6501765251159668, "learning_rate": 8.158513965484915e-06, "loss": 0.7602, "step": 2338 }, { "epoch": 1.5922396187882915, "grad_norm": 1.629441738128662, "learning_rate": 8.156768342107526e-06, "loss": 0.7232, "step": 2339 }, { "epoch": 1.592920353982301, "grad_norm": 1.7378742694854736, "learning_rate": 8.15502207869325e-06, "loss": 0.8361, "step": 2340 }, { "epoch": 1.5936010891763104, "grad_norm": 1.7258751392364502, "learning_rate": 8.15327517559614e-06, "loss": 0.8486, "step": 2341 }, { "epoch": 1.59428182437032, "grad_norm": 1.6817883253097534, "learning_rate": 8.151527633170384e-06, "loss": 0.8543, "step": 2342 }, { "epoch": 1.5949625595643295, "grad_norm": 1.7319146394729614, "learning_rate": 8.149779451770298e-06, "loss": 0.7854, "step": 2343 }, { "epoch": 1.595643294758339, "grad_norm": 1.6992779970169067, "learning_rate": 8.14803063175033e-06, "loss": 0.7827, "step": 2344 }, { "epoch": 1.5963240299523487, "grad_norm": 1.8918389081954956, "learning_rate": 8.146281173465048e-06, "loss": 0.7459, "step": 2345 }, { "epoch": 1.597004765146358, "grad_norm": 1.6404815912246704, "learning_rate": 8.144531077269157e-06, "loss": 0.874, "step": 2346 }, { "epoch": 1.5976855003403676, "grad_norm": 1.8994060754776, "learning_rate": 8.142780343517496e-06, "loss": 0.8019, "step": 2347 }, { "epoch": 1.5983662355343773, "grad_norm": 1.7898178100585938, "learning_rate": 8.141028972565021e-06, "loss": 0.715, "step": 2348 }, { "epoch": 1.5990469707283865, "grad_norm": 1.7970589399337769, "learning_rate": 8.139276964766827e-06, "loss": 0.7384, "step": 2349 }, { "epoch": 1.5997277059223962, "grad_norm": 1.5544366836547852, "learning_rate": 8.137524320478131e-06, "loss": 0.8233, "step": 2350 }, { "epoch": 1.6004084411164057, "grad_norm": 1.8173701763153076, "learning_rate": 8.135771040054289e-06, "loss": 0.8907, "step": 2351 }, { "epoch": 1.6010891763104151, "grad_norm": 1.8614137172698975, "learning_rate": 8.134017123850774e-06, "loss": 0.7322, "step": 2352 }, { "epoch": 1.6017699115044248, "grad_norm": 1.838544487953186, "learning_rate": 8.1322625722232e-06, "loss": 0.8235, "step": 2353 }, { "epoch": 1.6024506466984343, "grad_norm": 1.5702792406082153, "learning_rate": 8.130507385527295e-06, "loss": 0.9488, "step": 2354 }, { "epoch": 1.6031313818924438, "grad_norm": 1.6995023488998413, "learning_rate": 8.12875156411893e-06, "loss": 0.9243, "step": 2355 }, { "epoch": 1.6038121170864534, "grad_norm": 1.807737112045288, "learning_rate": 8.1269951083541e-06, "loss": 0.6247, "step": 2356 }, { "epoch": 1.604492852280463, "grad_norm": 1.572104573249817, "learning_rate": 8.125238018588923e-06, "loss": 0.7745, "step": 2357 }, { "epoch": 1.6051735874744724, "grad_norm": 1.748547077178955, "learning_rate": 8.123480295179654e-06, "loss": 0.838, "step": 2358 }, { "epoch": 1.605854322668482, "grad_norm": 1.64974844455719, "learning_rate": 8.12172193848267e-06, "loss": 0.7494, "step": 2359 }, { "epoch": 1.6065350578624915, "grad_norm": 1.6221238374710083, "learning_rate": 8.119962948854481e-06, "loss": 0.8367, "step": 2360 }, { "epoch": 1.607215793056501, "grad_norm": 1.8565505743026733, "learning_rate": 8.118203326651722e-06, "loss": 0.7902, "step": 2361 }, { "epoch": 1.6078965282505107, "grad_norm": 1.5774387121200562, "learning_rate": 8.116443072231156e-06, "loss": 0.8461, "step": 2362 }, { "epoch": 1.6085772634445201, "grad_norm": 1.597070574760437, "learning_rate": 8.114682185949681e-06, "loss": 0.7885, "step": 2363 }, { "epoch": 1.6092579986385296, "grad_norm": 1.9330346584320068, "learning_rate": 8.112920668164313e-06, "loss": 0.7995, "step": 2364 }, { "epoch": 1.6099387338325393, "grad_norm": 1.8401787281036377, "learning_rate": 8.111158519232203e-06, "loss": 0.8003, "step": 2365 }, { "epoch": 1.6106194690265485, "grad_norm": 1.9462956190109253, "learning_rate": 8.109395739510626e-06, "loss": 0.7105, "step": 2366 }, { "epoch": 1.6113002042205582, "grad_norm": 1.8045681715011597, "learning_rate": 8.107632329356989e-06, "loss": 0.8214, "step": 2367 }, { "epoch": 1.6119809394145679, "grad_norm": 1.746605634689331, "learning_rate": 8.105868289128824e-06, "loss": 0.7723, "step": 2368 }, { "epoch": 1.6126616746085771, "grad_norm": 1.6552221775054932, "learning_rate": 8.10410361918379e-06, "loss": 0.7853, "step": 2369 }, { "epoch": 1.6133424098025868, "grad_norm": 1.682644009590149, "learning_rate": 8.102338319879678e-06, "loss": 0.8023, "step": 2370 }, { "epoch": 1.6140231449965963, "grad_norm": 1.6843271255493164, "learning_rate": 8.100572391574399e-06, "loss": 0.7994, "step": 2371 }, { "epoch": 1.6147038801906057, "grad_norm": 1.6815407276153564, "learning_rate": 8.098805834626003e-06, "loss": 0.8569, "step": 2372 }, { "epoch": 1.6153846153846154, "grad_norm": 1.831900954246521, "learning_rate": 8.097038649392654e-06, "loss": 0.8477, "step": 2373 }, { "epoch": 1.6160653505786249, "grad_norm": 1.9639742374420166, "learning_rate": 8.095270836232652e-06, "loss": 0.6168, "step": 2374 }, { "epoch": 1.6167460857726343, "grad_norm": 1.8038982152938843, "learning_rate": 8.093502395504422e-06, "loss": 0.7529, "step": 2375 }, { "epoch": 1.617426820966644, "grad_norm": 1.62727689743042, "learning_rate": 8.091733327566517e-06, "loss": 0.8164, "step": 2376 }, { "epoch": 1.6181075561606535, "grad_norm": 1.7934695482254028, "learning_rate": 8.08996363277762e-06, "loss": 0.7978, "step": 2377 }, { "epoch": 1.618788291354663, "grad_norm": 1.753796935081482, "learning_rate": 8.088193311496533e-06, "loss": 0.9698, "step": 2378 }, { "epoch": 1.6194690265486726, "grad_norm": 1.8568596839904785, "learning_rate": 8.086422364082192e-06, "loss": 0.7812, "step": 2379 }, { "epoch": 1.620149761742682, "grad_norm": 1.6381210088729858, "learning_rate": 8.084650790893657e-06, "loss": 0.7842, "step": 2380 }, { "epoch": 1.6208304969366916, "grad_norm": 1.6137648820877075, "learning_rate": 8.082878592290116e-06, "loss": 0.9007, "step": 2381 }, { "epoch": 1.6215112321307013, "grad_norm": 1.645877718925476, "learning_rate": 8.081105768630883e-06, "loss": 0.7957, "step": 2382 }, { "epoch": 1.6221919673247107, "grad_norm": 1.77944815158844, "learning_rate": 8.079332320275399e-06, "loss": 0.7867, "step": 2383 }, { "epoch": 1.6228727025187202, "grad_norm": 2.112971067428589, "learning_rate": 8.077558247583233e-06, "loss": 0.6231, "step": 2384 }, { "epoch": 1.6235534377127299, "grad_norm": 1.815826654434204, "learning_rate": 8.07578355091408e-06, "loss": 0.7788, "step": 2385 }, { "epoch": 1.6242341729067393, "grad_norm": 1.6672799587249756, "learning_rate": 8.07400823062776e-06, "loss": 0.7994, "step": 2386 }, { "epoch": 1.6249149081007488, "grad_norm": 1.9647133350372314, "learning_rate": 8.072232287084219e-06, "loss": 0.7696, "step": 2387 }, { "epoch": 1.6255956432947585, "grad_norm": 1.9051156044006348, "learning_rate": 8.070455720643533e-06, "loss": 0.7903, "step": 2388 }, { "epoch": 1.6262763784887677, "grad_norm": 2.029189109802246, "learning_rate": 8.068678531665899e-06, "loss": 0.7023, "step": 2389 }, { "epoch": 1.6269571136827774, "grad_norm": 1.7322806119918823, "learning_rate": 8.066900720511647e-06, "loss": 0.7902, "step": 2390 }, { "epoch": 1.627637848876787, "grad_norm": 1.7583985328674316, "learning_rate": 8.065122287541228e-06, "loss": 0.8266, "step": 2391 }, { "epoch": 1.6283185840707963, "grad_norm": 1.790358066558838, "learning_rate": 8.063343233115218e-06, "loss": 0.8169, "step": 2392 }, { "epoch": 1.628999319264806, "grad_norm": 1.7467390298843384, "learning_rate": 8.061563557594325e-06, "loss": 0.84, "step": 2393 }, { "epoch": 1.6296800544588155, "grad_norm": 1.7154995203018188, "learning_rate": 8.059783261339377e-06, "loss": 0.8377, "step": 2394 }, { "epoch": 1.630360789652825, "grad_norm": 1.713818073272705, "learning_rate": 8.05800234471133e-06, "loss": 0.8342, "step": 2395 }, { "epoch": 1.6310415248468346, "grad_norm": 1.69582998752594, "learning_rate": 8.05622080807127e-06, "loss": 0.8461, "step": 2396 }, { "epoch": 1.631722260040844, "grad_norm": 1.8034582138061523, "learning_rate": 8.0544386517804e-06, "loss": 0.7733, "step": 2397 }, { "epoch": 1.6324029952348535, "grad_norm": 1.869802474975586, "learning_rate": 8.052655876200058e-06, "loss": 0.8021, "step": 2398 }, { "epoch": 1.6330837304288632, "grad_norm": 1.803203821182251, "learning_rate": 8.050872481691699e-06, "loss": 0.7883, "step": 2399 }, { "epoch": 1.6337644656228727, "grad_norm": 2.0419607162475586, "learning_rate": 8.049088468616908e-06, "loss": 0.7247, "step": 2400 }, { "epoch": 1.6344452008168822, "grad_norm": 1.983041524887085, "learning_rate": 8.047303837337396e-06, "loss": 0.7781, "step": 2401 }, { "epoch": 1.6351259360108918, "grad_norm": 1.9360939264297485, "learning_rate": 8.045518588214997e-06, "loss": 0.7283, "step": 2402 }, { "epoch": 1.6358066712049013, "grad_norm": 1.81977117061615, "learning_rate": 8.043732721611672e-06, "loss": 0.8309, "step": 2403 }, { "epoch": 1.6364874063989108, "grad_norm": 1.7253180742263794, "learning_rate": 8.041946237889509e-06, "loss": 0.8408, "step": 2404 }, { "epoch": 1.6371681415929205, "grad_norm": 1.8173128366470337, "learning_rate": 8.040159137410715e-06, "loss": 0.7992, "step": 2405 }, { "epoch": 1.63784887678693, "grad_norm": 1.8887847661972046, "learning_rate": 8.038371420537626e-06, "loss": 0.8531, "step": 2406 }, { "epoch": 1.6385296119809394, "grad_norm": 1.9277549982070923, "learning_rate": 8.036583087632706e-06, "loss": 0.7753, "step": 2407 }, { "epoch": 1.639210347174949, "grad_norm": 2.048386335372925, "learning_rate": 8.034794139058539e-06, "loss": 0.7333, "step": 2408 }, { "epoch": 1.6398910823689585, "grad_norm": 1.9572253227233887, "learning_rate": 8.033004575177836e-06, "loss": 0.7049, "step": 2409 }, { "epoch": 1.640571817562968, "grad_norm": 1.6175979375839233, "learning_rate": 8.03121439635343e-06, "loss": 0.8124, "step": 2410 }, { "epoch": 1.6412525527569777, "grad_norm": 1.7572028636932373, "learning_rate": 8.029423602948283e-06, "loss": 0.7306, "step": 2411 }, { "epoch": 1.641933287950987, "grad_norm": 1.8136630058288574, "learning_rate": 8.027632195325481e-06, "loss": 0.7355, "step": 2412 }, { "epoch": 1.6426140231449966, "grad_norm": 1.7013225555419922, "learning_rate": 8.02584017384823e-06, "loss": 0.8914, "step": 2413 }, { "epoch": 1.6432947583390063, "grad_norm": 1.8380881547927856, "learning_rate": 8.024047538879864e-06, "loss": 0.8343, "step": 2414 }, { "epoch": 1.6439754935330155, "grad_norm": 1.8001923561096191, "learning_rate": 8.022254290783841e-06, "loss": 0.7361, "step": 2415 }, { "epoch": 1.6446562287270252, "grad_norm": 1.7439666986465454, "learning_rate": 8.020460429923747e-06, "loss": 0.7636, "step": 2416 }, { "epoch": 1.6453369639210347, "grad_norm": 1.9474244117736816, "learning_rate": 8.018665956663284e-06, "loss": 0.6974, "step": 2417 }, { "epoch": 1.6460176991150441, "grad_norm": 1.7307599782943726, "learning_rate": 8.016870871366285e-06, "loss": 0.7912, "step": 2418 }, { "epoch": 1.6466984343090538, "grad_norm": 2.02425217628479, "learning_rate": 8.015075174396703e-06, "loss": 0.6426, "step": 2419 }, { "epoch": 1.6473791695030633, "grad_norm": 1.76828932762146, "learning_rate": 8.013278866118617e-06, "loss": 0.9197, "step": 2420 }, { "epoch": 1.6480599046970728, "grad_norm": 1.7297213077545166, "learning_rate": 8.011481946896232e-06, "loss": 0.7719, "step": 2421 }, { "epoch": 1.6487406398910824, "grad_norm": 1.8095216751098633, "learning_rate": 8.009684417093872e-06, "loss": 0.782, "step": 2422 }, { "epoch": 1.649421375085092, "grad_norm": 1.7371517419815063, "learning_rate": 8.007886277075987e-06, "loss": 0.8631, "step": 2423 }, { "epoch": 1.6501021102791014, "grad_norm": 1.7470587491989136, "learning_rate": 8.006087527207154e-06, "loss": 0.7373, "step": 2424 }, { "epoch": 1.650782845473111, "grad_norm": 1.9707919359207153, "learning_rate": 8.004288167852068e-06, "loss": 0.8139, "step": 2425 }, { "epoch": 1.6514635806671205, "grad_norm": 1.9305102825164795, "learning_rate": 8.002488199375552e-06, "loss": 0.7644, "step": 2426 }, { "epoch": 1.65214431586113, "grad_norm": 1.735705852508545, "learning_rate": 8.000687622142553e-06, "loss": 0.9073, "step": 2427 }, { "epoch": 1.6528250510551397, "grad_norm": 1.9640743732452393, "learning_rate": 7.998886436518134e-06, "loss": 0.7901, "step": 2428 }, { "epoch": 1.6535057862491491, "grad_norm": 1.9679532051086426, "learning_rate": 7.997084642867488e-06, "loss": 0.7036, "step": 2429 }, { "epoch": 1.6541865214431586, "grad_norm": 1.9508812427520752, "learning_rate": 7.995282241555931e-06, "loss": 0.6833, "step": 2430 }, { "epoch": 1.6548672566371683, "grad_norm": 1.7067590951919556, "learning_rate": 7.993479232948904e-06, "loss": 0.8981, "step": 2431 }, { "epoch": 1.6555479918311775, "grad_norm": 1.8163373470306396, "learning_rate": 7.991675617411961e-06, "loss": 0.7627, "step": 2432 }, { "epoch": 1.6562287270251872, "grad_norm": 1.8233033418655396, "learning_rate": 7.989871395310793e-06, "loss": 0.7828, "step": 2433 }, { "epoch": 1.6569094622191969, "grad_norm": 1.68712317943573, "learning_rate": 7.988066567011206e-06, "loss": 0.7595, "step": 2434 }, { "epoch": 1.6575901974132061, "grad_norm": 2.059189558029175, "learning_rate": 7.986261132879125e-06, "loss": 0.7251, "step": 2435 }, { "epoch": 1.6582709326072158, "grad_norm": 1.8482327461242676, "learning_rate": 7.98445509328061e-06, "loss": 0.7363, "step": 2436 }, { "epoch": 1.6589516678012253, "grad_norm": 1.8052335977554321, "learning_rate": 7.982648448581832e-06, "loss": 0.75, "step": 2437 }, { "epoch": 1.6596324029952347, "grad_norm": 1.7759044170379639, "learning_rate": 7.980841199149091e-06, "loss": 0.7837, "step": 2438 }, { "epoch": 1.6603131381892444, "grad_norm": 1.8882311582565308, "learning_rate": 7.979033345348808e-06, "loss": 0.7444, "step": 2439 }, { "epoch": 1.6609938733832539, "grad_norm": 1.7354198694229126, "learning_rate": 7.977224887547527e-06, "loss": 0.8428, "step": 2440 }, { "epoch": 1.6616746085772633, "grad_norm": 1.6430028676986694, "learning_rate": 7.975415826111914e-06, "loss": 0.8575, "step": 2441 }, { "epoch": 1.662355343771273, "grad_norm": 1.9734504222869873, "learning_rate": 7.973606161408756e-06, "loss": 0.8056, "step": 2442 }, { "epoch": 1.6630360789652825, "grad_norm": 1.670733094215393, "learning_rate": 7.971795893804965e-06, "loss": 0.8917, "step": 2443 }, { "epoch": 1.663716814159292, "grad_norm": 1.8400344848632812, "learning_rate": 7.969985023667575e-06, "loss": 0.7335, "step": 2444 }, { "epoch": 1.6643975493533016, "grad_norm": 1.5705645084381104, "learning_rate": 7.968173551363738e-06, "loss": 0.9583, "step": 2445 }, { "epoch": 1.665078284547311, "grad_norm": 1.822453498840332, "learning_rate": 7.966361477260733e-06, "loss": 0.7553, "step": 2446 }, { "epoch": 1.6657590197413206, "grad_norm": 1.699543833732605, "learning_rate": 7.96454880172596e-06, "loss": 0.8707, "step": 2447 }, { "epoch": 1.6664397549353303, "grad_norm": 1.6949071884155273, "learning_rate": 7.96273552512694e-06, "loss": 0.8322, "step": 2448 }, { "epoch": 1.6671204901293397, "grad_norm": 1.7985422611236572, "learning_rate": 7.960921647831314e-06, "loss": 0.8335, "step": 2449 }, { "epoch": 1.6678012253233492, "grad_norm": 1.724686622619629, "learning_rate": 7.95910717020685e-06, "loss": 0.8225, "step": 2450 }, { "epoch": 1.6684819605173589, "grad_norm": 1.6630074977874756, "learning_rate": 7.957292092621432e-06, "loss": 0.8143, "step": 2451 }, { "epoch": 1.6691626957113683, "grad_norm": 1.647949457168579, "learning_rate": 7.955476415443069e-06, "loss": 0.9154, "step": 2452 }, { "epoch": 1.6698434309053778, "grad_norm": 1.659745693206787, "learning_rate": 7.95366013903989e-06, "loss": 0.8073, "step": 2453 }, { "epoch": 1.6705241660993875, "grad_norm": 1.8379991054534912, "learning_rate": 7.951843263780148e-06, "loss": 0.7564, "step": 2454 }, { "epoch": 1.6712049012933967, "grad_norm": 1.8036913871765137, "learning_rate": 7.950025790032214e-06, "loss": 0.8735, "step": 2455 }, { "epoch": 1.6718856364874064, "grad_norm": 1.7611589431762695, "learning_rate": 7.948207718164584e-06, "loss": 0.8394, "step": 2456 }, { "epoch": 1.672566371681416, "grad_norm": 1.895580768585205, "learning_rate": 7.946389048545871e-06, "loss": 0.8887, "step": 2457 }, { "epoch": 1.6732471068754253, "grad_norm": 1.8188705444335938, "learning_rate": 7.944569781544814e-06, "loss": 0.702, "step": 2458 }, { "epoch": 1.673927842069435, "grad_norm": 1.7419298887252808, "learning_rate": 7.942749917530266e-06, "loss": 0.7836, "step": 2459 }, { "epoch": 1.6746085772634445, "grad_norm": 1.7909866571426392, "learning_rate": 7.940929456871211e-06, "loss": 0.7583, "step": 2460 }, { "epoch": 1.675289312457454, "grad_norm": 1.801190733909607, "learning_rate": 7.939108399936747e-06, "loss": 0.6776, "step": 2461 }, { "epoch": 1.6759700476514636, "grad_norm": 1.7265524864196777, "learning_rate": 7.937286747096093e-06, "loss": 0.871, "step": 2462 }, { "epoch": 1.676650782845473, "grad_norm": 1.9631811380386353, "learning_rate": 7.93546449871859e-06, "loss": 0.9143, "step": 2463 }, { "epoch": 1.6773315180394825, "grad_norm": 1.8406404256820679, "learning_rate": 7.933641655173703e-06, "loss": 0.7583, "step": 2464 }, { "epoch": 1.6780122532334922, "grad_norm": 1.7036089897155762, "learning_rate": 7.931818216831014e-06, "loss": 0.7192, "step": 2465 }, { "epoch": 1.6786929884275017, "grad_norm": 1.6474921703338623, "learning_rate": 7.929994184060221e-06, "loss": 0.7367, "step": 2466 }, { "epoch": 1.6793737236215112, "grad_norm": 1.870328426361084, "learning_rate": 7.928169557231156e-06, "loss": 0.7693, "step": 2467 }, { "epoch": 1.6800544588155208, "grad_norm": 1.8573375940322876, "learning_rate": 7.926344336713758e-06, "loss": 0.7751, "step": 2468 }, { "epoch": 1.6807351940095303, "grad_norm": 1.7213656902313232, "learning_rate": 7.924518522878093e-06, "loss": 0.8794, "step": 2469 }, { "epoch": 1.6814159292035398, "grad_norm": 1.7637957334518433, "learning_rate": 7.922692116094349e-06, "loss": 0.8899, "step": 2470 }, { "epoch": 1.6820966643975495, "grad_norm": 1.7812601327896118, "learning_rate": 7.920865116732824e-06, "loss": 0.7166, "step": 2471 }, { "epoch": 1.682777399591559, "grad_norm": 1.7243927717208862, "learning_rate": 7.91903752516395e-06, "loss": 0.7026, "step": 2472 }, { "epoch": 1.6834581347855684, "grad_norm": 1.634993314743042, "learning_rate": 7.91720934175827e-06, "loss": 0.8747, "step": 2473 }, { "epoch": 1.684138869979578, "grad_norm": 1.8468903303146362, "learning_rate": 7.91538056688645e-06, "loss": 0.766, "step": 2474 }, { "epoch": 1.6848196051735873, "grad_norm": 1.632820725440979, "learning_rate": 7.913551200919274e-06, "loss": 0.7401, "step": 2475 }, { "epoch": 1.685500340367597, "grad_norm": 1.6969746351242065, "learning_rate": 7.911721244227647e-06, "loss": 0.8457, "step": 2476 }, { "epoch": 1.6861810755616067, "grad_norm": 1.6739591360092163, "learning_rate": 7.909890697182598e-06, "loss": 0.8823, "step": 2477 }, { "epoch": 1.686861810755616, "grad_norm": 1.6903080940246582, "learning_rate": 7.908059560155267e-06, "loss": 0.8382, "step": 2478 }, { "epoch": 1.6875425459496256, "grad_norm": 1.715610146522522, "learning_rate": 7.90622783351692e-06, "loss": 0.7843, "step": 2479 }, { "epoch": 1.6882232811436353, "grad_norm": 1.6657848358154297, "learning_rate": 7.90439551763894e-06, "loss": 0.8285, "step": 2480 }, { "epoch": 1.6889040163376445, "grad_norm": 2.1570088863372803, "learning_rate": 7.90256261289283e-06, "loss": 0.7832, "step": 2481 }, { "epoch": 1.6895847515316542, "grad_norm": 1.7887022495269775, "learning_rate": 7.900729119650215e-06, "loss": 0.7996, "step": 2482 }, { "epoch": 1.6902654867256637, "grad_norm": 1.6294970512390137, "learning_rate": 7.898895038282836e-06, "loss": 0.8545, "step": 2483 }, { "epoch": 1.6909462219196731, "grad_norm": 1.6509472131729126, "learning_rate": 7.897060369162552e-06, "loss": 0.7547, "step": 2484 }, { "epoch": 1.6916269571136828, "grad_norm": 1.7189313173294067, "learning_rate": 7.895225112661347e-06, "loss": 0.7838, "step": 2485 }, { "epoch": 1.6923076923076923, "grad_norm": 1.7508127689361572, "learning_rate": 7.893389269151319e-06, "loss": 0.7979, "step": 2486 }, { "epoch": 1.6929884275017018, "grad_norm": 1.9025278091430664, "learning_rate": 7.891552839004685e-06, "loss": 0.703, "step": 2487 }, { "epoch": 1.6936691626957114, "grad_norm": 1.74802565574646, "learning_rate": 7.889715822593783e-06, "loss": 0.8243, "step": 2488 }, { "epoch": 1.694349897889721, "grad_norm": 1.8319567441940308, "learning_rate": 7.887878220291072e-06, "loss": 0.7957, "step": 2489 }, { "epoch": 1.6950306330837304, "grad_norm": 1.802986741065979, "learning_rate": 7.886040032469122e-06, "loss": 0.7564, "step": 2490 }, { "epoch": 1.69571136827774, "grad_norm": 1.656380295753479, "learning_rate": 7.884201259500631e-06, "loss": 0.8043, "step": 2491 }, { "epoch": 1.6963921034717495, "grad_norm": 1.8712739944458008, "learning_rate": 7.88236190175841e-06, "loss": 0.7694, "step": 2492 }, { "epoch": 1.697072838665759, "grad_norm": 2.022509813308716, "learning_rate": 7.880521959615392e-06, "loss": 0.8079, "step": 2493 }, { "epoch": 1.6977535738597687, "grad_norm": 1.7409125566482544, "learning_rate": 7.878681433444624e-06, "loss": 0.7746, "step": 2494 }, { "epoch": 1.6984343090537781, "grad_norm": 1.894756555557251, "learning_rate": 7.876840323619273e-06, "loss": 0.8267, "step": 2495 }, { "epoch": 1.6991150442477876, "grad_norm": 1.732977271080017, "learning_rate": 7.87499863051263e-06, "loss": 0.8026, "step": 2496 }, { "epoch": 1.6997957794417973, "grad_norm": 1.7951855659484863, "learning_rate": 7.873156354498092e-06, "loss": 0.725, "step": 2497 }, { "epoch": 1.7004765146358065, "grad_norm": 1.9720333814620972, "learning_rate": 7.87131349594919e-06, "loss": 0.7639, "step": 2498 }, { "epoch": 1.7011572498298162, "grad_norm": 1.8214280605316162, "learning_rate": 7.869470055239558e-06, "loss": 0.7986, "step": 2499 }, { "epoch": 1.7018379850238259, "grad_norm": 2.0003273487091064, "learning_rate": 7.86762603274296e-06, "loss": 0.7284, "step": 2500 }, { "epoch": 1.7025187202178351, "grad_norm": 1.7462916374206543, "learning_rate": 7.865781428833269e-06, "loss": 0.8319, "step": 2501 }, { "epoch": 1.7031994554118448, "grad_norm": 1.963153600692749, "learning_rate": 7.863936243884482e-06, "loss": 0.7362, "step": 2502 }, { "epoch": 1.7038801906058543, "grad_norm": 1.879460334777832, "learning_rate": 7.862090478270711e-06, "loss": 0.7671, "step": 2503 }, { "epoch": 1.7045609257998637, "grad_norm": 1.7345749139785767, "learning_rate": 7.860244132366184e-06, "loss": 0.8041, "step": 2504 }, { "epoch": 1.7052416609938734, "grad_norm": 1.846419095993042, "learning_rate": 7.858397206545251e-06, "loss": 0.8602, "step": 2505 }, { "epoch": 1.7059223961878829, "grad_norm": 1.862433910369873, "learning_rate": 7.856549701182379e-06, "loss": 0.8485, "step": 2506 }, { "epoch": 1.7066031313818923, "grad_norm": 1.9441109895706177, "learning_rate": 7.85470161665215e-06, "loss": 0.8934, "step": 2507 }, { "epoch": 1.707283866575902, "grad_norm": 1.7455615997314453, "learning_rate": 7.852852953329263e-06, "loss": 0.7998, "step": 2508 }, { "epoch": 1.7079646017699115, "grad_norm": 1.561189889907837, "learning_rate": 7.851003711588532e-06, "loss": 0.8307, "step": 2509 }, { "epoch": 1.708645336963921, "grad_norm": 1.692819595336914, "learning_rate": 7.8491538918049e-06, "loss": 0.8599, "step": 2510 }, { "epoch": 1.7093260721579306, "grad_norm": 1.5336296558380127, "learning_rate": 7.847303494353416e-06, "loss": 0.8576, "step": 2511 }, { "epoch": 1.71000680735194, "grad_norm": 1.8300433158874512, "learning_rate": 7.845452519609246e-06, "loss": 0.8174, "step": 2512 }, { "epoch": 1.7106875425459496, "grad_norm": 1.7581491470336914, "learning_rate": 7.84360096794768e-06, "loss": 0.7707, "step": 2513 }, { "epoch": 1.7113682777399593, "grad_norm": 1.7738981246948242, "learning_rate": 7.841748839744121e-06, "loss": 0.8439, "step": 2514 }, { "epoch": 1.7120490129339687, "grad_norm": 1.627707600593567, "learning_rate": 7.839896135374088e-06, "loss": 0.8583, "step": 2515 }, { "epoch": 1.7127297481279782, "grad_norm": 1.8145734071731567, "learning_rate": 7.83804285521322e-06, "loss": 0.9402, "step": 2516 }, { "epoch": 1.7134104833219879, "grad_norm": 1.965145468711853, "learning_rate": 7.836188999637268e-06, "loss": 0.8307, "step": 2517 }, { "epoch": 1.7140912185159973, "grad_norm": 1.7169644832611084, "learning_rate": 7.834334569022106e-06, "loss": 0.8291, "step": 2518 }, { "epoch": 1.7147719537100068, "grad_norm": 1.7085050344467163, "learning_rate": 7.832479563743717e-06, "loss": 0.8726, "step": 2519 }, { "epoch": 1.7154526889040165, "grad_norm": 1.871971845626831, "learning_rate": 7.830623984178207e-06, "loss": 0.8447, "step": 2520 }, { "epoch": 1.7161334240980257, "grad_norm": 1.9465378522872925, "learning_rate": 7.828767830701796e-06, "loss": 0.8897, "step": 2521 }, { "epoch": 1.7168141592920354, "grad_norm": 1.707287311553955, "learning_rate": 7.82691110369082e-06, "loss": 0.7883, "step": 2522 }, { "epoch": 1.717494894486045, "grad_norm": 1.8441665172576904, "learning_rate": 7.825053803521731e-06, "loss": 0.8538, "step": 2523 }, { "epoch": 1.7181756296800543, "grad_norm": 1.711458683013916, "learning_rate": 7.823195930571099e-06, "loss": 0.7129, "step": 2524 }, { "epoch": 1.718856364874064, "grad_norm": 1.883597493171692, "learning_rate": 7.821337485215607e-06, "loss": 0.7864, "step": 2525 }, { "epoch": 1.7195371000680735, "grad_norm": 1.555824637413025, "learning_rate": 7.819478467832058e-06, "loss": 0.9321, "step": 2526 }, { "epoch": 1.720217835262083, "grad_norm": 1.7402998208999634, "learning_rate": 7.81761887879737e-06, "loss": 0.8598, "step": 2527 }, { "epoch": 1.7208985704560926, "grad_norm": 1.6222047805786133, "learning_rate": 7.815758718488573e-06, "loss": 0.7926, "step": 2528 }, { "epoch": 1.721579305650102, "grad_norm": 1.5488550662994385, "learning_rate": 7.813897987282816e-06, "loss": 0.9153, "step": 2529 }, { "epoch": 1.7222600408441116, "grad_norm": 1.8017361164093018, "learning_rate": 7.812036685557367e-06, "loss": 0.7654, "step": 2530 }, { "epoch": 1.7229407760381212, "grad_norm": 1.829958200454712, "learning_rate": 7.810174813689601e-06, "loss": 0.7154, "step": 2531 }, { "epoch": 1.7236215112321307, "grad_norm": 1.9432095289230347, "learning_rate": 7.808312372057019e-06, "loss": 0.7605, "step": 2532 }, { "epoch": 1.7243022464261402, "grad_norm": 1.6423197984695435, "learning_rate": 7.806449361037229e-06, "loss": 0.8682, "step": 2533 }, { "epoch": 1.7249829816201498, "grad_norm": 1.9420750141143799, "learning_rate": 7.804585781007958e-06, "loss": 0.7762, "step": 2534 }, { "epoch": 1.7256637168141593, "grad_norm": 1.76565682888031, "learning_rate": 7.802721632347048e-06, "loss": 0.8407, "step": 2535 }, { "epoch": 1.7263444520081688, "grad_norm": 1.679046869277954, "learning_rate": 7.800856915432458e-06, "loss": 0.872, "step": 2536 }, { "epoch": 1.7270251872021785, "grad_norm": 1.9070783853530884, "learning_rate": 7.79899163064226e-06, "loss": 0.7756, "step": 2537 }, { "epoch": 1.727705922396188, "grad_norm": 1.7260403633117676, "learning_rate": 7.797125778354642e-06, "loss": 0.7734, "step": 2538 }, { "epoch": 1.7283866575901974, "grad_norm": 1.8351374864578247, "learning_rate": 7.795259358947903e-06, "loss": 0.7564, "step": 2539 }, { "epoch": 1.729067392784207, "grad_norm": 1.7058830261230469, "learning_rate": 7.793392372800466e-06, "loss": 0.8591, "step": 2540 }, { "epoch": 1.7297481279782163, "grad_norm": 1.735472559928894, "learning_rate": 7.791524820290862e-06, "loss": 0.7822, "step": 2541 }, { "epoch": 1.730428863172226, "grad_norm": 1.8072477579116821, "learning_rate": 7.78965670179774e-06, "loss": 0.7833, "step": 2542 }, { "epoch": 1.7311095983662357, "grad_norm": 1.781920075416565, "learning_rate": 7.787788017699855e-06, "loss": 0.8001, "step": 2543 }, { "epoch": 1.731790333560245, "grad_norm": 1.7180391550064087, "learning_rate": 7.785918768376093e-06, "loss": 0.8384, "step": 2544 }, { "epoch": 1.7324710687542546, "grad_norm": 1.8434438705444336, "learning_rate": 7.784048954205441e-06, "loss": 0.84, "step": 2545 }, { "epoch": 1.7331518039482643, "grad_norm": 1.6449155807495117, "learning_rate": 7.782178575567005e-06, "loss": 0.8578, "step": 2546 }, { "epoch": 1.7338325391422735, "grad_norm": 1.7722909450531006, "learning_rate": 7.780307632840009e-06, "loss": 0.6137, "step": 2547 }, { "epoch": 1.7345132743362832, "grad_norm": 1.7589815855026245, "learning_rate": 7.778436126403784e-06, "loss": 0.8593, "step": 2548 }, { "epoch": 1.7351940095302927, "grad_norm": 1.8951897621154785, "learning_rate": 7.77656405663778e-06, "loss": 0.8867, "step": 2549 }, { "epoch": 1.7358747447243021, "grad_norm": 1.5933266878128052, "learning_rate": 7.774691423921561e-06, "loss": 0.8085, "step": 2550 }, { "epoch": 1.7365554799183118, "grad_norm": 1.6717510223388672, "learning_rate": 7.772818228634805e-06, "loss": 0.7914, "step": 2551 }, { "epoch": 1.7372362151123213, "grad_norm": 1.7361047267913818, "learning_rate": 7.770944471157302e-06, "loss": 0.7676, "step": 2552 }, { "epoch": 1.7379169503063308, "grad_norm": 1.645896553993225, "learning_rate": 7.76907015186896e-06, "loss": 0.8413, "step": 2553 }, { "epoch": 1.7385976855003404, "grad_norm": 1.8233970403671265, "learning_rate": 7.767195271149797e-06, "loss": 0.9139, "step": 2554 }, { "epoch": 1.73927842069435, "grad_norm": 1.7174938917160034, "learning_rate": 7.765319829379944e-06, "loss": 0.8131, "step": 2555 }, { "epoch": 1.7399591558883594, "grad_norm": 1.891914963722229, "learning_rate": 7.763443826939654e-06, "loss": 0.805, "step": 2556 }, { "epoch": 1.740639891082369, "grad_norm": 1.7526402473449707, "learning_rate": 7.761567264209282e-06, "loss": 0.7836, "step": 2557 }, { "epoch": 1.7413206262763785, "grad_norm": 1.6399041414260864, "learning_rate": 7.759690141569305e-06, "loss": 0.8727, "step": 2558 }, { "epoch": 1.742001361470388, "grad_norm": 1.620285987854004, "learning_rate": 7.757812459400311e-06, "loss": 0.8716, "step": 2559 }, { "epoch": 1.7426820966643977, "grad_norm": 1.5758781433105469, "learning_rate": 7.755934218083e-06, "loss": 0.9242, "step": 2560 }, { "epoch": 1.7433628318584071, "grad_norm": 1.750198245048523, "learning_rate": 7.754055417998189e-06, "loss": 0.8034, "step": 2561 }, { "epoch": 1.7440435670524166, "grad_norm": 1.6607283353805542, "learning_rate": 7.752176059526803e-06, "loss": 0.8348, "step": 2562 }, { "epoch": 1.7447243022464263, "grad_norm": 1.6341544389724731, "learning_rate": 7.750296143049885e-06, "loss": 0.8916, "step": 2563 }, { "epoch": 1.7454050374404355, "grad_norm": 1.7386082410812378, "learning_rate": 7.74841566894859e-06, "loss": 0.7578, "step": 2564 }, { "epoch": 1.7460857726344452, "grad_norm": 1.6513925790786743, "learning_rate": 7.746534637604184e-06, "loss": 0.7866, "step": 2565 }, { "epoch": 1.7467665078284549, "grad_norm": 1.8572880029678345, "learning_rate": 7.744653049398045e-06, "loss": 0.7974, "step": 2566 }, { "epoch": 1.7474472430224641, "grad_norm": 1.9446083307266235, "learning_rate": 7.742770904711672e-06, "loss": 0.7415, "step": 2567 }, { "epoch": 1.7481279782164738, "grad_norm": 1.612648844718933, "learning_rate": 7.740888203926666e-06, "loss": 0.8234, "step": 2568 }, { "epoch": 1.7488087134104833, "grad_norm": 1.684876799583435, "learning_rate": 7.739004947424748e-06, "loss": 0.7479, "step": 2569 }, { "epoch": 1.7494894486044927, "grad_norm": 1.8031882047653198, "learning_rate": 7.73712113558775e-06, "loss": 0.7288, "step": 2570 }, { "epoch": 1.7501701837985024, "grad_norm": 1.6747169494628906, "learning_rate": 7.735236768797617e-06, "loss": 0.8649, "step": 2571 }, { "epoch": 1.7508509189925119, "grad_norm": 1.632043480873108, "learning_rate": 7.733351847436403e-06, "loss": 0.7579, "step": 2572 }, { "epoch": 1.7515316541865213, "grad_norm": 1.7511889934539795, "learning_rate": 7.731466371886277e-06, "loss": 0.8367, "step": 2573 }, { "epoch": 1.752212389380531, "grad_norm": 1.770559310913086, "learning_rate": 7.729580342529522e-06, "loss": 0.6748, "step": 2574 }, { "epoch": 1.7528931245745405, "grad_norm": 2.082787275314331, "learning_rate": 7.727693759748531e-06, "loss": 0.6628, "step": 2575 }, { "epoch": 1.75357385976855, "grad_norm": 1.7039954662322998, "learning_rate": 7.725806623925812e-06, "loss": 0.7017, "step": 2576 }, { "epoch": 1.7542545949625596, "grad_norm": 1.617893934249878, "learning_rate": 7.723918935443981e-06, "loss": 0.8931, "step": 2577 }, { "epoch": 1.754935330156569, "grad_norm": 1.8945093154907227, "learning_rate": 7.722030694685768e-06, "loss": 0.7569, "step": 2578 }, { "epoch": 1.7556160653505786, "grad_norm": 1.9452893733978271, "learning_rate": 7.720141902034013e-06, "loss": 0.8388, "step": 2579 }, { "epoch": 1.7562968005445883, "grad_norm": 1.6394075155258179, "learning_rate": 7.718252557871677e-06, "loss": 0.925, "step": 2580 }, { "epoch": 1.7569775357385977, "grad_norm": 1.670300841331482, "learning_rate": 7.716362662581818e-06, "loss": 0.765, "step": 2581 }, { "epoch": 1.7576582709326072, "grad_norm": 1.9268478155136108, "learning_rate": 7.714472216547617e-06, "loss": 0.7459, "step": 2582 }, { "epoch": 1.7583390061266169, "grad_norm": 1.6571829319000244, "learning_rate": 7.712581220152363e-06, "loss": 0.769, "step": 2583 }, { "epoch": 1.7590197413206263, "grad_norm": 1.8751558065414429, "learning_rate": 7.710689673779456e-06, "loss": 0.8235, "step": 2584 }, { "epoch": 1.7597004765146358, "grad_norm": 1.9165197610855103, "learning_rate": 7.708797577812412e-06, "loss": 0.7809, "step": 2585 }, { "epoch": 1.7603812117086455, "grad_norm": 1.8205937147140503, "learning_rate": 7.706904932634849e-06, "loss": 0.7644, "step": 2586 }, { "epoch": 1.7610619469026547, "grad_norm": 1.801159143447876, "learning_rate": 7.705011738630506e-06, "loss": 0.7443, "step": 2587 }, { "epoch": 1.7617426820966644, "grad_norm": 1.7106508016586304, "learning_rate": 7.703117996183227e-06, "loss": 0.8304, "step": 2588 }, { "epoch": 1.762423417290674, "grad_norm": 1.9311275482177734, "learning_rate": 7.701223705676972e-06, "loss": 0.8196, "step": 2589 }, { "epoch": 1.7631041524846833, "grad_norm": 1.8265399932861328, "learning_rate": 7.699328867495807e-06, "loss": 0.8496, "step": 2590 }, { "epoch": 1.763784887678693, "grad_norm": 1.7533042430877686, "learning_rate": 7.697433482023915e-06, "loss": 0.7407, "step": 2591 }, { "epoch": 1.7644656228727025, "grad_norm": 1.55569326877594, "learning_rate": 7.695537549645582e-06, "loss": 0.9305, "step": 2592 }, { "epoch": 1.765146358066712, "grad_norm": 1.6220964193344116, "learning_rate": 7.693641070745214e-06, "loss": 0.8277, "step": 2593 }, { "epoch": 1.7658270932607216, "grad_norm": 1.7504369020462036, "learning_rate": 7.69174404570732e-06, "loss": 0.7915, "step": 2594 }, { "epoch": 1.766507828454731, "grad_norm": 1.79012930393219, "learning_rate": 7.689846474916528e-06, "loss": 0.7172, "step": 2595 }, { "epoch": 1.7671885636487406, "grad_norm": 1.942908763885498, "learning_rate": 7.687948358757567e-06, "loss": 0.767, "step": 2596 }, { "epoch": 1.7678692988427502, "grad_norm": 1.7737069129943848, "learning_rate": 7.686049697615281e-06, "loss": 0.7252, "step": 2597 }, { "epoch": 1.7685500340367597, "grad_norm": 1.8163846731185913, "learning_rate": 7.68415049187463e-06, "loss": 0.7469, "step": 2598 }, { "epoch": 1.7692307692307692, "grad_norm": 1.738236904144287, "learning_rate": 7.682250741920674e-06, "loss": 0.8804, "step": 2599 }, { "epoch": 1.7699115044247788, "grad_norm": 1.6400165557861328, "learning_rate": 7.68035044813859e-06, "loss": 0.7848, "step": 2600 }, { "epoch": 1.7705922396187883, "grad_norm": 2.0280697345733643, "learning_rate": 7.678449610913663e-06, "loss": 0.7111, "step": 2601 }, { "epoch": 1.7712729748127978, "grad_norm": 1.6703040599822998, "learning_rate": 7.676548230631293e-06, "loss": 0.7401, "step": 2602 }, { "epoch": 1.7719537100068075, "grad_norm": 1.877907156944275, "learning_rate": 7.674646307676981e-06, "loss": 0.766, "step": 2603 }, { "epoch": 1.772634445200817, "grad_norm": 1.8106203079223633, "learning_rate": 7.672743842436346e-06, "loss": 0.8308, "step": 2604 }, { "epoch": 1.7733151803948264, "grad_norm": 1.7409579753875732, "learning_rate": 7.670840835295116e-06, "loss": 0.758, "step": 2605 }, { "epoch": 1.773995915588836, "grad_norm": 1.6317248344421387, "learning_rate": 7.668937286639122e-06, "loss": 0.8778, "step": 2606 }, { "epoch": 1.7746766507828453, "grad_norm": 1.64655339717865, "learning_rate": 7.667033196854314e-06, "loss": 0.7708, "step": 2607 }, { "epoch": 1.775357385976855, "grad_norm": 1.8784682750701904, "learning_rate": 7.665128566326746e-06, "loss": 0.7527, "step": 2608 }, { "epoch": 1.7760381211708647, "grad_norm": 1.6727584600448608, "learning_rate": 7.66322339544258e-06, "loss": 0.9484, "step": 2609 }, { "epoch": 1.776718856364874, "grad_norm": 1.7360280752182007, "learning_rate": 7.661317684588097e-06, "loss": 0.7714, "step": 2610 }, { "epoch": 1.7773995915588836, "grad_norm": 1.551582932472229, "learning_rate": 7.659411434149675e-06, "loss": 0.8593, "step": 2611 }, { "epoch": 1.778080326752893, "grad_norm": 1.699325680732727, "learning_rate": 7.657504644513814e-06, "loss": 0.8574, "step": 2612 }, { "epoch": 1.7787610619469025, "grad_norm": 1.750726580619812, "learning_rate": 7.655597316067109e-06, "loss": 0.8241, "step": 2613 }, { "epoch": 1.7794417971409122, "grad_norm": 1.8275166749954224, "learning_rate": 7.653689449196277e-06, "loss": 0.7292, "step": 2614 }, { "epoch": 1.7801225323349217, "grad_norm": 1.6992918252944946, "learning_rate": 7.65178104428814e-06, "loss": 0.8313, "step": 2615 }, { "epoch": 1.7808032675289311, "grad_norm": 1.792649507522583, "learning_rate": 7.649872101729627e-06, "loss": 0.767, "step": 2616 }, { "epoch": 1.7814840027229408, "grad_norm": 1.7851312160491943, "learning_rate": 7.647962621907776e-06, "loss": 0.773, "step": 2617 }, { "epoch": 1.7821647379169503, "grad_norm": 1.9604570865631104, "learning_rate": 7.646052605209738e-06, "loss": 0.7186, "step": 2618 }, { "epoch": 1.7828454731109598, "grad_norm": 1.8540939092636108, "learning_rate": 7.64414205202277e-06, "loss": 0.6611, "step": 2619 }, { "epoch": 1.7835262083049694, "grad_norm": 1.7843067646026611, "learning_rate": 7.642230962734234e-06, "loss": 0.7078, "step": 2620 }, { "epoch": 1.784206943498979, "grad_norm": 1.911370038986206, "learning_rate": 7.640319337731608e-06, "loss": 0.8487, "step": 2621 }, { "epoch": 1.7848876786929884, "grad_norm": 1.836601734161377, "learning_rate": 7.638407177402474e-06, "loss": 0.8081, "step": 2622 }, { "epoch": 1.785568413886998, "grad_norm": 1.7752511501312256, "learning_rate": 7.636494482134527e-06, "loss": 0.8743, "step": 2623 }, { "epoch": 1.7862491490810075, "grad_norm": 1.737402081489563, "learning_rate": 7.634581252315563e-06, "loss": 0.8231, "step": 2624 }, { "epoch": 1.786929884275017, "grad_norm": 1.6785993576049805, "learning_rate": 7.632667488333492e-06, "loss": 0.8513, "step": 2625 }, { "epoch": 1.7876106194690267, "grad_norm": 1.6740846633911133, "learning_rate": 7.630753190576331e-06, "loss": 0.8614, "step": 2626 }, { "epoch": 1.7882913546630361, "grad_norm": 1.6210122108459473, "learning_rate": 7.628838359432205e-06, "loss": 0.8774, "step": 2627 }, { "epoch": 1.7889720898570456, "grad_norm": 1.6806203126907349, "learning_rate": 7.626922995289348e-06, "loss": 0.7716, "step": 2628 }, { "epoch": 1.7896528250510553, "grad_norm": 1.648146152496338, "learning_rate": 7.625007098536102e-06, "loss": 0.8319, "step": 2629 }, { "epoch": 1.7903335602450645, "grad_norm": 1.870981216430664, "learning_rate": 7.623090669560913e-06, "loss": 0.795, "step": 2630 }, { "epoch": 1.7910142954390742, "grad_norm": 1.7867432832717896, "learning_rate": 7.621173708752343e-06, "loss": 0.7472, "step": 2631 }, { "epoch": 1.7916950306330839, "grad_norm": 1.9273545742034912, "learning_rate": 7.619256216499054e-06, "loss": 0.7399, "step": 2632 }, { "epoch": 1.7923757658270931, "grad_norm": 1.9313890933990479, "learning_rate": 7.617338193189818e-06, "loss": 0.7907, "step": 2633 }, { "epoch": 1.7930565010211028, "grad_norm": 1.683150291442871, "learning_rate": 7.615419639213519e-06, "loss": 0.8366, "step": 2634 }, { "epoch": 1.7937372362151123, "grad_norm": 1.6491832733154297, "learning_rate": 7.613500554959141e-06, "loss": 0.8199, "step": 2635 }, { "epoch": 1.7944179714091217, "grad_norm": 1.635238528251648, "learning_rate": 7.611580940815783e-06, "loss": 0.8332, "step": 2636 }, { "epoch": 1.7950987066031314, "grad_norm": 1.8454440832138062, "learning_rate": 7.6096607971726475e-06, "loss": 0.8318, "step": 2637 }, { "epoch": 1.7957794417971409, "grad_norm": 1.715005874633789, "learning_rate": 7.6077401244190444e-06, "loss": 0.7494, "step": 2638 }, { "epoch": 1.7964601769911503, "grad_norm": 1.9000036716461182, "learning_rate": 7.605818922944388e-06, "loss": 0.8191, "step": 2639 }, { "epoch": 1.79714091218516, "grad_norm": 1.7934578657150269, "learning_rate": 7.6038971931382074e-06, "loss": 0.707, "step": 2640 }, { "epoch": 1.7978216473791695, "grad_norm": 1.7043282985687256, "learning_rate": 7.601974935390136e-06, "loss": 0.8036, "step": 2641 }, { "epoch": 1.798502382573179, "grad_norm": 2.0630741119384766, "learning_rate": 7.60005215008991e-06, "loss": 0.6564, "step": 2642 }, { "epoch": 1.7991831177671886, "grad_norm": 1.980199933052063, "learning_rate": 7.598128837627372e-06, "loss": 0.8104, "step": 2643 }, { "epoch": 1.799863852961198, "grad_norm": 1.7245131731033325, "learning_rate": 7.596204998392482e-06, "loss": 0.8893, "step": 2644 }, { "epoch": 1.8005445881552076, "grad_norm": 1.7827709913253784, "learning_rate": 7.594280632775296e-06, "loss": 0.7003, "step": 2645 }, { "epoch": 1.8012253233492173, "grad_norm": 1.7674510478973389, "learning_rate": 7.59235574116598e-06, "loss": 0.786, "step": 2646 }, { "epoch": 1.8019060585432267, "grad_norm": 1.471795678138733, "learning_rate": 7.590430323954807e-06, "loss": 0.934, "step": 2647 }, { "epoch": 1.8025867937372362, "grad_norm": 1.788412094116211, "learning_rate": 7.588504381532158e-06, "loss": 0.6651, "step": 2648 }, { "epoch": 1.8032675289312459, "grad_norm": 1.8392119407653809, "learning_rate": 7.58657791428852e-06, "loss": 0.7793, "step": 2649 }, { "epoch": 1.8039482641252553, "grad_norm": 1.9857620000839233, "learning_rate": 7.584650922614483e-06, "loss": 0.7207, "step": 2650 }, { "epoch": 1.8046289993192648, "grad_norm": 1.668925166130066, "learning_rate": 7.582723406900747e-06, "loss": 0.8014, "step": 2651 }, { "epoch": 1.8053097345132745, "grad_norm": 1.617610216140747, "learning_rate": 7.580795367538116e-06, "loss": 0.7876, "step": 2652 }, { "epoch": 1.8059904697072837, "grad_norm": 1.8677465915679932, "learning_rate": 7.5788668049175025e-06, "loss": 0.912, "step": 2653 }, { "epoch": 1.8066712049012934, "grad_norm": 1.740816593170166, "learning_rate": 7.576937719429925e-06, "loss": 0.7257, "step": 2654 }, { "epoch": 1.807351940095303, "grad_norm": 1.6560370922088623, "learning_rate": 7.575008111466504e-06, "loss": 0.8133, "step": 2655 }, { "epoch": 1.8080326752893123, "grad_norm": 1.7565138339996338, "learning_rate": 7.573077981418471e-06, "loss": 0.8379, "step": 2656 }, { "epoch": 1.808713410483322, "grad_norm": 1.6817413568496704, "learning_rate": 7.571147329677161e-06, "loss": 0.7534, "step": 2657 }, { "epoch": 1.8093941456773315, "grad_norm": 1.723950743675232, "learning_rate": 7.5692161566340135e-06, "loss": 0.7184, "step": 2658 }, { "epoch": 1.810074880871341, "grad_norm": 1.821458339691162, "learning_rate": 7.567284462680575e-06, "loss": 0.7832, "step": 2659 }, { "epoch": 1.8107556160653506, "grad_norm": 1.9784542322158813, "learning_rate": 7.5653522482085006e-06, "loss": 0.7261, "step": 2660 }, { "epoch": 1.81143635125936, "grad_norm": 1.747141718864441, "learning_rate": 7.5634195136095455e-06, "loss": 0.8365, "step": 2661 }, { "epoch": 1.8121170864533696, "grad_norm": 1.6025664806365967, "learning_rate": 7.561486259275574e-06, "loss": 0.8298, "step": 2662 }, { "epoch": 1.8127978216473792, "grad_norm": 1.6368603706359863, "learning_rate": 7.559552485598553e-06, "loss": 0.8738, "step": 2663 }, { "epoch": 1.8134785568413887, "grad_norm": 1.8294923305511475, "learning_rate": 7.557618192970559e-06, "loss": 0.7626, "step": 2664 }, { "epoch": 1.8141592920353982, "grad_norm": 1.949337363243103, "learning_rate": 7.555683381783771e-06, "loss": 0.663, "step": 2665 }, { "epoch": 1.8148400272294078, "grad_norm": 1.725303053855896, "learning_rate": 7.553748052430469e-06, "loss": 0.8396, "step": 2666 }, { "epoch": 1.8155207624234173, "grad_norm": 1.67246675491333, "learning_rate": 7.5518122053030485e-06, "loss": 0.7248, "step": 2667 }, { "epoch": 1.8162014976174268, "grad_norm": 1.9089330434799194, "learning_rate": 7.549875840794e-06, "loss": 0.806, "step": 2668 }, { "epoch": 1.8168822328114365, "grad_norm": 1.9743975400924683, "learning_rate": 7.547938959295922e-06, "loss": 0.7605, "step": 2669 }, { "epoch": 1.817562968005446, "grad_norm": 1.7165164947509766, "learning_rate": 7.546001561201521e-06, "loss": 0.7238, "step": 2670 }, { "epoch": 1.8182437031994554, "grad_norm": 1.756553053855896, "learning_rate": 7.5440636469036045e-06, "loss": 0.8381, "step": 2671 }, { "epoch": 1.818924438393465, "grad_norm": 1.885688304901123, "learning_rate": 7.542125216795086e-06, "loss": 0.7531, "step": 2672 }, { "epoch": 1.8196051735874743, "grad_norm": 1.938738465309143, "learning_rate": 7.540186271268983e-06, "loss": 0.704, "step": 2673 }, { "epoch": 1.820285908781484, "grad_norm": 1.8137843608856201, "learning_rate": 7.5382468107184214e-06, "loss": 0.8056, "step": 2674 }, { "epoch": 1.8209666439754937, "grad_norm": 1.7880568504333496, "learning_rate": 7.536306835536622e-06, "loss": 0.9028, "step": 2675 }, { "epoch": 1.821647379169503, "grad_norm": 1.726976752281189, "learning_rate": 7.534366346116922e-06, "loss": 0.7967, "step": 2676 }, { "epoch": 1.8223281143635126, "grad_norm": 1.760287880897522, "learning_rate": 7.532425342852753e-06, "loss": 0.8725, "step": 2677 }, { "epoch": 1.823008849557522, "grad_norm": 1.961280345916748, "learning_rate": 7.530483826137655e-06, "loss": 0.7714, "step": 2678 }, { "epoch": 1.8236895847515315, "grad_norm": 1.7734508514404297, "learning_rate": 7.528541796365274e-06, "loss": 0.8019, "step": 2679 }, { "epoch": 1.8243703199455412, "grad_norm": 1.7850489616394043, "learning_rate": 7.526599253929357e-06, "loss": 0.7329, "step": 2680 }, { "epoch": 1.8250510551395507, "grad_norm": 1.514609932899475, "learning_rate": 7.524656199223757e-06, "loss": 0.85, "step": 2681 }, { "epoch": 1.8257317903335601, "grad_norm": 1.8483469486236572, "learning_rate": 7.522712632642424e-06, "loss": 0.7284, "step": 2682 }, { "epoch": 1.8264125255275698, "grad_norm": 1.7801095247268677, "learning_rate": 7.520768554579425e-06, "loss": 0.8232, "step": 2683 }, { "epoch": 1.8270932607215793, "grad_norm": 2.0548830032348633, "learning_rate": 7.518823965428918e-06, "loss": 0.7617, "step": 2684 }, { "epoch": 1.8277739959155888, "grad_norm": 1.8757493495941162, "learning_rate": 7.516878865585171e-06, "loss": 0.7357, "step": 2685 }, { "epoch": 1.8284547311095984, "grad_norm": 1.8435817956924438, "learning_rate": 7.514933255442556e-06, "loss": 0.9173, "step": 2686 }, { "epoch": 1.829135466303608, "grad_norm": 1.6977425813674927, "learning_rate": 7.512987135395544e-06, "loss": 0.6938, "step": 2687 }, { "epoch": 1.8298162014976174, "grad_norm": 1.852387547492981, "learning_rate": 7.5110405058387155e-06, "loss": 0.7921, "step": 2688 }, { "epoch": 1.830496936691627, "grad_norm": 1.8470711708068848, "learning_rate": 7.509093367166748e-06, "loss": 0.7844, "step": 2689 }, { "epoch": 1.8311776718856365, "grad_norm": 1.7987585067749023, "learning_rate": 7.507145719774426e-06, "loss": 0.7881, "step": 2690 }, { "epoch": 1.831858407079646, "grad_norm": 1.9987812042236328, "learning_rate": 7.505197564056637e-06, "loss": 0.7921, "step": 2691 }, { "epoch": 1.8325391422736557, "grad_norm": 1.6804507970809937, "learning_rate": 7.5032489004083685e-06, "loss": 0.7764, "step": 2692 }, { "epoch": 1.8332198774676651, "grad_norm": 1.686163067817688, "learning_rate": 7.501299729224718e-06, "loss": 0.8185, "step": 2693 }, { "epoch": 1.8339006126616746, "grad_norm": 1.721206545829773, "learning_rate": 7.499350050900877e-06, "loss": 0.8634, "step": 2694 }, { "epoch": 1.8345813478556843, "grad_norm": 1.856860637664795, "learning_rate": 7.497399865832145e-06, "loss": 0.8115, "step": 2695 }, { "epoch": 1.8352620830496935, "grad_norm": 1.6931121349334717, "learning_rate": 7.495449174413925e-06, "loss": 0.7586, "step": 2696 }, { "epoch": 1.8359428182437032, "grad_norm": 1.6570583581924438, "learning_rate": 7.493497977041718e-06, "loss": 0.8753, "step": 2697 }, { "epoch": 1.8366235534377129, "grad_norm": 1.6390507221221924, "learning_rate": 7.491546274111136e-06, "loss": 0.8374, "step": 2698 }, { "epoch": 1.8373042886317221, "grad_norm": 1.571345329284668, "learning_rate": 7.48959406601788e-06, "loss": 0.8961, "step": 2699 }, { "epoch": 1.8379850238257318, "grad_norm": 1.72049081325531, "learning_rate": 7.48764135315777e-06, "loss": 0.7845, "step": 2700 }, { "epoch": 1.8386657590197413, "grad_norm": 1.8488277196884155, "learning_rate": 7.485688135926713e-06, "loss": 0.7811, "step": 2701 }, { "epoch": 1.8393464942137507, "grad_norm": 1.6583435535430908, "learning_rate": 7.483734414720729e-06, "loss": 0.9086, "step": 2702 }, { "epoch": 1.8400272294077604, "grad_norm": 1.8840577602386475, "learning_rate": 7.481780189935937e-06, "loss": 0.8544, "step": 2703 }, { "epoch": 1.8407079646017699, "grad_norm": 1.747352123260498, "learning_rate": 7.4798254619685516e-06, "loss": 0.8206, "step": 2704 }, { "epoch": 1.8413886997957793, "grad_norm": 1.8124855756759644, "learning_rate": 7.477870231214903e-06, "loss": 0.8014, "step": 2705 }, { "epoch": 1.842069434989789, "grad_norm": 1.8260139226913452, "learning_rate": 7.47591449807141e-06, "loss": 0.7312, "step": 2706 }, { "epoch": 1.8427501701837985, "grad_norm": 1.7266888618469238, "learning_rate": 7.4739582629346e-06, "loss": 0.7349, "step": 2707 }, { "epoch": 1.843430905377808, "grad_norm": 1.9423335790634155, "learning_rate": 7.472001526201102e-06, "loss": 0.7388, "step": 2708 }, { "epoch": 1.8441116405718176, "grad_norm": 1.6491508483886719, "learning_rate": 7.470044288267645e-06, "loss": 0.84, "step": 2709 }, { "epoch": 1.844792375765827, "grad_norm": 1.7127912044525146, "learning_rate": 7.468086549531062e-06, "loss": 0.8842, "step": 2710 }, { "epoch": 1.8454731109598366, "grad_norm": 1.8193634748458862, "learning_rate": 7.466128310388283e-06, "loss": 0.8092, "step": 2711 }, { "epoch": 1.8461538461538463, "grad_norm": 1.7773395776748657, "learning_rate": 7.464169571236344e-06, "loss": 0.7824, "step": 2712 }, { "epoch": 1.8468345813478557, "grad_norm": 1.7466846704483032, "learning_rate": 7.462210332472379e-06, "loss": 0.8006, "step": 2713 }, { "epoch": 1.8475153165418652, "grad_norm": 1.8853434324264526, "learning_rate": 7.460250594493628e-06, "loss": 0.7431, "step": 2714 }, { "epoch": 1.8481960517358749, "grad_norm": 1.8028336763381958, "learning_rate": 7.4582903576974265e-06, "loss": 0.7194, "step": 2715 }, { "epoch": 1.8488767869298843, "grad_norm": 1.7340656518936157, "learning_rate": 7.456329622481214e-06, "loss": 0.8526, "step": 2716 }, { "epoch": 1.8495575221238938, "grad_norm": 1.6792300939559937, "learning_rate": 7.454368389242533e-06, "loss": 0.7826, "step": 2717 }, { "epoch": 1.8502382573179035, "grad_norm": 1.7159817218780518, "learning_rate": 7.452406658379024e-06, "loss": 0.8547, "step": 2718 }, { "epoch": 1.8509189925119127, "grad_norm": 1.9313410520553589, "learning_rate": 7.450444430288427e-06, "loss": 0.7595, "step": 2719 }, { "epoch": 1.8515997277059224, "grad_norm": 1.6709007024765015, "learning_rate": 7.448481705368588e-06, "loss": 0.7731, "step": 2720 }, { "epoch": 1.852280462899932, "grad_norm": 1.8137847185134888, "learning_rate": 7.446518484017448e-06, "loss": 0.7482, "step": 2721 }, { "epoch": 1.8529611980939413, "grad_norm": 1.9283092021942139, "learning_rate": 7.444554766633054e-06, "loss": 0.7898, "step": 2722 }, { "epoch": 1.853641933287951, "grad_norm": 1.8999979496002197, "learning_rate": 7.4425905536135505e-06, "loss": 0.7714, "step": 2723 }, { "epoch": 1.8543226684819605, "grad_norm": 1.7638972997665405, "learning_rate": 7.440625845357182e-06, "loss": 0.7746, "step": 2724 }, { "epoch": 1.85500340367597, "grad_norm": 1.8476344347000122, "learning_rate": 7.4386606422622945e-06, "loss": 0.7485, "step": 2725 }, { "epoch": 1.8556841388699796, "grad_norm": 1.675310730934143, "learning_rate": 7.436694944727335e-06, "loss": 0.8074, "step": 2726 }, { "epoch": 1.856364874063989, "grad_norm": 1.895275592803955, "learning_rate": 7.43472875315085e-06, "loss": 0.7723, "step": 2727 }, { "epoch": 1.8570456092579986, "grad_norm": 1.705544352531433, "learning_rate": 7.432762067931485e-06, "loss": 0.8482, "step": 2728 }, { "epoch": 1.8577263444520082, "grad_norm": 1.7122825384140015, "learning_rate": 7.430794889467988e-06, "loss": 0.7555, "step": 2729 }, { "epoch": 1.8584070796460177, "grad_norm": 1.6485517024993896, "learning_rate": 7.428827218159205e-06, "loss": 0.8738, "step": 2730 }, { "epoch": 1.8590878148400272, "grad_norm": 1.6372240781784058, "learning_rate": 7.426859054404084e-06, "loss": 0.7488, "step": 2731 }, { "epoch": 1.8597685500340368, "grad_norm": 1.8058421611785889, "learning_rate": 7.4248903986016695e-06, "loss": 0.8621, "step": 2732 }, { "epoch": 1.8604492852280463, "grad_norm": 1.747322678565979, "learning_rate": 7.422921251151109e-06, "loss": 0.8377, "step": 2733 }, { "epoch": 1.8611300204220558, "grad_norm": 1.754956603050232, "learning_rate": 7.420951612451648e-06, "loss": 0.7802, "step": 2734 }, { "epoch": 1.8618107556160655, "grad_norm": 1.7666006088256836, "learning_rate": 7.418981482902632e-06, "loss": 0.8327, "step": 2735 }, { "epoch": 1.862491490810075, "grad_norm": 1.8718327283859253, "learning_rate": 7.4170108629035064e-06, "loss": 0.7657, "step": 2736 }, { "epoch": 1.8631722260040844, "grad_norm": 1.860566258430481, "learning_rate": 7.415039752853816e-06, "loss": 0.801, "step": 2737 }, { "epoch": 1.863852961198094, "grad_norm": 1.74509596824646, "learning_rate": 7.413068153153205e-06, "loss": 0.79, "step": 2738 }, { "epoch": 1.8645336963921033, "grad_norm": 1.9037972688674927, "learning_rate": 7.411096064201414e-06, "loss": 0.8548, "step": 2739 }, { "epoch": 1.865214431586113, "grad_norm": 1.727889060974121, "learning_rate": 7.409123486398288e-06, "loss": 0.7598, "step": 2740 }, { "epoch": 1.8658951667801227, "grad_norm": 1.6185383796691895, "learning_rate": 7.40715042014377e-06, "loss": 0.9296, "step": 2741 }, { "epoch": 1.866575901974132, "grad_norm": 1.747281789779663, "learning_rate": 7.405176865837894e-06, "loss": 0.7984, "step": 2742 }, { "epoch": 1.8672566371681416, "grad_norm": 1.747428059577942, "learning_rate": 7.403202823880807e-06, "loss": 0.8059, "step": 2743 }, { "epoch": 1.867937372362151, "grad_norm": 1.8502094745635986, "learning_rate": 7.401228294672743e-06, "loss": 0.8208, "step": 2744 }, { "epoch": 1.8686181075561605, "grad_norm": 1.8325895071029663, "learning_rate": 7.39925327861404e-06, "loss": 0.8045, "step": 2745 }, { "epoch": 1.8692988427501702, "grad_norm": 1.8713085651397705, "learning_rate": 7.397277776105134e-06, "loss": 0.8669, "step": 2746 }, { "epoch": 1.8699795779441797, "grad_norm": 1.7304210662841797, "learning_rate": 7.395301787546558e-06, "loss": 0.706, "step": 2747 }, { "epoch": 1.8706603131381891, "grad_norm": 1.859573245048523, "learning_rate": 7.393325313338949e-06, "loss": 0.8962, "step": 2748 }, { "epoch": 1.8713410483321988, "grad_norm": 1.6986818313598633, "learning_rate": 7.391348353883035e-06, "loss": 0.7586, "step": 2749 }, { "epoch": 1.8720217835262083, "grad_norm": 1.856252908706665, "learning_rate": 7.389370909579646e-06, "loss": 0.836, "step": 2750 }, { "epoch": 1.8727025187202178, "grad_norm": 1.695708990097046, "learning_rate": 7.3873929808297096e-06, "loss": 0.8225, "step": 2751 }, { "epoch": 1.8733832539142274, "grad_norm": 1.8040062189102173, "learning_rate": 7.385414568034256e-06, "loss": 0.7682, "step": 2752 }, { "epoch": 1.874063989108237, "grad_norm": 1.8552956581115723, "learning_rate": 7.383435671594405e-06, "loss": 0.8281, "step": 2753 }, { "epoch": 1.8747447243022464, "grad_norm": 1.7873077392578125, "learning_rate": 7.381456291911383e-06, "loss": 0.7842, "step": 2754 }, { "epoch": 1.875425459496256, "grad_norm": 1.6373225450515747, "learning_rate": 7.379476429386507e-06, "loss": 0.8472, "step": 2755 }, { "epoch": 1.8761061946902655, "grad_norm": 1.7842363119125366, "learning_rate": 7.377496084421199e-06, "loss": 0.7753, "step": 2756 }, { "epoch": 1.876786929884275, "grad_norm": 1.715883493423462, "learning_rate": 7.375515257416973e-06, "loss": 0.8822, "step": 2757 }, { "epoch": 1.8774676650782847, "grad_norm": 1.803513765335083, "learning_rate": 7.373533948775444e-06, "loss": 0.8386, "step": 2758 }, { "epoch": 1.8781484002722941, "grad_norm": 1.8479281663894653, "learning_rate": 7.371552158898322e-06, "loss": 0.823, "step": 2759 }, { "epoch": 1.8788291354663036, "grad_norm": 1.9742456674575806, "learning_rate": 7.369569888187418e-06, "loss": 0.7729, "step": 2760 }, { "epoch": 1.8795098706603133, "grad_norm": 1.9160962104797363, "learning_rate": 7.3675871370446384e-06, "loss": 0.6656, "step": 2761 }, { "epoch": 1.8801906058543225, "grad_norm": 1.7846499681472778, "learning_rate": 7.365603905871988e-06, "loss": 0.7391, "step": 2762 }, { "epoch": 1.8808713410483322, "grad_norm": 1.954660415649414, "learning_rate": 7.363620195071566e-06, "loss": 0.783, "step": 2763 }, { "epoch": 1.8815520762423419, "grad_norm": 1.9691507816314697, "learning_rate": 7.361636005045571e-06, "loss": 0.8188, "step": 2764 }, { "epoch": 1.8822328114363511, "grad_norm": 1.9296690225601196, "learning_rate": 7.359651336196302e-06, "loss": 0.8953, "step": 2765 }, { "epoch": 1.8829135466303608, "grad_norm": 1.919702172279358, "learning_rate": 7.3576661889261505e-06, "loss": 0.7338, "step": 2766 }, { "epoch": 1.8835942818243703, "grad_norm": 1.9929457902908325, "learning_rate": 7.355680563637606e-06, "loss": 0.7382, "step": 2767 }, { "epoch": 1.8842750170183797, "grad_norm": 1.8413846492767334, "learning_rate": 7.353694460733254e-06, "loss": 0.7973, "step": 2768 }, { "epoch": 1.8849557522123894, "grad_norm": 1.8622519969940186, "learning_rate": 7.351707880615781e-06, "loss": 0.7071, "step": 2769 }, { "epoch": 1.8856364874063989, "grad_norm": 1.7084943056106567, "learning_rate": 7.349720823687964e-06, "loss": 0.9057, "step": 2770 }, { "epoch": 1.8863172226004083, "grad_norm": 1.710242748260498, "learning_rate": 7.347733290352685e-06, "loss": 0.7404, "step": 2771 }, { "epoch": 1.886997957794418, "grad_norm": 1.8136059045791626, "learning_rate": 7.345745281012915e-06, "loss": 0.8992, "step": 2772 }, { "epoch": 1.8876786929884275, "grad_norm": 1.9745222330093384, "learning_rate": 7.343756796071722e-06, "loss": 0.8774, "step": 2773 }, { "epoch": 1.888359428182437, "grad_norm": 1.92790949344635, "learning_rate": 7.341767835932277e-06, "loss": 0.7452, "step": 2774 }, { "epoch": 1.8890401633764466, "grad_norm": 1.4426816701889038, "learning_rate": 7.339778400997838e-06, "loss": 1.0199, "step": 2775 }, { "epoch": 1.889720898570456, "grad_norm": 1.8585561513900757, "learning_rate": 7.337788491671769e-06, "loss": 0.817, "step": 2776 }, { "epoch": 1.8904016337644656, "grad_norm": 1.6898162364959717, "learning_rate": 7.335798108357522e-06, "loss": 0.803, "step": 2777 }, { "epoch": 1.8910823689584753, "grad_norm": 1.8130083084106445, "learning_rate": 7.3338072514586515e-06, "loss": 0.8173, "step": 2778 }, { "epoch": 1.8917631041524847, "grad_norm": 2.0462896823883057, "learning_rate": 7.3318159213788045e-06, "loss": 0.7567, "step": 2779 }, { "epoch": 1.8924438393464942, "grad_norm": 2.0159687995910645, "learning_rate": 7.329824118521721e-06, "loss": 0.7337, "step": 2780 }, { "epoch": 1.8931245745405039, "grad_norm": 1.8293230533599854, "learning_rate": 7.327831843291245e-06, "loss": 0.7568, "step": 2781 }, { "epoch": 1.893805309734513, "grad_norm": 1.669201135635376, "learning_rate": 7.325839096091307e-06, "loss": 0.7937, "step": 2782 }, { "epoch": 1.8944860449285228, "grad_norm": 1.8503495454788208, "learning_rate": 7.323845877325944e-06, "loss": 0.7872, "step": 2783 }, { "epoch": 1.8951667801225325, "grad_norm": 1.7838006019592285, "learning_rate": 7.3218521873992765e-06, "loss": 0.7218, "step": 2784 }, { "epoch": 1.8958475153165417, "grad_norm": 1.8031785488128662, "learning_rate": 7.3198580267155285e-06, "loss": 0.7695, "step": 2785 }, { "epoch": 1.8965282505105514, "grad_norm": 1.7962548732757568, "learning_rate": 7.317863395679019e-06, "loss": 0.6961, "step": 2786 }, { "epoch": 1.897208985704561, "grad_norm": 1.5992131233215332, "learning_rate": 7.31586829469416e-06, "loss": 0.7508, "step": 2787 }, { "epoch": 1.8978897208985703, "grad_norm": 1.85475754737854, "learning_rate": 7.313872724165459e-06, "loss": 0.753, "step": 2788 }, { "epoch": 1.89857045609258, "grad_norm": 1.809073567390442, "learning_rate": 7.311876684497517e-06, "loss": 0.7239, "step": 2789 }, { "epoch": 1.8992511912865895, "grad_norm": 1.7686972618103027, "learning_rate": 7.309880176095036e-06, "loss": 0.7817, "step": 2790 }, { "epoch": 1.899931926480599, "grad_norm": 1.652297019958496, "learning_rate": 7.307883199362808e-06, "loss": 0.8243, "step": 2791 }, { "epoch": 1.9006126616746086, "grad_norm": 2.1377692222595215, "learning_rate": 7.305885754705721e-06, "loss": 0.778, "step": 2792 }, { "epoch": 1.901293396868618, "grad_norm": 1.5918478965759277, "learning_rate": 7.3038878425287605e-06, "loss": 0.8579, "step": 2793 }, { "epoch": 1.9019741320626276, "grad_norm": 1.690873384475708, "learning_rate": 7.301889463237e-06, "loss": 0.8142, "step": 2794 }, { "epoch": 1.9026548672566372, "grad_norm": 1.5838255882263184, "learning_rate": 7.299890617235616e-06, "loss": 0.9023, "step": 2795 }, { "epoch": 1.9033356024506467, "grad_norm": 1.7517509460449219, "learning_rate": 7.297891304929876e-06, "loss": 0.8183, "step": 2796 }, { "epoch": 1.9040163376446562, "grad_norm": 1.8436532020568848, "learning_rate": 7.29589152672514e-06, "loss": 0.803, "step": 2797 }, { "epoch": 1.9046970728386659, "grad_norm": 1.7648372650146484, "learning_rate": 7.293891283026865e-06, "loss": 0.7532, "step": 2798 }, { "epoch": 1.9053778080326753, "grad_norm": 1.612411618232727, "learning_rate": 7.291890574240601e-06, "loss": 0.8925, "step": 2799 }, { "epoch": 1.9060585432266848, "grad_norm": 1.592145562171936, "learning_rate": 7.289889400771996e-06, "loss": 0.863, "step": 2800 }, { "epoch": 1.9067392784206945, "grad_norm": 1.7865883111953735, "learning_rate": 7.287887763026786e-06, "loss": 0.8077, "step": 2801 }, { "epoch": 1.907420013614704, "grad_norm": 1.6361021995544434, "learning_rate": 7.285885661410805e-06, "loss": 0.8073, "step": 2802 }, { "epoch": 1.9081007488087134, "grad_norm": 1.704775094985962, "learning_rate": 7.28388309632998e-06, "loss": 0.7776, "step": 2803 }, { "epoch": 1.908781484002723, "grad_norm": 1.6047133207321167, "learning_rate": 7.281880068190335e-06, "loss": 1.0095, "step": 2804 }, { "epoch": 1.9094622191967323, "grad_norm": 1.6995229721069336, "learning_rate": 7.279876577397984e-06, "loss": 0.8387, "step": 2805 }, { "epoch": 1.910142954390742, "grad_norm": 1.5961852073669434, "learning_rate": 7.277872624359134e-06, "loss": 0.8117, "step": 2806 }, { "epoch": 1.9108236895847517, "grad_norm": 1.7874257564544678, "learning_rate": 7.275868209480092e-06, "loss": 0.7376, "step": 2807 }, { "epoch": 1.911504424778761, "grad_norm": 1.552878737449646, "learning_rate": 7.273863333167251e-06, "loss": 0.8492, "step": 2808 }, { "epoch": 1.9121851599727706, "grad_norm": 1.747031807899475, "learning_rate": 7.2718579958271005e-06, "loss": 0.7306, "step": 2809 }, { "epoch": 1.91286589516678, "grad_norm": 1.8146899938583374, "learning_rate": 7.269852197866227e-06, "loss": 0.7001, "step": 2810 }, { "epoch": 1.9135466303607895, "grad_norm": 1.8955130577087402, "learning_rate": 7.267845939691305e-06, "loss": 0.8074, "step": 2811 }, { "epoch": 1.9142273655547992, "grad_norm": 1.8120982646942139, "learning_rate": 7.265839221709105e-06, "loss": 0.8103, "step": 2812 }, { "epoch": 1.9149081007488087, "grad_norm": 1.6937648057937622, "learning_rate": 7.2638320443264935e-06, "loss": 0.8288, "step": 2813 }, { "epoch": 1.9155888359428181, "grad_norm": 1.80818510055542, "learning_rate": 7.261824407950423e-06, "loss": 0.7941, "step": 2814 }, { "epoch": 1.9162695711368278, "grad_norm": 1.7125511169433594, "learning_rate": 7.259816312987945e-06, "loss": 0.8333, "step": 2815 }, { "epoch": 1.9169503063308373, "grad_norm": 1.725570797920227, "learning_rate": 7.2578077598462e-06, "loss": 0.8248, "step": 2816 }, { "epoch": 1.9176310415248468, "grad_norm": 1.6958179473876953, "learning_rate": 7.255798748932428e-06, "loss": 0.8791, "step": 2817 }, { "epoch": 1.9183117767188564, "grad_norm": 1.6644607782363892, "learning_rate": 7.253789280653953e-06, "loss": 0.7337, "step": 2818 }, { "epoch": 1.918992511912866, "grad_norm": 1.9427552223205566, "learning_rate": 7.251779355418198e-06, "loss": 0.8624, "step": 2819 }, { "epoch": 1.9196732471068754, "grad_norm": 1.6316802501678467, "learning_rate": 7.249768973632675e-06, "loss": 0.8132, "step": 2820 }, { "epoch": 1.920353982300885, "grad_norm": 1.7183152437210083, "learning_rate": 7.247758135704993e-06, "loss": 0.771, "step": 2821 }, { "epoch": 1.9210347174948945, "grad_norm": 1.6396516561508179, "learning_rate": 7.245746842042849e-06, "loss": 0.8253, "step": 2822 }, { "epoch": 1.921715452688904, "grad_norm": 1.6029472351074219, "learning_rate": 7.243735093054033e-06, "loss": 0.7749, "step": 2823 }, { "epoch": 1.9223961878829137, "grad_norm": 1.7850459814071655, "learning_rate": 7.241722889146431e-06, "loss": 0.7995, "step": 2824 }, { "epoch": 1.9230769230769231, "grad_norm": 2.0484113693237305, "learning_rate": 7.239710230728017e-06, "loss": 0.8577, "step": 2825 }, { "epoch": 1.9237576582709326, "grad_norm": 1.8495620489120483, "learning_rate": 7.23769711820686e-06, "loss": 0.8043, "step": 2826 }, { "epoch": 1.9244383934649423, "grad_norm": 1.569958209991455, "learning_rate": 7.235683551991119e-06, "loss": 0.8131, "step": 2827 }, { "epoch": 1.9251191286589515, "grad_norm": 1.7429513931274414, "learning_rate": 7.233669532489046e-06, "loss": 0.7783, "step": 2828 }, { "epoch": 1.9257998638529612, "grad_norm": 1.592317819595337, "learning_rate": 7.231655060108984e-06, "loss": 0.9191, "step": 2829 }, { "epoch": 1.9264805990469709, "grad_norm": 1.9768010377883911, "learning_rate": 7.229640135259371e-06, "loss": 0.6771, "step": 2830 }, { "epoch": 1.9271613342409801, "grad_norm": 1.8046330213546753, "learning_rate": 7.227624758348732e-06, "loss": 0.7695, "step": 2831 }, { "epoch": 1.9278420694349898, "grad_norm": 1.9820218086242676, "learning_rate": 7.225608929785688e-06, "loss": 0.7775, "step": 2832 }, { "epoch": 1.9285228046289993, "grad_norm": 1.8687870502471924, "learning_rate": 7.223592649978948e-06, "loss": 0.8197, "step": 2833 }, { "epoch": 1.9292035398230087, "grad_norm": 1.80500066280365, "learning_rate": 7.221575919337314e-06, "loss": 0.6843, "step": 2834 }, { "epoch": 1.9298842750170184, "grad_norm": 1.7136720418930054, "learning_rate": 7.21955873826968e-06, "loss": 0.7956, "step": 2835 }, { "epoch": 1.9305650102110279, "grad_norm": 1.7580629587173462, "learning_rate": 7.217541107185032e-06, "loss": 0.8873, "step": 2836 }, { "epoch": 1.9312457454050374, "grad_norm": 1.88291335105896, "learning_rate": 7.215523026492444e-06, "loss": 0.82, "step": 2837 }, { "epoch": 1.931926480599047, "grad_norm": 1.9307456016540527, "learning_rate": 7.213504496601085e-06, "loss": 0.7022, "step": 2838 }, { "epoch": 1.9326072157930565, "grad_norm": 1.7825682163238525, "learning_rate": 7.211485517920212e-06, "loss": 0.8945, "step": 2839 }, { "epoch": 1.933287950987066, "grad_norm": 1.8261511325836182, "learning_rate": 7.209466090859173e-06, "loss": 0.8053, "step": 2840 }, { "epoch": 1.9339686861810756, "grad_norm": 1.6752681732177734, "learning_rate": 7.207446215827412e-06, "loss": 0.8908, "step": 2841 }, { "epoch": 1.934649421375085, "grad_norm": 2.018066167831421, "learning_rate": 7.2054258932344565e-06, "loss": 0.6992, "step": 2842 }, { "epoch": 1.9353301565690946, "grad_norm": 1.8708107471466064, "learning_rate": 7.20340512348993e-06, "loss": 0.7686, "step": 2843 }, { "epoch": 1.9360108917631043, "grad_norm": 1.7577095031738281, "learning_rate": 7.201383907003545e-06, "loss": 0.8267, "step": 2844 }, { "epoch": 1.9366916269571137, "grad_norm": 1.8133455514907837, "learning_rate": 7.199362244185104e-06, "loss": 0.6962, "step": 2845 }, { "epoch": 1.9373723621511232, "grad_norm": 1.8508495092391968, "learning_rate": 7.1973401354445e-06, "loss": 0.6774, "step": 2846 }, { "epoch": 1.9380530973451329, "grad_norm": 1.847523808479309, "learning_rate": 7.195317581191719e-06, "loss": 0.7356, "step": 2847 }, { "epoch": 1.938733832539142, "grad_norm": 1.7117900848388672, "learning_rate": 7.193294581836833e-06, "loss": 0.7997, "step": 2848 }, { "epoch": 1.9394145677331518, "grad_norm": 1.875726580619812, "learning_rate": 7.1912711377900055e-06, "loss": 0.7864, "step": 2849 }, { "epoch": 1.9400953029271615, "grad_norm": 1.8782844543457031, "learning_rate": 7.189247249461494e-06, "loss": 0.7673, "step": 2850 }, { "epoch": 1.9407760381211707, "grad_norm": 1.8555155992507935, "learning_rate": 7.187222917261641e-06, "loss": 0.7718, "step": 2851 }, { "epoch": 1.9414567733151804, "grad_norm": 1.8225739002227783, "learning_rate": 7.1851981416008845e-06, "loss": 0.7534, "step": 2852 }, { "epoch": 1.94213750850919, "grad_norm": 1.742647647857666, "learning_rate": 7.183172922889745e-06, "loss": 0.6981, "step": 2853 }, { "epoch": 1.9428182437031993, "grad_norm": 1.7990604639053345, "learning_rate": 7.181147261538837e-06, "loss": 0.8164, "step": 2854 }, { "epoch": 1.943498978897209, "grad_norm": 1.833019495010376, "learning_rate": 7.179121157958869e-06, "loss": 0.7157, "step": 2855 }, { "epoch": 1.9441797140912185, "grad_norm": 1.8602229356765747, "learning_rate": 7.177094612560631e-06, "loss": 0.9031, "step": 2856 }, { "epoch": 1.944860449285228, "grad_norm": 1.8686991930007935, "learning_rate": 7.17506762575501e-06, "loss": 0.7054, "step": 2857 }, { "epoch": 1.9455411844792376, "grad_norm": 1.7978456020355225, "learning_rate": 7.173040197952974e-06, "loss": 0.7515, "step": 2858 }, { "epoch": 1.946221919673247, "grad_norm": 1.7229881286621094, "learning_rate": 7.171012329565589e-06, "loss": 0.7871, "step": 2859 }, { "epoch": 1.9469026548672566, "grad_norm": 1.599086880683899, "learning_rate": 7.1689840210040055e-06, "loss": 0.7213, "step": 2860 }, { "epoch": 1.9475833900612662, "grad_norm": 1.8016854524612427, "learning_rate": 7.166955272679464e-06, "loss": 0.8433, "step": 2861 }, { "epoch": 1.9482641252552757, "grad_norm": 1.948005199432373, "learning_rate": 7.164926085003296e-06, "loss": 0.942, "step": 2862 }, { "epoch": 1.9489448604492852, "grad_norm": 1.7938951253890991, "learning_rate": 7.162896458386919e-06, "loss": 0.7585, "step": 2863 }, { "epoch": 1.9496255956432949, "grad_norm": 1.761570930480957, "learning_rate": 7.160866393241843e-06, "loss": 0.8171, "step": 2864 }, { "epoch": 1.9503063308373043, "grad_norm": 1.6675010919570923, "learning_rate": 7.158835889979663e-06, "loss": 0.815, "step": 2865 }, { "epoch": 1.9509870660313138, "grad_norm": 1.6432188749313354, "learning_rate": 7.156804949012066e-06, "loss": 0.8263, "step": 2866 }, { "epoch": 1.9516678012253235, "grad_norm": 1.87940514087677, "learning_rate": 7.154773570750826e-06, "loss": 0.7028, "step": 2867 }, { "epoch": 1.952348536419333, "grad_norm": 1.945691466331482, "learning_rate": 7.152741755607807e-06, "loss": 0.9057, "step": 2868 }, { "epoch": 1.9530292716133424, "grad_norm": 1.695189118385315, "learning_rate": 7.1507095039949624e-06, "loss": 0.7712, "step": 2869 }, { "epoch": 1.953710006807352, "grad_norm": 1.6128841638565063, "learning_rate": 7.148676816324329e-06, "loss": 0.7758, "step": 2870 }, { "epoch": 1.9543907420013613, "grad_norm": 1.5792992115020752, "learning_rate": 7.146643693008039e-06, "loss": 0.8312, "step": 2871 }, { "epoch": 1.955071477195371, "grad_norm": 1.724109172821045, "learning_rate": 7.1446101344583065e-06, "loss": 0.8243, "step": 2872 }, { "epoch": 1.9557522123893807, "grad_norm": 1.67349112033844, "learning_rate": 7.142576141087438e-06, "loss": 0.7188, "step": 2873 }, { "epoch": 1.95643294758339, "grad_norm": 1.886015772819519, "learning_rate": 7.140541713307829e-06, "loss": 0.8279, "step": 2874 }, { "epoch": 1.9571136827773996, "grad_norm": 2.0207290649414062, "learning_rate": 7.138506851531959e-06, "loss": 0.8737, "step": 2875 }, { "epoch": 1.957794417971409, "grad_norm": 1.6592105627059937, "learning_rate": 7.1364715561723965e-06, "loss": 0.8314, "step": 2876 }, { "epoch": 1.9584751531654185, "grad_norm": 1.7382521629333496, "learning_rate": 7.134435827641801e-06, "loss": 0.8378, "step": 2877 }, { "epoch": 1.9591558883594282, "grad_norm": 1.7959740161895752, "learning_rate": 7.132399666352918e-06, "loss": 0.8246, "step": 2878 }, { "epoch": 1.9598366235534377, "grad_norm": 1.7587445974349976, "learning_rate": 7.13036307271858e-06, "loss": 0.7931, "step": 2879 }, { "epoch": 1.9605173587474471, "grad_norm": 1.8933953046798706, "learning_rate": 7.1283260471517054e-06, "loss": 0.7259, "step": 2880 }, { "epoch": 1.9611980939414568, "grad_norm": 1.766183853149414, "learning_rate": 7.126288590065309e-06, "loss": 0.7618, "step": 2881 }, { "epoch": 1.9618788291354663, "grad_norm": 1.7410143613815308, "learning_rate": 7.124250701872479e-06, "loss": 0.8537, "step": 2882 }, { "epoch": 1.9625595643294758, "grad_norm": 1.7109252214431763, "learning_rate": 7.1222123829864025e-06, "loss": 0.7121, "step": 2883 }, { "epoch": 1.9632402995234854, "grad_norm": 1.6483322381973267, "learning_rate": 7.12017363382035e-06, "loss": 0.8112, "step": 2884 }, { "epoch": 1.963921034717495, "grad_norm": 1.6033583879470825, "learning_rate": 7.118134454787677e-06, "loss": 0.8786, "step": 2885 }, { "epoch": 1.9646017699115044, "grad_norm": 1.7404989004135132, "learning_rate": 7.1160948463018305e-06, "loss": 0.7362, "step": 2886 }, { "epoch": 1.965282505105514, "grad_norm": 1.8341553211212158, "learning_rate": 7.114054808776343e-06, "loss": 0.744, "step": 2887 }, { "epoch": 1.9659632402995235, "grad_norm": 1.708665370941162, "learning_rate": 7.112014342624834e-06, "loss": 0.8618, "step": 2888 }, { "epoch": 1.966643975493533, "grad_norm": 1.6553136110305786, "learning_rate": 7.109973448261004e-06, "loss": 0.8406, "step": 2889 }, { "epoch": 1.9673247106875427, "grad_norm": 1.7124706506729126, "learning_rate": 7.107932126098651e-06, "loss": 0.7634, "step": 2890 }, { "epoch": 1.9680054458815521, "grad_norm": 1.6861218214035034, "learning_rate": 7.105890376551654e-06, "loss": 0.8371, "step": 2891 }, { "epoch": 1.9686861810755616, "grad_norm": 1.7665544748306274, "learning_rate": 7.103848200033977e-06, "loss": 0.7644, "step": 2892 }, { "epoch": 1.9693669162695713, "grad_norm": 1.8271983861923218, "learning_rate": 7.101805596959675e-06, "loss": 0.6023, "step": 2893 }, { "epoch": 1.9700476514635805, "grad_norm": 1.7189196348190308, "learning_rate": 7.099762567742884e-06, "loss": 0.8387, "step": 2894 }, { "epoch": 1.9707283866575902, "grad_norm": 1.7732257843017578, "learning_rate": 7.097719112797833e-06, "loss": 0.7803, "step": 2895 }, { "epoch": 1.9714091218515999, "grad_norm": 1.8122026920318604, "learning_rate": 7.095675232538831e-06, "loss": 0.8621, "step": 2896 }, { "epoch": 1.9720898570456091, "grad_norm": 1.9244285821914673, "learning_rate": 7.093630927380277e-06, "loss": 0.8121, "step": 2897 }, { "epoch": 1.9727705922396188, "grad_norm": 1.8652448654174805, "learning_rate": 7.091586197736658e-06, "loss": 0.8213, "step": 2898 }, { "epoch": 1.9734513274336283, "grad_norm": 1.5706276893615723, "learning_rate": 7.0895410440225386e-06, "loss": 0.8182, "step": 2899 }, { "epoch": 1.9741320626276377, "grad_norm": 1.751887321472168, "learning_rate": 7.087495466652581e-06, "loss": 0.8246, "step": 2900 }, { "epoch": 1.9748127978216474, "grad_norm": 1.5715776681900024, "learning_rate": 7.085449466041523e-06, "loss": 0.7787, "step": 2901 }, { "epoch": 1.9754935330156569, "grad_norm": 1.8459107875823975, "learning_rate": 7.083403042604196e-06, "loss": 0.816, "step": 2902 }, { "epoch": 1.9761742682096664, "grad_norm": 1.7805607318878174, "learning_rate": 7.08135619675551e-06, "loss": 0.8482, "step": 2903 }, { "epoch": 1.976855003403676, "grad_norm": 1.6235923767089844, "learning_rate": 7.079308928910467e-06, "loss": 0.8446, "step": 2904 }, { "epoch": 1.9775357385976855, "grad_norm": 1.6189749240875244, "learning_rate": 7.077261239484152e-06, "loss": 0.7254, "step": 2905 }, { "epoch": 1.978216473791695, "grad_norm": 1.9885027408599854, "learning_rate": 7.075213128891733e-06, "loss": 0.7741, "step": 2906 }, { "epoch": 1.9788972089857046, "grad_norm": 1.7213140726089478, "learning_rate": 7.07316459754847e-06, "loss": 0.9133, "step": 2907 }, { "epoch": 1.979577944179714, "grad_norm": 1.7205501794815063, "learning_rate": 7.0711156458697e-06, "loss": 0.7751, "step": 2908 }, { "epoch": 1.9802586793737236, "grad_norm": 1.9712382555007935, "learning_rate": 7.0690662742708515e-06, "loss": 0.7286, "step": 2909 }, { "epoch": 1.9809394145677333, "grad_norm": 1.9404445886611938, "learning_rate": 7.067016483167435e-06, "loss": 0.801, "step": 2910 }, { "epoch": 1.9816201497617427, "grad_norm": 1.7093870639801025, "learning_rate": 7.0649662729750465e-06, "loss": 0.836, "step": 2911 }, { "epoch": 1.9823008849557522, "grad_norm": 1.8558094501495361, "learning_rate": 7.062915644109372e-06, "loss": 0.7798, "step": 2912 }, { "epoch": 1.9829816201497619, "grad_norm": 1.9491991996765137, "learning_rate": 7.06086459698617e-06, "loss": 0.7092, "step": 2913 }, { "epoch": 1.983662355343771, "grad_norm": 1.7241017818450928, "learning_rate": 7.0588131320212985e-06, "loss": 0.7672, "step": 2914 }, { "epoch": 1.9843430905377808, "grad_norm": 1.8397436141967773, "learning_rate": 7.056761249630688e-06, "loss": 0.74, "step": 2915 }, { "epoch": 1.9850238257317905, "grad_norm": 1.9019598960876465, "learning_rate": 7.054708950230364e-06, "loss": 0.7877, "step": 2916 }, { "epoch": 1.9857045609257997, "grad_norm": 1.6258995532989502, "learning_rate": 7.05265623423643e-06, "loss": 0.9277, "step": 2917 }, { "epoch": 1.9863852961198094, "grad_norm": 1.7701892852783203, "learning_rate": 7.050603102065074e-06, "loss": 0.7921, "step": 2918 }, { "epoch": 1.9870660313138189, "grad_norm": 1.6391781568527222, "learning_rate": 7.04854955413257e-06, "loss": 0.811, "step": 2919 }, { "epoch": 1.9877467665078283, "grad_norm": 1.8145726919174194, "learning_rate": 7.046495590855278e-06, "loss": 0.75, "step": 2920 }, { "epoch": 1.988427501701838, "grad_norm": 1.8985766172409058, "learning_rate": 7.0444412126496385e-06, "loss": 0.8217, "step": 2921 }, { "epoch": 1.9891082368958475, "grad_norm": 1.70517897605896, "learning_rate": 7.0423864199321814e-06, "loss": 0.7099, "step": 2922 }, { "epoch": 1.989788972089857, "grad_norm": 2.0369985103607178, "learning_rate": 7.040331213119511e-06, "loss": 0.6383, "step": 2923 }, { "epoch": 1.9904697072838666, "grad_norm": 1.80495023727417, "learning_rate": 7.0382755926283295e-06, "loss": 0.7705, "step": 2924 }, { "epoch": 1.991150442477876, "grad_norm": 1.9145821332931519, "learning_rate": 7.03621955887541e-06, "loss": 0.7876, "step": 2925 }, { "epoch": 1.9918311776718856, "grad_norm": 1.7223951816558838, "learning_rate": 7.034163112277618e-06, "loss": 0.7788, "step": 2926 }, { "epoch": 1.9925119128658952, "grad_norm": 1.7618240118026733, "learning_rate": 7.0321062532518954e-06, "loss": 0.867, "step": 2927 }, { "epoch": 1.9931926480599047, "grad_norm": 1.8372892141342163, "learning_rate": 7.030048982215275e-06, "loss": 0.7595, "step": 2928 }, { "epoch": 1.9938733832539142, "grad_norm": 1.8256860971450806, "learning_rate": 7.027991299584869e-06, "loss": 0.7511, "step": 2929 }, { "epoch": 1.9945541184479239, "grad_norm": 1.8315863609313965, "learning_rate": 7.025933205777873e-06, "loss": 0.7822, "step": 2930 }, { "epoch": 1.9952348536419333, "grad_norm": 1.867229700088501, "learning_rate": 7.02387470121157e-06, "loss": 0.761, "step": 2931 }, { "epoch": 1.9959155888359428, "grad_norm": 1.7393608093261719, "learning_rate": 7.021815786303318e-06, "loss": 0.8158, "step": 2932 }, { "epoch": 1.9965963240299525, "grad_norm": 1.7693288326263428, "learning_rate": 7.019756461470568e-06, "loss": 0.7446, "step": 2933 }, { "epoch": 1.997277059223962, "grad_norm": 1.759114384651184, "learning_rate": 7.017696727130847e-06, "loss": 0.7908, "step": 2934 }, { "epoch": 1.9979577944179714, "grad_norm": 2.0460214614868164, "learning_rate": 7.015636583701769e-06, "loss": 0.6993, "step": 2935 }, { "epoch": 1.998638529611981, "grad_norm": 2.1293678283691406, "learning_rate": 7.013576031601029e-06, "loss": 0.7767, "step": 2936 }, { "epoch": 1.9993192648059903, "grad_norm": 1.9124666452407837, "learning_rate": 7.011515071246404e-06, "loss": 0.759, "step": 2937 }, { "epoch": 2.0, "grad_norm": 1.7940168380737305, "learning_rate": 7.009453703055758e-06, "loss": 0.6425, "step": 2938 }, { "epoch": 2.0006807351940097, "grad_norm": 1.819746971130371, "learning_rate": 7.007391927447032e-06, "loss": 0.5209, "step": 2939 }, { "epoch": 2.001361470388019, "grad_norm": 1.7137569189071655, "learning_rate": 7.005329744838253e-06, "loss": 0.583, "step": 2940 }, { "epoch": 2.0020422055820286, "grad_norm": 1.5689502954483032, "learning_rate": 7.00326715564753e-06, "loss": 0.7384, "step": 2941 }, { "epoch": 2.0027229407760383, "grad_norm": 1.7118428945541382, "learning_rate": 7.0012041602930554e-06, "loss": 0.4904, "step": 2942 }, { "epoch": 2.0034036759700475, "grad_norm": 1.867104411125183, "learning_rate": 6.999140759193103e-06, "loss": 0.5654, "step": 2943 }, { "epoch": 2.0040844111640572, "grad_norm": 1.9212673902511597, "learning_rate": 6.997076952766025e-06, "loss": 0.6803, "step": 2944 }, { "epoch": 2.004765146358067, "grad_norm": 2.2173402309417725, "learning_rate": 6.995012741430263e-06, "loss": 0.59, "step": 2945 }, { "epoch": 2.005445881552076, "grad_norm": 2.050142765045166, "learning_rate": 6.992948125604339e-06, "loss": 0.5607, "step": 2946 }, { "epoch": 2.006126616746086, "grad_norm": 1.8232518434524536, "learning_rate": 6.99088310570685e-06, "loss": 0.6506, "step": 2947 }, { "epoch": 2.0068073519400955, "grad_norm": 1.7151535749435425, "learning_rate": 6.9888176821564846e-06, "loss": 0.6264, "step": 2948 }, { "epoch": 2.0074880871341048, "grad_norm": 1.808930516242981, "learning_rate": 6.986751855372006e-06, "loss": 0.4923, "step": 2949 }, { "epoch": 2.0081688223281144, "grad_norm": 1.7146693468093872, "learning_rate": 6.984685625772264e-06, "loss": 0.6657, "step": 2950 }, { "epoch": 2.0088495575221237, "grad_norm": 1.6326191425323486, "learning_rate": 6.982618993776187e-06, "loss": 0.6384, "step": 2951 }, { "epoch": 2.0095302927161334, "grad_norm": 1.6393593549728394, "learning_rate": 6.980551959802786e-06, "loss": 0.5103, "step": 2952 }, { "epoch": 2.010211027910143, "grad_norm": 1.8084907531738281, "learning_rate": 6.978484524271154e-06, "loss": 0.641, "step": 2953 }, { "epoch": 2.0108917631041523, "grad_norm": 1.836901068687439, "learning_rate": 6.976416687600463e-06, "loss": 0.5601, "step": 2954 }, { "epoch": 2.011572498298162, "grad_norm": 1.6830275058746338, "learning_rate": 6.974348450209972e-06, "loss": 0.7111, "step": 2955 }, { "epoch": 2.0122532334921717, "grad_norm": 1.6503545045852661, "learning_rate": 6.972279812519015e-06, "loss": 0.6034, "step": 2956 }, { "epoch": 2.012933968686181, "grad_norm": 1.7501308917999268, "learning_rate": 6.970210774947011e-06, "loss": 0.5499, "step": 2957 }, { "epoch": 2.0136147038801906, "grad_norm": 1.7605208158493042, "learning_rate": 6.9681413379134545e-06, "loss": 0.6286, "step": 2958 }, { "epoch": 2.0142954390742003, "grad_norm": 1.8164918422698975, "learning_rate": 6.96607150183793e-06, "loss": 0.4615, "step": 2959 }, { "epoch": 2.0149761742682095, "grad_norm": 1.8404436111450195, "learning_rate": 6.964001267140098e-06, "loss": 0.5315, "step": 2960 }, { "epoch": 2.015656909462219, "grad_norm": 1.7482715845108032, "learning_rate": 6.961930634239698e-06, "loss": 0.5339, "step": 2961 }, { "epoch": 2.016337644656229, "grad_norm": 1.7646394968032837, "learning_rate": 6.9598596035565525e-06, "loss": 0.472, "step": 2962 }, { "epoch": 2.017018379850238, "grad_norm": 1.612497329711914, "learning_rate": 6.957788175510565e-06, "loss": 0.7178, "step": 2963 }, { "epoch": 2.017699115044248, "grad_norm": 1.6621955633163452, "learning_rate": 6.9557163505217185e-06, "loss": 0.5318, "step": 2964 }, { "epoch": 2.0183798502382575, "grad_norm": 1.7460108995437622, "learning_rate": 6.953644129010077e-06, "loss": 0.4931, "step": 2965 }, { "epoch": 2.0190605854322667, "grad_norm": 1.6870344877243042, "learning_rate": 6.951571511395785e-06, "loss": 0.6078, "step": 2966 }, { "epoch": 2.0197413206262764, "grad_norm": 1.6707649230957031, "learning_rate": 6.949498498099067e-06, "loss": 0.5693, "step": 2967 }, { "epoch": 2.020422055820286, "grad_norm": 1.6484426259994507, "learning_rate": 6.947425089540226e-06, "loss": 0.6078, "step": 2968 }, { "epoch": 2.0211027910142954, "grad_norm": 1.5417038202285767, "learning_rate": 6.945351286139652e-06, "loss": 0.5647, "step": 2969 }, { "epoch": 2.021783526208305, "grad_norm": 1.597579836845398, "learning_rate": 6.943277088317805e-06, "loss": 0.7349, "step": 2970 }, { "epoch": 2.0224642614023143, "grad_norm": 1.5908490419387817, "learning_rate": 6.941202496495231e-06, "loss": 0.6147, "step": 2971 }, { "epoch": 2.023144996596324, "grad_norm": 1.5973423719406128, "learning_rate": 6.939127511092556e-06, "loss": 0.6865, "step": 2972 }, { "epoch": 2.0238257317903336, "grad_norm": 1.6845381259918213, "learning_rate": 6.937052132530484e-06, "loss": 0.668, "step": 2973 }, { "epoch": 2.024506466984343, "grad_norm": 1.7220251560211182, "learning_rate": 6.9349763612298015e-06, "loss": 0.6179, "step": 2974 }, { "epoch": 2.0251872021783526, "grad_norm": 1.6276259422302246, "learning_rate": 6.932900197611369e-06, "loss": 0.6743, "step": 2975 }, { "epoch": 2.0258679373723623, "grad_norm": 1.751701831817627, "learning_rate": 6.930823642096133e-06, "loss": 0.6066, "step": 2976 }, { "epoch": 2.0265486725663715, "grad_norm": 1.5663812160491943, "learning_rate": 6.928746695105114e-06, "loss": 0.6307, "step": 2977 }, { "epoch": 2.027229407760381, "grad_norm": 1.6025577783584595, "learning_rate": 6.926669357059416e-06, "loss": 0.5835, "step": 2978 }, { "epoch": 2.027910142954391, "grad_norm": 1.8581326007843018, "learning_rate": 6.92459162838022e-06, "loss": 0.5148, "step": 2979 }, { "epoch": 2.0285908781484, "grad_norm": 1.523162841796875, "learning_rate": 6.922513509488787e-06, "loss": 0.7263, "step": 2980 }, { "epoch": 2.02927161334241, "grad_norm": 1.8100074529647827, "learning_rate": 6.920435000806459e-06, "loss": 0.5866, "step": 2981 }, { "epoch": 2.0299523485364195, "grad_norm": 1.6701836585998535, "learning_rate": 6.918356102754652e-06, "loss": 0.5195, "step": 2982 }, { "epoch": 2.0306330837304287, "grad_norm": 1.7131465673446655, "learning_rate": 6.916276815754865e-06, "loss": 0.5161, "step": 2983 }, { "epoch": 2.0313138189244384, "grad_norm": 1.8266576528549194, "learning_rate": 6.914197140228675e-06, "loss": 0.5152, "step": 2984 }, { "epoch": 2.031994554118448, "grad_norm": 1.596634864807129, "learning_rate": 6.912117076597738e-06, "loss": 0.5218, "step": 2985 }, { "epoch": 2.0326752893124573, "grad_norm": 1.6074268817901611, "learning_rate": 6.910036625283789e-06, "loss": 0.525, "step": 2986 }, { "epoch": 2.033356024506467, "grad_norm": 1.560288667678833, "learning_rate": 6.907955786708639e-06, "loss": 0.7838, "step": 2987 }, { "epoch": 2.0340367597004767, "grad_norm": 1.7191994190216064, "learning_rate": 6.905874561294179e-06, "loss": 0.5064, "step": 2988 }, { "epoch": 2.034717494894486, "grad_norm": 1.7606227397918701, "learning_rate": 6.903792949462381e-06, "loss": 0.6244, "step": 2989 }, { "epoch": 2.0353982300884956, "grad_norm": 1.6830495595932007, "learning_rate": 6.9017109516352924e-06, "loss": 0.597, "step": 2990 }, { "epoch": 2.0360789652825053, "grad_norm": 1.6226011514663696, "learning_rate": 6.899628568235039e-06, "loss": 0.6319, "step": 2991 }, { "epoch": 2.0367597004765146, "grad_norm": 1.6791309118270874, "learning_rate": 6.897545799683826e-06, "loss": 0.6339, "step": 2992 }, { "epoch": 2.0374404356705242, "grad_norm": 1.6677559614181519, "learning_rate": 6.895462646403936e-06, "loss": 0.4754, "step": 2993 }, { "epoch": 2.0381211708645335, "grad_norm": 1.699583888053894, "learning_rate": 6.893379108817731e-06, "loss": 0.652, "step": 2994 }, { "epoch": 2.038801906058543, "grad_norm": 1.5707311630249023, "learning_rate": 6.891295187347648e-06, "loss": 0.59, "step": 2995 }, { "epoch": 2.039482641252553, "grad_norm": 1.6056503057479858, "learning_rate": 6.889210882416202e-06, "loss": 0.5935, "step": 2996 }, { "epoch": 2.040163376446562, "grad_norm": 1.6257386207580566, "learning_rate": 6.8871261944459896e-06, "loss": 0.6627, "step": 2997 }, { "epoch": 2.040844111640572, "grad_norm": 1.6738842725753784, "learning_rate": 6.885041123859684e-06, "loss": 0.6363, "step": 2998 }, { "epoch": 2.0415248468345815, "grad_norm": 1.6245492696762085, "learning_rate": 6.882955671080031e-06, "loss": 0.6011, "step": 2999 }, { "epoch": 2.0422055820285907, "grad_norm": 1.634541392326355, "learning_rate": 6.88086983652986e-06, "loss": 0.7342, "step": 3000 }, { "epoch": 2.0428863172226004, "grad_norm": 1.6229352951049805, "learning_rate": 6.878783620632073e-06, "loss": 0.6257, "step": 3001 }, { "epoch": 2.04356705241661, "grad_norm": 1.5353341102600098, "learning_rate": 6.876697023809656e-06, "loss": 0.7532, "step": 3002 }, { "epoch": 2.0442477876106193, "grad_norm": 1.6381473541259766, "learning_rate": 6.874610046485664e-06, "loss": 0.498, "step": 3003 }, { "epoch": 2.044928522804629, "grad_norm": 1.7802536487579346, "learning_rate": 6.872522689083234e-06, "loss": 0.5479, "step": 3004 }, { "epoch": 2.0456092579986387, "grad_norm": 1.6660314798355103, "learning_rate": 6.87043495202558e-06, "loss": 0.5301, "step": 3005 }, { "epoch": 2.046289993192648, "grad_norm": 1.6844851970672607, "learning_rate": 6.868346835735991e-06, "loss": 0.6021, "step": 3006 }, { "epoch": 2.0469707283866576, "grad_norm": 1.8061378002166748, "learning_rate": 6.866258340637836e-06, "loss": 0.513, "step": 3007 }, { "epoch": 2.0476514635806673, "grad_norm": 1.5346113443374634, "learning_rate": 6.864169467154555e-06, "loss": 0.5915, "step": 3008 }, { "epoch": 2.0483321987746765, "grad_norm": 1.673775315284729, "learning_rate": 6.862080215709672e-06, "loss": 0.5426, "step": 3009 }, { "epoch": 2.0490129339686862, "grad_norm": 1.7866554260253906, "learning_rate": 6.859990586726782e-06, "loss": 0.5298, "step": 3010 }, { "epoch": 2.049693669162696, "grad_norm": 1.5657507181167603, "learning_rate": 6.85790058062956e-06, "loss": 0.5681, "step": 3011 }, { "epoch": 2.050374404356705, "grad_norm": 1.5415165424346924, "learning_rate": 6.855810197841757e-06, "loss": 0.5939, "step": 3012 }, { "epoch": 2.051055139550715, "grad_norm": 1.6973795890808105, "learning_rate": 6.8537194387871965e-06, "loss": 0.5114, "step": 3013 }, { "epoch": 2.0517358747447245, "grad_norm": 1.6654752492904663, "learning_rate": 6.851628303889782e-06, "loss": 0.5484, "step": 3014 }, { "epoch": 2.0524166099387338, "grad_norm": 1.6992075443267822, "learning_rate": 6.849536793573495e-06, "loss": 0.554, "step": 3015 }, { "epoch": 2.0530973451327434, "grad_norm": 1.6948543787002563, "learning_rate": 6.8474449082623885e-06, "loss": 0.6265, "step": 3016 }, { "epoch": 2.0537780803267527, "grad_norm": 1.5453487634658813, "learning_rate": 6.8453526483805955e-06, "loss": 0.8113, "step": 3017 }, { "epoch": 2.0544588155207624, "grad_norm": 1.7551565170288086, "learning_rate": 6.8432600143523185e-06, "loss": 0.5878, "step": 3018 }, { "epoch": 2.055139550714772, "grad_norm": 1.7359585762023926, "learning_rate": 6.841167006601848e-06, "loss": 0.5203, "step": 3019 }, { "epoch": 2.0558202859087813, "grad_norm": 1.627562165260315, "learning_rate": 6.839073625553535e-06, "loss": 0.5051, "step": 3020 }, { "epoch": 2.056501021102791, "grad_norm": 1.5308499336242676, "learning_rate": 6.836979871631819e-06, "loss": 0.7109, "step": 3021 }, { "epoch": 2.0571817562968007, "grad_norm": 1.736485481262207, "learning_rate": 6.834885745261209e-06, "loss": 0.5915, "step": 3022 }, { "epoch": 2.05786249149081, "grad_norm": 1.643240213394165, "learning_rate": 6.8327912468662875e-06, "loss": 0.6095, "step": 3023 }, { "epoch": 2.0585432266848196, "grad_norm": 1.7523272037506104, "learning_rate": 6.83069637687172e-06, "loss": 0.5669, "step": 3024 }, { "epoch": 2.0592239618788293, "grad_norm": 1.6498976945877075, "learning_rate": 6.82860113570224e-06, "loss": 0.6664, "step": 3025 }, { "epoch": 2.0599046970728385, "grad_norm": 1.6686065196990967, "learning_rate": 6.82650552378266e-06, "loss": 0.5532, "step": 3026 }, { "epoch": 2.060585432266848, "grad_norm": 1.6480698585510254, "learning_rate": 6.824409541537864e-06, "loss": 0.5051, "step": 3027 }, { "epoch": 2.061266167460858, "grad_norm": 1.560442328453064, "learning_rate": 6.822313189392818e-06, "loss": 0.5964, "step": 3028 }, { "epoch": 2.061946902654867, "grad_norm": 1.5727131366729736, "learning_rate": 6.820216467772558e-06, "loss": 0.739, "step": 3029 }, { "epoch": 2.062627637848877, "grad_norm": 1.6508278846740723, "learning_rate": 6.818119377102192e-06, "loss": 0.6773, "step": 3030 }, { "epoch": 2.0633083730428865, "grad_norm": 1.9078161716461182, "learning_rate": 6.816021917806911e-06, "loss": 0.5491, "step": 3031 }, { "epoch": 2.0639891082368957, "grad_norm": 1.8862922191619873, "learning_rate": 6.813924090311974e-06, "loss": 0.4671, "step": 3032 }, { "epoch": 2.0646698434309054, "grad_norm": 1.7006813287734985, "learning_rate": 6.811825895042719e-06, "loss": 0.5352, "step": 3033 }, { "epoch": 2.065350578624915, "grad_norm": 1.536283254623413, "learning_rate": 6.809727332424553e-06, "loss": 0.6274, "step": 3034 }, { "epoch": 2.0660313138189244, "grad_norm": 1.7896775007247925, "learning_rate": 6.807628402882963e-06, "loss": 0.5424, "step": 3035 }, { "epoch": 2.066712049012934, "grad_norm": 1.797182321548462, "learning_rate": 6.805529106843508e-06, "loss": 0.5978, "step": 3036 }, { "epoch": 2.0673927842069437, "grad_norm": 1.7506415843963623, "learning_rate": 6.8034294447318225e-06, "loss": 0.6894, "step": 3037 }, { "epoch": 2.068073519400953, "grad_norm": 1.5975204706192017, "learning_rate": 6.801329416973614e-06, "loss": 0.6141, "step": 3038 }, { "epoch": 2.0687542545949626, "grad_norm": 1.7071483135223389, "learning_rate": 6.799229023994665e-06, "loss": 0.5898, "step": 3039 }, { "epoch": 2.069434989788972, "grad_norm": 1.8884223699569702, "learning_rate": 6.7971282662208295e-06, "loss": 0.506, "step": 3040 }, { "epoch": 2.0701157249829816, "grad_norm": 1.7555350065231323, "learning_rate": 6.795027144078039e-06, "loss": 0.5741, "step": 3041 }, { "epoch": 2.0707964601769913, "grad_norm": 1.6440653800964355, "learning_rate": 6.792925657992297e-06, "loss": 0.6401, "step": 3042 }, { "epoch": 2.0714771953710005, "grad_norm": 1.7073456048965454, "learning_rate": 6.790823808389684e-06, "loss": 0.69, "step": 3043 }, { "epoch": 2.07215793056501, "grad_norm": 1.664690375328064, "learning_rate": 6.788721595696346e-06, "loss": 0.4782, "step": 3044 }, { "epoch": 2.07283866575902, "grad_norm": 1.6649388074874878, "learning_rate": 6.786619020338513e-06, "loss": 0.6524, "step": 3045 }, { "epoch": 2.073519400953029, "grad_norm": 1.691908836364746, "learning_rate": 6.784516082742481e-06, "loss": 0.5559, "step": 3046 }, { "epoch": 2.074200136147039, "grad_norm": 1.7367854118347168, "learning_rate": 6.782412783334622e-06, "loss": 0.6344, "step": 3047 }, { "epoch": 2.0748808713410485, "grad_norm": 1.6855542659759521, "learning_rate": 6.780309122541382e-06, "loss": 0.6196, "step": 3048 }, { "epoch": 2.0755616065350577, "grad_norm": 1.7150938510894775, "learning_rate": 6.778205100789279e-06, "loss": 0.6369, "step": 3049 }, { "epoch": 2.0762423417290674, "grad_norm": 1.7017879486083984, "learning_rate": 6.7761007185049075e-06, "loss": 0.5305, "step": 3050 }, { "epoch": 2.076923076923077, "grad_norm": 1.6577025651931763, "learning_rate": 6.7739959761149285e-06, "loss": 0.6015, "step": 3051 }, { "epoch": 2.0776038121170863, "grad_norm": 1.839878797531128, "learning_rate": 6.771890874046081e-06, "loss": 0.619, "step": 3052 }, { "epoch": 2.078284547311096, "grad_norm": 1.6683135032653809, "learning_rate": 6.769785412725178e-06, "loss": 0.4946, "step": 3053 }, { "epoch": 2.0789652825051057, "grad_norm": 1.8559664487838745, "learning_rate": 6.767679592579101e-06, "loss": 0.5734, "step": 3054 }, { "epoch": 2.079646017699115, "grad_norm": 1.6814104318618774, "learning_rate": 6.765573414034808e-06, "loss": 0.5391, "step": 3055 }, { "epoch": 2.0803267528931246, "grad_norm": 1.6612370014190674, "learning_rate": 6.763466877519327e-06, "loss": 0.6208, "step": 3056 }, { "epoch": 2.0810074880871343, "grad_norm": 1.749238133430481, "learning_rate": 6.76135998345976e-06, "loss": 0.4533, "step": 3057 }, { "epoch": 2.0816882232811436, "grad_norm": 1.7867285013198853, "learning_rate": 6.75925273228328e-06, "loss": 0.5792, "step": 3058 }, { "epoch": 2.0823689584751532, "grad_norm": 1.6970901489257812, "learning_rate": 6.757145124417135e-06, "loss": 0.6537, "step": 3059 }, { "epoch": 2.0830496936691625, "grad_norm": 1.5900508165359497, "learning_rate": 6.755037160288646e-06, "loss": 0.6537, "step": 3060 }, { "epoch": 2.083730428863172, "grad_norm": 1.65217125415802, "learning_rate": 6.752928840325198e-06, "loss": 0.6439, "step": 3061 }, { "epoch": 2.084411164057182, "grad_norm": 1.620593786239624, "learning_rate": 6.750820164954262e-06, "loss": 0.5929, "step": 3062 }, { "epoch": 2.085091899251191, "grad_norm": 1.796843409538269, "learning_rate": 6.748711134603366e-06, "loss": 0.5507, "step": 3063 }, { "epoch": 2.085772634445201, "grad_norm": 1.6398738622665405, "learning_rate": 6.746601749700124e-06, "loss": 0.5384, "step": 3064 }, { "epoch": 2.0864533696392105, "grad_norm": 1.7843806743621826, "learning_rate": 6.744492010672208e-06, "loss": 0.5412, "step": 3065 }, { "epoch": 2.0871341048332197, "grad_norm": 1.6245583295822144, "learning_rate": 6.742381917947374e-06, "loss": 0.6853, "step": 3066 }, { "epoch": 2.0878148400272294, "grad_norm": 1.6103706359863281, "learning_rate": 6.740271471953446e-06, "loss": 0.6875, "step": 3067 }, { "epoch": 2.088495575221239, "grad_norm": 1.728540301322937, "learning_rate": 6.738160673118313e-06, "loss": 0.6959, "step": 3068 }, { "epoch": 2.0891763104152483, "grad_norm": 1.7006137371063232, "learning_rate": 6.736049521869946e-06, "loss": 0.7169, "step": 3069 }, { "epoch": 2.089857045609258, "grad_norm": 1.8515459299087524, "learning_rate": 6.733938018636378e-06, "loss": 0.5654, "step": 3070 }, { "epoch": 2.0905377808032677, "grad_norm": 1.8067259788513184, "learning_rate": 6.7318261638457215e-06, "loss": 0.5989, "step": 3071 }, { "epoch": 2.091218515997277, "grad_norm": 1.6226645708084106, "learning_rate": 6.729713957926154e-06, "loss": 0.6226, "step": 3072 }, { "epoch": 2.0918992511912866, "grad_norm": 1.723095178604126, "learning_rate": 6.727601401305928e-06, "loss": 0.5569, "step": 3073 }, { "epoch": 2.0925799863852963, "grad_norm": 1.5520389080047607, "learning_rate": 6.725488494413363e-06, "loss": 0.519, "step": 3074 }, { "epoch": 2.0932607215793055, "grad_norm": 1.5749284029006958, "learning_rate": 6.723375237676857e-06, "loss": 0.6229, "step": 3075 }, { "epoch": 2.0939414567733152, "grad_norm": 1.8362972736358643, "learning_rate": 6.721261631524871e-06, "loss": 0.5385, "step": 3076 }, { "epoch": 2.094622191967325, "grad_norm": 1.653497338294983, "learning_rate": 6.719147676385941e-06, "loss": 0.5433, "step": 3077 }, { "epoch": 2.095302927161334, "grad_norm": 1.8336832523345947, "learning_rate": 6.717033372688671e-06, "loss": 0.5816, "step": 3078 }, { "epoch": 2.095983662355344, "grad_norm": 1.8390763998031616, "learning_rate": 6.714918720861741e-06, "loss": 0.5035, "step": 3079 }, { "epoch": 2.096664397549353, "grad_norm": 1.636027216911316, "learning_rate": 6.712803721333895e-06, "loss": 0.6127, "step": 3080 }, { "epoch": 2.0973451327433628, "grad_norm": 1.8930662870407104, "learning_rate": 6.710688374533952e-06, "loss": 0.5758, "step": 3081 }, { "epoch": 2.0980258679373724, "grad_norm": 1.7879307270050049, "learning_rate": 6.7085726808908004e-06, "loss": 0.4327, "step": 3082 }, { "epoch": 2.0987066031313817, "grad_norm": 1.8568922281265259, "learning_rate": 6.706456640833396e-06, "loss": 0.4341, "step": 3083 }, { "epoch": 2.0993873383253914, "grad_norm": 1.6641017198562622, "learning_rate": 6.70434025479077e-06, "loss": 0.5518, "step": 3084 }, { "epoch": 2.100068073519401, "grad_norm": 1.6977899074554443, "learning_rate": 6.70222352319202e-06, "loss": 0.5722, "step": 3085 }, { "epoch": 2.1007488087134103, "grad_norm": 1.7598390579223633, "learning_rate": 6.7001064464663165e-06, "loss": 0.6158, "step": 3086 }, { "epoch": 2.10142954390742, "grad_norm": 1.6737849712371826, "learning_rate": 6.697989025042893e-06, "loss": 0.621, "step": 3087 }, { "epoch": 2.1021102791014297, "grad_norm": 1.9180747270584106, "learning_rate": 6.695871259351065e-06, "loss": 0.4829, "step": 3088 }, { "epoch": 2.102791014295439, "grad_norm": 1.6509289741516113, "learning_rate": 6.693753149820207e-06, "loss": 0.6468, "step": 3089 }, { "epoch": 2.1034717494894486, "grad_norm": 1.7767964601516724, "learning_rate": 6.6916346968797675e-06, "loss": 0.5974, "step": 3090 }, { "epoch": 2.1041524846834583, "grad_norm": 1.6869996786117554, "learning_rate": 6.6895159009592645e-06, "loss": 0.6021, "step": 3091 }, { "epoch": 2.1048332198774675, "grad_norm": 1.6705690622329712, "learning_rate": 6.6873967624882845e-06, "loss": 0.4727, "step": 3092 }, { "epoch": 2.105513955071477, "grad_norm": 1.6921648979187012, "learning_rate": 6.685277281896486e-06, "loss": 0.6362, "step": 3093 }, { "epoch": 2.106194690265487, "grad_norm": 1.6491239070892334, "learning_rate": 6.683157459613594e-06, "loss": 0.6286, "step": 3094 }, { "epoch": 2.106875425459496, "grad_norm": 1.6521931886672974, "learning_rate": 6.681037296069405e-06, "loss": 0.5671, "step": 3095 }, { "epoch": 2.107556160653506, "grad_norm": 1.7405585050582886, "learning_rate": 6.6789167916937794e-06, "loss": 0.6129, "step": 3096 }, { "epoch": 2.1082368958475155, "grad_norm": 1.8214110136032104, "learning_rate": 6.676795946916656e-06, "loss": 0.4768, "step": 3097 }, { "epoch": 2.1089176310415247, "grad_norm": 1.6467983722686768, "learning_rate": 6.6746747621680365e-06, "loss": 0.5874, "step": 3098 }, { "epoch": 2.1095983662355344, "grad_norm": 1.7244584560394287, "learning_rate": 6.67255323787799e-06, "loss": 0.5407, "step": 3099 }, { "epoch": 2.110279101429544, "grad_norm": 1.741273283958435, "learning_rate": 6.67043137447666e-06, "loss": 0.5414, "step": 3100 }, { "epoch": 2.1109598366235534, "grad_norm": 1.4851932525634766, "learning_rate": 6.668309172394253e-06, "loss": 0.6953, "step": 3101 }, { "epoch": 2.111640571817563, "grad_norm": 1.5580824613571167, "learning_rate": 6.6661866320610495e-06, "loss": 0.5637, "step": 3102 }, { "epoch": 2.1123213070115723, "grad_norm": 1.543746829032898, "learning_rate": 6.664063753907394e-06, "loss": 0.7246, "step": 3103 }, { "epoch": 2.113002042205582, "grad_norm": 1.748598337173462, "learning_rate": 6.6619405383637015e-06, "loss": 0.4728, "step": 3104 }, { "epoch": 2.1136827773995917, "grad_norm": 1.7352399826049805, "learning_rate": 6.659816985860457e-06, "loss": 0.557, "step": 3105 }, { "epoch": 2.114363512593601, "grad_norm": 1.7695428133010864, "learning_rate": 6.6576930968282095e-06, "loss": 0.6448, "step": 3106 }, { "epoch": 2.1150442477876106, "grad_norm": 1.9290945529937744, "learning_rate": 6.655568871697582e-06, "loss": 0.4767, "step": 3107 }, { "epoch": 2.1157249829816203, "grad_norm": 1.683479905128479, "learning_rate": 6.653444310899261e-06, "loss": 0.634, "step": 3108 }, { "epoch": 2.1164057181756295, "grad_norm": 1.7410858869552612, "learning_rate": 6.651319414864003e-06, "loss": 0.4561, "step": 3109 }, { "epoch": 2.117086453369639, "grad_norm": 1.6214455366134644, "learning_rate": 6.64919418402263e-06, "loss": 0.6085, "step": 3110 }, { "epoch": 2.117767188563649, "grad_norm": 1.5842441320419312, "learning_rate": 6.647068618806037e-06, "loss": 0.6725, "step": 3111 }, { "epoch": 2.118447923757658, "grad_norm": 1.5498934984207153, "learning_rate": 6.644942719645184e-06, "loss": 0.6948, "step": 3112 }, { "epoch": 2.119128658951668, "grad_norm": 1.641795039176941, "learning_rate": 6.6428164869710945e-06, "loss": 0.5792, "step": 3113 }, { "epoch": 2.1198093941456775, "grad_norm": 1.7519782781600952, "learning_rate": 6.640689921214868e-06, "loss": 0.5464, "step": 3114 }, { "epoch": 2.1204901293396867, "grad_norm": 1.6047507524490356, "learning_rate": 6.638563022807665e-06, "loss": 0.7187, "step": 3115 }, { "epoch": 2.1211708645336964, "grad_norm": 1.6488611698150635, "learning_rate": 6.636435792180714e-06, "loss": 0.6432, "step": 3116 }, { "epoch": 2.121851599727706, "grad_norm": 1.6006407737731934, "learning_rate": 6.634308229765316e-06, "loss": 0.7164, "step": 3117 }, { "epoch": 2.1225323349217153, "grad_norm": 1.7135791778564453, "learning_rate": 6.632180335992834e-06, "loss": 0.6957, "step": 3118 }, { "epoch": 2.123213070115725, "grad_norm": 1.6930832862854004, "learning_rate": 6.6300521112947004e-06, "loss": 0.6561, "step": 3119 }, { "epoch": 2.1238938053097347, "grad_norm": 1.6370903253555298, "learning_rate": 6.627923556102412e-06, "loss": 0.5897, "step": 3120 }, { "epoch": 2.124574540503744, "grad_norm": 1.680694818496704, "learning_rate": 6.6257946708475374e-06, "loss": 0.6825, "step": 3121 }, { "epoch": 2.1252552756977536, "grad_norm": 1.6728345155715942, "learning_rate": 6.623665455961707e-06, "loss": 0.5497, "step": 3122 }, { "epoch": 2.1259360108917633, "grad_norm": 1.6695177555084229, "learning_rate": 6.6215359118766244e-06, "loss": 0.5891, "step": 3123 }, { "epoch": 2.1266167460857726, "grad_norm": 1.654948353767395, "learning_rate": 6.619406039024053e-06, "loss": 0.579, "step": 3124 }, { "epoch": 2.1272974812797822, "grad_norm": 1.599854826927185, "learning_rate": 6.617275837835826e-06, "loss": 0.6164, "step": 3125 }, { "epoch": 2.1279782164737915, "grad_norm": 1.5927391052246094, "learning_rate": 6.615145308743844e-06, "loss": 0.6349, "step": 3126 }, { "epoch": 2.128658951667801, "grad_norm": 1.7360401153564453, "learning_rate": 6.613014452180073e-06, "loss": 0.6669, "step": 3127 }, { "epoch": 2.129339686861811, "grad_norm": 1.8037124872207642, "learning_rate": 6.610883268576545e-06, "loss": 0.5789, "step": 3128 }, { "epoch": 2.13002042205582, "grad_norm": 1.8084478378295898, "learning_rate": 6.608751758365362e-06, "loss": 0.5262, "step": 3129 }, { "epoch": 2.13070115724983, "grad_norm": 1.5697745084762573, "learning_rate": 6.606619921978683e-06, "loss": 0.642, "step": 3130 }, { "epoch": 2.1313818924438395, "grad_norm": 1.7358685731887817, "learning_rate": 6.604487759848746e-06, "loss": 0.513, "step": 3131 }, { "epoch": 2.1320626276378487, "grad_norm": 1.8093022108078003, "learning_rate": 6.602355272407843e-06, "loss": 0.5853, "step": 3132 }, { "epoch": 2.1327433628318584, "grad_norm": 1.7037056684494019, "learning_rate": 6.600222460088341e-06, "loss": 0.4867, "step": 3133 }, { "epoch": 2.133424098025868, "grad_norm": 1.7147245407104492, "learning_rate": 6.598089323322666e-06, "loss": 0.5256, "step": 3134 }, { "epoch": 2.1341048332198773, "grad_norm": 1.7261013984680176, "learning_rate": 6.595955862543314e-06, "loss": 0.6737, "step": 3135 }, { "epoch": 2.134785568413887, "grad_norm": 1.626957654953003, "learning_rate": 6.5938220781828474e-06, "loss": 0.6248, "step": 3136 }, { "epoch": 2.1354663036078967, "grad_norm": 1.8717573881149292, "learning_rate": 6.59168797067389e-06, "loss": 0.5063, "step": 3137 }, { "epoch": 2.136147038801906, "grad_norm": 1.6472129821777344, "learning_rate": 6.589553540449136e-06, "loss": 0.5581, "step": 3138 }, { "epoch": 2.1368277739959156, "grad_norm": 1.8903288841247559, "learning_rate": 6.58741878794134e-06, "loss": 0.3253, "step": 3139 }, { "epoch": 2.1375085091899253, "grad_norm": 1.83865487575531, "learning_rate": 6.585283713583326e-06, "loss": 0.4922, "step": 3140 }, { "epoch": 2.1381892443839345, "grad_norm": 1.6248998641967773, "learning_rate": 6.583148317807982e-06, "loss": 0.5159, "step": 3141 }, { "epoch": 2.1388699795779442, "grad_norm": 1.7228058576583862, "learning_rate": 6.5810126010482585e-06, "loss": 0.5712, "step": 3142 }, { "epoch": 2.139550714771954, "grad_norm": 1.7561026811599731, "learning_rate": 6.578876563737176e-06, "loss": 0.6196, "step": 3143 }, { "epoch": 2.140231449965963, "grad_norm": 1.6219005584716797, "learning_rate": 6.5767402063078164e-06, "loss": 0.6733, "step": 3144 }, { "epoch": 2.140912185159973, "grad_norm": 1.5814098119735718, "learning_rate": 6.5746035291933295e-06, "loss": 0.6318, "step": 3145 }, { "epoch": 2.1415929203539825, "grad_norm": 1.6211259365081787, "learning_rate": 6.5724665328269256e-06, "loss": 0.5836, "step": 3146 }, { "epoch": 2.1422736555479918, "grad_norm": 1.6487840414047241, "learning_rate": 6.5703292176418824e-06, "loss": 0.6067, "step": 3147 }, { "epoch": 2.1429543907420014, "grad_norm": 1.6425853967666626, "learning_rate": 6.568191584071543e-06, "loss": 0.5834, "step": 3148 }, { "epoch": 2.1436351259360107, "grad_norm": 1.7612661123275757, "learning_rate": 6.566053632549313e-06, "loss": 0.3886, "step": 3149 }, { "epoch": 2.1443158611300204, "grad_norm": 1.7374627590179443, "learning_rate": 6.563915363508665e-06, "loss": 0.5574, "step": 3150 }, { "epoch": 2.14499659632403, "grad_norm": 1.553765892982483, "learning_rate": 6.561776777383133e-06, "loss": 0.6582, "step": 3151 }, { "epoch": 2.1456773315180393, "grad_norm": 1.847841501235962, "learning_rate": 6.559637874606317e-06, "loss": 0.5232, "step": 3152 }, { "epoch": 2.146358066712049, "grad_norm": 1.7647347450256348, "learning_rate": 6.5574986556118816e-06, "loss": 0.6232, "step": 3153 }, { "epoch": 2.1470388019060587, "grad_norm": 1.6779283285140991, "learning_rate": 6.555359120833554e-06, "loss": 0.5972, "step": 3154 }, { "epoch": 2.147719537100068, "grad_norm": 1.6032204627990723, "learning_rate": 6.553219270705129e-06, "loss": 0.6513, "step": 3155 }, { "epoch": 2.1484002722940776, "grad_norm": 1.796297311782837, "learning_rate": 6.5510791056604584e-06, "loss": 0.5613, "step": 3156 }, { "epoch": 2.1490810074880873, "grad_norm": 1.744573712348938, "learning_rate": 6.5489386261334655e-06, "loss": 0.499, "step": 3157 }, { "epoch": 2.1497617426820965, "grad_norm": 1.7285763025283813, "learning_rate": 6.5467978325581315e-06, "loss": 0.6519, "step": 3158 }, { "epoch": 2.150442477876106, "grad_norm": 1.6052192449569702, "learning_rate": 6.544656725368506e-06, "loss": 0.5558, "step": 3159 }, { "epoch": 2.151123213070116, "grad_norm": 1.6399272680282593, "learning_rate": 6.5425153049986985e-06, "loss": 0.5722, "step": 3160 }, { "epoch": 2.151803948264125, "grad_norm": 1.7431713342666626, "learning_rate": 6.540373571882882e-06, "loss": 0.5777, "step": 3161 }, { "epoch": 2.152484683458135, "grad_norm": 1.5597895383834839, "learning_rate": 6.5382315264552986e-06, "loss": 0.662, "step": 3162 }, { "epoch": 2.1531654186521445, "grad_norm": 1.875118374824524, "learning_rate": 6.536089169150246e-06, "loss": 0.5583, "step": 3163 }, { "epoch": 2.1538461538461537, "grad_norm": 1.7566728591918945, "learning_rate": 6.533946500402088e-06, "loss": 0.5517, "step": 3164 }, { "epoch": 2.1545268890401634, "grad_norm": 1.5536041259765625, "learning_rate": 6.531803520645254e-06, "loss": 0.6993, "step": 3165 }, { "epoch": 2.155207624234173, "grad_norm": 1.5563647747039795, "learning_rate": 6.529660230314235e-06, "loss": 0.703, "step": 3166 }, { "epoch": 2.1558883594281824, "grad_norm": 1.6415802240371704, "learning_rate": 6.527516629843585e-06, "loss": 0.632, "step": 3167 }, { "epoch": 2.156569094622192, "grad_norm": 1.7872430086135864, "learning_rate": 6.525372719667917e-06, "loss": 0.6371, "step": 3168 }, { "epoch": 2.1572498298162017, "grad_norm": 1.596190333366394, "learning_rate": 6.5232285002219134e-06, "loss": 0.6787, "step": 3169 }, { "epoch": 2.157930565010211, "grad_norm": 1.7005844116210938, "learning_rate": 6.521083971940314e-06, "loss": 0.4727, "step": 3170 }, { "epoch": 2.1586113002042207, "grad_norm": 1.6519434452056885, "learning_rate": 6.518939135257926e-06, "loss": 0.6102, "step": 3171 }, { "epoch": 2.15929203539823, "grad_norm": 1.745961308479309, "learning_rate": 6.5167939906096134e-06, "loss": 0.4957, "step": 3172 }, { "epoch": 2.1599727705922396, "grad_norm": 1.6736737489700317, "learning_rate": 6.514648538430308e-06, "loss": 0.5233, "step": 3173 }, { "epoch": 2.1606535057862493, "grad_norm": 1.7915436029434204, "learning_rate": 6.512502779155e-06, "loss": 0.5074, "step": 3174 }, { "epoch": 2.1613342409802585, "grad_norm": 1.6989293098449707, "learning_rate": 6.510356713218744e-06, "loss": 0.6304, "step": 3175 }, { "epoch": 2.162014976174268, "grad_norm": 1.7857725620269775, "learning_rate": 6.508210341056659e-06, "loss": 0.6454, "step": 3176 }, { "epoch": 2.162695711368278, "grad_norm": 1.7205603122711182, "learning_rate": 6.5060636631039185e-06, "loss": 0.6584, "step": 3177 }, { "epoch": 2.163376446562287, "grad_norm": 1.8674653768539429, "learning_rate": 6.503916679795767e-06, "loss": 0.569, "step": 3178 }, { "epoch": 2.164057181756297, "grad_norm": 1.6001702547073364, "learning_rate": 6.501769391567504e-06, "loss": 0.5832, "step": 3179 }, { "epoch": 2.1647379169503065, "grad_norm": 1.7821202278137207, "learning_rate": 6.499621798854495e-06, "loss": 0.5268, "step": 3180 }, { "epoch": 2.1654186521443157, "grad_norm": 1.615452766418457, "learning_rate": 6.497473902092167e-06, "loss": 0.6908, "step": 3181 }, { "epoch": 2.1660993873383254, "grad_norm": 1.4881073236465454, "learning_rate": 6.495325701716004e-06, "loss": 0.7095, "step": 3182 }, { "epoch": 2.166780122532335, "grad_norm": 1.6427441835403442, "learning_rate": 6.49317719816156e-06, "loss": 0.6072, "step": 3183 }, { "epoch": 2.1674608577263443, "grad_norm": 1.6114517450332642, "learning_rate": 6.49102839186444e-06, "loss": 0.6936, "step": 3184 }, { "epoch": 2.168141592920354, "grad_norm": 1.7280491590499878, "learning_rate": 6.4888792832603205e-06, "loss": 0.564, "step": 3185 }, { "epoch": 2.1688223281143637, "grad_norm": 1.779703140258789, "learning_rate": 6.486729872784933e-06, "loss": 0.5516, "step": 3186 }, { "epoch": 2.169503063308373, "grad_norm": 1.6392055749893188, "learning_rate": 6.48458016087407e-06, "loss": 0.6311, "step": 3187 }, { "epoch": 2.1701837985023826, "grad_norm": 1.792136788368225, "learning_rate": 6.482430147963592e-06, "loss": 0.6155, "step": 3188 }, { "epoch": 2.170864533696392, "grad_norm": 1.7808635234832764, "learning_rate": 6.480279834489409e-06, "loss": 0.5846, "step": 3189 }, { "epoch": 2.1715452688904016, "grad_norm": 1.7451127767562866, "learning_rate": 6.478129220887504e-06, "loss": 0.5149, "step": 3190 }, { "epoch": 2.1722260040844112, "grad_norm": 1.6034884452819824, "learning_rate": 6.475978307593912e-06, "loss": 0.706, "step": 3191 }, { "epoch": 2.172906739278421, "grad_norm": 1.612652063369751, "learning_rate": 6.473827095044734e-06, "loss": 0.5868, "step": 3192 }, { "epoch": 2.17358747447243, "grad_norm": 1.5778957605361938, "learning_rate": 6.471675583676129e-06, "loss": 0.6699, "step": 3193 }, { "epoch": 2.17426820966644, "grad_norm": 1.6126539707183838, "learning_rate": 6.469523773924318e-06, "loss": 0.6343, "step": 3194 }, { "epoch": 2.174948944860449, "grad_norm": 1.6018924713134766, "learning_rate": 6.467371666225581e-06, "loss": 0.6072, "step": 3195 }, { "epoch": 2.175629680054459, "grad_norm": 1.7963252067565918, "learning_rate": 6.465219261016259e-06, "loss": 0.5618, "step": 3196 }, { "epoch": 2.1763104152484685, "grad_norm": 1.6338430643081665, "learning_rate": 6.463066558732754e-06, "loss": 0.5473, "step": 3197 }, { "epoch": 2.1769911504424777, "grad_norm": 1.814839243888855, "learning_rate": 6.46091355981153e-06, "loss": 0.5787, "step": 3198 }, { "epoch": 2.1776718856364874, "grad_norm": 1.843941330909729, "learning_rate": 6.458760264689103e-06, "loss": 0.4359, "step": 3199 }, { "epoch": 2.178352620830497, "grad_norm": 1.7066292762756348, "learning_rate": 6.456606673802064e-06, "loss": 0.562, "step": 3200 }, { "epoch": 2.1790333560245063, "grad_norm": 1.7847150564193726, "learning_rate": 6.454452787587047e-06, "loss": 0.634, "step": 3201 }, { "epoch": 2.179714091218516, "grad_norm": 1.7709543704986572, "learning_rate": 6.452298606480758e-06, "loss": 0.6446, "step": 3202 }, { "epoch": 2.1803948264125257, "grad_norm": 1.5541850328445435, "learning_rate": 6.4501441309199575e-06, "loss": 0.6775, "step": 3203 }, { "epoch": 2.181075561606535, "grad_norm": 1.6920193433761597, "learning_rate": 6.447989361341464e-06, "loss": 0.67, "step": 3204 }, { "epoch": 2.1817562968005446, "grad_norm": 1.7403095960617065, "learning_rate": 6.445834298182165e-06, "loss": 0.596, "step": 3205 }, { "epoch": 2.1824370319945543, "grad_norm": 1.65636146068573, "learning_rate": 6.443678941878996e-06, "loss": 0.6246, "step": 3206 }, { "epoch": 2.1831177671885635, "grad_norm": 1.753917932510376, "learning_rate": 6.4415232928689595e-06, "loss": 0.5666, "step": 3207 }, { "epoch": 2.1837985023825732, "grad_norm": 1.5861040353775024, "learning_rate": 6.439367351589111e-06, "loss": 0.7345, "step": 3208 }, { "epoch": 2.184479237576583, "grad_norm": 1.6064836978912354, "learning_rate": 6.437211118476574e-06, "loss": 0.5952, "step": 3209 }, { "epoch": 2.185159972770592, "grad_norm": 1.7879904508590698, "learning_rate": 6.435054593968522e-06, "loss": 0.5489, "step": 3210 }, { "epoch": 2.185840707964602, "grad_norm": 1.7136664390563965, "learning_rate": 6.432897778502194e-06, "loss": 0.521, "step": 3211 }, { "epoch": 2.186521443158611, "grad_norm": 1.783652663230896, "learning_rate": 6.430740672514886e-06, "loss": 0.5718, "step": 3212 }, { "epoch": 2.1872021783526208, "grad_norm": 1.7146905660629272, "learning_rate": 6.428583276443952e-06, "loss": 0.5786, "step": 3213 }, { "epoch": 2.1878829135466304, "grad_norm": 1.838173270225525, "learning_rate": 6.426425590726806e-06, "loss": 0.5209, "step": 3214 }, { "epoch": 2.1885636487406397, "grad_norm": 1.677155613899231, "learning_rate": 6.424267615800918e-06, "loss": 0.6144, "step": 3215 }, { "epoch": 2.1892443839346494, "grad_norm": 1.6775258779525757, "learning_rate": 6.422109352103822e-06, "loss": 0.5974, "step": 3216 }, { "epoch": 2.189925119128659, "grad_norm": 1.7508891820907593, "learning_rate": 6.419950800073105e-06, "loss": 0.4381, "step": 3217 }, { "epoch": 2.1906058543226683, "grad_norm": 1.6874788999557495, "learning_rate": 6.417791960146416e-06, "loss": 0.5462, "step": 3218 }, { "epoch": 2.191286589516678, "grad_norm": 1.7556638717651367, "learning_rate": 6.415632832761463e-06, "loss": 0.614, "step": 3219 }, { "epoch": 2.1919673247106877, "grad_norm": 1.600484848022461, "learning_rate": 6.413473418356007e-06, "loss": 0.6952, "step": 3220 }, { "epoch": 2.192648059904697, "grad_norm": 1.6341949701309204, "learning_rate": 6.411313717367872e-06, "loss": 0.6552, "step": 3221 }, { "epoch": 2.1933287950987066, "grad_norm": 1.7567468881607056, "learning_rate": 6.409153730234939e-06, "loss": 0.5417, "step": 3222 }, { "epoch": 2.1940095302927163, "grad_norm": 1.6452672481536865, "learning_rate": 6.406993457395148e-06, "loss": 0.5299, "step": 3223 }, { "epoch": 2.1946902654867255, "grad_norm": 1.7116883993148804, "learning_rate": 6.404832899286495e-06, "loss": 0.655, "step": 3224 }, { "epoch": 2.195371000680735, "grad_norm": 1.737140417098999, "learning_rate": 6.402672056347033e-06, "loss": 0.4581, "step": 3225 }, { "epoch": 2.196051735874745, "grad_norm": 1.6750195026397705, "learning_rate": 6.400510929014876e-06, "loss": 0.6788, "step": 3226 }, { "epoch": 2.196732471068754, "grad_norm": 1.7717173099517822, "learning_rate": 6.398349517728193e-06, "loss": 0.5327, "step": 3227 }, { "epoch": 2.197413206262764, "grad_norm": 1.8158873319625854, "learning_rate": 6.396187822925214e-06, "loss": 0.4689, "step": 3228 }, { "epoch": 2.1980939414567735, "grad_norm": 1.6428399085998535, "learning_rate": 6.394025845044222e-06, "loss": 0.6385, "step": 3229 }, { "epoch": 2.1987746766507827, "grad_norm": 1.8073034286499023, "learning_rate": 6.391863584523558e-06, "loss": 0.4092, "step": 3230 }, { "epoch": 2.1994554118447924, "grad_norm": 1.6187959909439087, "learning_rate": 6.389701041801626e-06, "loss": 0.5924, "step": 3231 }, { "epoch": 2.200136147038802, "grad_norm": 1.801143765449524, "learning_rate": 6.387538217316879e-06, "loss": 0.5399, "step": 3232 }, { "epoch": 2.2008168822328114, "grad_norm": 1.7121758460998535, "learning_rate": 6.385375111507833e-06, "loss": 0.6301, "step": 3233 }, { "epoch": 2.201497617426821, "grad_norm": 1.6989582777023315, "learning_rate": 6.383211724813058e-06, "loss": 0.5845, "step": 3234 }, { "epoch": 2.2021783526208303, "grad_norm": 1.7534607648849487, "learning_rate": 6.381048057671184e-06, "loss": 0.52, "step": 3235 }, { "epoch": 2.20285908781484, "grad_norm": 1.6950205564498901, "learning_rate": 6.378884110520896e-06, "loss": 0.6208, "step": 3236 }, { "epoch": 2.2035398230088497, "grad_norm": 1.787691354751587, "learning_rate": 6.376719883800933e-06, "loss": 0.4712, "step": 3237 }, { "epoch": 2.204220558202859, "grad_norm": 1.7929701805114746, "learning_rate": 6.374555377950093e-06, "loss": 0.6868, "step": 3238 }, { "epoch": 2.2049012933968686, "grad_norm": 1.8106334209442139, "learning_rate": 6.372390593407236e-06, "loss": 0.5961, "step": 3239 }, { "epoch": 2.2055820285908783, "grad_norm": 1.7828395366668701, "learning_rate": 6.37022553061127e-06, "loss": 0.5491, "step": 3240 }, { "epoch": 2.2062627637848875, "grad_norm": 1.7691236734390259, "learning_rate": 6.368060190001163e-06, "loss": 0.4314, "step": 3241 }, { "epoch": 2.206943498978897, "grad_norm": 1.8034842014312744, "learning_rate": 6.365894572015939e-06, "loss": 0.6409, "step": 3242 }, { "epoch": 2.207624234172907, "grad_norm": 1.7119710445404053, "learning_rate": 6.363728677094681e-06, "loss": 0.5769, "step": 3243 }, { "epoch": 2.208304969366916, "grad_norm": 1.8999630212783813, "learning_rate": 6.361562505676522e-06, "loss": 0.4501, "step": 3244 }, { "epoch": 2.208985704560926, "grad_norm": 1.7269392013549805, "learning_rate": 6.359396058200657e-06, "loss": 0.6054, "step": 3245 }, { "epoch": 2.2096664397549355, "grad_norm": 1.5932297706604004, "learning_rate": 6.357229335106334e-06, "loss": 0.6033, "step": 3246 }, { "epoch": 2.2103471749489447, "grad_norm": 1.6233351230621338, "learning_rate": 6.355062336832858e-06, "loss": 0.7012, "step": 3247 }, { "epoch": 2.2110279101429544, "grad_norm": 1.7244259119033813, "learning_rate": 6.352895063819589e-06, "loss": 0.5029, "step": 3248 }, { "epoch": 2.211708645336964, "grad_norm": 1.5502840280532837, "learning_rate": 6.350727516505943e-06, "loss": 0.6782, "step": 3249 }, { "epoch": 2.2123893805309733, "grad_norm": 1.8594316244125366, "learning_rate": 6.348559695331394e-06, "loss": 0.5636, "step": 3250 }, { "epoch": 2.213070115724983, "grad_norm": 1.7144724130630493, "learning_rate": 6.346391600735463e-06, "loss": 0.6954, "step": 3251 }, { "epoch": 2.2137508509189927, "grad_norm": 1.7055552005767822, "learning_rate": 6.344223233157739e-06, "loss": 0.6898, "step": 3252 }, { "epoch": 2.214431586113002, "grad_norm": 1.6008086204528809, "learning_rate": 6.342054593037857e-06, "loss": 0.5598, "step": 3253 }, { "epoch": 2.2151123213070116, "grad_norm": 1.7800447940826416, "learning_rate": 6.339885680815512e-06, "loss": 0.5001, "step": 3254 }, { "epoch": 2.2157930565010213, "grad_norm": 1.7060147523880005, "learning_rate": 6.33771649693045e-06, "loss": 0.5123, "step": 3255 }, { "epoch": 2.2164737916950306, "grad_norm": 1.7199054956436157, "learning_rate": 6.335547041822475e-06, "loss": 0.5535, "step": 3256 }, { "epoch": 2.2171545268890402, "grad_norm": 1.7095869779586792, "learning_rate": 6.3333773159314484e-06, "loss": 0.6057, "step": 3257 }, { "epoch": 2.2178352620830495, "grad_norm": 1.4738425016403198, "learning_rate": 6.331207319697279e-06, "loss": 0.792, "step": 3258 }, { "epoch": 2.218515997277059, "grad_norm": 1.7373448610305786, "learning_rate": 6.329037053559937e-06, "loss": 0.578, "step": 3259 }, { "epoch": 2.219196732471069, "grad_norm": 1.4804328680038452, "learning_rate": 6.326866517959448e-06, "loss": 0.6537, "step": 3260 }, { "epoch": 2.219877467665078, "grad_norm": 1.6256515979766846, "learning_rate": 6.324695713335886e-06, "loss": 0.6928, "step": 3261 }, { "epoch": 2.220558202859088, "grad_norm": 1.6089930534362793, "learning_rate": 6.322524640129385e-06, "loss": 0.6253, "step": 3262 }, { "epoch": 2.2212389380530975, "grad_norm": 1.557381272315979, "learning_rate": 6.3203532987801295e-06, "loss": 0.634, "step": 3263 }, { "epoch": 2.2219196732471067, "grad_norm": 1.8744850158691406, "learning_rate": 6.318181689728362e-06, "loss": 0.5496, "step": 3264 }, { "epoch": 2.2226004084411164, "grad_norm": 1.6481014490127563, "learning_rate": 6.316009813414378e-06, "loss": 0.7209, "step": 3265 }, { "epoch": 2.223281143635126, "grad_norm": 1.6801620721817017, "learning_rate": 6.313837670278526e-06, "loss": 0.5318, "step": 3266 }, { "epoch": 2.2239618788291353, "grad_norm": 1.820267915725708, "learning_rate": 6.3116652607612106e-06, "loss": 0.4867, "step": 3267 }, { "epoch": 2.224642614023145, "grad_norm": 1.5366498231887817, "learning_rate": 6.3094925853028854e-06, "loss": 0.6382, "step": 3268 }, { "epoch": 2.2253233492171547, "grad_norm": 1.8103556632995605, "learning_rate": 6.307319644344068e-06, "loss": 0.549, "step": 3269 }, { "epoch": 2.226004084411164, "grad_norm": 1.5482430458068848, "learning_rate": 6.305146438325319e-06, "loss": 0.6158, "step": 3270 }, { "epoch": 2.2266848196051736, "grad_norm": 1.682381272315979, "learning_rate": 6.302972967687258e-06, "loss": 0.6022, "step": 3271 }, { "epoch": 2.2273655547991833, "grad_norm": 1.7282600402832031, "learning_rate": 6.3007992328705615e-06, "loss": 0.5021, "step": 3272 }, { "epoch": 2.2280462899931925, "grad_norm": 1.6832523345947266, "learning_rate": 6.298625234315949e-06, "loss": 0.6149, "step": 3273 }, { "epoch": 2.2287270251872022, "grad_norm": 1.5456088781356812, "learning_rate": 6.2964509724642065e-06, "loss": 0.6747, "step": 3274 }, { "epoch": 2.229407760381212, "grad_norm": 1.6509549617767334, "learning_rate": 6.294276447756162e-06, "loss": 0.5903, "step": 3275 }, { "epoch": 2.230088495575221, "grad_norm": 1.8025189638137817, "learning_rate": 6.2921016606327065e-06, "loss": 0.5363, "step": 3276 }, { "epoch": 2.230769230769231, "grad_norm": 1.6708446741104126, "learning_rate": 6.289926611534776e-06, "loss": 0.6168, "step": 3277 }, { "epoch": 2.2314499659632405, "grad_norm": 1.8088417053222656, "learning_rate": 6.287751300903364e-06, "loss": 0.4178, "step": 3278 }, { "epoch": 2.2321307011572498, "grad_norm": 1.7514736652374268, "learning_rate": 6.2855757291795176e-06, "loss": 0.5797, "step": 3279 }, { "epoch": 2.2328114363512594, "grad_norm": 1.7028244733810425, "learning_rate": 6.283399896804333e-06, "loss": 0.6769, "step": 3280 }, { "epoch": 2.2334921715452687, "grad_norm": 1.8005659580230713, "learning_rate": 6.281223804218964e-06, "loss": 0.5846, "step": 3281 }, { "epoch": 2.2341729067392784, "grad_norm": 1.7007434368133545, "learning_rate": 6.279047451864613e-06, "loss": 0.5593, "step": 3282 }, { "epoch": 2.234853641933288, "grad_norm": 1.7228730916976929, "learning_rate": 6.276870840182538e-06, "loss": 0.526, "step": 3283 }, { "epoch": 2.2355343771272973, "grad_norm": 1.7581148147583008, "learning_rate": 6.27469396961405e-06, "loss": 0.6376, "step": 3284 }, { "epoch": 2.236215112321307, "grad_norm": 1.8685530424118042, "learning_rate": 6.272516840600506e-06, "loss": 0.4218, "step": 3285 }, { "epoch": 2.2368958475153167, "grad_norm": 1.966852068901062, "learning_rate": 6.270339453583325e-06, "loss": 0.4533, "step": 3286 }, { "epoch": 2.237576582709326, "grad_norm": 1.7009028196334839, "learning_rate": 6.268161809003972e-06, "loss": 0.5588, "step": 3287 }, { "epoch": 2.2382573179033356, "grad_norm": 1.7519787549972534, "learning_rate": 6.265983907303967e-06, "loss": 0.6501, "step": 3288 }, { "epoch": 2.2389380530973453, "grad_norm": 1.7343864440917969, "learning_rate": 6.26380574892488e-06, "loss": 0.7001, "step": 3289 }, { "epoch": 2.2396187882913545, "grad_norm": 1.6230045557022095, "learning_rate": 6.261627334308333e-06, "loss": 0.5909, "step": 3290 }, { "epoch": 2.240299523485364, "grad_norm": 1.6984361410140991, "learning_rate": 6.259448663896001e-06, "loss": 0.5119, "step": 3291 }, { "epoch": 2.240980258679374, "grad_norm": 1.827405571937561, "learning_rate": 6.257269738129614e-06, "loss": 0.4743, "step": 3292 }, { "epoch": 2.241660993873383, "grad_norm": 1.5364898443222046, "learning_rate": 6.25509055745095e-06, "loss": 0.6774, "step": 3293 }, { "epoch": 2.242341729067393, "grad_norm": 1.7474076747894287, "learning_rate": 6.252911122301835e-06, "loss": 0.5809, "step": 3294 }, { "epoch": 2.2430224642614025, "grad_norm": 1.7102717161178589, "learning_rate": 6.250731433124157e-06, "loss": 0.6111, "step": 3295 }, { "epoch": 2.2437031994554117, "grad_norm": 1.6946073770523071, "learning_rate": 6.248551490359844e-06, "loss": 0.6026, "step": 3296 }, { "epoch": 2.2443839346494214, "grad_norm": 1.665970802307129, "learning_rate": 6.246371294450884e-06, "loss": 0.6557, "step": 3297 }, { "epoch": 2.2450646698434307, "grad_norm": 1.756345510482788, "learning_rate": 6.244190845839314e-06, "loss": 0.5905, "step": 3298 }, { "epoch": 2.2457454050374404, "grad_norm": 1.6221520900726318, "learning_rate": 6.242010144967218e-06, "loss": 0.6016, "step": 3299 }, { "epoch": 2.24642614023145, "grad_norm": 1.6544057130813599, "learning_rate": 6.2398291922767375e-06, "loss": 0.602, "step": 3300 }, { "epoch": 2.2471068754254597, "grad_norm": 1.6444439888000488, "learning_rate": 6.23764798821006e-06, "loss": 0.5811, "step": 3301 }, { "epoch": 2.247787610619469, "grad_norm": 1.622017502784729, "learning_rate": 6.235466533209427e-06, "loss": 0.6239, "step": 3302 }, { "epoch": 2.2484683458134787, "grad_norm": 1.7332381010055542, "learning_rate": 6.233284827717131e-06, "loss": 0.554, "step": 3303 }, { "epoch": 2.249149081007488, "grad_norm": 1.6683319807052612, "learning_rate": 6.231102872175512e-06, "loss": 0.6553, "step": 3304 }, { "epoch": 2.2498298162014976, "grad_norm": 1.5752919912338257, "learning_rate": 6.2289206670269665e-06, "loss": 0.7531, "step": 3305 }, { "epoch": 2.2505105513955073, "grad_norm": 1.647162675857544, "learning_rate": 6.226738212713935e-06, "loss": 0.5748, "step": 3306 }, { "epoch": 2.2511912865895165, "grad_norm": 1.6148697137832642, "learning_rate": 6.22455550967891e-06, "loss": 0.6029, "step": 3307 }, { "epoch": 2.251872021783526, "grad_norm": 1.7513160705566406, "learning_rate": 6.22237255836444e-06, "loss": 0.5142, "step": 3308 }, { "epoch": 2.252552756977536, "grad_norm": 1.66556715965271, "learning_rate": 6.220189359213118e-06, "loss": 0.5468, "step": 3309 }, { "epoch": 2.253233492171545, "grad_norm": 1.6758350133895874, "learning_rate": 6.2180059126675906e-06, "loss": 0.6403, "step": 3310 }, { "epoch": 2.253914227365555, "grad_norm": 1.8544570207595825, "learning_rate": 6.215822219170551e-06, "loss": 0.559, "step": 3311 }, { "epoch": 2.2545949625595645, "grad_norm": 1.516754388809204, "learning_rate": 6.213638279164744e-06, "loss": 0.5893, "step": 3312 }, { "epoch": 2.2552756977535737, "grad_norm": 1.7057185173034668, "learning_rate": 6.211454093092968e-06, "loss": 0.5991, "step": 3313 }, { "epoch": 2.2559564329475834, "grad_norm": 1.624015212059021, "learning_rate": 6.209269661398065e-06, "loss": 0.6569, "step": 3314 }, { "epoch": 2.256637168141593, "grad_norm": 1.6183562278747559, "learning_rate": 6.207084984522932e-06, "loss": 0.6511, "step": 3315 }, { "epoch": 2.2573179033356023, "grad_norm": 1.738445520401001, "learning_rate": 6.204900062910513e-06, "loss": 0.6169, "step": 3316 }, { "epoch": 2.257998638529612, "grad_norm": 1.7047204971313477, "learning_rate": 6.202714897003801e-06, "loss": 0.6409, "step": 3317 }, { "epoch": 2.2586793737236217, "grad_norm": 1.6434377431869507, "learning_rate": 6.200529487245842e-06, "loss": 0.7172, "step": 3318 }, { "epoch": 2.259360108917631, "grad_norm": 1.5235899686813354, "learning_rate": 6.1983438340797295e-06, "loss": 0.6515, "step": 3319 }, { "epoch": 2.2600408441116406, "grad_norm": 1.7627555131912231, "learning_rate": 6.1961579379486035e-06, "loss": 0.5249, "step": 3320 }, { "epoch": 2.26072157930565, "grad_norm": 1.7225078344345093, "learning_rate": 6.193971799295658e-06, "loss": 0.5516, "step": 3321 }, { "epoch": 2.2614023144996596, "grad_norm": 1.5461757183074951, "learning_rate": 6.191785418564133e-06, "loss": 0.6803, "step": 3322 }, { "epoch": 2.2620830496936692, "grad_norm": 1.6790553331375122, "learning_rate": 6.18959879619732e-06, "loss": 0.6085, "step": 3323 }, { "epoch": 2.262763784887679, "grad_norm": 1.7426890134811401, "learning_rate": 6.187411932638558e-06, "loss": 0.5701, "step": 3324 }, { "epoch": 2.263444520081688, "grad_norm": 1.8225438594818115, "learning_rate": 6.1852248283312334e-06, "loss": 0.4571, "step": 3325 }, { "epoch": 2.264125255275698, "grad_norm": 1.7724688053131104, "learning_rate": 6.183037483718784e-06, "loss": 0.6672, "step": 3326 }, { "epoch": 2.264805990469707, "grad_norm": 1.838683009147644, "learning_rate": 6.180849899244696e-06, "loss": 0.578, "step": 3327 }, { "epoch": 2.265486725663717, "grad_norm": 1.6591756343841553, "learning_rate": 6.1786620753525025e-06, "loss": 0.6499, "step": 3328 }, { "epoch": 2.2661674608577265, "grad_norm": 1.7171980142593384, "learning_rate": 6.176474012485787e-06, "loss": 0.5452, "step": 3329 }, { "epoch": 2.2668481960517357, "grad_norm": 1.6255955696105957, "learning_rate": 6.17428571108818e-06, "loss": 0.6219, "step": 3330 }, { "epoch": 2.2675289312457454, "grad_norm": 1.601577639579773, "learning_rate": 6.1720971716033625e-06, "loss": 0.6591, "step": 3331 }, { "epoch": 2.268209666439755, "grad_norm": 1.8289920091629028, "learning_rate": 6.1699083944750616e-06, "loss": 0.5137, "step": 3332 }, { "epoch": 2.2688904016337643, "grad_norm": 1.634009599685669, "learning_rate": 6.1677193801470515e-06, "loss": 0.7477, "step": 3333 }, { "epoch": 2.269571136827774, "grad_norm": 1.6744579076766968, "learning_rate": 6.165530129063159e-06, "loss": 0.6329, "step": 3334 }, { "epoch": 2.2702518720217837, "grad_norm": 1.696149468421936, "learning_rate": 6.163340641667255e-06, "loss": 0.5297, "step": 3335 }, { "epoch": 2.270932607215793, "grad_norm": 1.5514239072799683, "learning_rate": 6.16115091840326e-06, "loss": 0.7096, "step": 3336 }, { "epoch": 2.2716133424098026, "grad_norm": 1.6850864887237549, "learning_rate": 6.15896095971514e-06, "loss": 0.5676, "step": 3337 }, { "epoch": 2.2722940776038123, "grad_norm": 1.732816219329834, "learning_rate": 6.156770766046915e-06, "loss": 0.5956, "step": 3338 }, { "epoch": 2.2729748127978215, "grad_norm": 1.9952950477600098, "learning_rate": 6.154580337842644e-06, "loss": 0.5771, "step": 3339 }, { "epoch": 2.2736555479918312, "grad_norm": 1.6514453887939453, "learning_rate": 6.152389675546438e-06, "loss": 0.5858, "step": 3340 }, { "epoch": 2.274336283185841, "grad_norm": 1.8415656089782715, "learning_rate": 6.150198779602459e-06, "loss": 0.4959, "step": 3341 }, { "epoch": 2.27501701837985, "grad_norm": 1.7564787864685059, "learning_rate": 6.148007650454906e-06, "loss": 0.5135, "step": 3342 }, { "epoch": 2.27569775357386, "grad_norm": 1.7730683088302612, "learning_rate": 6.145816288548039e-06, "loss": 0.4285, "step": 3343 }, { "epoch": 2.276378488767869, "grad_norm": 1.64808189868927, "learning_rate": 6.143624694326154e-06, "loss": 0.6925, "step": 3344 }, { "epoch": 2.2770592239618788, "grad_norm": 1.7447229623794556, "learning_rate": 6.1414328682336e-06, "loss": 0.5338, "step": 3345 }, { "epoch": 2.2777399591558884, "grad_norm": 1.7696208953857422, "learning_rate": 6.139240810714768e-06, "loss": 0.563, "step": 3346 }, { "epoch": 2.278420694349898, "grad_norm": 1.6999070644378662, "learning_rate": 6.137048522214104e-06, "loss": 0.5426, "step": 3347 }, { "epoch": 2.2791014295439074, "grad_norm": 1.7290987968444824, "learning_rate": 6.134856003176093e-06, "loss": 0.5865, "step": 3348 }, { "epoch": 2.279782164737917, "grad_norm": 1.7392810583114624, "learning_rate": 6.13266325404527e-06, "loss": 0.5843, "step": 3349 }, { "epoch": 2.2804628999319263, "grad_norm": 1.722198247909546, "learning_rate": 6.130470275266216e-06, "loss": 0.6217, "step": 3350 }, { "epoch": 2.281143635125936, "grad_norm": 1.7425649166107178, "learning_rate": 6.128277067283559e-06, "loss": 0.5815, "step": 3351 }, { "epoch": 2.2818243703199457, "grad_norm": 1.685489535331726, "learning_rate": 6.1260836305419755e-06, "loss": 0.5169, "step": 3352 }, { "epoch": 2.282505105513955, "grad_norm": 1.8042200803756714, "learning_rate": 6.123889965486184e-06, "loss": 0.5931, "step": 3353 }, { "epoch": 2.2831858407079646, "grad_norm": 1.8233375549316406, "learning_rate": 6.121696072560951e-06, "loss": 0.5542, "step": 3354 }, { "epoch": 2.2838665759019743, "grad_norm": 1.6373584270477295, "learning_rate": 6.119501952211091e-06, "loss": 0.4874, "step": 3355 }, { "epoch": 2.2845473110959835, "grad_norm": 1.6303040981292725, "learning_rate": 6.117307604881464e-06, "loss": 0.596, "step": 3356 }, { "epoch": 2.285228046289993, "grad_norm": 1.8518363237380981, "learning_rate": 6.115113031016975e-06, "loss": 0.4609, "step": 3357 }, { "epoch": 2.285908781484003, "grad_norm": 1.8168962001800537, "learning_rate": 6.112918231062575e-06, "loss": 0.6441, "step": 3358 }, { "epoch": 2.286589516678012, "grad_norm": 1.6081284284591675, "learning_rate": 6.110723205463259e-06, "loss": 0.645, "step": 3359 }, { "epoch": 2.287270251872022, "grad_norm": 1.72564697265625, "learning_rate": 6.108527954664074e-06, "loss": 0.5491, "step": 3360 }, { "epoch": 2.2879509870660315, "grad_norm": 1.7378007173538208, "learning_rate": 6.106332479110105e-06, "loss": 0.5653, "step": 3361 }, { "epoch": 2.2886317222600407, "grad_norm": 1.779646635055542, "learning_rate": 6.10413677924649e-06, "loss": 0.6193, "step": 3362 }, { "epoch": 2.2893124574540504, "grad_norm": 1.5338401794433594, "learning_rate": 6.101940855518403e-06, "loss": 0.6907, "step": 3363 }, { "epoch": 2.28999319264806, "grad_norm": 1.7438279390335083, "learning_rate": 6.099744708371076e-06, "loss": 0.5848, "step": 3364 }, { "epoch": 2.2906739278420694, "grad_norm": 1.7048439979553223, "learning_rate": 6.097548338249773e-06, "loss": 0.5585, "step": 3365 }, { "epoch": 2.291354663036079, "grad_norm": 1.8428218364715576, "learning_rate": 6.095351745599813e-06, "loss": 0.5063, "step": 3366 }, { "epoch": 2.2920353982300883, "grad_norm": 1.7486330270767212, "learning_rate": 6.093154930866556e-06, "loss": 0.4816, "step": 3367 }, { "epoch": 2.292716133424098, "grad_norm": 1.6441946029663086, "learning_rate": 6.0909578944954064e-06, "loss": 0.5963, "step": 3368 }, { "epoch": 2.2933968686181077, "grad_norm": 1.51164710521698, "learning_rate": 6.088760636931817e-06, "loss": 0.682, "step": 3369 }, { "epoch": 2.294077603812117, "grad_norm": 1.7893821001052856, "learning_rate": 6.086563158621282e-06, "loss": 0.4698, "step": 3370 }, { "epoch": 2.2947583390061266, "grad_norm": 1.685953974723816, "learning_rate": 6.0843654600093404e-06, "loss": 0.5276, "step": 3371 }, { "epoch": 2.2954390742001363, "grad_norm": 1.6867303848266602, "learning_rate": 6.082167541541578e-06, "loss": 0.6881, "step": 3372 }, { "epoch": 2.2961198093941455, "grad_norm": 1.6995413303375244, "learning_rate": 6.079969403663626e-06, "loss": 0.5886, "step": 3373 }, { "epoch": 2.296800544588155, "grad_norm": 1.8587154150009155, "learning_rate": 6.077771046821157e-06, "loss": 0.5102, "step": 3374 }, { "epoch": 2.297481279782165, "grad_norm": 1.7930963039398193, "learning_rate": 6.075572471459888e-06, "loss": 0.6094, "step": 3375 }, { "epoch": 2.298162014976174, "grad_norm": 1.759119987487793, "learning_rate": 6.073373678025583e-06, "loss": 0.5842, "step": 3376 }, { "epoch": 2.298842750170184, "grad_norm": 1.7354413270950317, "learning_rate": 6.071174666964048e-06, "loss": 0.549, "step": 3377 }, { "epoch": 2.2995234853641935, "grad_norm": 1.7395658493041992, "learning_rate": 6.068975438721135e-06, "loss": 0.6919, "step": 3378 }, { "epoch": 2.3002042205582027, "grad_norm": 1.6450656652450562, "learning_rate": 6.066775993742739e-06, "loss": 0.5889, "step": 3379 }, { "epoch": 2.3008849557522124, "grad_norm": 1.7938807010650635, "learning_rate": 6.064576332474799e-06, "loss": 0.5377, "step": 3380 }, { "epoch": 2.301565690946222, "grad_norm": 1.6893302202224731, "learning_rate": 6.062376455363295e-06, "loss": 0.6348, "step": 3381 }, { "epoch": 2.3022464261402313, "grad_norm": 1.7813901901245117, "learning_rate": 6.060176362854259e-06, "loss": 0.5422, "step": 3382 }, { "epoch": 2.302927161334241, "grad_norm": 1.7682064771652222, "learning_rate": 6.057976055393757e-06, "loss": 0.5658, "step": 3383 }, { "epoch": 2.3036078965282503, "grad_norm": 1.7442927360534668, "learning_rate": 6.055775533427903e-06, "loss": 0.491, "step": 3384 }, { "epoch": 2.30428863172226, "grad_norm": 1.7523385286331177, "learning_rate": 6.053574797402855e-06, "loss": 0.6519, "step": 3385 }, { "epoch": 2.3049693669162696, "grad_norm": 1.6687389612197876, "learning_rate": 6.051373847764814e-06, "loss": 0.4265, "step": 3386 }, { "epoch": 2.3056501021102793, "grad_norm": 1.5831876993179321, "learning_rate": 6.049172684960024e-06, "loss": 0.6078, "step": 3387 }, { "epoch": 2.3063308373042886, "grad_norm": 1.721645712852478, "learning_rate": 6.046971309434773e-06, "loss": 0.5015, "step": 3388 }, { "epoch": 2.3070115724982982, "grad_norm": 1.5750806331634521, "learning_rate": 6.044769721635388e-06, "loss": 0.7512, "step": 3389 }, { "epoch": 2.3076923076923075, "grad_norm": 1.834031581878662, "learning_rate": 6.042567922008249e-06, "loss": 0.4956, "step": 3390 }, { "epoch": 2.308373042886317, "grad_norm": 1.6727581024169922, "learning_rate": 6.040365910999765e-06, "loss": 0.4651, "step": 3391 }, { "epoch": 2.309053778080327, "grad_norm": 1.607083797454834, "learning_rate": 6.038163689056399e-06, "loss": 0.6827, "step": 3392 }, { "epoch": 2.309734513274336, "grad_norm": 1.6767067909240723, "learning_rate": 6.035961256624652e-06, "loss": 0.6597, "step": 3393 }, { "epoch": 2.310415248468346, "grad_norm": 1.8215669393539429, "learning_rate": 6.03375861415107e-06, "loss": 0.5167, "step": 3394 }, { "epoch": 2.3110959836623555, "grad_norm": 1.6250237226486206, "learning_rate": 6.0315557620822395e-06, "loss": 0.759, "step": 3395 }, { "epoch": 2.3117767188563647, "grad_norm": 1.5659812688827515, "learning_rate": 6.029352700864789e-06, "loss": 0.5783, "step": 3396 }, { "epoch": 2.3124574540503744, "grad_norm": 1.7151551246643066, "learning_rate": 6.0271494309453915e-06, "loss": 0.4521, "step": 3397 }, { "epoch": 2.313138189244384, "grad_norm": 1.5966377258300781, "learning_rate": 6.024945952770761e-06, "loss": 0.7258, "step": 3398 }, { "epoch": 2.3138189244383933, "grad_norm": 1.7551816701889038, "learning_rate": 6.022742266787656e-06, "loss": 0.5783, "step": 3399 }, { "epoch": 2.314499659632403, "grad_norm": 1.6224644184112549, "learning_rate": 6.020538373442873e-06, "loss": 0.6616, "step": 3400 }, { "epoch": 2.3151803948264127, "grad_norm": 1.621463418006897, "learning_rate": 6.018334273183255e-06, "loss": 0.6386, "step": 3401 }, { "epoch": 2.315861130020422, "grad_norm": 1.691468596458435, "learning_rate": 6.016129966455683e-06, "loss": 0.5057, "step": 3402 }, { "epoch": 2.3165418652144316, "grad_norm": 1.6291131973266602, "learning_rate": 6.013925453707082e-06, "loss": 0.6131, "step": 3403 }, { "epoch": 2.3172226004084413, "grad_norm": 1.7531459331512451, "learning_rate": 6.011720735384419e-06, "loss": 0.6661, "step": 3404 }, { "epoch": 2.3179033356024505, "grad_norm": 1.7895934581756592, "learning_rate": 6.009515811934703e-06, "loss": 0.5121, "step": 3405 }, { "epoch": 2.3185840707964602, "grad_norm": 1.665787696838379, "learning_rate": 6.00731068380498e-06, "loss": 0.5853, "step": 3406 }, { "epoch": 2.3192648059904695, "grad_norm": 1.753064513206482, "learning_rate": 6.005105351442346e-06, "loss": 0.5539, "step": 3407 }, { "epoch": 2.319945541184479, "grad_norm": 1.5809077024459839, "learning_rate": 6.00289981529393e-06, "loss": 0.705, "step": 3408 }, { "epoch": 2.320626276378489, "grad_norm": 1.6455422639846802, "learning_rate": 6.000694075806908e-06, "loss": 0.5847, "step": 3409 }, { "epoch": 2.3213070115724985, "grad_norm": 1.7561895847320557, "learning_rate": 5.998488133428495e-06, "loss": 0.5929, "step": 3410 }, { "epoch": 2.3219877467665078, "grad_norm": 1.7462778091430664, "learning_rate": 5.996281988605945e-06, "loss": 0.596, "step": 3411 }, { "epoch": 2.3226684819605175, "grad_norm": 1.6855920553207397, "learning_rate": 5.994075641786559e-06, "loss": 0.7301, "step": 3412 }, { "epoch": 2.3233492171545267, "grad_norm": 1.6720209121704102, "learning_rate": 5.9918690934176726e-06, "loss": 0.6388, "step": 3413 }, { "epoch": 2.3240299523485364, "grad_norm": 1.5393896102905273, "learning_rate": 5.989662343946666e-06, "loss": 0.7116, "step": 3414 }, { "epoch": 2.324710687542546, "grad_norm": 1.6357321739196777, "learning_rate": 5.987455393820958e-06, "loss": 0.5948, "step": 3415 }, { "epoch": 2.3253914227365553, "grad_norm": 1.6705459356307983, "learning_rate": 5.98524824348801e-06, "loss": 0.563, "step": 3416 }, { "epoch": 2.326072157930565, "grad_norm": 1.736079454421997, "learning_rate": 5.983040893395324e-06, "loss": 0.5831, "step": 3417 }, { "epoch": 2.3267528931245747, "grad_norm": 1.7002805471420288, "learning_rate": 5.980833343990441e-06, "loss": 0.5805, "step": 3418 }, { "epoch": 2.327433628318584, "grad_norm": 1.689969539642334, "learning_rate": 5.978625595720942e-06, "loss": 0.5758, "step": 3419 }, { "epoch": 2.3281143635125936, "grad_norm": 1.7199506759643555, "learning_rate": 5.976417649034451e-06, "loss": 0.5549, "step": 3420 }, { "epoch": 2.3287950987066033, "grad_norm": 1.6580244302749634, "learning_rate": 5.974209504378632e-06, "loss": 0.5894, "step": 3421 }, { "epoch": 2.3294758339006125, "grad_norm": 1.706562876701355, "learning_rate": 5.972001162201184e-06, "loss": 0.5059, "step": 3422 }, { "epoch": 2.330156569094622, "grad_norm": 1.7272790670394897, "learning_rate": 5.9697926229498514e-06, "loss": 0.59, "step": 3423 }, { "epoch": 2.330837304288632, "grad_norm": 1.6664886474609375, "learning_rate": 5.967583887072419e-06, "loss": 0.6346, "step": 3424 }, { "epoch": 2.331518039482641, "grad_norm": 1.673944354057312, "learning_rate": 5.9653749550167064e-06, "loss": 0.6538, "step": 3425 }, { "epoch": 2.332198774676651, "grad_norm": 1.7883881330490112, "learning_rate": 5.963165827230579e-06, "loss": 0.6129, "step": 3426 }, { "epoch": 2.3328795098706605, "grad_norm": 1.521995186805725, "learning_rate": 5.9609565041619366e-06, "loss": 0.6467, "step": 3427 }, { "epoch": 2.3335602450646697, "grad_norm": 1.5314208269119263, "learning_rate": 5.958746986258722e-06, "loss": 0.7446, "step": 3428 }, { "epoch": 2.3342409802586794, "grad_norm": 1.7991422414779663, "learning_rate": 5.956537273968916e-06, "loss": 0.4975, "step": 3429 }, { "epoch": 2.3349217154526887, "grad_norm": 1.800383448600769, "learning_rate": 5.95432736774054e-06, "loss": 0.6145, "step": 3430 }, { "epoch": 2.3356024506466984, "grad_norm": 1.749886155128479, "learning_rate": 5.952117268021653e-06, "loss": 0.4497, "step": 3431 }, { "epoch": 2.336283185840708, "grad_norm": 1.7171543836593628, "learning_rate": 5.949906975260353e-06, "loss": 0.6366, "step": 3432 }, { "epoch": 2.3369639210347177, "grad_norm": 1.660375714302063, "learning_rate": 5.947696489904784e-06, "loss": 0.6155, "step": 3433 }, { "epoch": 2.337644656228727, "grad_norm": 1.884089708328247, "learning_rate": 5.945485812403116e-06, "loss": 0.5423, "step": 3434 }, { "epoch": 2.3383253914227367, "grad_norm": 1.653560996055603, "learning_rate": 5.94327494320357e-06, "loss": 0.5913, "step": 3435 }, { "epoch": 2.339006126616746, "grad_norm": 1.6691961288452148, "learning_rate": 5.941063882754401e-06, "loss": 0.5736, "step": 3436 }, { "epoch": 2.3396868618107556, "grad_norm": 1.6090718507766724, "learning_rate": 5.938852631503899e-06, "loss": 0.6509, "step": 3437 }, { "epoch": 2.3403675970047653, "grad_norm": 1.7599045038223267, "learning_rate": 5.936641189900401e-06, "loss": 0.4721, "step": 3438 }, { "epoch": 2.3410483321987745, "grad_norm": 1.6683765649795532, "learning_rate": 5.934429558392277e-06, "loss": 0.4882, "step": 3439 }, { "epoch": 2.341729067392784, "grad_norm": 1.686785101890564, "learning_rate": 5.932217737427936e-06, "loss": 0.529, "step": 3440 }, { "epoch": 2.342409802586794, "grad_norm": 1.6463260650634766, "learning_rate": 5.9300057274558275e-06, "loss": 0.5899, "step": 3441 }, { "epoch": 2.343090537780803, "grad_norm": 1.5275212526321411, "learning_rate": 5.927793528924436e-06, "loss": 0.5891, "step": 3442 }, { "epoch": 2.343771272974813, "grad_norm": 1.543819785118103, "learning_rate": 5.92558114228229e-06, "loss": 0.6504, "step": 3443 }, { "epoch": 2.3444520081688225, "grad_norm": 1.7506548166275024, "learning_rate": 5.923368567977948e-06, "loss": 0.6381, "step": 3444 }, { "epoch": 2.3451327433628317, "grad_norm": 1.7586716413497925, "learning_rate": 5.921155806460013e-06, "loss": 0.5443, "step": 3445 }, { "epoch": 2.3458134785568414, "grad_norm": 1.611623764038086, "learning_rate": 5.918942858177123e-06, "loss": 0.6167, "step": 3446 }, { "epoch": 2.346494213750851, "grad_norm": 1.6596251726150513, "learning_rate": 5.916729723577955e-06, "loss": 0.7085, "step": 3447 }, { "epoch": 2.3471749489448603, "grad_norm": 1.7168669700622559, "learning_rate": 5.914516403111226e-06, "loss": 0.5916, "step": 3448 }, { "epoch": 2.34785568413887, "grad_norm": 1.7608697414398193, "learning_rate": 5.912302897225684e-06, "loss": 0.5704, "step": 3449 }, { "epoch": 2.3485364193328797, "grad_norm": 1.6569148302078247, "learning_rate": 5.9100892063701225e-06, "loss": 0.7293, "step": 3450 }, { "epoch": 2.349217154526889, "grad_norm": 1.61326265335083, "learning_rate": 5.907875330993365e-06, "loss": 0.6709, "step": 3451 }, { "epoch": 2.3498978897208986, "grad_norm": 1.6438663005828857, "learning_rate": 5.905661271544282e-06, "loss": 0.6351, "step": 3452 }, { "epoch": 2.350578624914908, "grad_norm": 1.6800849437713623, "learning_rate": 5.903447028471769e-06, "loss": 0.4969, "step": 3453 }, { "epoch": 2.3512593601089176, "grad_norm": 1.7663898468017578, "learning_rate": 5.901232602224767e-06, "loss": 0.7388, "step": 3454 }, { "epoch": 2.3519400953029272, "grad_norm": 1.7241020202636719, "learning_rate": 5.899017993252256e-06, "loss": 0.592, "step": 3455 }, { "epoch": 2.352620830496937, "grad_norm": 1.6928784847259521, "learning_rate": 5.896803202003247e-06, "loss": 0.5512, "step": 3456 }, { "epoch": 2.353301565690946, "grad_norm": 1.7296010255813599, "learning_rate": 5.894588228926791e-06, "loss": 0.6698, "step": 3457 }, { "epoch": 2.353982300884956, "grad_norm": 1.6403826475143433, "learning_rate": 5.892373074471972e-06, "loss": 0.4459, "step": 3458 }, { "epoch": 2.354663036078965, "grad_norm": 1.710844874382019, "learning_rate": 5.8901577390879195e-06, "loss": 0.4848, "step": 3459 }, { "epoch": 2.355343771272975, "grad_norm": 1.7175081968307495, "learning_rate": 5.887942223223789e-06, "loss": 0.5529, "step": 3460 }, { "epoch": 2.3560245064669845, "grad_norm": 1.8705408573150635, "learning_rate": 5.885726527328782e-06, "loss": 0.5248, "step": 3461 }, { "epoch": 2.3567052416609937, "grad_norm": 1.7116166353225708, "learning_rate": 5.88351065185213e-06, "loss": 0.6423, "step": 3462 }, { "epoch": 2.3573859768550034, "grad_norm": 1.7926498651504517, "learning_rate": 5.881294597243103e-06, "loss": 0.5725, "step": 3463 }, { "epoch": 2.358066712049013, "grad_norm": 1.8406262397766113, "learning_rate": 5.879078363951011e-06, "loss": 0.4782, "step": 3464 }, { "epoch": 2.3587474472430223, "grad_norm": 1.5954807996749878, "learning_rate": 5.876861952425192e-06, "loss": 0.6552, "step": 3465 }, { "epoch": 2.359428182437032, "grad_norm": 1.6341099739074707, "learning_rate": 5.874645363115028e-06, "loss": 0.5417, "step": 3466 }, { "epoch": 2.3601089176310417, "grad_norm": 1.7003774642944336, "learning_rate": 5.872428596469932e-06, "loss": 0.5211, "step": 3467 }, { "epoch": 2.360789652825051, "grad_norm": 1.6340274810791016, "learning_rate": 5.8702116529393575e-06, "loss": 0.6831, "step": 3468 }, { "epoch": 2.3614703880190606, "grad_norm": 1.67527437210083, "learning_rate": 5.867994532972789e-06, "loss": 0.5017, "step": 3469 }, { "epoch": 2.3621511232130703, "grad_norm": 1.7314624786376953, "learning_rate": 5.8657772370197496e-06, "loss": 0.5963, "step": 3470 }, { "epoch": 2.3628318584070795, "grad_norm": 1.6555176973342896, "learning_rate": 5.863559765529797e-06, "loss": 0.5797, "step": 3471 }, { "epoch": 2.3635125936010892, "grad_norm": 1.70497465133667, "learning_rate": 5.8613421189525265e-06, "loss": 0.6014, "step": 3472 }, { "epoch": 2.364193328795099, "grad_norm": 1.5389991998672485, "learning_rate": 5.8591242977375665e-06, "loss": 0.6461, "step": 3473 }, { "epoch": 2.364874063989108, "grad_norm": 1.5994642972946167, "learning_rate": 5.856906302334583e-06, "loss": 0.5586, "step": 3474 }, { "epoch": 2.365554799183118, "grad_norm": 1.5899848937988281, "learning_rate": 5.854688133193273e-06, "loss": 0.6519, "step": 3475 }, { "epoch": 2.366235534377127, "grad_norm": 1.7593914270401, "learning_rate": 5.852469790763375e-06, "loss": 0.5493, "step": 3476 }, { "epoch": 2.3669162695711368, "grad_norm": 1.5663985013961792, "learning_rate": 5.850251275494658e-06, "loss": 0.6135, "step": 3477 }, { "epoch": 2.3675970047651465, "grad_norm": 1.674272894859314, "learning_rate": 5.848032587836927e-06, "loss": 0.5345, "step": 3478 }, { "epoch": 2.368277739959156, "grad_norm": 1.8377268314361572, "learning_rate": 5.845813728240025e-06, "loss": 0.5446, "step": 3479 }, { "epoch": 2.3689584751531654, "grad_norm": 1.7629966735839844, "learning_rate": 5.843594697153823e-06, "loss": 0.5026, "step": 3480 }, { "epoch": 2.369639210347175, "grad_norm": 1.7900903224945068, "learning_rate": 5.841375495028234e-06, "loss": 0.5186, "step": 3481 }, { "epoch": 2.3703199455411843, "grad_norm": 1.8426179885864258, "learning_rate": 5.839156122313202e-06, "loss": 0.5525, "step": 3482 }, { "epoch": 2.371000680735194, "grad_norm": 1.6331005096435547, "learning_rate": 5.836936579458707e-06, "loss": 0.7009, "step": 3483 }, { "epoch": 2.3716814159292037, "grad_norm": 1.6459729671478271, "learning_rate": 5.83471686691476e-06, "loss": 0.6114, "step": 3484 }, { "epoch": 2.372362151123213, "grad_norm": 1.6749944686889648, "learning_rate": 5.832496985131413e-06, "loss": 0.576, "step": 3485 }, { "epoch": 2.3730428863172226, "grad_norm": 1.4838861227035522, "learning_rate": 5.830276934558747e-06, "loss": 0.5992, "step": 3486 }, { "epoch": 2.3737236215112323, "grad_norm": 1.6878842115402222, "learning_rate": 5.828056715646878e-06, "loss": 0.555, "step": 3487 }, { "epoch": 2.3744043567052415, "grad_norm": 1.7058285474777222, "learning_rate": 5.825836328845957e-06, "loss": 0.575, "step": 3488 }, { "epoch": 2.375085091899251, "grad_norm": 1.6073956489562988, "learning_rate": 5.823615774606169e-06, "loss": 0.7864, "step": 3489 }, { "epoch": 2.375765827093261, "grad_norm": 1.6310449838638306, "learning_rate": 5.821395053377736e-06, "loss": 0.5796, "step": 3490 }, { "epoch": 2.37644656228727, "grad_norm": 1.8002878427505493, "learning_rate": 5.819174165610905e-06, "loss": 0.5284, "step": 3491 }, { "epoch": 2.37712729748128, "grad_norm": 1.7016223669052124, "learning_rate": 5.816953111755966e-06, "loss": 0.5669, "step": 3492 }, { "epoch": 2.377808032675289, "grad_norm": 1.7508561611175537, "learning_rate": 5.814731892263239e-06, "loss": 0.5398, "step": 3493 }, { "epoch": 2.3784887678692987, "grad_norm": 1.7365268468856812, "learning_rate": 5.812510507583075e-06, "loss": 0.6415, "step": 3494 }, { "epoch": 2.3791695030633084, "grad_norm": 1.8450062274932861, "learning_rate": 5.810288958165867e-06, "loss": 0.6116, "step": 3495 }, { "epoch": 2.379850238257318, "grad_norm": 1.6313186883926392, "learning_rate": 5.80806724446203e-06, "loss": 0.7359, "step": 3496 }, { "epoch": 2.3805309734513274, "grad_norm": 1.581355094909668, "learning_rate": 5.805845366922021e-06, "loss": 0.767, "step": 3497 }, { "epoch": 2.381211708645337, "grad_norm": 1.9034756422042847, "learning_rate": 5.803623325996325e-06, "loss": 0.4644, "step": 3498 }, { "epoch": 2.3818924438393463, "grad_norm": 1.6873457431793213, "learning_rate": 5.801401122135464e-06, "loss": 0.6266, "step": 3499 }, { "epoch": 2.382573179033356, "grad_norm": 1.696513056755066, "learning_rate": 5.799178755789992e-06, "loss": 0.5977, "step": 3500 }, { "epoch": 2.3832539142273657, "grad_norm": 1.7121741771697998, "learning_rate": 5.796956227410491e-06, "loss": 0.5304, "step": 3501 }, { "epoch": 2.383934649421375, "grad_norm": 1.8512837886810303, "learning_rate": 5.7947335374475855e-06, "loss": 0.4947, "step": 3502 }, { "epoch": 2.3846153846153846, "grad_norm": 1.691514253616333, "learning_rate": 5.7925106863519244e-06, "loss": 0.5902, "step": 3503 }, { "epoch": 2.3852961198093943, "grad_norm": 1.7304060459136963, "learning_rate": 5.790287674574193e-06, "loss": 0.5696, "step": 3504 }, { "epoch": 2.3859768550034035, "grad_norm": 1.6517354249954224, "learning_rate": 5.78806450256511e-06, "loss": 0.6251, "step": 3505 }, { "epoch": 2.386657590197413, "grad_norm": 1.5829025506973267, "learning_rate": 5.7858411707754195e-06, "loss": 0.7, "step": 3506 }, { "epoch": 2.387338325391423, "grad_norm": 1.6892437934875488, "learning_rate": 5.78361767965591e-06, "loss": 0.7334, "step": 3507 }, { "epoch": 2.388019060585432, "grad_norm": 1.8680258989334106, "learning_rate": 5.781394029657392e-06, "loss": 0.4294, "step": 3508 }, { "epoch": 2.388699795779442, "grad_norm": 1.6507850885391235, "learning_rate": 5.779170221230713e-06, "loss": 0.6158, "step": 3509 }, { "epoch": 2.3893805309734515, "grad_norm": 1.623785138130188, "learning_rate": 5.776946254826753e-06, "loss": 0.6201, "step": 3510 }, { "epoch": 2.3900612661674607, "grad_norm": 1.747822642326355, "learning_rate": 5.7747221308964204e-06, "loss": 0.3815, "step": 3511 }, { "epoch": 2.3907420013614704, "grad_norm": 1.6328426599502563, "learning_rate": 5.772497849890661e-06, "loss": 0.6954, "step": 3512 }, { "epoch": 2.39142273655548, "grad_norm": 1.7529633045196533, "learning_rate": 5.770273412260444e-06, "loss": 0.5878, "step": 3513 }, { "epoch": 2.3921034717494893, "grad_norm": 1.6198710203170776, "learning_rate": 5.76804881845678e-06, "loss": 0.6892, "step": 3514 }, { "epoch": 2.392784206943499, "grad_norm": 1.732377529144287, "learning_rate": 5.765824068930706e-06, "loss": 0.6272, "step": 3515 }, { "epoch": 2.3934649421375083, "grad_norm": 1.7185535430908203, "learning_rate": 5.76359916413329e-06, "loss": 0.6606, "step": 3516 }, { "epoch": 2.394145677331518, "grad_norm": 1.5393660068511963, "learning_rate": 5.761374104515636e-06, "loss": 0.5916, "step": 3517 }, { "epoch": 2.3948264125255276, "grad_norm": 1.7944257259368896, "learning_rate": 5.759148890528871e-06, "loss": 0.6071, "step": 3518 }, { "epoch": 2.3955071477195373, "grad_norm": 1.6895498037338257, "learning_rate": 5.7569235226241645e-06, "loss": 0.5288, "step": 3519 }, { "epoch": 2.3961878829135466, "grad_norm": 1.830141305923462, "learning_rate": 5.754698001252708e-06, "loss": 0.5807, "step": 3520 }, { "epoch": 2.3968686181075562, "grad_norm": 1.6579687595367432, "learning_rate": 5.752472326865729e-06, "loss": 0.5998, "step": 3521 }, { "epoch": 2.3975493533015655, "grad_norm": 1.7992374897003174, "learning_rate": 5.750246499914483e-06, "loss": 0.4147, "step": 3522 }, { "epoch": 2.398230088495575, "grad_norm": 1.7589335441589355, "learning_rate": 5.748020520850257e-06, "loss": 0.4116, "step": 3523 }, { "epoch": 2.398910823689585, "grad_norm": 1.827810525894165, "learning_rate": 5.745794390124374e-06, "loss": 0.4749, "step": 3524 }, { "epoch": 2.399591558883594, "grad_norm": 1.769269585609436, "learning_rate": 5.743568108188179e-06, "loss": 0.5529, "step": 3525 }, { "epoch": 2.400272294077604, "grad_norm": 1.8177247047424316, "learning_rate": 5.741341675493055e-06, "loss": 0.4651, "step": 3526 }, { "epoch": 2.4009530292716135, "grad_norm": 1.7896478176116943, "learning_rate": 5.739115092490411e-06, "loss": 0.5855, "step": 3527 }, { "epoch": 2.4016337644656227, "grad_norm": 1.700242519378662, "learning_rate": 5.736888359631689e-06, "loss": 0.5977, "step": 3528 }, { "epoch": 2.4023144996596324, "grad_norm": 1.5820130109786987, "learning_rate": 5.7346614773683605e-06, "loss": 0.7409, "step": 3529 }, { "epoch": 2.402995234853642, "grad_norm": 1.7091574668884277, "learning_rate": 5.7324344461519266e-06, "loss": 0.5153, "step": 3530 }, { "epoch": 2.4036759700476513, "grad_norm": 1.5685755014419556, "learning_rate": 5.73020726643392e-06, "loss": 0.6263, "step": 3531 }, { "epoch": 2.404356705241661, "grad_norm": 1.808545708656311, "learning_rate": 5.727979938665904e-06, "loss": 0.4995, "step": 3532 }, { "epoch": 2.4050374404356707, "grad_norm": 1.6483482122421265, "learning_rate": 5.72575246329947e-06, "loss": 0.4548, "step": 3533 }, { "epoch": 2.40571817562968, "grad_norm": 1.7612248659133911, "learning_rate": 5.723524840786239e-06, "loss": 0.5646, "step": 3534 }, { "epoch": 2.4063989108236896, "grad_norm": 1.7114144563674927, "learning_rate": 5.7212970715778635e-06, "loss": 0.6352, "step": 3535 }, { "epoch": 2.4070796460176993, "grad_norm": 1.63351571559906, "learning_rate": 5.719069156126026e-06, "loss": 0.4906, "step": 3536 }, { "epoch": 2.4077603812117085, "grad_norm": 1.697208285331726, "learning_rate": 5.7168410948824375e-06, "loss": 0.5488, "step": 3537 }, { "epoch": 2.4084411164057182, "grad_norm": 1.5348261594772339, "learning_rate": 5.714612888298838e-06, "loss": 0.7941, "step": 3538 }, { "epoch": 2.4091218515997275, "grad_norm": 1.6649630069732666, "learning_rate": 5.712384536826999e-06, "loss": 0.4965, "step": 3539 }, { "epoch": 2.409802586793737, "grad_norm": 1.7353122234344482, "learning_rate": 5.7101560409187205e-06, "loss": 0.4175, "step": 3540 }, { "epoch": 2.410483321987747, "grad_norm": 1.7398337125778198, "learning_rate": 5.70792740102583e-06, "loss": 0.5357, "step": 3541 }, { "epoch": 2.4111640571817565, "grad_norm": 1.6089965105056763, "learning_rate": 5.705698617600186e-06, "loss": 0.6418, "step": 3542 }, { "epoch": 2.4118447923757658, "grad_norm": 1.7616028785705566, "learning_rate": 5.703469691093678e-06, "loss": 0.4923, "step": 3543 }, { "epoch": 2.4125255275697755, "grad_norm": 1.793269157409668, "learning_rate": 5.701240621958219e-06, "loss": 0.6034, "step": 3544 }, { "epoch": 2.4132062627637847, "grad_norm": 1.6649165153503418, "learning_rate": 5.699011410645758e-06, "loss": 0.599, "step": 3545 }, { "epoch": 2.4138869979577944, "grad_norm": 1.6913350820541382, "learning_rate": 5.696782057608265e-06, "loss": 0.6436, "step": 3546 }, { "epoch": 2.414567733151804, "grad_norm": 1.593889832496643, "learning_rate": 5.694552563297746e-06, "loss": 0.6513, "step": 3547 }, { "epoch": 2.4152484683458133, "grad_norm": 1.642962098121643, "learning_rate": 5.692322928166233e-06, "loss": 0.6698, "step": 3548 }, { "epoch": 2.415929203539823, "grad_norm": 1.5915372371673584, "learning_rate": 5.690093152665781e-06, "loss": 0.6994, "step": 3549 }, { "epoch": 2.4166099387338327, "grad_norm": 1.64937162399292, "learning_rate": 5.6878632372484845e-06, "loss": 0.5569, "step": 3550 }, { "epoch": 2.417290673927842, "grad_norm": 1.6895848512649536, "learning_rate": 5.685633182366457e-06, "loss": 0.6182, "step": 3551 }, { "epoch": 2.4179714091218516, "grad_norm": 1.6122103929519653, "learning_rate": 5.683402988471843e-06, "loss": 0.657, "step": 3552 }, { "epoch": 2.4186521443158613, "grad_norm": 1.6482419967651367, "learning_rate": 5.681172656016818e-06, "loss": 0.6657, "step": 3553 }, { "epoch": 2.4193328795098705, "grad_norm": 1.7843023538589478, "learning_rate": 5.678942185453583e-06, "loss": 0.5233, "step": 3554 }, { "epoch": 2.42001361470388, "grad_norm": 1.6980156898498535, "learning_rate": 5.676711577234367e-06, "loss": 0.4989, "step": 3555 }, { "epoch": 2.42069434989789, "grad_norm": 1.5519723892211914, "learning_rate": 5.674480831811426e-06, "loss": 0.6622, "step": 3556 }, { "epoch": 2.421375085091899, "grad_norm": 1.809416651725769, "learning_rate": 5.672249949637048e-06, "loss": 0.5244, "step": 3557 }, { "epoch": 2.422055820285909, "grad_norm": 1.5986659526824951, "learning_rate": 5.670018931163543e-06, "loss": 0.6459, "step": 3558 }, { "epoch": 2.4227365554799185, "grad_norm": 1.5823942422866821, "learning_rate": 5.667787776843253e-06, "loss": 0.6123, "step": 3559 }, { "epoch": 2.4234172906739277, "grad_norm": 1.742327332496643, "learning_rate": 5.665556487128545e-06, "loss": 0.5607, "step": 3560 }, { "epoch": 2.4240980258679374, "grad_norm": 1.620632290840149, "learning_rate": 5.663325062471816e-06, "loss": 0.586, "step": 3561 }, { "epoch": 2.4247787610619467, "grad_norm": 1.6105681657791138, "learning_rate": 5.661093503325487e-06, "loss": 0.7009, "step": 3562 }, { "epoch": 2.4254594962559564, "grad_norm": 1.8003917932510376, "learning_rate": 5.6588618101420105e-06, "loss": 0.6253, "step": 3563 }, { "epoch": 2.426140231449966, "grad_norm": 1.7472947835922241, "learning_rate": 5.656629983373863e-06, "loss": 0.5985, "step": 3564 }, { "epoch": 2.4268209666439757, "grad_norm": 1.5671948194503784, "learning_rate": 5.654398023473546e-06, "loss": 0.6677, "step": 3565 }, { "epoch": 2.427501701837985, "grad_norm": 1.8344810009002686, "learning_rate": 5.652165930893594e-06, "loss": 0.4934, "step": 3566 }, { "epoch": 2.4281824370319947, "grad_norm": 1.5467911958694458, "learning_rate": 5.649933706086564e-06, "loss": 0.6553, "step": 3567 }, { "epoch": 2.428863172226004, "grad_norm": 1.6306895017623901, "learning_rate": 5.6477013495050416e-06, "loss": 0.5618, "step": 3568 }, { "epoch": 2.4295439074200136, "grad_norm": 1.7619037628173828, "learning_rate": 5.64546886160164e-06, "loss": 0.597, "step": 3569 }, { "epoch": 2.4302246426140233, "grad_norm": 1.7765706777572632, "learning_rate": 5.643236242828993e-06, "loss": 0.5023, "step": 3570 }, { "epoch": 2.4309053778080325, "grad_norm": 1.6212549209594727, "learning_rate": 5.641003493639771e-06, "loss": 0.6259, "step": 3571 }, { "epoch": 2.431586113002042, "grad_norm": 1.708725929260254, "learning_rate": 5.638770614486662e-06, "loss": 0.6784, "step": 3572 }, { "epoch": 2.432266848196052, "grad_norm": 1.4468801021575928, "learning_rate": 5.636537605822385e-06, "loss": 0.6006, "step": 3573 }, { "epoch": 2.432947583390061, "grad_norm": 1.6133061647415161, "learning_rate": 5.634304468099685e-06, "loss": 0.6422, "step": 3574 }, { "epoch": 2.433628318584071, "grad_norm": 1.468697190284729, "learning_rate": 5.632071201771328e-06, "loss": 0.6929, "step": 3575 }, { "epoch": 2.4343090537780805, "grad_norm": 1.6301229000091553, "learning_rate": 5.629837807290116e-06, "loss": 0.6308, "step": 3576 }, { "epoch": 2.4349897889720897, "grad_norm": 1.6735575199127197, "learning_rate": 5.627604285108866e-06, "loss": 0.6238, "step": 3577 }, { "epoch": 2.4356705241660994, "grad_norm": 1.5446808338165283, "learning_rate": 5.625370635680429e-06, "loss": 0.7036, "step": 3578 }, { "epoch": 2.436351259360109, "grad_norm": 1.7488200664520264, "learning_rate": 5.623136859457679e-06, "loss": 0.5051, "step": 3579 }, { "epoch": 2.4370319945541183, "grad_norm": 1.7483700513839722, "learning_rate": 5.620902956893515e-06, "loss": 0.5691, "step": 3580 }, { "epoch": 2.437712729748128, "grad_norm": 1.7900806665420532, "learning_rate": 5.618668928440862e-06, "loss": 0.5602, "step": 3581 }, { "epoch": 2.4383934649421377, "grad_norm": 1.6821171045303345, "learning_rate": 5.6164347745526715e-06, "loss": 0.6724, "step": 3582 }, { "epoch": 2.439074200136147, "grad_norm": 1.9014800786972046, "learning_rate": 5.6142004956819184e-06, "loss": 0.6433, "step": 3583 }, { "epoch": 2.4397549353301566, "grad_norm": 1.7302199602127075, "learning_rate": 5.611966092281605e-06, "loss": 0.6217, "step": 3584 }, { "epoch": 2.440435670524166, "grad_norm": 1.5773066282272339, "learning_rate": 5.609731564804758e-06, "loss": 0.6577, "step": 3585 }, { "epoch": 2.4411164057181756, "grad_norm": 1.6758995056152344, "learning_rate": 5.60749691370443e-06, "loss": 0.7405, "step": 3586 }, { "epoch": 2.4417971409121852, "grad_norm": 1.68914794921875, "learning_rate": 5.605262139433696e-06, "loss": 0.5631, "step": 3587 }, { "epoch": 2.442477876106195, "grad_norm": 1.8081867694854736, "learning_rate": 5.603027242445661e-06, "loss": 0.5688, "step": 3588 }, { "epoch": 2.443158611300204, "grad_norm": 1.6906061172485352, "learning_rate": 5.60079222319345e-06, "loss": 0.5408, "step": 3589 }, { "epoch": 2.443839346494214, "grad_norm": 1.7359365224838257, "learning_rate": 5.598557082130216e-06, "loss": 0.531, "step": 3590 }, { "epoch": 2.444520081688223, "grad_norm": 1.6611970663070679, "learning_rate": 5.5963218197091315e-06, "loss": 0.6007, "step": 3591 }, { "epoch": 2.445200816882233, "grad_norm": 1.720848560333252, "learning_rate": 5.594086436383402e-06, "loss": 0.586, "step": 3592 }, { "epoch": 2.4458815520762425, "grad_norm": 1.633137583732605, "learning_rate": 5.5918509326062495e-06, "loss": 0.61, "step": 3593 }, { "epoch": 2.4465622872702517, "grad_norm": 1.7032188177108765, "learning_rate": 5.5896153088309265e-06, "loss": 0.5844, "step": 3594 }, { "epoch": 2.4472430224642614, "grad_norm": 1.788820743560791, "learning_rate": 5.587379565510708e-06, "loss": 0.6154, "step": 3595 }, { "epoch": 2.447923757658271, "grad_norm": 1.6959232091903687, "learning_rate": 5.585143703098888e-06, "loss": 0.5553, "step": 3596 }, { "epoch": 2.4486044928522803, "grad_norm": 1.8056923151016235, "learning_rate": 5.5829077220487935e-06, "loss": 0.559, "step": 3597 }, { "epoch": 2.44928522804629, "grad_norm": 1.717313289642334, "learning_rate": 5.580671622813768e-06, "loss": 0.5368, "step": 3598 }, { "epoch": 2.4499659632402997, "grad_norm": 1.7696808576583862, "learning_rate": 5.578435405847185e-06, "loss": 0.5229, "step": 3599 }, { "epoch": 2.450646698434309, "grad_norm": 1.6926233768463135, "learning_rate": 5.576199071602436e-06, "loss": 0.5916, "step": 3600 }, { "epoch": 2.4513274336283186, "grad_norm": 1.6443920135498047, "learning_rate": 5.573962620532941e-06, "loss": 0.6133, "step": 3601 }, { "epoch": 2.4520081688223283, "grad_norm": 1.741858720779419, "learning_rate": 5.5717260530921435e-06, "loss": 0.5626, "step": 3602 }, { "epoch": 2.4526889040163375, "grad_norm": 1.7003549337387085, "learning_rate": 5.5694893697335065e-06, "loss": 0.6778, "step": 3603 }, { "epoch": 2.4533696392103472, "grad_norm": 1.7429951429367065, "learning_rate": 5.567252570910519e-06, "loss": 0.6018, "step": 3604 }, { "epoch": 2.454050374404357, "grad_norm": 1.6375383138656616, "learning_rate": 5.565015657076693e-06, "loss": 0.6874, "step": 3605 }, { "epoch": 2.454731109598366, "grad_norm": 1.6974499225616455, "learning_rate": 5.562778628685568e-06, "loss": 0.5746, "step": 3606 }, { "epoch": 2.455411844792376, "grad_norm": 1.7929972410202026, "learning_rate": 5.5605414861907005e-06, "loss": 0.4405, "step": 3607 }, { "epoch": 2.456092579986385, "grad_norm": 1.8671679496765137, "learning_rate": 5.558304230045671e-06, "loss": 0.4599, "step": 3608 }, { "epoch": 2.4567733151803948, "grad_norm": 1.6701147556304932, "learning_rate": 5.556066860704088e-06, "loss": 0.5082, "step": 3609 }, { "epoch": 2.4574540503744045, "grad_norm": 1.702349305152893, "learning_rate": 5.553829378619576e-06, "loss": 0.6976, "step": 3610 }, { "epoch": 2.4581347855684137, "grad_norm": 1.6791064739227295, "learning_rate": 5.551591784245789e-06, "loss": 0.7143, "step": 3611 }, { "epoch": 2.4588155207624234, "grad_norm": 1.8315212726593018, "learning_rate": 5.549354078036401e-06, "loss": 0.4892, "step": 3612 }, { "epoch": 2.459496255956433, "grad_norm": 1.7246397733688354, "learning_rate": 5.547116260445105e-06, "loss": 0.5681, "step": 3613 }, { "epoch": 2.4601769911504423, "grad_norm": 1.7568998336791992, "learning_rate": 5.5448783319256235e-06, "loss": 0.4905, "step": 3614 }, { "epoch": 2.460857726344452, "grad_norm": 1.6547613143920898, "learning_rate": 5.542640292931696e-06, "loss": 0.7172, "step": 3615 }, { "epoch": 2.4615384615384617, "grad_norm": 1.78394615650177, "learning_rate": 5.540402143917088e-06, "loss": 0.6201, "step": 3616 }, { "epoch": 2.462219196732471, "grad_norm": 1.6534305810928345, "learning_rate": 5.538163885335584e-06, "loss": 0.6515, "step": 3617 }, { "epoch": 2.4628999319264806, "grad_norm": 1.723238468170166, "learning_rate": 5.535925517640992e-06, "loss": 0.5617, "step": 3618 }, { "epoch": 2.4635806671204903, "grad_norm": 1.6863316297531128, "learning_rate": 5.533687041287146e-06, "loss": 0.5878, "step": 3619 }, { "epoch": 2.4642614023144995, "grad_norm": 1.6530506610870361, "learning_rate": 5.531448456727895e-06, "loss": 0.5987, "step": 3620 }, { "epoch": 2.464942137508509, "grad_norm": 1.6247215270996094, "learning_rate": 5.529209764417115e-06, "loss": 0.5246, "step": 3621 }, { "epoch": 2.465622872702519, "grad_norm": 1.7163945436477661, "learning_rate": 5.526970964808702e-06, "loss": 0.5823, "step": 3622 }, { "epoch": 2.466303607896528, "grad_norm": 1.5970346927642822, "learning_rate": 5.5247320583565745e-06, "loss": 0.6498, "step": 3623 }, { "epoch": 2.466984343090538, "grad_norm": 1.5266828536987305, "learning_rate": 5.522493045514672e-06, "loss": 0.5969, "step": 3624 }, { "epoch": 2.467665078284547, "grad_norm": 1.7769255638122559, "learning_rate": 5.520253926736956e-06, "loss": 0.557, "step": 3625 }, { "epoch": 2.4683458134785567, "grad_norm": 1.7074337005615234, "learning_rate": 5.518014702477409e-06, "loss": 0.6148, "step": 3626 }, { "epoch": 2.4690265486725664, "grad_norm": 1.6280937194824219, "learning_rate": 5.515775373190035e-06, "loss": 0.5535, "step": 3627 }, { "epoch": 2.469707283866576, "grad_norm": 1.6713712215423584, "learning_rate": 5.513535939328861e-06, "loss": 0.522, "step": 3628 }, { "epoch": 2.4703880190605854, "grad_norm": 1.7955822944641113, "learning_rate": 5.511296401347933e-06, "loss": 0.5814, "step": 3629 }, { "epoch": 2.471068754254595, "grad_norm": 1.7720530033111572, "learning_rate": 5.509056759701317e-06, "loss": 0.6269, "step": 3630 }, { "epoch": 2.4717494894486043, "grad_norm": 1.68267822265625, "learning_rate": 5.506817014843104e-06, "loss": 0.5742, "step": 3631 }, { "epoch": 2.472430224642614, "grad_norm": 1.696266770362854, "learning_rate": 5.504577167227403e-06, "loss": 0.5601, "step": 3632 }, { "epoch": 2.4731109598366237, "grad_norm": 1.762630820274353, "learning_rate": 5.502337217308346e-06, "loss": 0.5367, "step": 3633 }, { "epoch": 2.473791695030633, "grad_norm": 1.6847487688064575, "learning_rate": 5.500097165540082e-06, "loss": 0.5327, "step": 3634 }, { "epoch": 2.4744724302246426, "grad_norm": 1.6969794034957886, "learning_rate": 5.497857012376783e-06, "loss": 0.6312, "step": 3635 }, { "epoch": 2.4751531654186523, "grad_norm": 1.8111789226531982, "learning_rate": 5.4956167582726425e-06, "loss": 0.612, "step": 3636 }, { "epoch": 2.4758339006126615, "grad_norm": 1.705918788909912, "learning_rate": 5.493376403681874e-06, "loss": 0.5711, "step": 3637 }, { "epoch": 2.476514635806671, "grad_norm": 1.9070310592651367, "learning_rate": 5.491135949058713e-06, "loss": 0.5115, "step": 3638 }, { "epoch": 2.477195371000681, "grad_norm": 1.7237331867218018, "learning_rate": 5.488895394857407e-06, "loss": 0.7362, "step": 3639 }, { "epoch": 2.47787610619469, "grad_norm": 1.6752125024795532, "learning_rate": 5.486654741532234e-06, "loss": 0.5577, "step": 3640 }, { "epoch": 2.4785568413887, "grad_norm": 1.7968122959136963, "learning_rate": 5.484413989537489e-06, "loss": 0.4473, "step": 3641 }, { "epoch": 2.4792375765827095, "grad_norm": 1.6412549018859863, "learning_rate": 5.482173139327483e-06, "loss": 0.622, "step": 3642 }, { "epoch": 2.4799183117767187, "grad_norm": 1.6981031894683838, "learning_rate": 5.479932191356554e-06, "loss": 0.5856, "step": 3643 }, { "epoch": 2.4805990469707284, "grad_norm": 1.718885064125061, "learning_rate": 5.477691146079049e-06, "loss": 0.4936, "step": 3644 }, { "epoch": 2.481279782164738, "grad_norm": 1.763420581817627, "learning_rate": 5.475450003949349e-06, "loss": 0.5454, "step": 3645 }, { "epoch": 2.4819605173587473, "grad_norm": 1.7298154830932617, "learning_rate": 5.473208765421841e-06, "loss": 0.5281, "step": 3646 }, { "epoch": 2.482641252552757, "grad_norm": 1.6202119588851929, "learning_rate": 5.470967430950942e-06, "loss": 0.6446, "step": 3647 }, { "epoch": 2.4833219877467663, "grad_norm": 1.7169721126556396, "learning_rate": 5.46872600099108e-06, "loss": 0.5808, "step": 3648 }, { "epoch": 2.484002722940776, "grad_norm": 1.7053310871124268, "learning_rate": 5.466484475996709e-06, "loss": 0.5087, "step": 3649 }, { "epoch": 2.4846834581347856, "grad_norm": 1.7339998483657837, "learning_rate": 5.4642428564223016e-06, "loss": 0.5072, "step": 3650 }, { "epoch": 2.4853641933287953, "grad_norm": 1.7533084154129028, "learning_rate": 5.462001142722344e-06, "loss": 0.4713, "step": 3651 }, { "epoch": 2.4860449285228046, "grad_norm": 1.7817580699920654, "learning_rate": 5.459759335351345e-06, "loss": 0.4911, "step": 3652 }, { "epoch": 2.4867256637168142, "grad_norm": 1.6675771474838257, "learning_rate": 5.457517434763835e-06, "loss": 0.6084, "step": 3653 }, { "epoch": 2.4874063989108235, "grad_norm": 1.686280369758606, "learning_rate": 5.45527544141436e-06, "loss": 0.6313, "step": 3654 }, { "epoch": 2.488087134104833, "grad_norm": 1.5442748069763184, "learning_rate": 5.4530333557574864e-06, "loss": 0.7772, "step": 3655 }, { "epoch": 2.488767869298843, "grad_norm": 1.6236342191696167, "learning_rate": 5.450791178247795e-06, "loss": 0.6202, "step": 3656 }, { "epoch": 2.489448604492852, "grad_norm": 1.830047845840454, "learning_rate": 5.448548909339895e-06, "loss": 0.5823, "step": 3657 }, { "epoch": 2.490129339686862, "grad_norm": 1.7002383470535278, "learning_rate": 5.4463065494884015e-06, "loss": 0.5246, "step": 3658 }, { "epoch": 2.4908100748808715, "grad_norm": 1.7711108922958374, "learning_rate": 5.444064099147958e-06, "loss": 0.5609, "step": 3659 }, { "epoch": 2.4914908100748807, "grad_norm": 1.7184243202209473, "learning_rate": 5.441821558773224e-06, "loss": 0.6295, "step": 3660 }, { "epoch": 2.4921715452688904, "grad_norm": 1.7274812459945679, "learning_rate": 5.43957892881887e-06, "loss": 0.57, "step": 3661 }, { "epoch": 2.4928522804629, "grad_norm": 1.713869333267212, "learning_rate": 5.437336209739599e-06, "loss": 0.5179, "step": 3662 }, { "epoch": 2.4935330156569093, "grad_norm": 1.792547583580017, "learning_rate": 5.435093401990118e-06, "loss": 0.5198, "step": 3663 }, { "epoch": 2.494213750850919, "grad_norm": 1.7303553819656372, "learning_rate": 5.43285050602516e-06, "loss": 0.6392, "step": 3664 }, { "epoch": 2.4948944860449287, "grad_norm": 1.7948049306869507, "learning_rate": 5.430607522299471e-06, "loss": 0.5747, "step": 3665 }, { "epoch": 2.495575221238938, "grad_norm": 1.632614254951477, "learning_rate": 5.428364451267823e-06, "loss": 0.7572, "step": 3666 }, { "epoch": 2.4962559564329476, "grad_norm": 1.7079707384109497, "learning_rate": 5.426121293384994e-06, "loss": 0.5909, "step": 3667 }, { "epoch": 2.4969366916269573, "grad_norm": 1.7064765691757202, "learning_rate": 5.4238780491057895e-06, "loss": 0.6143, "step": 3668 }, { "epoch": 2.4976174268209665, "grad_norm": 1.7074707746505737, "learning_rate": 5.421634718885027e-06, "loss": 0.6222, "step": 3669 }, { "epoch": 2.4982981620149762, "grad_norm": 1.707060694694519, "learning_rate": 5.4193913031775445e-06, "loss": 0.5358, "step": 3670 }, { "epoch": 2.4989788972089855, "grad_norm": 1.9160534143447876, "learning_rate": 5.417147802438196e-06, "loss": 0.5149, "step": 3671 }, { "epoch": 2.499659632402995, "grad_norm": 1.7744646072387695, "learning_rate": 5.414904217121852e-06, "loss": 0.5928, "step": 3672 }, { "epoch": 2.500340367597005, "grad_norm": 1.726027011871338, "learning_rate": 5.4126605476834e-06, "loss": 0.6985, "step": 3673 }, { "epoch": 2.5010211027910145, "grad_norm": 1.8082573413848877, "learning_rate": 5.410416794577747e-06, "loss": 0.6199, "step": 3674 }, { "epoch": 2.5017018379850238, "grad_norm": 1.7816988229751587, "learning_rate": 5.408172958259815e-06, "loss": 0.4589, "step": 3675 }, { "epoch": 2.5023825731790335, "grad_norm": 1.714120626449585, "learning_rate": 5.405929039184545e-06, "loss": 0.5834, "step": 3676 }, { "epoch": 2.5030633083730427, "grad_norm": 1.8117706775665283, "learning_rate": 5.40368503780689e-06, "loss": 0.5224, "step": 3677 }, { "epoch": 2.5037440435670524, "grad_norm": 1.887943148612976, "learning_rate": 5.401440954581825e-06, "loss": 0.4472, "step": 3678 }, { "epoch": 2.504424778761062, "grad_norm": 1.7421400547027588, "learning_rate": 5.399196789964337e-06, "loss": 0.5518, "step": 3679 }, { "epoch": 2.5051055139550717, "grad_norm": 1.7633353471755981, "learning_rate": 5.396952544409435e-06, "loss": 0.6988, "step": 3680 }, { "epoch": 2.505786249149081, "grad_norm": 1.8049448728561401, "learning_rate": 5.39470821837214e-06, "loss": 0.5118, "step": 3681 }, { "epoch": 2.5064669843430907, "grad_norm": 1.647830843925476, "learning_rate": 5.392463812307488e-06, "loss": 0.5912, "step": 3682 }, { "epoch": 2.5071477195371, "grad_norm": 1.7611640691757202, "learning_rate": 5.39021932667054e-06, "loss": 0.4233, "step": 3683 }, { "epoch": 2.5078284547311096, "grad_norm": 1.6951605081558228, "learning_rate": 5.38797476191636e-06, "loss": 0.5615, "step": 3684 }, { "epoch": 2.5085091899251193, "grad_norm": 1.7608226537704468, "learning_rate": 5.3857301185000384e-06, "loss": 0.4877, "step": 3685 }, { "epoch": 2.5091899251191285, "grad_norm": 1.7370105981826782, "learning_rate": 5.38348539687668e-06, "loss": 0.6031, "step": 3686 }, { "epoch": 2.509870660313138, "grad_norm": 1.738217830657959, "learning_rate": 5.3812405975013995e-06, "loss": 0.654, "step": 3687 }, { "epoch": 2.5105513955071475, "grad_norm": 1.7915644645690918, "learning_rate": 5.378995720829334e-06, "loss": 0.4998, "step": 3688 }, { "epoch": 2.511232130701157, "grad_norm": 1.728161096572876, "learning_rate": 5.376750767315634e-06, "loss": 0.6975, "step": 3689 }, { "epoch": 2.511912865895167, "grad_norm": 1.971840262413025, "learning_rate": 5.374505737415463e-06, "loss": 0.3801, "step": 3690 }, { "epoch": 2.5125936010891765, "grad_norm": 1.8144725561141968, "learning_rate": 5.372260631584006e-06, "loss": 0.4865, "step": 3691 }, { "epoch": 2.5132743362831858, "grad_norm": 1.6986329555511475, "learning_rate": 5.370015450276456e-06, "loss": 0.6095, "step": 3692 }, { "epoch": 2.5139550714771954, "grad_norm": 1.844321846961975, "learning_rate": 5.3677701939480275e-06, "loss": 0.5358, "step": 3693 }, { "epoch": 2.5146358066712047, "grad_norm": 1.930362582206726, "learning_rate": 5.365524863053946e-06, "loss": 0.4133, "step": 3694 }, { "epoch": 2.5153165418652144, "grad_norm": 1.71235990524292, "learning_rate": 5.363279458049454e-06, "loss": 0.4718, "step": 3695 }, { "epoch": 2.515997277059224, "grad_norm": 1.740886926651001, "learning_rate": 5.36103397938981e-06, "loss": 0.6278, "step": 3696 }, { "epoch": 2.5166780122532337, "grad_norm": 1.7821929454803467, "learning_rate": 5.358788427530286e-06, "loss": 0.5761, "step": 3697 }, { "epoch": 2.517358747447243, "grad_norm": 1.879231572151184, "learning_rate": 5.35654280292617e-06, "loss": 0.4569, "step": 3698 }, { "epoch": 2.5180394826412527, "grad_norm": 1.7703750133514404, "learning_rate": 5.354297106032762e-06, "loss": 0.6342, "step": 3699 }, { "epoch": 2.518720217835262, "grad_norm": 1.6627230644226074, "learning_rate": 5.35205133730538e-06, "loss": 0.6348, "step": 3700 }, { "epoch": 2.5194009530292716, "grad_norm": 1.7426528930664062, "learning_rate": 5.3498054971993525e-06, "loss": 0.5598, "step": 3701 }, { "epoch": 2.5200816882232813, "grad_norm": 1.4719955921173096, "learning_rate": 5.347559586170029e-06, "loss": 0.7588, "step": 3702 }, { "epoch": 2.5207624234172905, "grad_norm": 1.58771812915802, "learning_rate": 5.345313604672767e-06, "loss": 0.5579, "step": 3703 }, { "epoch": 2.5214431586113, "grad_norm": 1.6450424194335938, "learning_rate": 5.34306755316294e-06, "loss": 0.5606, "step": 3704 }, { "epoch": 2.52212389380531, "grad_norm": 1.6124475002288818, "learning_rate": 5.34082143209594e-06, "loss": 0.6752, "step": 3705 }, { "epoch": 2.522804628999319, "grad_norm": 1.8337560892105103, "learning_rate": 5.338575241927165e-06, "loss": 0.4964, "step": 3706 }, { "epoch": 2.523485364193329, "grad_norm": 1.7103341817855835, "learning_rate": 5.336328983112036e-06, "loss": 0.5611, "step": 3707 }, { "epoch": 2.5241660993873385, "grad_norm": 1.7818434238433838, "learning_rate": 5.334082656105979e-06, "loss": 0.6151, "step": 3708 }, { "epoch": 2.5248468345813477, "grad_norm": 1.615209698677063, "learning_rate": 5.3318362613644415e-06, "loss": 0.6472, "step": 3709 }, { "epoch": 2.5255275697753574, "grad_norm": 1.718634009361267, "learning_rate": 5.329589799342881e-06, "loss": 0.5443, "step": 3710 }, { "epoch": 2.5262083049693667, "grad_norm": 1.718174934387207, "learning_rate": 5.327343270496767e-06, "loss": 0.6248, "step": 3711 }, { "epoch": 2.5268890401633763, "grad_norm": 1.7471201419830322, "learning_rate": 5.32509667528159e-06, "loss": 0.527, "step": 3712 }, { "epoch": 2.527569775357386, "grad_norm": 1.7528228759765625, "learning_rate": 5.322850014152842e-06, "loss": 0.758, "step": 3713 }, { "epoch": 2.5282505105513957, "grad_norm": 1.792337417602539, "learning_rate": 5.320603287566039e-06, "loss": 0.5878, "step": 3714 }, { "epoch": 2.528931245745405, "grad_norm": 1.7922006845474243, "learning_rate": 5.318356495976706e-06, "loss": 0.6247, "step": 3715 }, { "epoch": 2.5296119809394146, "grad_norm": 1.6238957643508911, "learning_rate": 5.316109639840381e-06, "loss": 0.6196, "step": 3716 }, { "epoch": 2.530292716133424, "grad_norm": 1.6841360330581665, "learning_rate": 5.313862719612616e-06, "loss": 0.5545, "step": 3717 }, { "epoch": 2.5309734513274336, "grad_norm": 1.7040164470672607, "learning_rate": 5.311615735748974e-06, "loss": 0.5104, "step": 3718 }, { "epoch": 2.5316541865214433, "grad_norm": 1.7029920816421509, "learning_rate": 5.309368688705036e-06, "loss": 0.5171, "step": 3719 }, { "epoch": 2.532334921715453, "grad_norm": 1.4448987245559692, "learning_rate": 5.307121578936387e-06, "loss": 0.6709, "step": 3720 }, { "epoch": 2.533015656909462, "grad_norm": 1.7137458324432373, "learning_rate": 5.3048744068986345e-06, "loss": 0.5706, "step": 3721 }, { "epoch": 2.533696392103472, "grad_norm": 1.6974526643753052, "learning_rate": 5.302627173047392e-06, "loss": 0.5776, "step": 3722 }, { "epoch": 2.534377127297481, "grad_norm": 1.7268774509429932, "learning_rate": 5.300379877838289e-06, "loss": 0.671, "step": 3723 }, { "epoch": 2.535057862491491, "grad_norm": 1.6628003120422363, "learning_rate": 5.298132521726965e-06, "loss": 0.6894, "step": 3724 }, { "epoch": 2.5357385976855005, "grad_norm": 1.6289329528808594, "learning_rate": 5.295885105169072e-06, "loss": 0.6498, "step": 3725 }, { "epoch": 2.5364193328795097, "grad_norm": 1.6588263511657715, "learning_rate": 5.293637628620278e-06, "loss": 0.5137, "step": 3726 }, { "epoch": 2.5371000680735194, "grad_norm": 1.84761643409729, "learning_rate": 5.291390092536259e-06, "loss": 0.4858, "step": 3727 }, { "epoch": 2.537780803267529, "grad_norm": 1.7536122798919678, "learning_rate": 5.289142497372703e-06, "loss": 0.5183, "step": 3728 }, { "epoch": 2.5384615384615383, "grad_norm": 1.6913751363754272, "learning_rate": 5.286894843585315e-06, "loss": 0.6002, "step": 3729 }, { "epoch": 2.539142273655548, "grad_norm": 1.683878779411316, "learning_rate": 5.284647131629803e-06, "loss": 0.5305, "step": 3730 }, { "epoch": 2.5398230088495577, "grad_norm": 1.7403464317321777, "learning_rate": 5.282399361961898e-06, "loss": 0.6618, "step": 3731 }, { "epoch": 2.540503744043567, "grad_norm": 1.6170803308486938, "learning_rate": 5.280151535037334e-06, "loss": 0.6187, "step": 3732 }, { "epoch": 2.5411844792375766, "grad_norm": 1.8470221757888794, "learning_rate": 5.277903651311858e-06, "loss": 0.7126, "step": 3733 }, { "epoch": 2.541865214431586, "grad_norm": 1.6922719478607178, "learning_rate": 5.27565571124123e-06, "loss": 0.6204, "step": 3734 }, { "epoch": 2.5425459496255955, "grad_norm": 1.6968191862106323, "learning_rate": 5.273407715281224e-06, "loss": 0.5482, "step": 3735 }, { "epoch": 2.5432266848196052, "grad_norm": 1.7600852251052856, "learning_rate": 5.2711596638876214e-06, "loss": 0.4809, "step": 3736 }, { "epoch": 2.543907420013615, "grad_norm": 1.729547381401062, "learning_rate": 5.2689115575162166e-06, "loss": 0.6098, "step": 3737 }, { "epoch": 2.544588155207624, "grad_norm": 1.6426759958267212, "learning_rate": 5.266663396622813e-06, "loss": 0.6239, "step": 3738 }, { "epoch": 2.545268890401634, "grad_norm": 1.6746286153793335, "learning_rate": 5.264415181663228e-06, "loss": 0.4524, "step": 3739 }, { "epoch": 2.545949625595643, "grad_norm": 1.6496394872665405, "learning_rate": 5.26216691309329e-06, "loss": 0.4984, "step": 3740 }, { "epoch": 2.5466303607896528, "grad_norm": 1.7982332706451416, "learning_rate": 5.259918591368833e-06, "loss": 0.4186, "step": 3741 }, { "epoch": 2.5473110959836625, "grad_norm": 1.660520315170288, "learning_rate": 5.25767021694571e-06, "loss": 0.6662, "step": 3742 }, { "epoch": 2.547991831177672, "grad_norm": 1.6478878259658813, "learning_rate": 5.255421790279778e-06, "loss": 0.5017, "step": 3743 }, { "epoch": 2.5486725663716814, "grad_norm": 1.6638983488082886, "learning_rate": 5.253173311826907e-06, "loss": 0.5749, "step": 3744 }, { "epoch": 2.549353301565691, "grad_norm": 1.7702723741531372, "learning_rate": 5.250924782042979e-06, "loss": 0.5419, "step": 3745 }, { "epoch": 2.5500340367597003, "grad_norm": 1.663535475730896, "learning_rate": 5.248676201383884e-06, "loss": 0.5515, "step": 3746 }, { "epoch": 2.55071477195371, "grad_norm": 1.798378586769104, "learning_rate": 5.246427570305522e-06, "loss": 0.5916, "step": 3747 }, { "epoch": 2.5513955071477197, "grad_norm": 1.6040608882904053, "learning_rate": 5.244178889263806e-06, "loss": 0.5838, "step": 3748 }, { "epoch": 2.552076242341729, "grad_norm": 1.6591120958328247, "learning_rate": 5.241930158714658e-06, "loss": 0.6677, "step": 3749 }, { "epoch": 2.5527569775357386, "grad_norm": 1.6274300813674927, "learning_rate": 5.23968137911401e-06, "loss": 0.6315, "step": 3750 }, { "epoch": 2.5534377127297483, "grad_norm": 1.5610159635543823, "learning_rate": 5.237432550917799e-06, "loss": 0.6585, "step": 3751 }, { "epoch": 2.5541184479237575, "grad_norm": 1.6597546339035034, "learning_rate": 5.235183674581984e-06, "loss": 0.5103, "step": 3752 }, { "epoch": 2.554799183117767, "grad_norm": 1.6165599822998047, "learning_rate": 5.232934750562519e-06, "loss": 0.6789, "step": 3753 }, { "epoch": 2.555479918311777, "grad_norm": 1.6388723850250244, "learning_rate": 5.23068577931538e-06, "loss": 0.6086, "step": 3754 }, { "epoch": 2.556160653505786, "grad_norm": 1.8268043994903564, "learning_rate": 5.228436761296545e-06, "loss": 0.541, "step": 3755 }, { "epoch": 2.556841388699796, "grad_norm": 1.6392109394073486, "learning_rate": 5.226187696962002e-06, "loss": 0.5544, "step": 3756 }, { "epoch": 2.557522123893805, "grad_norm": 1.5854971408843994, "learning_rate": 5.223938586767755e-06, "loss": 0.6736, "step": 3757 }, { "epoch": 2.5582028590878148, "grad_norm": 1.5843344926834106, "learning_rate": 5.2216894311698096e-06, "loss": 0.7836, "step": 3758 }, { "epoch": 2.5588835942818244, "grad_norm": 1.8525503873825073, "learning_rate": 5.219440230624183e-06, "loss": 0.5049, "step": 3759 }, { "epoch": 2.559564329475834, "grad_norm": 1.7629386186599731, "learning_rate": 5.2171909855869045e-06, "loss": 0.569, "step": 3760 }, { "epoch": 2.5602450646698434, "grad_norm": 1.689492106437683, "learning_rate": 5.214941696514008e-06, "loss": 0.4886, "step": 3761 }, { "epoch": 2.560925799863853, "grad_norm": 1.7168753147125244, "learning_rate": 5.21269236386154e-06, "loss": 0.5475, "step": 3762 }, { "epoch": 2.5616065350578623, "grad_norm": 1.6751424074172974, "learning_rate": 5.210442988085553e-06, "loss": 0.6652, "step": 3763 }, { "epoch": 2.562287270251872, "grad_norm": 1.6478068828582764, "learning_rate": 5.208193569642108e-06, "loss": 0.6019, "step": 3764 }, { "epoch": 2.5629680054458817, "grad_norm": 1.7705786228179932, "learning_rate": 5.2059441089872795e-06, "loss": 0.5624, "step": 3765 }, { "epoch": 2.5636487406398913, "grad_norm": 1.7647080421447754, "learning_rate": 5.203694606577145e-06, "loss": 0.7078, "step": 3766 }, { "epoch": 2.5643294758339006, "grad_norm": 1.8972915410995483, "learning_rate": 5.201445062867794e-06, "loss": 0.5315, "step": 3767 }, { "epoch": 2.5650102110279103, "grad_norm": 1.6202623844146729, "learning_rate": 5.199195478315321e-06, "loss": 0.704, "step": 3768 }, { "epoch": 2.5656909462219195, "grad_norm": 1.7599965333938599, "learning_rate": 5.196945853375832e-06, "loss": 0.5258, "step": 3769 }, { "epoch": 2.566371681415929, "grad_norm": 1.731441855430603, "learning_rate": 5.19469618850544e-06, "loss": 0.5711, "step": 3770 }, { "epoch": 2.567052416609939, "grad_norm": 1.6255989074707031, "learning_rate": 5.192446484160267e-06, "loss": 0.6603, "step": 3771 }, { "epoch": 2.567733151803948, "grad_norm": 1.809689998626709, "learning_rate": 5.190196740796441e-06, "loss": 0.543, "step": 3772 }, { "epoch": 2.568413886997958, "grad_norm": 1.5359143018722534, "learning_rate": 5.187946958870097e-06, "loss": 0.6659, "step": 3773 }, { "epoch": 2.569094622191967, "grad_norm": 1.5287485122680664, "learning_rate": 5.1856971388373824e-06, "loss": 0.7158, "step": 3774 }, { "epoch": 2.5697753573859767, "grad_norm": 1.6561975479125977, "learning_rate": 5.183447281154449e-06, "loss": 0.5375, "step": 3775 }, { "epoch": 2.5704560925799864, "grad_norm": 1.832356333732605, "learning_rate": 5.181197386277458e-06, "loss": 0.4786, "step": 3776 }, { "epoch": 2.571136827773996, "grad_norm": 1.755573034286499, "learning_rate": 5.178947454662573e-06, "loss": 0.4285, "step": 3777 }, { "epoch": 2.5718175629680053, "grad_norm": 1.6847929954528809, "learning_rate": 5.176697486765976e-06, "loss": 0.574, "step": 3778 }, { "epoch": 2.572498298162015, "grad_norm": 1.7976229190826416, "learning_rate": 5.174447483043843e-06, "loss": 0.4498, "step": 3779 }, { "epoch": 2.5731790333560243, "grad_norm": 1.6915749311447144, "learning_rate": 5.172197443952367e-06, "loss": 0.5792, "step": 3780 }, { "epoch": 2.573859768550034, "grad_norm": 1.71116042137146, "learning_rate": 5.1699473699477465e-06, "loss": 0.6171, "step": 3781 }, { "epoch": 2.5745405037440436, "grad_norm": 1.8246840238571167, "learning_rate": 5.16769726148618e-06, "loss": 0.6029, "step": 3782 }, { "epoch": 2.5752212389380533, "grad_norm": 1.724709153175354, "learning_rate": 5.165447119023884e-06, "loss": 0.6009, "step": 3783 }, { "epoch": 2.5759019741320626, "grad_norm": 1.7204158306121826, "learning_rate": 5.163196943017074e-06, "loss": 0.5525, "step": 3784 }, { "epoch": 2.5765827093260723, "grad_norm": 1.5297001600265503, "learning_rate": 5.160946733921974e-06, "loss": 0.6209, "step": 3785 }, { "epoch": 2.5772634445200815, "grad_norm": 1.6555954217910767, "learning_rate": 5.1586964921948165e-06, "loss": 0.7158, "step": 3786 }, { "epoch": 2.577944179714091, "grad_norm": 1.8144835233688354, "learning_rate": 5.156446218291839e-06, "loss": 0.5324, "step": 3787 }, { "epoch": 2.578624914908101, "grad_norm": 1.7372273206710815, "learning_rate": 5.154195912669288e-06, "loss": 0.4944, "step": 3788 }, { "epoch": 2.5793056501021105, "grad_norm": 1.5244060754776, "learning_rate": 5.151945575783413e-06, "loss": 0.7394, "step": 3789 }, { "epoch": 2.57998638529612, "grad_norm": 1.9631112813949585, "learning_rate": 5.149695208090469e-06, "loss": 0.4913, "step": 3790 }, { "epoch": 2.5806671204901295, "grad_norm": 1.5188795328140259, "learning_rate": 5.147444810046722e-06, "loss": 0.6331, "step": 3791 }, { "epoch": 2.5813478556841387, "grad_norm": 1.7542359828948975, "learning_rate": 5.145194382108442e-06, "loss": 0.4093, "step": 3792 }, { "epoch": 2.5820285908781484, "grad_norm": 1.558131456375122, "learning_rate": 5.142943924731904e-06, "loss": 0.6357, "step": 3793 }, { "epoch": 2.582709326072158, "grad_norm": 1.789465308189392, "learning_rate": 5.14069343837339e-06, "loss": 0.5428, "step": 3794 }, { "epoch": 2.5833900612661673, "grad_norm": 1.6649106740951538, "learning_rate": 5.138442923489188e-06, "loss": 0.6166, "step": 3795 }, { "epoch": 2.584070796460177, "grad_norm": 1.7054330110549927, "learning_rate": 5.136192380535591e-06, "loss": 0.5431, "step": 3796 }, { "epoch": 2.5847515316541863, "grad_norm": 1.7223711013793945, "learning_rate": 5.133941809968897e-06, "loss": 0.4982, "step": 3797 }, { "epoch": 2.585432266848196, "grad_norm": 1.7428172826766968, "learning_rate": 5.131691212245414e-06, "loss": 0.5881, "step": 3798 }, { "epoch": 2.5861130020422056, "grad_norm": 1.7134242057800293, "learning_rate": 5.129440587821446e-06, "loss": 0.5721, "step": 3799 }, { "epoch": 2.5867937372362153, "grad_norm": 1.5758615732192993, "learning_rate": 5.1271899371533155e-06, "loss": 0.6818, "step": 3800 }, { "epoch": 2.5874744724302245, "grad_norm": 1.6370279788970947, "learning_rate": 5.124939260697338e-06, "loss": 0.6281, "step": 3801 }, { "epoch": 2.5881552076242342, "grad_norm": 1.59638249874115, "learning_rate": 5.122688558909844e-06, "loss": 0.6091, "step": 3802 }, { "epoch": 2.5888359428182435, "grad_norm": 1.7666176557540894, "learning_rate": 5.120437832247162e-06, "loss": 0.5357, "step": 3803 }, { "epoch": 2.589516678012253, "grad_norm": 1.7017161846160889, "learning_rate": 5.118187081165628e-06, "loss": 0.6733, "step": 3804 }, { "epoch": 2.590197413206263, "grad_norm": 1.7661941051483154, "learning_rate": 5.115936306121587e-06, "loss": 0.6244, "step": 3805 }, { "epoch": 2.5908781484002725, "grad_norm": 1.6292527914047241, "learning_rate": 5.113685507571381e-06, "loss": 0.6836, "step": 3806 }, { "epoch": 2.5915588835942818, "grad_norm": 1.5703858137130737, "learning_rate": 5.111434685971362e-06, "loss": 0.6151, "step": 3807 }, { "epoch": 2.5922396187882915, "grad_norm": 1.5851328372955322, "learning_rate": 5.109183841777887e-06, "loss": 0.6305, "step": 3808 }, { "epoch": 2.5929203539823007, "grad_norm": 1.4936355352401733, "learning_rate": 5.1069329754473165e-06, "loss": 0.7156, "step": 3809 }, { "epoch": 2.5936010891763104, "grad_norm": 1.7762417793273926, "learning_rate": 5.104682087436011e-06, "loss": 0.6252, "step": 3810 }, { "epoch": 2.59428182437032, "grad_norm": 1.7700684070587158, "learning_rate": 5.102431178200344e-06, "loss": 0.5666, "step": 3811 }, { "epoch": 2.5949625595643298, "grad_norm": 1.604324460029602, "learning_rate": 5.100180248196688e-06, "loss": 0.6159, "step": 3812 }, { "epoch": 2.595643294758339, "grad_norm": 1.793752670288086, "learning_rate": 5.097929297881419e-06, "loss": 0.6294, "step": 3813 }, { "epoch": 2.5963240299523487, "grad_norm": 1.6006618738174438, "learning_rate": 5.09567832771092e-06, "loss": 0.6881, "step": 3814 }, { "epoch": 2.597004765146358, "grad_norm": 1.6706746816635132, "learning_rate": 5.093427338141576e-06, "loss": 0.5437, "step": 3815 }, { "epoch": 2.5976855003403676, "grad_norm": 1.7557477951049805, "learning_rate": 5.091176329629775e-06, "loss": 0.4073, "step": 3816 }, { "epoch": 2.5983662355343773, "grad_norm": 1.613722324371338, "learning_rate": 5.088925302631914e-06, "loss": 0.5691, "step": 3817 }, { "epoch": 2.5990469707283865, "grad_norm": 1.7079614400863647, "learning_rate": 5.0866742576043885e-06, "loss": 0.6786, "step": 3818 }, { "epoch": 2.599727705922396, "grad_norm": 1.7723429203033447, "learning_rate": 5.0844231950036e-06, "loss": 0.5106, "step": 3819 }, { "epoch": 2.6004084411164055, "grad_norm": 1.7137336730957031, "learning_rate": 5.082172115285951e-06, "loss": 0.5159, "step": 3820 }, { "epoch": 2.601089176310415, "grad_norm": 1.653051733970642, "learning_rate": 5.079921018907852e-06, "loss": 0.6991, "step": 3821 }, { "epoch": 2.601769911504425, "grad_norm": 1.5787837505340576, "learning_rate": 5.0776699063257125e-06, "loss": 0.7407, "step": 3822 }, { "epoch": 2.6024506466984345, "grad_norm": 1.7534099817276, "learning_rate": 5.075418777995948e-06, "loss": 0.6838, "step": 3823 }, { "epoch": 2.6031313818924438, "grad_norm": 1.8310270309448242, "learning_rate": 5.073167634374977e-06, "loss": 0.6009, "step": 3824 }, { "epoch": 2.6038121170864534, "grad_norm": 1.567888617515564, "learning_rate": 5.070916475919217e-06, "loss": 0.6532, "step": 3825 }, { "epoch": 2.6044928522804627, "grad_norm": 1.8093918561935425, "learning_rate": 5.068665303085095e-06, "loss": 0.6237, "step": 3826 }, { "epoch": 2.6051735874744724, "grad_norm": 1.7333143949508667, "learning_rate": 5.066414116329038e-06, "loss": 0.4598, "step": 3827 }, { "epoch": 2.605854322668482, "grad_norm": 1.741994023323059, "learning_rate": 5.0641629161074735e-06, "loss": 0.4409, "step": 3828 }, { "epoch": 2.6065350578624917, "grad_norm": 1.779481053352356, "learning_rate": 5.061911702876835e-06, "loss": 0.5275, "step": 3829 }, { "epoch": 2.607215793056501, "grad_norm": 1.7925547361373901, "learning_rate": 5.059660477093558e-06, "loss": 0.5348, "step": 3830 }, { "epoch": 2.6078965282505107, "grad_norm": 1.7526503801345825, "learning_rate": 5.057409239214079e-06, "loss": 0.705, "step": 3831 }, { "epoch": 2.60857726344452, "grad_norm": 1.803096055984497, "learning_rate": 5.055157989694839e-06, "loss": 0.5578, "step": 3832 }, { "epoch": 2.6092579986385296, "grad_norm": 1.7625000476837158, "learning_rate": 5.05290672899228e-06, "loss": 0.4748, "step": 3833 }, { "epoch": 2.6099387338325393, "grad_norm": 1.6794730424880981, "learning_rate": 5.050655457562845e-06, "loss": 0.5875, "step": 3834 }, { "epoch": 2.6106194690265485, "grad_norm": 1.7570174932479858, "learning_rate": 5.048404175862983e-06, "loss": 0.5283, "step": 3835 }, { "epoch": 2.611300204220558, "grad_norm": 1.6958256959915161, "learning_rate": 5.046152884349143e-06, "loss": 0.5832, "step": 3836 }, { "epoch": 2.611980939414568, "grad_norm": 1.7319446802139282, "learning_rate": 5.043901583477774e-06, "loss": 0.5278, "step": 3837 }, { "epoch": 2.612661674608577, "grad_norm": 1.7313792705535889, "learning_rate": 5.0416502737053284e-06, "loss": 0.5731, "step": 3838 }, { "epoch": 2.613342409802587, "grad_norm": 1.9262514114379883, "learning_rate": 5.039398955488264e-06, "loss": 0.4842, "step": 3839 }, { "epoch": 2.6140231449965965, "grad_norm": 1.6944280862808228, "learning_rate": 5.037147629283035e-06, "loss": 0.6465, "step": 3840 }, { "epoch": 2.6147038801906057, "grad_norm": 1.5535814762115479, "learning_rate": 5.034896295546098e-06, "loss": 0.5981, "step": 3841 }, { "epoch": 2.6153846153846154, "grad_norm": 1.7888623476028442, "learning_rate": 5.032644954733913e-06, "loss": 0.5131, "step": 3842 }, { "epoch": 2.6160653505786247, "grad_norm": 1.706917405128479, "learning_rate": 5.030393607302945e-06, "loss": 0.6428, "step": 3843 }, { "epoch": 2.6167460857726343, "grad_norm": 1.7659331560134888, "learning_rate": 5.02814225370965e-06, "loss": 0.6119, "step": 3844 }, { "epoch": 2.617426820966644, "grad_norm": 1.8376963138580322, "learning_rate": 5.0258908944104965e-06, "loss": 0.5143, "step": 3845 }, { "epoch": 2.6181075561606537, "grad_norm": 1.5884881019592285, "learning_rate": 5.023639529861944e-06, "loss": 0.7348, "step": 3846 }, { "epoch": 2.618788291354663, "grad_norm": 1.5604289770126343, "learning_rate": 5.021388160520464e-06, "loss": 0.582, "step": 3847 }, { "epoch": 2.6194690265486726, "grad_norm": 1.6887869834899902, "learning_rate": 5.019136786842519e-06, "loss": 0.5267, "step": 3848 }, { "epoch": 2.620149761742682, "grad_norm": 1.6212528944015503, "learning_rate": 5.016885409284576e-06, "loss": 0.6106, "step": 3849 }, { "epoch": 2.6208304969366916, "grad_norm": 1.8258442878723145, "learning_rate": 5.014634028303108e-06, "loss": 0.4452, "step": 3850 }, { "epoch": 2.6215112321307013, "grad_norm": 1.854315161705017, "learning_rate": 5.012382644354579e-06, "loss": 0.508, "step": 3851 }, { "epoch": 2.622191967324711, "grad_norm": 1.8091349601745605, "learning_rate": 5.010131257895462e-06, "loss": 0.5549, "step": 3852 }, { "epoch": 2.62287270251872, "grad_norm": 1.8791149854660034, "learning_rate": 5.007879869382224e-06, "loss": 0.5368, "step": 3853 }, { "epoch": 2.62355343771273, "grad_norm": 1.6504672765731812, "learning_rate": 5.005628479271338e-06, "loss": 0.6352, "step": 3854 }, { "epoch": 2.624234172906739, "grad_norm": 1.6875332593917847, "learning_rate": 5.003377088019274e-06, "loss": 0.5447, "step": 3855 }, { "epoch": 2.624914908100749, "grad_norm": 1.726784110069275, "learning_rate": 5.001125696082504e-06, "loss": 0.6489, "step": 3856 }, { "epoch": 2.6255956432947585, "grad_norm": 1.6747872829437256, "learning_rate": 4.998874303917497e-06, "loss": 0.6541, "step": 3857 }, { "epoch": 2.6262763784887677, "grad_norm": 1.7601321935653687, "learning_rate": 4.996622911980726e-06, "loss": 0.5092, "step": 3858 }, { "epoch": 2.6269571136827774, "grad_norm": 1.7315289974212646, "learning_rate": 4.994371520728663e-06, "loss": 0.5645, "step": 3859 }, { "epoch": 2.627637848876787, "grad_norm": 1.7075157165527344, "learning_rate": 4.992120130617777e-06, "loss": 0.6369, "step": 3860 }, { "epoch": 2.6283185840707963, "grad_norm": 1.5417262315750122, "learning_rate": 4.98986874210454e-06, "loss": 0.7234, "step": 3861 }, { "epoch": 2.628999319264806, "grad_norm": 1.7281526327133179, "learning_rate": 4.987617355645423e-06, "loss": 0.6054, "step": 3862 }, { "epoch": 2.6296800544588157, "grad_norm": 1.794398546218872, "learning_rate": 4.985365971696895e-06, "loss": 0.5963, "step": 3863 }, { "epoch": 2.630360789652825, "grad_norm": 1.8271799087524414, "learning_rate": 4.983114590715425e-06, "loss": 0.5207, "step": 3864 }, { "epoch": 2.6310415248468346, "grad_norm": 1.864652395248413, "learning_rate": 4.980863213157484e-06, "loss": 0.4674, "step": 3865 }, { "epoch": 2.631722260040844, "grad_norm": 1.7294293642044067, "learning_rate": 4.978611839479537e-06, "loss": 0.5011, "step": 3866 }, { "epoch": 2.6324029952348535, "grad_norm": 1.67308509349823, "learning_rate": 4.9763604701380565e-06, "loss": 0.5719, "step": 3867 }, { "epoch": 2.6330837304288632, "grad_norm": 1.7134169340133667, "learning_rate": 4.974109105589505e-06, "loss": 0.5701, "step": 3868 }, { "epoch": 2.633764465622873, "grad_norm": 1.7136532068252563, "learning_rate": 4.9718577462903515e-06, "loss": 0.5708, "step": 3869 }, { "epoch": 2.634445200816882, "grad_norm": 1.5487067699432373, "learning_rate": 4.9696063926970574e-06, "loss": 0.6569, "step": 3870 }, { "epoch": 2.635125936010892, "grad_norm": 1.9308658838272095, "learning_rate": 4.967355045266088e-06, "loss": 0.5913, "step": 3871 }, { "epoch": 2.635806671204901, "grad_norm": 1.6160567998886108, "learning_rate": 4.965103704453904e-06, "loss": 0.7133, "step": 3872 }, { "epoch": 2.6364874063989108, "grad_norm": 1.524430751800537, "learning_rate": 4.962852370716968e-06, "loss": 0.7416, "step": 3873 }, { "epoch": 2.6371681415929205, "grad_norm": 1.7889617681503296, "learning_rate": 4.960601044511737e-06, "loss": 0.5502, "step": 3874 }, { "epoch": 2.63784887678693, "grad_norm": 1.6825861930847168, "learning_rate": 4.9583497262946715e-06, "loss": 0.595, "step": 3875 }, { "epoch": 2.6385296119809394, "grad_norm": 1.648514986038208, "learning_rate": 4.956098416522228e-06, "loss": 0.642, "step": 3876 }, { "epoch": 2.639210347174949, "grad_norm": 1.6648846864700317, "learning_rate": 4.953847115650859e-06, "loss": 0.6695, "step": 3877 }, { "epoch": 2.6398910823689583, "grad_norm": 1.6209392547607422, "learning_rate": 4.951595824137019e-06, "loss": 0.6726, "step": 3878 }, { "epoch": 2.640571817562968, "grad_norm": 1.6425635814666748, "learning_rate": 4.9493445424371564e-06, "loss": 0.5627, "step": 3879 }, { "epoch": 2.6412525527569777, "grad_norm": 1.6326857805252075, "learning_rate": 4.947093271007723e-06, "loss": 0.6733, "step": 3880 }, { "epoch": 2.641933287950987, "grad_norm": 1.7174943685531616, "learning_rate": 4.944842010305163e-06, "loss": 0.6296, "step": 3881 }, { "epoch": 2.6426140231449966, "grad_norm": 1.8318774700164795, "learning_rate": 4.942590760785923e-06, "loss": 0.5054, "step": 3882 }, { "epoch": 2.6432947583390063, "grad_norm": 1.7425953149795532, "learning_rate": 4.940339522906443e-06, "loss": 0.5265, "step": 3883 }, { "epoch": 2.6439754935330155, "grad_norm": 1.7096306085586548, "learning_rate": 4.938088297123165e-06, "loss": 0.5442, "step": 3884 }, { "epoch": 2.644656228727025, "grad_norm": 1.7911659479141235, "learning_rate": 4.935837083892528e-06, "loss": 0.4418, "step": 3885 }, { "epoch": 2.645336963921035, "grad_norm": 1.6003259420394897, "learning_rate": 4.933585883670964e-06, "loss": 0.5958, "step": 3886 }, { "epoch": 2.646017699115044, "grad_norm": 1.7188242673873901, "learning_rate": 4.931334696914906e-06, "loss": 0.6163, "step": 3887 }, { "epoch": 2.646698434309054, "grad_norm": 1.831223726272583, "learning_rate": 4.929083524080785e-06, "loss": 0.4784, "step": 3888 }, { "epoch": 2.647379169503063, "grad_norm": 1.6122623682022095, "learning_rate": 4.926832365625026e-06, "loss": 0.5602, "step": 3889 }, { "epoch": 2.6480599046970728, "grad_norm": 1.5942084789276123, "learning_rate": 4.924581222004053e-06, "loss": 0.5756, "step": 3890 }, { "epoch": 2.6487406398910824, "grad_norm": 1.6503676176071167, "learning_rate": 4.9223300936742874e-06, "loss": 0.6465, "step": 3891 }, { "epoch": 2.649421375085092, "grad_norm": 1.7266265153884888, "learning_rate": 4.920078981092149e-06, "loss": 0.5258, "step": 3892 }, { "epoch": 2.6501021102791014, "grad_norm": 1.6614508628845215, "learning_rate": 4.917827884714049e-06, "loss": 0.6252, "step": 3893 }, { "epoch": 2.650782845473111, "grad_norm": 1.6532089710235596, "learning_rate": 4.9155768049964006e-06, "loss": 0.6512, "step": 3894 }, { "epoch": 2.6514635806671203, "grad_norm": 1.6794333457946777, "learning_rate": 4.913325742395612e-06, "loss": 0.5444, "step": 3895 }, { "epoch": 2.65214431586113, "grad_norm": 1.6164195537567139, "learning_rate": 4.911074697368087e-06, "loss": 0.5982, "step": 3896 }, { "epoch": 2.6528250510551397, "grad_norm": 1.6749588251113892, "learning_rate": 4.908823670370225e-06, "loss": 0.6878, "step": 3897 }, { "epoch": 2.6535057862491493, "grad_norm": 1.7585844993591309, "learning_rate": 4.906572661858428e-06, "loss": 0.5726, "step": 3898 }, { "epoch": 2.6541865214431586, "grad_norm": 1.6098248958587646, "learning_rate": 4.904321672289083e-06, "loss": 0.6235, "step": 3899 }, { "epoch": 2.6548672566371683, "grad_norm": 1.7065726518630981, "learning_rate": 4.902070702118582e-06, "loss": 0.6418, "step": 3900 }, { "epoch": 2.6555479918311775, "grad_norm": 1.7469130754470825, "learning_rate": 4.899819751803313e-06, "loss": 0.4268, "step": 3901 }, { "epoch": 2.656228727025187, "grad_norm": 1.7076210975646973, "learning_rate": 4.897568821799657e-06, "loss": 0.5969, "step": 3902 }, { "epoch": 2.656909462219197, "grad_norm": 1.6600667238235474, "learning_rate": 4.895317912563989e-06, "loss": 0.6999, "step": 3903 }, { "epoch": 2.657590197413206, "grad_norm": 1.696614146232605, "learning_rate": 4.893067024552685e-06, "loss": 0.606, "step": 3904 }, { "epoch": 2.658270932607216, "grad_norm": 1.6494710445404053, "learning_rate": 4.890816158222114e-06, "loss": 0.6662, "step": 3905 }, { "epoch": 2.658951667801225, "grad_norm": 1.8116633892059326, "learning_rate": 4.8885653140286384e-06, "loss": 0.4827, "step": 3906 }, { "epoch": 2.6596324029952347, "grad_norm": 1.6795188188552856, "learning_rate": 4.886314492428621e-06, "loss": 0.5103, "step": 3907 }, { "epoch": 2.6603131381892444, "grad_norm": 1.5861074924468994, "learning_rate": 4.884063693878416e-06, "loss": 0.7136, "step": 3908 }, { "epoch": 2.660993873383254, "grad_norm": 1.8750360012054443, "learning_rate": 4.881812918834372e-06, "loss": 0.5046, "step": 3909 }, { "epoch": 2.6616746085772633, "grad_norm": 1.6551302671432495, "learning_rate": 4.879562167752839e-06, "loss": 0.6245, "step": 3910 }, { "epoch": 2.662355343771273, "grad_norm": 1.6522027254104614, "learning_rate": 4.877311441090157e-06, "loss": 0.5314, "step": 3911 }, { "epoch": 2.6630360789652823, "grad_norm": 1.6348062753677368, "learning_rate": 4.875060739302664e-06, "loss": 0.6939, "step": 3912 }, { "epoch": 2.663716814159292, "grad_norm": 1.6162123680114746, "learning_rate": 4.872810062846687e-06, "loss": 0.6266, "step": 3913 }, { "epoch": 2.6643975493533016, "grad_norm": 1.6554820537567139, "learning_rate": 4.870559412178556e-06, "loss": 0.5178, "step": 3914 }, { "epoch": 2.6650782845473113, "grad_norm": 1.7326204776763916, "learning_rate": 4.86830878775459e-06, "loss": 0.6055, "step": 3915 }, { "epoch": 2.6657590197413206, "grad_norm": 1.5215128660202026, "learning_rate": 4.866058190031104e-06, "loss": 0.6597, "step": 3916 }, { "epoch": 2.6664397549353303, "grad_norm": 1.6736525297164917, "learning_rate": 4.863807619464409e-06, "loss": 0.6524, "step": 3917 }, { "epoch": 2.6671204901293395, "grad_norm": 1.678259253501892, "learning_rate": 4.8615570765108124e-06, "loss": 0.5923, "step": 3918 }, { "epoch": 2.667801225323349, "grad_norm": 1.717858076095581, "learning_rate": 4.85930656162661e-06, "loss": 0.6865, "step": 3919 }, { "epoch": 2.668481960517359, "grad_norm": 1.754798173904419, "learning_rate": 4.8570560752680966e-06, "loss": 0.4837, "step": 3920 }, { "epoch": 2.6691626957113685, "grad_norm": 1.7670968770980835, "learning_rate": 4.854805617891559e-06, "loss": 0.6932, "step": 3921 }, { "epoch": 2.669843430905378, "grad_norm": 1.6954588890075684, "learning_rate": 4.852555189953279e-06, "loss": 0.6647, "step": 3922 }, { "epoch": 2.6705241660993875, "grad_norm": 1.6583397388458252, "learning_rate": 4.850304791909532e-06, "loss": 0.6224, "step": 3923 }, { "epoch": 2.6712049012933967, "grad_norm": 1.59857976436615, "learning_rate": 4.848054424216591e-06, "loss": 0.6351, "step": 3924 }, { "epoch": 2.6718856364874064, "grad_norm": 1.5351290702819824, "learning_rate": 4.845804087330714e-06, "loss": 0.7054, "step": 3925 }, { "epoch": 2.672566371681416, "grad_norm": 1.5864161252975464, "learning_rate": 4.843553781708161e-06, "loss": 0.7097, "step": 3926 }, { "epoch": 2.6732471068754253, "grad_norm": 1.6152215003967285, "learning_rate": 4.841303507805184e-06, "loss": 0.6159, "step": 3927 }, { "epoch": 2.673927842069435, "grad_norm": 1.651772379875183, "learning_rate": 4.8390532660780275e-06, "loss": 0.5271, "step": 3928 }, { "epoch": 2.6746085772634443, "grad_norm": 1.741681694984436, "learning_rate": 4.836803056982928e-06, "loss": 0.4966, "step": 3929 }, { "epoch": 2.675289312457454, "grad_norm": 1.7039567232131958, "learning_rate": 4.834552880976117e-06, "loss": 0.6131, "step": 3930 }, { "epoch": 2.6759700476514636, "grad_norm": 1.657138705253601, "learning_rate": 4.832302738513822e-06, "loss": 0.6123, "step": 3931 }, { "epoch": 2.6766507828454733, "grad_norm": 1.6567074060440063, "learning_rate": 4.830052630052257e-06, "loss": 0.6722, "step": 3932 }, { "epoch": 2.6773315180394825, "grad_norm": 1.5725886821746826, "learning_rate": 4.827802556047634e-06, "loss": 0.5909, "step": 3933 }, { "epoch": 2.6780122532334922, "grad_norm": 1.731770634651184, "learning_rate": 4.825552516956159e-06, "loss": 0.5496, "step": 3934 }, { "epoch": 2.6786929884275015, "grad_norm": 1.6026813983917236, "learning_rate": 4.823302513234026e-06, "loss": 0.6599, "step": 3935 }, { "epoch": 2.679373723621511, "grad_norm": 1.7906028032302856, "learning_rate": 4.821052545337427e-06, "loss": 0.4868, "step": 3936 }, { "epoch": 2.680054458815521, "grad_norm": 1.6290940046310425, "learning_rate": 4.818802613722544e-06, "loss": 0.6275, "step": 3937 }, { "epoch": 2.6807351940095305, "grad_norm": 1.8332734107971191, "learning_rate": 4.816552718845553e-06, "loss": 0.4562, "step": 3938 }, { "epoch": 2.6814159292035398, "grad_norm": 1.5743134021759033, "learning_rate": 4.814302861162619e-06, "loss": 0.7126, "step": 3939 }, { "epoch": 2.6820966643975495, "grad_norm": 1.734072208404541, "learning_rate": 4.812053041129905e-06, "loss": 0.5444, "step": 3940 }, { "epoch": 2.6827773995915587, "grad_norm": 1.8128198385238647, "learning_rate": 4.8098032592035625e-06, "loss": 0.4643, "step": 3941 }, { "epoch": 2.6834581347855684, "grad_norm": 1.8551961183547974, "learning_rate": 4.807553515839735e-06, "loss": 0.5487, "step": 3942 }, { "epoch": 2.684138869979578, "grad_norm": 1.5998462438583374, "learning_rate": 4.80530381149456e-06, "loss": 0.6077, "step": 3943 }, { "epoch": 2.6848196051735873, "grad_norm": 1.7671027183532715, "learning_rate": 4.803054146624168e-06, "loss": 0.5495, "step": 3944 }, { "epoch": 2.685500340367597, "grad_norm": 1.5450348854064941, "learning_rate": 4.80080452168468e-06, "loss": 0.5991, "step": 3945 }, { "epoch": 2.6861810755616067, "grad_norm": 1.7275352478027344, "learning_rate": 4.798554937132207e-06, "loss": 0.4366, "step": 3946 }, { "epoch": 2.686861810755616, "grad_norm": 1.7186994552612305, "learning_rate": 4.796305393422856e-06, "loss": 0.6462, "step": 3947 }, { "epoch": 2.6875425459496256, "grad_norm": 1.6859033107757568, "learning_rate": 4.794055891012722e-06, "loss": 0.6553, "step": 3948 }, { "epoch": 2.6882232811436353, "grad_norm": 1.6649428606033325, "learning_rate": 4.791806430357893e-06, "loss": 0.5727, "step": 3949 }, { "epoch": 2.6889040163376445, "grad_norm": 1.7252373695373535, "learning_rate": 4.78955701191445e-06, "loss": 0.5033, "step": 3950 }, { "epoch": 2.689584751531654, "grad_norm": 1.7942663431167603, "learning_rate": 4.787307636138463e-06, "loss": 0.5138, "step": 3951 }, { "epoch": 2.6902654867256635, "grad_norm": 1.6719579696655273, "learning_rate": 4.785058303485993e-06, "loss": 0.4456, "step": 3952 }, { "epoch": 2.690946221919673, "grad_norm": 1.7369303703308105, "learning_rate": 4.782809014413096e-06, "loss": 0.5283, "step": 3953 }, { "epoch": 2.691626957113683, "grad_norm": 1.618749976158142, "learning_rate": 4.780559769375818e-06, "loss": 0.6346, "step": 3954 }, { "epoch": 2.6923076923076925, "grad_norm": 1.792243242263794, "learning_rate": 4.778310568830192e-06, "loss": 0.6009, "step": 3955 }, { "epoch": 2.6929884275017018, "grad_norm": 1.7691644430160522, "learning_rate": 4.776061413232247e-06, "loss": 0.5401, "step": 3956 }, { "epoch": 2.6936691626957114, "grad_norm": 1.6386237144470215, "learning_rate": 4.773812303038e-06, "loss": 0.7062, "step": 3957 }, { "epoch": 2.6943498978897207, "grad_norm": 1.6294596195220947, "learning_rate": 4.7715632387034575e-06, "loss": 0.6597, "step": 3958 }, { "epoch": 2.6950306330837304, "grad_norm": 1.6275826692581177, "learning_rate": 4.769314220684622e-06, "loss": 0.6146, "step": 3959 }, { "epoch": 2.69571136827774, "grad_norm": 1.6199898719787598, "learning_rate": 4.767065249437481e-06, "loss": 0.6757, "step": 3960 }, { "epoch": 2.6963921034717497, "grad_norm": 1.4058763980865479, "learning_rate": 4.764816325418018e-06, "loss": 0.7617, "step": 3961 }, { "epoch": 2.697072838665759, "grad_norm": 1.6601990461349487, "learning_rate": 4.7625674490822e-06, "loss": 0.5918, "step": 3962 }, { "epoch": 2.6977535738597687, "grad_norm": 1.6157029867172241, "learning_rate": 4.7603186208859916e-06, "loss": 0.6385, "step": 3963 }, { "epoch": 2.698434309053778, "grad_norm": 1.668969988822937, "learning_rate": 4.758069841285343e-06, "loss": 0.5984, "step": 3964 }, { "epoch": 2.6991150442477876, "grad_norm": 1.7232261896133423, "learning_rate": 4.755821110736195e-06, "loss": 0.5988, "step": 3965 }, { "epoch": 2.6997957794417973, "grad_norm": 1.7995811700820923, "learning_rate": 4.753572429694479e-06, "loss": 0.5576, "step": 3966 }, { "epoch": 2.7004765146358065, "grad_norm": 1.6982344388961792, "learning_rate": 4.751323798616119e-06, "loss": 0.5601, "step": 3967 }, { "epoch": 2.701157249829816, "grad_norm": 1.6666439771652222, "learning_rate": 4.749075217957024e-06, "loss": 0.631, "step": 3968 }, { "epoch": 2.701837985023826, "grad_norm": 1.7492504119873047, "learning_rate": 4.746826688173094e-06, "loss": 0.5838, "step": 3969 }, { "epoch": 2.702518720217835, "grad_norm": 1.581679344177246, "learning_rate": 4.744578209720223e-06, "loss": 0.6766, "step": 3970 }, { "epoch": 2.703199455411845, "grad_norm": 1.7821907997131348, "learning_rate": 4.742329783054292e-06, "loss": 0.6121, "step": 3971 }, { "epoch": 2.7038801906058545, "grad_norm": 1.6990894079208374, "learning_rate": 4.740081408631168e-06, "loss": 0.634, "step": 3972 }, { "epoch": 2.7045609257998637, "grad_norm": 1.7018702030181885, "learning_rate": 4.737833086906712e-06, "loss": 0.588, "step": 3973 }, { "epoch": 2.7052416609938734, "grad_norm": 1.795185923576355, "learning_rate": 4.735584818336773e-06, "loss": 0.4876, "step": 3974 }, { "epoch": 2.7059223961878827, "grad_norm": 1.6676019430160522, "learning_rate": 4.733336603377189e-06, "loss": 0.6922, "step": 3975 }, { "epoch": 2.7066031313818923, "grad_norm": 1.7231409549713135, "learning_rate": 4.731088442483787e-06, "loss": 0.626, "step": 3976 }, { "epoch": 2.707283866575902, "grad_norm": 1.7156105041503906, "learning_rate": 4.728840336112381e-06, "loss": 0.6135, "step": 3977 }, { "epoch": 2.7079646017699117, "grad_norm": 1.9043540954589844, "learning_rate": 4.726592284718777e-06, "loss": 0.4744, "step": 3978 }, { "epoch": 2.708645336963921, "grad_norm": 1.7546768188476562, "learning_rate": 4.724344288758771e-06, "loss": 0.5519, "step": 3979 }, { "epoch": 2.7093260721579306, "grad_norm": 1.5777348279953003, "learning_rate": 4.722096348688143e-06, "loss": 0.6342, "step": 3980 }, { "epoch": 2.71000680735194, "grad_norm": 1.6663793325424194, "learning_rate": 4.719848464962669e-06, "loss": 0.5535, "step": 3981 }, { "epoch": 2.7106875425459496, "grad_norm": 1.6356279850006104, "learning_rate": 4.717600638038103e-06, "loss": 0.563, "step": 3982 }, { "epoch": 2.7113682777399593, "grad_norm": 1.7761951684951782, "learning_rate": 4.715352868370198e-06, "loss": 0.5958, "step": 3983 }, { "epoch": 2.712049012933969, "grad_norm": 1.5482287406921387, "learning_rate": 4.713105156414687e-06, "loss": 0.7396, "step": 3984 }, { "epoch": 2.712729748127978, "grad_norm": 1.5738664865493774, "learning_rate": 4.710857502627298e-06, "loss": 0.6637, "step": 3985 }, { "epoch": 2.713410483321988, "grad_norm": 1.6180814504623413, "learning_rate": 4.708609907463742e-06, "loss": 0.6359, "step": 3986 }, { "epoch": 2.714091218515997, "grad_norm": 1.81670343875885, "learning_rate": 4.706362371379722e-06, "loss": 0.6546, "step": 3987 }, { "epoch": 2.714771953710007, "grad_norm": 1.7720708847045898, "learning_rate": 4.704114894830929e-06, "loss": 0.5449, "step": 3988 }, { "epoch": 2.7154526889040165, "grad_norm": 1.5303291082382202, "learning_rate": 4.701867478273036e-06, "loss": 0.6729, "step": 3989 }, { "epoch": 2.7161334240980257, "grad_norm": 1.581149697303772, "learning_rate": 4.699620122161714e-06, "loss": 0.5837, "step": 3990 }, { "epoch": 2.7168141592920354, "grad_norm": 1.4527863264083862, "learning_rate": 4.697372826952609e-06, "loss": 0.7298, "step": 3991 }, { "epoch": 2.717494894486045, "grad_norm": 1.715571403503418, "learning_rate": 4.695125593101367e-06, "loss": 0.5522, "step": 3992 }, { "epoch": 2.7181756296800543, "grad_norm": 1.843894362449646, "learning_rate": 4.6928784210636155e-06, "loss": 0.4976, "step": 3993 }, { "epoch": 2.718856364874064, "grad_norm": 1.6830041408538818, "learning_rate": 4.690631311294967e-06, "loss": 0.5865, "step": 3994 }, { "epoch": 2.7195371000680737, "grad_norm": 1.6360055208206177, "learning_rate": 4.688384264251026e-06, "loss": 0.6594, "step": 3995 }, { "epoch": 2.720217835262083, "grad_norm": 1.615294098854065, "learning_rate": 4.686137280387384e-06, "loss": 0.5447, "step": 3996 }, { "epoch": 2.7208985704560926, "grad_norm": 1.612733006477356, "learning_rate": 4.68389036015962e-06, "loss": 0.6994, "step": 3997 }, { "epoch": 2.721579305650102, "grad_norm": 1.5511623620986938, "learning_rate": 4.681643504023295e-06, "loss": 0.6878, "step": 3998 }, { "epoch": 2.7222600408441116, "grad_norm": 1.6161097288131714, "learning_rate": 4.679396712433962e-06, "loss": 0.7626, "step": 3999 }, { "epoch": 2.7229407760381212, "grad_norm": 1.8604813814163208, "learning_rate": 4.67714998584716e-06, "loss": 0.5591, "step": 4000 }, { "epoch": 2.723621511232131, "grad_norm": 1.6663103103637695, "learning_rate": 4.674903324718412e-06, "loss": 0.5466, "step": 4001 }, { "epoch": 2.72430224642614, "grad_norm": 1.5742558240890503, "learning_rate": 4.6726567295032335e-06, "loss": 0.5942, "step": 4002 }, { "epoch": 2.72498298162015, "grad_norm": 1.6485278606414795, "learning_rate": 4.670410200657121e-06, "loss": 0.6829, "step": 4003 }, { "epoch": 2.725663716814159, "grad_norm": 1.829140543937683, "learning_rate": 4.668163738635559e-06, "loss": 0.5113, "step": 4004 }, { "epoch": 2.7263444520081688, "grad_norm": 1.5264889001846313, "learning_rate": 4.665917343894022e-06, "loss": 0.6965, "step": 4005 }, { "epoch": 2.7270251872021785, "grad_norm": 1.6287964582443237, "learning_rate": 4.663671016887966e-06, "loss": 0.7123, "step": 4006 }, { "epoch": 2.727705922396188, "grad_norm": 1.7864336967468262, "learning_rate": 4.661424758072836e-06, "loss": 0.5791, "step": 4007 }, { "epoch": 2.7283866575901974, "grad_norm": 1.7194581031799316, "learning_rate": 4.659178567904062e-06, "loss": 0.6352, "step": 4008 }, { "epoch": 2.729067392784207, "grad_norm": 1.7332868576049805, "learning_rate": 4.65693244683706e-06, "loss": 0.6501, "step": 4009 }, { "epoch": 2.7297481279782163, "grad_norm": 1.6282439231872559, "learning_rate": 4.6546863953272355e-06, "loss": 0.6118, "step": 4010 }, { "epoch": 2.730428863172226, "grad_norm": 1.5363969802856445, "learning_rate": 4.6524404138299725e-06, "loss": 0.6987, "step": 4011 }, { "epoch": 2.7311095983662357, "grad_norm": 1.644166350364685, "learning_rate": 4.6501945028006474e-06, "loss": 0.6449, "step": 4012 }, { "epoch": 2.731790333560245, "grad_norm": 1.760754942893982, "learning_rate": 4.647948662694621e-06, "loss": 0.5164, "step": 4013 }, { "epoch": 2.7324710687542546, "grad_norm": 1.8186084032058716, "learning_rate": 4.645702893967239e-06, "loss": 0.4596, "step": 4014 }, { "epoch": 2.7331518039482643, "grad_norm": 1.627831220626831, "learning_rate": 4.643457197073831e-06, "loss": 0.6221, "step": 4015 }, { "epoch": 2.7338325391422735, "grad_norm": 1.8395497798919678, "learning_rate": 4.641211572469715e-06, "loss": 0.6034, "step": 4016 }, { "epoch": 2.734513274336283, "grad_norm": 1.6824272871017456, "learning_rate": 4.6389660206101915e-06, "loss": 0.5961, "step": 4017 }, { "epoch": 2.735194009530293, "grad_norm": 1.6646666526794434, "learning_rate": 4.636720541950547e-06, "loss": 0.6081, "step": 4018 }, { "epoch": 2.735874744724302, "grad_norm": 1.4721564054489136, "learning_rate": 4.634475136946057e-06, "loss": 0.6441, "step": 4019 }, { "epoch": 2.736555479918312, "grad_norm": 1.541751742362976, "learning_rate": 4.632229806051976e-06, "loss": 0.68, "step": 4020 }, { "epoch": 2.737236215112321, "grad_norm": 1.702659249305725, "learning_rate": 4.629984549723545e-06, "loss": 0.5957, "step": 4021 }, { "epoch": 2.7379169503063308, "grad_norm": 1.7008583545684814, "learning_rate": 4.627739368415995e-06, "loss": 0.5517, "step": 4022 }, { "epoch": 2.7385976855003404, "grad_norm": 1.6101734638214111, "learning_rate": 4.625494262584538e-06, "loss": 0.71, "step": 4023 }, { "epoch": 2.73927842069435, "grad_norm": 1.878604531288147, "learning_rate": 4.623249232684368e-06, "loss": 0.5857, "step": 4024 }, { "epoch": 2.7399591558883594, "grad_norm": 1.638939619064331, "learning_rate": 4.621004279170667e-06, "loss": 0.6251, "step": 4025 }, { "epoch": 2.740639891082369, "grad_norm": 1.653367280960083, "learning_rate": 4.618759402498603e-06, "loss": 0.6388, "step": 4026 }, { "epoch": 2.7413206262763783, "grad_norm": 1.6669867038726807, "learning_rate": 4.616514603123322e-06, "loss": 0.5319, "step": 4027 }, { "epoch": 2.742001361470388, "grad_norm": 1.6055675745010376, "learning_rate": 4.614269881499962e-06, "loss": 0.6032, "step": 4028 }, { "epoch": 2.7426820966643977, "grad_norm": 1.7792236804962158, "learning_rate": 4.612025238083641e-06, "loss": 0.4999, "step": 4029 }, { "epoch": 2.7433628318584073, "grad_norm": 1.682168960571289, "learning_rate": 4.609780673329463e-06, "loss": 0.5257, "step": 4030 }, { "epoch": 2.7440435670524166, "grad_norm": 1.5169498920440674, "learning_rate": 4.6075361876925126e-06, "loss": 0.688, "step": 4031 }, { "epoch": 2.7447243022464263, "grad_norm": 1.7512868642807007, "learning_rate": 4.605291781627862e-06, "loss": 0.6083, "step": 4032 }, { "epoch": 2.7454050374404355, "grad_norm": 1.6740065813064575, "learning_rate": 4.603047455590567e-06, "loss": 0.5007, "step": 4033 }, { "epoch": 2.746085772634445, "grad_norm": 1.6894234418869019, "learning_rate": 4.600803210035664e-06, "loss": 0.5371, "step": 4034 }, { "epoch": 2.746766507828455, "grad_norm": 1.7284808158874512, "learning_rate": 4.5985590454181775e-06, "loss": 0.459, "step": 4035 }, { "epoch": 2.747447243022464, "grad_norm": 1.620449185371399, "learning_rate": 4.596314962193113e-06, "loss": 0.6065, "step": 4036 }, { "epoch": 2.748127978216474, "grad_norm": 1.646344780921936, "learning_rate": 4.5940709608154576e-06, "loss": 0.6293, "step": 4037 }, { "epoch": 2.748808713410483, "grad_norm": 1.5818921327590942, "learning_rate": 4.591827041740185e-06, "loss": 0.7356, "step": 4038 }, { "epoch": 2.7494894486044927, "grad_norm": 1.6983269453048706, "learning_rate": 4.5895832054222536e-06, "loss": 0.6283, "step": 4039 }, { "epoch": 2.7501701837985024, "grad_norm": 1.7342008352279663, "learning_rate": 4.587339452316601e-06, "loss": 0.546, "step": 4040 }, { "epoch": 2.750850918992512, "grad_norm": 1.6202359199523926, "learning_rate": 4.58509578287815e-06, "loss": 0.6064, "step": 4041 }, { "epoch": 2.7515316541865213, "grad_norm": 1.638934850692749, "learning_rate": 4.5828521975618055e-06, "loss": 0.5387, "step": 4042 }, { "epoch": 2.752212389380531, "grad_norm": 1.7174919843673706, "learning_rate": 4.580608696822457e-06, "loss": 0.6086, "step": 4043 }, { "epoch": 2.7528931245745403, "grad_norm": 1.8056598901748657, "learning_rate": 4.578365281114974e-06, "loss": 0.5666, "step": 4044 }, { "epoch": 2.75357385976855, "grad_norm": 1.646498441696167, "learning_rate": 4.576121950894213e-06, "loss": 0.5421, "step": 4045 }, { "epoch": 2.7542545949625596, "grad_norm": 1.8435676097869873, "learning_rate": 4.573878706615009e-06, "loss": 0.4791, "step": 4046 }, { "epoch": 2.7549353301565693, "grad_norm": 1.6587841510772705, "learning_rate": 4.571635548732179e-06, "loss": 0.5603, "step": 4047 }, { "epoch": 2.7556160653505786, "grad_norm": 1.7717081308364868, "learning_rate": 4.569392477700529e-06, "loss": 0.6057, "step": 4048 }, { "epoch": 2.7562968005445883, "grad_norm": 1.647875189781189, "learning_rate": 4.5671494939748415e-06, "loss": 0.6651, "step": 4049 }, { "epoch": 2.7569775357385975, "grad_norm": 1.6317071914672852, "learning_rate": 4.564906598009884e-06, "loss": 0.5447, "step": 4050 }, { "epoch": 2.757658270932607, "grad_norm": 1.6825973987579346, "learning_rate": 4.562663790260403e-06, "loss": 0.5735, "step": 4051 }, { "epoch": 2.758339006126617, "grad_norm": 1.5820389986038208, "learning_rate": 4.560421071181131e-06, "loss": 0.7573, "step": 4052 }, { "epoch": 2.7590197413206266, "grad_norm": 1.647619366645813, "learning_rate": 4.558178441226779e-06, "loss": 0.6894, "step": 4053 }, { "epoch": 2.759700476514636, "grad_norm": 1.6669203042984009, "learning_rate": 4.555935900852043e-06, "loss": 0.6348, "step": 4054 }, { "epoch": 2.7603812117086455, "grad_norm": 1.8145586252212524, "learning_rate": 4.5536934505115985e-06, "loss": 0.5851, "step": 4055 }, { "epoch": 2.7610619469026547, "grad_norm": 1.5403876304626465, "learning_rate": 4.551451090660106e-06, "loss": 0.6278, "step": 4056 }, { "epoch": 2.7617426820966644, "grad_norm": 1.6307717561721802, "learning_rate": 4.549208821752205e-06, "loss": 0.5518, "step": 4057 }, { "epoch": 2.762423417290674, "grad_norm": 1.5446561574935913, "learning_rate": 4.546966644242515e-06, "loss": 0.636, "step": 4058 }, { "epoch": 2.7631041524846833, "grad_norm": 1.7795424461364746, "learning_rate": 4.544724558585641e-06, "loss": 0.5188, "step": 4059 }, { "epoch": 2.763784887678693, "grad_norm": 1.5452122688293457, "learning_rate": 4.542482565236166e-06, "loss": 0.7544, "step": 4060 }, { "epoch": 2.7644656228727023, "grad_norm": 1.593476414680481, "learning_rate": 4.540240664648656e-06, "loss": 0.5524, "step": 4061 }, { "epoch": 2.765146358066712, "grad_norm": 1.6698253154754639, "learning_rate": 4.537998857277659e-06, "loss": 0.5603, "step": 4062 }, { "epoch": 2.7658270932607216, "grad_norm": 1.6549265384674072, "learning_rate": 4.535757143577701e-06, "loss": 0.5965, "step": 4063 }, { "epoch": 2.7665078284547313, "grad_norm": 1.578073263168335, "learning_rate": 4.533515524003291e-06, "loss": 0.717, "step": 4064 }, { "epoch": 2.7671885636487406, "grad_norm": 1.6235188245773315, "learning_rate": 4.53127399900892e-06, "loss": 0.6866, "step": 4065 }, { "epoch": 2.7678692988427502, "grad_norm": 1.6782848834991455, "learning_rate": 4.52903256904906e-06, "loss": 0.6058, "step": 4066 }, { "epoch": 2.7685500340367595, "grad_norm": 1.6815913915634155, "learning_rate": 4.52679123457816e-06, "loss": 0.5708, "step": 4067 }, { "epoch": 2.769230769230769, "grad_norm": 1.6892293691635132, "learning_rate": 4.524549996050652e-06, "loss": 0.5744, "step": 4068 }, { "epoch": 2.769911504424779, "grad_norm": 1.5594161748886108, "learning_rate": 4.522308853920952e-06, "loss": 0.7093, "step": 4069 }, { "epoch": 2.7705922396187885, "grad_norm": 1.8254562616348267, "learning_rate": 4.520067808643449e-06, "loss": 0.516, "step": 4070 }, { "epoch": 2.7712729748127978, "grad_norm": 1.6189051866531372, "learning_rate": 4.517826860672518e-06, "loss": 0.6978, "step": 4071 }, { "epoch": 2.7719537100068075, "grad_norm": 1.620514154434204, "learning_rate": 4.515586010462513e-06, "loss": 0.6267, "step": 4072 }, { "epoch": 2.7726344452008167, "grad_norm": 1.6804791688919067, "learning_rate": 4.5133452584677664e-06, "loss": 0.4673, "step": 4073 }, { "epoch": 2.7733151803948264, "grad_norm": 1.7179802656173706, "learning_rate": 4.511104605142594e-06, "loss": 0.5372, "step": 4074 }, { "epoch": 2.773995915588836, "grad_norm": 1.5440683364868164, "learning_rate": 4.50886405094129e-06, "loss": 0.6705, "step": 4075 }, { "epoch": 2.7746766507828453, "grad_norm": 1.6980831623077393, "learning_rate": 4.506623596318127e-06, "loss": 0.5763, "step": 4076 }, { "epoch": 2.775357385976855, "grad_norm": 1.7682377099990845, "learning_rate": 4.504383241727359e-06, "loss": 0.4773, "step": 4077 }, { "epoch": 2.7760381211708647, "grad_norm": 1.8571326732635498, "learning_rate": 4.5021429876232196e-06, "loss": 0.5057, "step": 4078 }, { "epoch": 2.776718856364874, "grad_norm": 1.851108193397522, "learning_rate": 4.499902834459922e-06, "loss": 0.5902, "step": 4079 }, { "epoch": 2.7773995915588836, "grad_norm": 1.8923958539962769, "learning_rate": 4.4976627826916575e-06, "loss": 0.512, "step": 4080 }, { "epoch": 2.7780803267528933, "grad_norm": 1.561834692955017, "learning_rate": 4.495422832772597e-06, "loss": 0.6584, "step": 4081 }, { "epoch": 2.7787610619469025, "grad_norm": 1.5834791660308838, "learning_rate": 4.493182985156897e-06, "loss": 0.6802, "step": 4082 }, { "epoch": 2.779441797140912, "grad_norm": 1.6983609199523926, "learning_rate": 4.490943240298684e-06, "loss": 0.6609, "step": 4083 }, { "epoch": 2.7801225323349215, "grad_norm": 1.7342445850372314, "learning_rate": 4.4887035986520684e-06, "loss": 0.5968, "step": 4084 }, { "epoch": 2.780803267528931, "grad_norm": 1.6897608041763306, "learning_rate": 4.48646406067114e-06, "loss": 0.5932, "step": 4085 }, { "epoch": 2.781484002722941, "grad_norm": 1.586773157119751, "learning_rate": 4.484224626809966e-06, "loss": 0.6782, "step": 4086 }, { "epoch": 2.7821647379169505, "grad_norm": 1.598613977432251, "learning_rate": 4.481985297522592e-06, "loss": 0.586, "step": 4087 }, { "epoch": 2.7828454731109598, "grad_norm": 1.6457606554031372, "learning_rate": 4.479746073263046e-06, "loss": 0.701, "step": 4088 }, { "epoch": 2.7835262083049694, "grad_norm": 1.6879770755767822, "learning_rate": 4.47750695448533e-06, "loss": 0.5202, "step": 4089 }, { "epoch": 2.7842069434989787, "grad_norm": 1.5508949756622314, "learning_rate": 4.475267941643426e-06, "loss": 0.5342, "step": 4090 }, { "epoch": 2.7848876786929884, "grad_norm": 1.7145158052444458, "learning_rate": 4.473029035191299e-06, "loss": 0.6366, "step": 4091 }, { "epoch": 2.785568413886998, "grad_norm": 1.7076950073242188, "learning_rate": 4.470790235582887e-06, "loss": 0.5124, "step": 4092 }, { "epoch": 2.7862491490810077, "grad_norm": 1.6043235063552856, "learning_rate": 4.468551543272107e-06, "loss": 0.5748, "step": 4093 }, { "epoch": 2.786929884275017, "grad_norm": 1.7153329849243164, "learning_rate": 4.466312958712856e-06, "loss": 0.5884, "step": 4094 }, { "epoch": 2.7876106194690267, "grad_norm": 1.6743465662002563, "learning_rate": 4.4640744823590096e-06, "loss": 0.5469, "step": 4095 }, { "epoch": 2.788291354663036, "grad_norm": 1.8424265384674072, "learning_rate": 4.461836114664418e-06, "loss": 0.4954, "step": 4096 }, { "epoch": 2.7889720898570456, "grad_norm": 1.6064196825027466, "learning_rate": 4.459597856082915e-06, "loss": 0.5493, "step": 4097 }, { "epoch": 2.7896528250510553, "grad_norm": 1.6378215551376343, "learning_rate": 4.457359707068304e-06, "loss": 0.5253, "step": 4098 }, { "epoch": 2.7903335602450645, "grad_norm": 1.694778561592102, "learning_rate": 4.455121668074378e-06, "loss": 0.5891, "step": 4099 }, { "epoch": 2.791014295439074, "grad_norm": 1.655332088470459, "learning_rate": 4.452883739554896e-06, "loss": 0.5965, "step": 4100 }, { "epoch": 2.791695030633084, "grad_norm": 1.6107633113861084, "learning_rate": 4.4506459219636e-06, "loss": 0.5392, "step": 4101 }, { "epoch": 2.792375765827093, "grad_norm": 1.4954547882080078, "learning_rate": 4.448408215754212e-06, "loss": 0.6833, "step": 4102 }, { "epoch": 2.793056501021103, "grad_norm": 1.697016954421997, "learning_rate": 4.446170621380425e-06, "loss": 0.6359, "step": 4103 }, { "epoch": 2.7937372362151125, "grad_norm": 1.663480281829834, "learning_rate": 4.443933139295914e-06, "loss": 0.5913, "step": 4104 }, { "epoch": 2.7944179714091217, "grad_norm": 1.5978676080703735, "learning_rate": 4.441695769954331e-06, "loss": 0.5827, "step": 4105 }, { "epoch": 2.7950987066031314, "grad_norm": 1.7508856058120728, "learning_rate": 4.439458513809302e-06, "loss": 0.495, "step": 4106 }, { "epoch": 2.7957794417971407, "grad_norm": 1.6533547639846802, "learning_rate": 4.437221371314432e-06, "loss": 0.5933, "step": 4107 }, { "epoch": 2.7964601769911503, "grad_norm": 1.661617398262024, "learning_rate": 4.434984342923307e-06, "loss": 0.5826, "step": 4108 }, { "epoch": 2.79714091218516, "grad_norm": 1.683596134185791, "learning_rate": 4.432747429089483e-06, "loss": 0.5907, "step": 4109 }, { "epoch": 2.7978216473791697, "grad_norm": 1.6859403848648071, "learning_rate": 4.430510630266496e-06, "loss": 0.7315, "step": 4110 }, { "epoch": 2.798502382573179, "grad_norm": 1.6292678117752075, "learning_rate": 4.428273946907858e-06, "loss": 0.6507, "step": 4111 }, { "epoch": 2.7991831177671886, "grad_norm": 1.6872590780258179, "learning_rate": 4.4260373794670606e-06, "loss": 0.547, "step": 4112 }, { "epoch": 2.799863852961198, "grad_norm": 1.6412677764892578, "learning_rate": 4.423800928397566e-06, "loss": 0.6901, "step": 4113 }, { "epoch": 2.8005445881552076, "grad_norm": 1.5309587717056274, "learning_rate": 4.4215645941528186e-06, "loss": 0.6322, "step": 4114 }, { "epoch": 2.8012253233492173, "grad_norm": 1.4485844373703003, "learning_rate": 4.419328377186235e-06, "loss": 0.742, "step": 4115 }, { "epoch": 2.801906058543227, "grad_norm": 1.8044445514678955, "learning_rate": 4.417092277951208e-06, "loss": 0.5641, "step": 4116 }, { "epoch": 2.802586793737236, "grad_norm": 1.6822553873062134, "learning_rate": 4.414856296901112e-06, "loss": 0.5553, "step": 4117 }, { "epoch": 2.803267528931246, "grad_norm": 1.6512576341629028, "learning_rate": 4.412620434489294e-06, "loss": 0.6158, "step": 4118 }, { "epoch": 2.803948264125255, "grad_norm": 1.7402968406677246, "learning_rate": 4.410384691169074e-06, "loss": 0.5641, "step": 4119 }, { "epoch": 2.804628999319265, "grad_norm": 1.674248456954956, "learning_rate": 4.408149067393751e-06, "loss": 0.7264, "step": 4120 }, { "epoch": 2.8053097345132745, "grad_norm": 1.6625549793243408, "learning_rate": 4.405913563616601e-06, "loss": 0.5795, "step": 4121 }, { "epoch": 2.8059904697072837, "grad_norm": 1.6865090131759644, "learning_rate": 4.403678180290871e-06, "loss": 0.5338, "step": 4122 }, { "epoch": 2.8066712049012934, "grad_norm": 1.619749665260315, "learning_rate": 4.401442917869787e-06, "loss": 0.749, "step": 4123 }, { "epoch": 2.807351940095303, "grad_norm": 1.6160694360733032, "learning_rate": 4.39920777680655e-06, "loss": 0.7378, "step": 4124 }, { "epoch": 2.8080326752893123, "grad_norm": 1.6940621137619019, "learning_rate": 4.396972757554339e-06, "loss": 0.6415, "step": 4125 }, { "epoch": 2.808713410483322, "grad_norm": 1.6720480918884277, "learning_rate": 4.3947378605663035e-06, "loss": 0.6332, "step": 4126 }, { "epoch": 2.8093941456773317, "grad_norm": 1.7060366868972778, "learning_rate": 4.392503086295571e-06, "loss": 0.556, "step": 4127 }, { "epoch": 2.810074880871341, "grad_norm": 1.6305758953094482, "learning_rate": 4.390268435195244e-06, "loss": 0.5084, "step": 4128 }, { "epoch": 2.8107556160653506, "grad_norm": 1.5589007139205933, "learning_rate": 4.388033907718396e-06, "loss": 0.6871, "step": 4129 }, { "epoch": 2.81143635125936, "grad_norm": 1.837954044342041, "learning_rate": 4.385799504318083e-06, "loss": 0.5888, "step": 4130 }, { "epoch": 2.8121170864533696, "grad_norm": 1.594229817390442, "learning_rate": 4.383565225447331e-06, "loss": 0.6394, "step": 4131 }, { "epoch": 2.8127978216473792, "grad_norm": 1.7275830507278442, "learning_rate": 4.3813310715591405e-06, "loss": 0.6325, "step": 4132 }, { "epoch": 2.813478556841389, "grad_norm": 1.8573518991470337, "learning_rate": 4.379097043106485e-06, "loss": 0.5405, "step": 4133 }, { "epoch": 2.814159292035398, "grad_norm": 1.6594206094741821, "learning_rate": 4.376863140542321e-06, "loss": 0.605, "step": 4134 }, { "epoch": 2.814840027229408, "grad_norm": 1.726850986480713, "learning_rate": 4.374629364319572e-06, "loss": 0.5935, "step": 4135 }, { "epoch": 2.815520762423417, "grad_norm": 1.8133381605148315, "learning_rate": 4.372395714891135e-06, "loss": 0.4955, "step": 4136 }, { "epoch": 2.8162014976174268, "grad_norm": 1.719880223274231, "learning_rate": 4.370162192709886e-06, "loss": 0.3984, "step": 4137 }, { "epoch": 2.8168822328114365, "grad_norm": 1.6768193244934082, "learning_rate": 4.367928798228673e-06, "loss": 0.6232, "step": 4138 }, { "epoch": 2.817562968005446, "grad_norm": 1.748571515083313, "learning_rate": 4.365695531900318e-06, "loss": 0.5457, "step": 4139 }, { "epoch": 2.8182437031994554, "grad_norm": 1.605738878250122, "learning_rate": 4.3634623941776164e-06, "loss": 0.7145, "step": 4140 }, { "epoch": 2.818924438393465, "grad_norm": 1.7737703323364258, "learning_rate": 4.36122938551334e-06, "loss": 0.5188, "step": 4141 }, { "epoch": 2.8196051735874743, "grad_norm": 1.6815091371536255, "learning_rate": 4.35899650636023e-06, "loss": 0.6068, "step": 4142 }, { "epoch": 2.820285908781484, "grad_norm": 1.6934690475463867, "learning_rate": 4.356763757171007e-06, "loss": 0.6254, "step": 4143 }, { "epoch": 2.8209666439754937, "grad_norm": 1.588112235069275, "learning_rate": 4.354531138398361e-06, "loss": 0.6406, "step": 4144 }, { "epoch": 2.821647379169503, "grad_norm": 1.6258726119995117, "learning_rate": 4.352298650494959e-06, "loss": 0.5409, "step": 4145 }, { "epoch": 2.8223281143635126, "grad_norm": 1.750741720199585, "learning_rate": 4.3500662939134375e-06, "loss": 0.604, "step": 4146 }, { "epoch": 2.823008849557522, "grad_norm": 1.4911130666732788, "learning_rate": 4.347834069106408e-06, "loss": 0.7053, "step": 4147 }, { "epoch": 2.8236895847515315, "grad_norm": 1.6801820993423462, "learning_rate": 4.345601976526456e-06, "loss": 0.6565, "step": 4148 }, { "epoch": 2.824370319945541, "grad_norm": 1.6342195272445679, "learning_rate": 4.34337001662614e-06, "loss": 0.6221, "step": 4149 }, { "epoch": 2.825051055139551, "grad_norm": 1.6785413026809692, "learning_rate": 4.34113818985799e-06, "loss": 0.6511, "step": 4150 }, { "epoch": 2.82573179033356, "grad_norm": 1.6811962127685547, "learning_rate": 4.338906496674513e-06, "loss": 0.5808, "step": 4151 }, { "epoch": 2.82641252552757, "grad_norm": 1.6268877983093262, "learning_rate": 4.3366749375281856e-06, "loss": 0.7133, "step": 4152 }, { "epoch": 2.827093260721579, "grad_norm": 1.6547690629959106, "learning_rate": 4.334443512871456e-06, "loss": 0.672, "step": 4153 }, { "epoch": 2.8277739959155888, "grad_norm": 1.7278146743774414, "learning_rate": 4.332212223156748e-06, "loss": 0.5265, "step": 4154 }, { "epoch": 2.8284547311095984, "grad_norm": 1.7195805311203003, "learning_rate": 4.329981068836458e-06, "loss": 0.537, "step": 4155 }, { "epoch": 2.829135466303608, "grad_norm": 1.6056175231933594, "learning_rate": 4.327750050362954e-06, "loss": 0.6755, "step": 4156 }, { "epoch": 2.8298162014976174, "grad_norm": 1.758358120918274, "learning_rate": 4.325519168188575e-06, "loss": 0.4675, "step": 4157 }, { "epoch": 2.830496936691627, "grad_norm": 1.6622048616409302, "learning_rate": 4.323288422765635e-06, "loss": 0.5299, "step": 4158 }, { "epoch": 2.8311776718856363, "grad_norm": 1.6256062984466553, "learning_rate": 4.321057814546418e-06, "loss": 0.6319, "step": 4159 }, { "epoch": 2.831858407079646, "grad_norm": 1.6558762788772583, "learning_rate": 4.3188273439831815e-06, "loss": 0.6106, "step": 4160 }, { "epoch": 2.8325391422736557, "grad_norm": 1.688070297241211, "learning_rate": 4.316597011528158e-06, "loss": 0.5808, "step": 4161 }, { "epoch": 2.8332198774676653, "grad_norm": 1.7779943943023682, "learning_rate": 4.314366817633545e-06, "loss": 0.5995, "step": 4162 }, { "epoch": 2.8339006126616746, "grad_norm": 1.7025463581085205, "learning_rate": 4.312136762751517e-06, "loss": 0.5716, "step": 4163 }, { "epoch": 2.8345813478556843, "grad_norm": 1.6700295209884644, "learning_rate": 4.3099068473342205e-06, "loss": 0.5815, "step": 4164 }, { "epoch": 2.8352620830496935, "grad_norm": 1.744056224822998, "learning_rate": 4.30767707183377e-06, "loss": 0.5336, "step": 4165 }, { "epoch": 2.835942818243703, "grad_norm": 1.8025377988815308, "learning_rate": 4.305447436702255e-06, "loss": 0.5576, "step": 4166 }, { "epoch": 2.836623553437713, "grad_norm": 1.775244116783142, "learning_rate": 4.303217942391735e-06, "loss": 0.5539, "step": 4167 }, { "epoch": 2.837304288631722, "grad_norm": 1.4965819120407104, "learning_rate": 4.3009885893542436e-06, "loss": 0.617, "step": 4168 }, { "epoch": 2.837985023825732, "grad_norm": 1.7286880016326904, "learning_rate": 4.298759378041782e-06, "loss": 0.5686, "step": 4169 }, { "epoch": 2.838665759019741, "grad_norm": 1.6480696201324463, "learning_rate": 4.296530308906323e-06, "loss": 0.6102, "step": 4170 }, { "epoch": 2.8393464942137507, "grad_norm": 1.5829291343688965, "learning_rate": 4.2943013823998155e-06, "loss": 0.5722, "step": 4171 }, { "epoch": 2.8400272294077604, "grad_norm": 1.7942852973937988, "learning_rate": 4.292072598974172e-06, "loss": 0.4289, "step": 4172 }, { "epoch": 2.84070796460177, "grad_norm": 1.6959000825881958, "learning_rate": 4.289843959081281e-06, "loss": 0.639, "step": 4173 }, { "epoch": 2.8413886997957793, "grad_norm": 1.5964025259017944, "learning_rate": 4.287615463173002e-06, "loss": 0.6625, "step": 4174 }, { "epoch": 2.842069434989789, "grad_norm": 1.7464685440063477, "learning_rate": 4.285387111701164e-06, "loss": 0.5585, "step": 4175 }, { "epoch": 2.8427501701837983, "grad_norm": 1.5305596590042114, "learning_rate": 4.283158905117563e-06, "loss": 0.7741, "step": 4176 }, { "epoch": 2.843430905377808, "grad_norm": 1.615135669708252, "learning_rate": 4.280930843873975e-06, "loss": 0.6503, "step": 4177 }, { "epoch": 2.8441116405718176, "grad_norm": 1.6682462692260742, "learning_rate": 4.278702928422137e-06, "loss": 0.5649, "step": 4178 }, { "epoch": 2.8447923757658273, "grad_norm": 1.7709174156188965, "learning_rate": 4.276475159213762e-06, "loss": 0.5497, "step": 4179 }, { "epoch": 2.8454731109598366, "grad_norm": 1.5403209924697876, "learning_rate": 4.274247536700531e-06, "loss": 0.6095, "step": 4180 }, { "epoch": 2.8461538461538463, "grad_norm": 1.6130897998809814, "learning_rate": 4.2720200613340975e-06, "loss": 0.6555, "step": 4181 }, { "epoch": 2.8468345813478555, "grad_norm": 1.7792319059371948, "learning_rate": 4.2697927335660806e-06, "loss": 0.5842, "step": 4182 }, { "epoch": 2.847515316541865, "grad_norm": 1.6157246828079224, "learning_rate": 4.267565553848075e-06, "loss": 0.6102, "step": 4183 }, { "epoch": 2.848196051735875, "grad_norm": 1.8649848699569702, "learning_rate": 4.265338522631642e-06, "loss": 0.4491, "step": 4184 }, { "epoch": 2.8488767869298846, "grad_norm": 1.6169899702072144, "learning_rate": 4.263111640368312e-06, "loss": 0.6123, "step": 4185 }, { "epoch": 2.849557522123894, "grad_norm": 1.726778507232666, "learning_rate": 4.26088490750959e-06, "loss": 0.5, "step": 4186 }, { "epoch": 2.8502382573179035, "grad_norm": 1.7844951152801514, "learning_rate": 4.258658324506945e-06, "loss": 0.5922, "step": 4187 }, { "epoch": 2.8509189925119127, "grad_norm": 1.6732053756713867, "learning_rate": 4.256431891811823e-06, "loss": 0.5739, "step": 4188 }, { "epoch": 2.8515997277059224, "grad_norm": 1.6826788187026978, "learning_rate": 4.254205609875628e-06, "loss": 0.5921, "step": 4189 }, { "epoch": 2.852280462899932, "grad_norm": 1.6622949838638306, "learning_rate": 4.251979479149744e-06, "loss": 0.5588, "step": 4190 }, { "epoch": 2.8529611980939413, "grad_norm": 1.7791508436203003, "learning_rate": 4.24975350008552e-06, "loss": 0.515, "step": 4191 }, { "epoch": 2.853641933287951, "grad_norm": 1.7758318185806274, "learning_rate": 4.247527673134274e-06, "loss": 0.5179, "step": 4192 }, { "epoch": 2.8543226684819603, "grad_norm": 1.7949141263961792, "learning_rate": 4.245301998747292e-06, "loss": 0.642, "step": 4193 }, { "epoch": 2.85500340367597, "grad_norm": 1.6227277517318726, "learning_rate": 4.243076477375836e-06, "loss": 0.6223, "step": 4194 }, { "epoch": 2.8556841388699796, "grad_norm": 1.739027738571167, "learning_rate": 4.24085110947113e-06, "loss": 0.5649, "step": 4195 }, { "epoch": 2.8563648740639893, "grad_norm": 1.6837464570999146, "learning_rate": 4.2386258954843664e-06, "loss": 0.6147, "step": 4196 }, { "epoch": 2.8570456092579986, "grad_norm": 1.6615235805511475, "learning_rate": 4.236400835866712e-06, "loss": 0.4749, "step": 4197 }, { "epoch": 2.8577263444520082, "grad_norm": 1.88108491897583, "learning_rate": 4.234175931069297e-06, "loss": 0.5484, "step": 4198 }, { "epoch": 2.8584070796460175, "grad_norm": 1.6267430782318115, "learning_rate": 4.231951181543221e-06, "loss": 0.5931, "step": 4199 }, { "epoch": 2.859087814840027, "grad_norm": 1.7114702463150024, "learning_rate": 4.229726587739559e-06, "loss": 0.4815, "step": 4200 }, { "epoch": 2.859768550034037, "grad_norm": 1.755232334136963, "learning_rate": 4.227502150109343e-06, "loss": 0.5129, "step": 4201 }, { "epoch": 2.8604492852280465, "grad_norm": 1.6066133975982666, "learning_rate": 4.22527786910358e-06, "loss": 0.6959, "step": 4202 }, { "epoch": 2.8611300204220558, "grad_norm": 1.7959764003753662, "learning_rate": 4.2230537451732476e-06, "loss": 0.56, "step": 4203 }, { "epoch": 2.8618107556160655, "grad_norm": 1.8958470821380615, "learning_rate": 4.220829778769288e-06, "loss": 0.5442, "step": 4204 }, { "epoch": 2.8624914908100747, "grad_norm": 1.6312259435653687, "learning_rate": 4.218605970342609e-06, "loss": 0.6664, "step": 4205 }, { "epoch": 2.8631722260040844, "grad_norm": 1.7184628248214722, "learning_rate": 4.216382320344091e-06, "loss": 0.5115, "step": 4206 }, { "epoch": 2.863852961198094, "grad_norm": 1.6886953115463257, "learning_rate": 4.214158829224582e-06, "loss": 0.4214, "step": 4207 }, { "epoch": 2.8645336963921033, "grad_norm": 1.6061033010482788, "learning_rate": 4.211935497434894e-06, "loss": 0.6258, "step": 4208 }, { "epoch": 2.865214431586113, "grad_norm": 1.6869227886199951, "learning_rate": 4.209712325425808e-06, "loss": 0.7071, "step": 4209 }, { "epoch": 2.8658951667801227, "grad_norm": 1.6903131008148193, "learning_rate": 4.207489313648078e-06, "loss": 0.632, "step": 4210 }, { "epoch": 2.866575901974132, "grad_norm": 1.696273922920227, "learning_rate": 4.205266462552415e-06, "loss": 0.601, "step": 4211 }, { "epoch": 2.8672566371681416, "grad_norm": 1.6103253364562988, "learning_rate": 4.203043772589509e-06, "loss": 0.561, "step": 4212 }, { "epoch": 2.8679373723621513, "grad_norm": 1.6756726503372192, "learning_rate": 4.20082124421001e-06, "loss": 0.6764, "step": 4213 }, { "epoch": 2.8686181075561605, "grad_norm": 1.6194112300872803, "learning_rate": 4.198598877864538e-06, "loss": 0.6215, "step": 4214 }, { "epoch": 2.86929884275017, "grad_norm": 1.6213743686676025, "learning_rate": 4.196376674003676e-06, "loss": 0.6286, "step": 4215 }, { "epoch": 2.8699795779441795, "grad_norm": 1.4316518306732178, "learning_rate": 4.194154633077981e-06, "loss": 0.7891, "step": 4216 }, { "epoch": 2.870660313138189, "grad_norm": 1.773652195930481, "learning_rate": 4.191932755537972e-06, "loss": 0.4715, "step": 4217 }, { "epoch": 2.871341048332199, "grad_norm": 1.5733938217163086, "learning_rate": 4.189711041834136e-06, "loss": 0.6747, "step": 4218 }, { "epoch": 2.8720217835262085, "grad_norm": 1.6174544095993042, "learning_rate": 4.187489492416924e-06, "loss": 0.6222, "step": 4219 }, { "epoch": 2.8727025187202178, "grad_norm": 1.5188251733779907, "learning_rate": 4.185268107736762e-06, "loss": 0.6942, "step": 4220 }, { "epoch": 2.8733832539142274, "grad_norm": 1.6394453048706055, "learning_rate": 4.1830468882440355e-06, "loss": 0.7044, "step": 4221 }, { "epoch": 2.8740639891082367, "grad_norm": 1.6223644018173218, "learning_rate": 4.180825834389096e-06, "loss": 0.6171, "step": 4222 }, { "epoch": 2.8747447243022464, "grad_norm": 1.7587043046951294, "learning_rate": 4.178604946622265e-06, "loss": 0.4865, "step": 4223 }, { "epoch": 2.875425459496256, "grad_norm": 1.763170838356018, "learning_rate": 4.1763842253938316e-06, "loss": 0.3947, "step": 4224 }, { "epoch": 2.8761061946902657, "grad_norm": 1.7327736616134644, "learning_rate": 4.174163671154044e-06, "loss": 0.6869, "step": 4225 }, { "epoch": 2.876786929884275, "grad_norm": 1.7600224018096924, "learning_rate": 4.171943284353124e-06, "loss": 0.556, "step": 4226 }, { "epoch": 2.8774676650782847, "grad_norm": 1.6546696424484253, "learning_rate": 4.169723065441255e-06, "loss": 0.5462, "step": 4227 }, { "epoch": 2.878148400272294, "grad_norm": 1.7235618829727173, "learning_rate": 4.167503014868588e-06, "loss": 0.5279, "step": 4228 }, { "epoch": 2.8788291354663036, "grad_norm": 1.7231123447418213, "learning_rate": 4.16528313308524e-06, "loss": 0.5461, "step": 4229 }, { "epoch": 2.8795098706603133, "grad_norm": 1.6624724864959717, "learning_rate": 4.163063420541294e-06, "loss": 0.5536, "step": 4230 }, { "epoch": 2.8801906058543225, "grad_norm": 1.6884911060333252, "learning_rate": 4.1608438776868e-06, "loss": 0.7142, "step": 4231 }, { "epoch": 2.880871341048332, "grad_norm": 1.7600433826446533, "learning_rate": 4.158624504971767e-06, "loss": 0.611, "step": 4232 }, { "epoch": 2.881552076242342, "grad_norm": 1.6640008687973022, "learning_rate": 4.15640530284618e-06, "loss": 0.5413, "step": 4233 }, { "epoch": 2.882232811436351, "grad_norm": 1.9250292778015137, "learning_rate": 4.154186271759978e-06, "loss": 0.5628, "step": 4234 }, { "epoch": 2.882913546630361, "grad_norm": 1.6980453729629517, "learning_rate": 4.1519674121630735e-06, "loss": 0.7331, "step": 4235 }, { "epoch": 2.8835942818243705, "grad_norm": 1.743981957435608, "learning_rate": 4.149748724505342e-06, "loss": 0.6036, "step": 4236 }, { "epoch": 2.8842750170183797, "grad_norm": 1.5902117490768433, "learning_rate": 4.147530209236626e-06, "loss": 0.5004, "step": 4237 }, { "epoch": 2.8849557522123894, "grad_norm": 1.6300930976867676, "learning_rate": 4.145311866806727e-06, "loss": 0.6232, "step": 4238 }, { "epoch": 2.8856364874063987, "grad_norm": 1.6031140089035034, "learning_rate": 4.143093697665418e-06, "loss": 0.6622, "step": 4239 }, { "epoch": 2.8863172226004083, "grad_norm": 1.6173810958862305, "learning_rate": 4.140875702262434e-06, "loss": 0.615, "step": 4240 }, { "epoch": 2.886997957794418, "grad_norm": 1.7451766729354858, "learning_rate": 4.138657881047475e-06, "loss": 0.5686, "step": 4241 }, { "epoch": 2.8876786929884277, "grad_norm": 1.6521053314208984, "learning_rate": 4.136440234470204e-06, "loss": 0.5721, "step": 4242 }, { "epoch": 2.888359428182437, "grad_norm": 1.7450356483459473, "learning_rate": 4.134222762980254e-06, "loss": 0.6617, "step": 4243 }, { "epoch": 2.8890401633764466, "grad_norm": 1.5792685747146606, "learning_rate": 4.1320054670272135e-06, "loss": 0.5478, "step": 4244 }, { "epoch": 2.889720898570456, "grad_norm": 1.6931829452514648, "learning_rate": 4.129788347060643e-06, "loss": 0.6092, "step": 4245 }, { "epoch": 2.8904016337644656, "grad_norm": 1.6929231882095337, "learning_rate": 4.127571403530068e-06, "loss": 0.5399, "step": 4246 }, { "epoch": 2.8910823689584753, "grad_norm": 1.6752010583877563, "learning_rate": 4.125354636884974e-06, "loss": 0.5809, "step": 4247 }, { "epoch": 2.891763104152485, "grad_norm": 1.5812695026397705, "learning_rate": 4.123138047574809e-06, "loss": 0.634, "step": 4248 }, { "epoch": 2.892443839346494, "grad_norm": 1.8909374475479126, "learning_rate": 4.12092163604899e-06, "loss": 0.496, "step": 4249 }, { "epoch": 2.893124574540504, "grad_norm": 1.7397863864898682, "learning_rate": 4.118705402756898e-06, "loss": 0.5418, "step": 4250 }, { "epoch": 2.893805309734513, "grad_norm": 1.9238168001174927, "learning_rate": 4.116489348147871e-06, "loss": 0.569, "step": 4251 }, { "epoch": 2.894486044928523, "grad_norm": 1.7486886978149414, "learning_rate": 4.1142734726712195e-06, "loss": 0.5144, "step": 4252 }, { "epoch": 2.8951667801225325, "grad_norm": 1.6190290451049805, "learning_rate": 4.112057776776213e-06, "loss": 0.5766, "step": 4253 }, { "epoch": 2.8958475153165417, "grad_norm": 1.735851764678955, "learning_rate": 4.109842260912082e-06, "loss": 0.6302, "step": 4254 }, { "epoch": 2.8965282505105514, "grad_norm": 1.7208244800567627, "learning_rate": 4.107626925528028e-06, "loss": 0.5897, "step": 4255 }, { "epoch": 2.897208985704561, "grad_norm": 1.8352538347244263, "learning_rate": 4.105411771073211e-06, "loss": 0.5666, "step": 4256 }, { "epoch": 2.8978897208985703, "grad_norm": 1.8367682695388794, "learning_rate": 4.1031967979967546e-06, "loss": 0.4716, "step": 4257 }, { "epoch": 2.89857045609258, "grad_norm": 1.7186530828475952, "learning_rate": 4.100982006747745e-06, "loss": 0.6119, "step": 4258 }, { "epoch": 2.8992511912865897, "grad_norm": 1.7669332027435303, "learning_rate": 4.098767397775234e-06, "loss": 0.5957, "step": 4259 }, { "epoch": 2.899931926480599, "grad_norm": 1.8458393812179565, "learning_rate": 4.096552971528234e-06, "loss": 0.4929, "step": 4260 }, { "epoch": 2.9006126616746086, "grad_norm": 1.7742903232574463, "learning_rate": 4.094338728455721e-06, "loss": 0.4667, "step": 4261 }, { "epoch": 2.901293396868618, "grad_norm": 1.6709035634994507, "learning_rate": 4.092124669006635e-06, "loss": 0.6347, "step": 4262 }, { "epoch": 2.9019741320626276, "grad_norm": 1.8244327306747437, "learning_rate": 4.089910793629878e-06, "loss": 0.5024, "step": 4263 }, { "epoch": 2.9026548672566372, "grad_norm": 1.6438498497009277, "learning_rate": 4.0876971027743166e-06, "loss": 0.5311, "step": 4264 }, { "epoch": 2.903335602450647, "grad_norm": 1.7664490938186646, "learning_rate": 4.085483596888776e-06, "loss": 0.5505, "step": 4265 }, { "epoch": 2.904016337644656, "grad_norm": 1.7234653234481812, "learning_rate": 4.083270276422046e-06, "loss": 0.5913, "step": 4266 }, { "epoch": 2.904697072838666, "grad_norm": 1.7422130107879639, "learning_rate": 4.081057141822879e-06, "loss": 0.5931, "step": 4267 }, { "epoch": 2.905377808032675, "grad_norm": 1.708388328552246, "learning_rate": 4.078844193539989e-06, "loss": 0.5081, "step": 4268 }, { "epoch": 2.9060585432266848, "grad_norm": 1.643471121788025, "learning_rate": 4.076631432022055e-06, "loss": 0.6638, "step": 4269 }, { "epoch": 2.9067392784206945, "grad_norm": 1.7410454750061035, "learning_rate": 4.074418857717713e-06, "loss": 0.4619, "step": 4270 }, { "epoch": 2.907420013614704, "grad_norm": 1.775148868560791, "learning_rate": 4.072206471075564e-06, "loss": 0.5198, "step": 4271 }, { "epoch": 2.9081007488087134, "grad_norm": 1.6362498998641968, "learning_rate": 4.069994272544173e-06, "loss": 0.6132, "step": 4272 }, { "epoch": 2.908781484002723, "grad_norm": 1.707338809967041, "learning_rate": 4.067782262572065e-06, "loss": 0.5918, "step": 4273 }, { "epoch": 2.9094622191967323, "grad_norm": 1.839748740196228, "learning_rate": 4.0655704416077244e-06, "loss": 0.4989, "step": 4274 }, { "epoch": 2.910142954390742, "grad_norm": 1.6595144271850586, "learning_rate": 4.0633588100996e-06, "loss": 0.5572, "step": 4275 }, { "epoch": 2.9108236895847517, "grad_norm": 1.6299272775650024, "learning_rate": 4.061147368496103e-06, "loss": 0.6481, "step": 4276 }, { "epoch": 2.911504424778761, "grad_norm": 1.6980576515197754, "learning_rate": 4.058936117245602e-06, "loss": 0.563, "step": 4277 }, { "epoch": 2.9121851599727706, "grad_norm": 1.707534670829773, "learning_rate": 4.056725056796432e-06, "loss": 0.5783, "step": 4278 }, { "epoch": 2.91286589516678, "grad_norm": 1.8550440073013306, "learning_rate": 4.054514187596886e-06, "loss": 0.5767, "step": 4279 }, { "epoch": 2.9135466303607895, "grad_norm": 1.798586368560791, "learning_rate": 4.052303510095218e-06, "loss": 0.5644, "step": 4280 }, { "epoch": 2.914227365554799, "grad_norm": 1.602774977684021, "learning_rate": 4.050093024739647e-06, "loss": 0.6993, "step": 4281 }, { "epoch": 2.914908100748809, "grad_norm": 1.6515319347381592, "learning_rate": 4.0478827319783484e-06, "loss": 0.6098, "step": 4282 }, { "epoch": 2.915588835942818, "grad_norm": 1.7365953922271729, "learning_rate": 4.045672632259462e-06, "loss": 0.471, "step": 4283 }, { "epoch": 2.916269571136828, "grad_norm": 1.7139993906021118, "learning_rate": 4.043462726031085e-06, "loss": 0.582, "step": 4284 }, { "epoch": 2.916950306330837, "grad_norm": 1.6175092458724976, "learning_rate": 4.0412530137412795e-06, "loss": 0.5949, "step": 4285 }, { "epoch": 2.9176310415248468, "grad_norm": 1.6171437501907349, "learning_rate": 4.039043495838066e-06, "loss": 0.5769, "step": 4286 }, { "epoch": 2.9183117767188564, "grad_norm": 1.7511563301086426, "learning_rate": 4.0368341727694235e-06, "loss": 0.487, "step": 4287 }, { "epoch": 2.918992511912866, "grad_norm": 1.5284733772277832, "learning_rate": 4.0346250449832935e-06, "loss": 0.6935, "step": 4288 }, { "epoch": 2.9196732471068754, "grad_norm": 1.6744747161865234, "learning_rate": 4.032416112927582e-06, "loss": 0.6369, "step": 4289 }, { "epoch": 2.920353982300885, "grad_norm": 1.7322643995285034, "learning_rate": 4.030207377050149e-06, "loss": 0.5474, "step": 4290 }, { "epoch": 2.9210347174948943, "grad_norm": 1.62079656124115, "learning_rate": 4.0279988377988174e-06, "loss": 0.6201, "step": 4291 }, { "epoch": 2.921715452688904, "grad_norm": 1.6884517669677734, "learning_rate": 4.025790495621369e-06, "loss": 0.5996, "step": 4292 }, { "epoch": 2.9223961878829137, "grad_norm": 1.7840888500213623, "learning_rate": 4.02358235096555e-06, "loss": 0.6804, "step": 4293 }, { "epoch": 2.9230769230769234, "grad_norm": 1.7741026878356934, "learning_rate": 4.021374404279059e-06, "loss": 0.466, "step": 4294 }, { "epoch": 2.9237576582709326, "grad_norm": 1.7578973770141602, "learning_rate": 4.019166656009562e-06, "loss": 0.5057, "step": 4295 }, { "epoch": 2.9244383934649423, "grad_norm": 1.5797039270401, "learning_rate": 4.0169591066046785e-06, "loss": 0.6787, "step": 4296 }, { "epoch": 2.9251191286589515, "grad_norm": 1.5836522579193115, "learning_rate": 4.014751756511991e-06, "loss": 0.6758, "step": 4297 }, { "epoch": 2.925799863852961, "grad_norm": 1.6070388555526733, "learning_rate": 4.012544606179043e-06, "loss": 0.6735, "step": 4298 }, { "epoch": 2.926480599046971, "grad_norm": 1.646520972251892, "learning_rate": 4.010337656053335e-06, "loss": 0.5139, "step": 4299 }, { "epoch": 2.92716133424098, "grad_norm": 1.7290939092636108, "learning_rate": 4.008130906582329e-06, "loss": 0.4689, "step": 4300 }, { "epoch": 2.92784206943499, "grad_norm": 1.7608850002288818, "learning_rate": 4.005924358213442e-06, "loss": 0.5318, "step": 4301 }, { "epoch": 2.928522804628999, "grad_norm": 1.6915165185928345, "learning_rate": 4.003718011394056e-06, "loss": 0.6282, "step": 4302 }, { "epoch": 2.9292035398230087, "grad_norm": 1.84963858127594, "learning_rate": 4.001511866571507e-06, "loss": 0.4326, "step": 4303 }, { "epoch": 2.9298842750170184, "grad_norm": 1.6510659456253052, "learning_rate": 3.999305924193093e-06, "loss": 0.6731, "step": 4304 }, { "epoch": 2.930565010211028, "grad_norm": 1.6208404302597046, "learning_rate": 3.99710018470607e-06, "loss": 0.6492, "step": 4305 }, { "epoch": 2.9312457454050374, "grad_norm": 1.607448697090149, "learning_rate": 3.994894648557655e-06, "loss": 0.5764, "step": 4306 }, { "epoch": 2.931926480599047, "grad_norm": 1.6414989233016968, "learning_rate": 3.9926893161950206e-06, "loss": 0.5812, "step": 4307 }, { "epoch": 2.9326072157930563, "grad_norm": 1.898536205291748, "learning_rate": 3.990484188065299e-06, "loss": 0.5026, "step": 4308 }, { "epoch": 2.933287950987066, "grad_norm": 1.7617088556289673, "learning_rate": 3.988279264615583e-06, "loss": 0.7029, "step": 4309 }, { "epoch": 2.9339686861810756, "grad_norm": 1.6309735774993896, "learning_rate": 3.98607454629292e-06, "loss": 0.5483, "step": 4310 }, { "epoch": 2.9346494213750853, "grad_norm": 1.800668478012085, "learning_rate": 3.983870033544319e-06, "loss": 0.6118, "step": 4311 }, { "epoch": 2.9353301565690946, "grad_norm": 1.762555480003357, "learning_rate": 3.981665726816747e-06, "loss": 0.5444, "step": 4312 }, { "epoch": 2.9360108917631043, "grad_norm": 1.7211817502975464, "learning_rate": 3.979461626557128e-06, "loss": 0.6206, "step": 4313 }, { "epoch": 2.9366916269571135, "grad_norm": 1.693216323852539, "learning_rate": 3.977257733212344e-06, "loss": 0.6356, "step": 4314 }, { "epoch": 2.937372362151123, "grad_norm": 1.7924734354019165, "learning_rate": 3.9750540472292394e-06, "loss": 0.5061, "step": 4315 }, { "epoch": 2.938053097345133, "grad_norm": 1.9754055738449097, "learning_rate": 3.97285056905461e-06, "loss": 0.4061, "step": 4316 }, { "epoch": 2.938733832539142, "grad_norm": 1.675451397895813, "learning_rate": 3.970647299135213e-06, "loss": 0.5348, "step": 4317 }, { "epoch": 2.939414567733152, "grad_norm": 1.7393406629562378, "learning_rate": 3.968444237917762e-06, "loss": 0.4713, "step": 4318 }, { "epoch": 2.9400953029271615, "grad_norm": 1.6308437585830688, "learning_rate": 3.966241385848932e-06, "loss": 0.6486, "step": 4319 }, { "epoch": 2.9407760381211707, "grad_norm": 1.676539659500122, "learning_rate": 3.964038743375349e-06, "loss": 0.5286, "step": 4320 }, { "epoch": 2.9414567733151804, "grad_norm": 1.7567574977874756, "learning_rate": 3.961836310943603e-06, "loss": 0.5877, "step": 4321 }, { "epoch": 2.94213750850919, "grad_norm": 1.849685788154602, "learning_rate": 3.959634089000238e-06, "loss": 0.4616, "step": 4322 }, { "epoch": 2.9428182437031993, "grad_norm": 1.8608171939849854, "learning_rate": 3.957432077991753e-06, "loss": 0.5387, "step": 4323 }, { "epoch": 2.943498978897209, "grad_norm": 1.6166499853134155, "learning_rate": 3.955230278364611e-06, "loss": 0.5924, "step": 4324 }, { "epoch": 2.9441797140912183, "grad_norm": 1.6279675960540771, "learning_rate": 3.953028690565228e-06, "loss": 0.6522, "step": 4325 }, { "epoch": 2.944860449285228, "grad_norm": 1.7176264524459839, "learning_rate": 3.950827315039977e-06, "loss": 0.5663, "step": 4326 }, { "epoch": 2.9455411844792376, "grad_norm": 1.9098420143127441, "learning_rate": 3.948626152235187e-06, "loss": 0.5201, "step": 4327 }, { "epoch": 2.9462219196732473, "grad_norm": 1.76890230178833, "learning_rate": 3.946425202597147e-06, "loss": 0.5408, "step": 4328 }, { "epoch": 2.9469026548672566, "grad_norm": 1.6751629114151, "learning_rate": 3.9442244665720995e-06, "loss": 0.5822, "step": 4329 }, { "epoch": 2.9475833900612662, "grad_norm": 1.825455904006958, "learning_rate": 3.942023944606246e-06, "loss": 0.5875, "step": 4330 }, { "epoch": 2.9482641252552755, "grad_norm": 1.6191755533218384, "learning_rate": 3.939823637145742e-06, "loss": 0.6002, "step": 4331 }, { "epoch": 2.948944860449285, "grad_norm": 1.5248162746429443, "learning_rate": 3.937623544636704e-06, "loss": 0.7647, "step": 4332 }, { "epoch": 2.949625595643295, "grad_norm": 1.7427974939346313, "learning_rate": 3.935423667525202e-06, "loss": 0.6084, "step": 4333 }, { "epoch": 2.9503063308373045, "grad_norm": 1.8283250331878662, "learning_rate": 3.933224006257262e-06, "loss": 0.5642, "step": 4334 }, { "epoch": 2.9509870660313138, "grad_norm": 1.8206475973129272, "learning_rate": 3.931024561278867e-06, "loss": 0.6283, "step": 4335 }, { "epoch": 2.9516678012253235, "grad_norm": 1.7753660678863525, "learning_rate": 3.928825333035953e-06, "loss": 0.6429, "step": 4336 }, { "epoch": 2.9523485364193327, "grad_norm": 1.732624888420105, "learning_rate": 3.926626321974419e-06, "loss": 0.5676, "step": 4337 }, { "epoch": 2.9530292716133424, "grad_norm": 1.7542706727981567, "learning_rate": 3.924427528540115e-06, "loss": 0.6234, "step": 4338 }, { "epoch": 2.953710006807352, "grad_norm": 1.620194911956787, "learning_rate": 3.922228953178846e-06, "loss": 0.565, "step": 4339 }, { "epoch": 2.9543907420013613, "grad_norm": 1.7087781429290771, "learning_rate": 3.920030596336375e-06, "loss": 0.6857, "step": 4340 }, { "epoch": 2.955071477195371, "grad_norm": 1.7073400020599365, "learning_rate": 3.9178324584584225e-06, "loss": 0.5349, "step": 4341 }, { "epoch": 2.9557522123893807, "grad_norm": 1.648305892944336, "learning_rate": 3.915634539990661e-06, "loss": 0.6327, "step": 4342 }, { "epoch": 2.95643294758339, "grad_norm": 1.665113925933838, "learning_rate": 3.91343684137872e-06, "loss": 0.6111, "step": 4343 }, { "epoch": 2.9571136827773996, "grad_norm": 1.5571858882904053, "learning_rate": 3.911239363068184e-06, "loss": 0.6018, "step": 4344 }, { "epoch": 2.9577944179714093, "grad_norm": 1.7943733930587769, "learning_rate": 3.909042105504596e-06, "loss": 0.458, "step": 4345 }, { "epoch": 2.9584751531654185, "grad_norm": 1.7026101350784302, "learning_rate": 3.906845069133446e-06, "loss": 0.465, "step": 4346 }, { "epoch": 2.959155888359428, "grad_norm": 1.6310861110687256, "learning_rate": 3.904648254400188e-06, "loss": 0.6757, "step": 4347 }, { "epoch": 2.9598366235534375, "grad_norm": 1.7491059303283691, "learning_rate": 3.902451661750229e-06, "loss": 0.5662, "step": 4348 }, { "epoch": 2.960517358747447, "grad_norm": 1.6391576528549194, "learning_rate": 3.900255291628926e-06, "loss": 0.56, "step": 4349 }, { "epoch": 2.961198093941457, "grad_norm": 1.6927229166030884, "learning_rate": 3.898059144481597e-06, "loss": 0.6039, "step": 4350 }, { "epoch": 2.9618788291354665, "grad_norm": 1.654606819152832, "learning_rate": 3.895863220753512e-06, "loss": 0.5999, "step": 4351 }, { "epoch": 2.9625595643294758, "grad_norm": 1.7202670574188232, "learning_rate": 3.893667520889896e-06, "loss": 0.5796, "step": 4352 }, { "epoch": 2.9632402995234854, "grad_norm": 1.6875314712524414, "learning_rate": 3.891472045335927e-06, "loss": 0.6275, "step": 4353 }, { "epoch": 2.9639210347174947, "grad_norm": 1.609521508216858, "learning_rate": 3.889276794536742e-06, "loss": 0.6357, "step": 4354 }, { "epoch": 2.9646017699115044, "grad_norm": 1.566368818283081, "learning_rate": 3.887081768937429e-06, "loss": 0.718, "step": 4355 }, { "epoch": 2.965282505105514, "grad_norm": 1.6311100721359253, "learning_rate": 3.8848869689830275e-06, "loss": 0.6701, "step": 4356 }, { "epoch": 2.9659632402995237, "grad_norm": 1.846177339553833, "learning_rate": 3.882692395118536e-06, "loss": 0.4719, "step": 4357 }, { "epoch": 2.966643975493533, "grad_norm": 1.6860666275024414, "learning_rate": 3.8804980477889084e-06, "loss": 0.5283, "step": 4358 }, { "epoch": 2.9673247106875427, "grad_norm": 1.6559057235717773, "learning_rate": 3.87830392743905e-06, "loss": 0.6969, "step": 4359 }, { "epoch": 2.968005445881552, "grad_norm": 1.6831809282302856, "learning_rate": 3.8761100345138185e-06, "loss": 0.5764, "step": 4360 }, { "epoch": 2.9686861810755616, "grad_norm": 1.7515552043914795, "learning_rate": 3.873916369458025e-06, "loss": 0.5901, "step": 4361 }, { "epoch": 2.9693669162695713, "grad_norm": 1.6348506212234497, "learning_rate": 3.871722932716442e-06, "loss": 0.5518, "step": 4362 }, { "epoch": 2.9700476514635805, "grad_norm": 1.6883928775787354, "learning_rate": 3.869529724733785e-06, "loss": 0.4785, "step": 4363 }, { "epoch": 2.97072838665759, "grad_norm": 1.7216908931732178, "learning_rate": 3.867336745954733e-06, "loss": 0.6059, "step": 4364 }, { "epoch": 2.9714091218516, "grad_norm": 1.75923490524292, "learning_rate": 3.865143996823909e-06, "loss": 0.4806, "step": 4365 }, { "epoch": 2.972089857045609, "grad_norm": 1.660428524017334, "learning_rate": 3.862951477785897e-06, "loss": 0.6664, "step": 4366 }, { "epoch": 2.972770592239619, "grad_norm": 1.7052165269851685, "learning_rate": 3.860759189285231e-06, "loss": 0.5145, "step": 4367 }, { "epoch": 2.9734513274336285, "grad_norm": 1.6371440887451172, "learning_rate": 3.858567131766401e-06, "loss": 0.6297, "step": 4368 }, { "epoch": 2.9741320626276377, "grad_norm": 1.6999043226242065, "learning_rate": 3.8563753056738475e-06, "loss": 0.6961, "step": 4369 }, { "epoch": 2.9748127978216474, "grad_norm": 1.852203607559204, "learning_rate": 3.854183711451962e-06, "loss": 0.4968, "step": 4370 }, { "epoch": 2.9754935330156567, "grad_norm": 1.7794162034988403, "learning_rate": 3.851992349545095e-06, "loss": 0.5742, "step": 4371 }, { "epoch": 2.9761742682096664, "grad_norm": 1.6365740299224854, "learning_rate": 3.8498012203975445e-06, "loss": 0.5301, "step": 4372 }, { "epoch": 2.976855003403676, "grad_norm": 1.5966618061065674, "learning_rate": 3.847610324453563e-06, "loss": 0.6804, "step": 4373 }, { "epoch": 2.9775357385976857, "grad_norm": 1.925984263420105, "learning_rate": 3.845419662157357e-06, "loss": 0.4396, "step": 4374 }, { "epoch": 2.978216473791695, "grad_norm": 1.7713760137557983, "learning_rate": 3.843229233953085e-06, "loss": 0.5357, "step": 4375 }, { "epoch": 2.9788972089857046, "grad_norm": 1.7273205518722534, "learning_rate": 3.84103904028486e-06, "loss": 0.4978, "step": 4376 }, { "epoch": 2.979577944179714, "grad_norm": 1.7940057516098022, "learning_rate": 3.838849081596741e-06, "loss": 0.4566, "step": 4377 }, { "epoch": 2.9802586793737236, "grad_norm": 1.6867094039916992, "learning_rate": 3.836659358332747e-06, "loss": 0.6382, "step": 4378 }, { "epoch": 2.9809394145677333, "grad_norm": 1.5967867374420166, "learning_rate": 3.834469870936842e-06, "loss": 0.618, "step": 4379 }, { "epoch": 2.981620149761743, "grad_norm": 1.7382681369781494, "learning_rate": 3.832280619852949e-06, "loss": 0.6207, "step": 4380 }, { "epoch": 2.982300884955752, "grad_norm": 1.599527359008789, "learning_rate": 3.830091605524942e-06, "loss": 0.6878, "step": 4381 }, { "epoch": 2.982981620149762, "grad_norm": 1.8487168550491333, "learning_rate": 3.82790282839664e-06, "loss": 0.5892, "step": 4382 }, { "epoch": 2.983662355343771, "grad_norm": 1.7210208177566528, "learning_rate": 3.825714288911821e-06, "loss": 0.6204, "step": 4383 }, { "epoch": 2.984343090537781, "grad_norm": 1.782758116722107, "learning_rate": 3.823525987514214e-06, "loss": 0.6021, "step": 4384 }, { "epoch": 2.9850238257317905, "grad_norm": 1.5974574089050293, "learning_rate": 3.821337924647499e-06, "loss": 0.6037, "step": 4385 }, { "epoch": 2.9857045609257997, "grad_norm": 1.5873825550079346, "learning_rate": 3.819150100755306e-06, "loss": 0.6211, "step": 4386 }, { "epoch": 2.9863852961198094, "grad_norm": 1.6777526140213013, "learning_rate": 3.816962516281217e-06, "loss": 0.6538, "step": 4387 }, { "epoch": 2.9870660313138186, "grad_norm": 1.6293636560440063, "learning_rate": 3.814775171668769e-06, "loss": 0.5853, "step": 4388 }, { "epoch": 2.9877467665078283, "grad_norm": 1.771385669708252, "learning_rate": 3.8125880673614435e-06, "loss": 0.5888, "step": 4389 }, { "epoch": 2.988427501701838, "grad_norm": 1.6246675252914429, "learning_rate": 3.8104012038026804e-06, "loss": 0.8168, "step": 4390 }, { "epoch": 2.9891082368958477, "grad_norm": 1.7536033391952515, "learning_rate": 3.8082145814358684e-06, "loss": 0.567, "step": 4391 }, { "epoch": 2.989788972089857, "grad_norm": 1.730421543121338, "learning_rate": 3.8060282007043425e-06, "loss": 0.5652, "step": 4392 }, { "epoch": 2.9904697072838666, "grad_norm": 1.7178356647491455, "learning_rate": 3.8038420620513973e-06, "loss": 0.5238, "step": 4393 }, { "epoch": 2.991150442477876, "grad_norm": 1.677446722984314, "learning_rate": 3.8016561659202717e-06, "loss": 0.5736, "step": 4394 }, { "epoch": 2.9918311776718856, "grad_norm": 1.53749418258667, "learning_rate": 3.799470512754159e-06, "loss": 0.7116, "step": 4395 }, { "epoch": 2.9925119128658952, "grad_norm": 1.6197023391723633, "learning_rate": 3.7972851029962e-06, "loss": 0.6047, "step": 4396 }, { "epoch": 2.993192648059905, "grad_norm": 1.7248207330703735, "learning_rate": 3.7950999370894886e-06, "loss": 0.6072, "step": 4397 }, { "epoch": 2.993873383253914, "grad_norm": 1.56929349899292, "learning_rate": 3.7929150154770706e-06, "loss": 0.6015, "step": 4398 }, { "epoch": 2.994554118447924, "grad_norm": 1.585854411125183, "learning_rate": 3.790730338601937e-06, "loss": 0.6617, "step": 4399 }, { "epoch": 2.995234853641933, "grad_norm": 1.9636054039001465, "learning_rate": 3.788545906907033e-06, "loss": 0.4032, "step": 4400 }, { "epoch": 2.9959155888359428, "grad_norm": 1.7246358394622803, "learning_rate": 3.786361720835256e-06, "loss": 0.5472, "step": 4401 }, { "epoch": 2.9965963240299525, "grad_norm": 1.572677731513977, "learning_rate": 3.7841777808294506e-06, "loss": 0.6859, "step": 4402 }, { "epoch": 2.997277059223962, "grad_norm": 1.62086820602417, "learning_rate": 3.7819940873324107e-06, "loss": 0.6544, "step": 4403 }, { "epoch": 2.9979577944179714, "grad_norm": 1.7379817962646484, "learning_rate": 3.779810640786883e-06, "loss": 0.7032, "step": 4404 }, { "epoch": 2.998638529611981, "grad_norm": 1.6431173086166382, "learning_rate": 3.7776274416355614e-06, "loss": 0.6403, "step": 4405 }, { "epoch": 2.9993192648059903, "grad_norm": 1.7476222515106201, "learning_rate": 3.775444490321091e-06, "loss": 0.5836, "step": 4406 }, { "epoch": 3.0, "grad_norm": 1.4875669479370117, "learning_rate": 3.7732617872860688e-06, "loss": 0.4728, "step": 4407 }, { "epoch": 3.0006807351940097, "grad_norm": 1.5430152416229248, "learning_rate": 3.7710793329730365e-06, "loss": 0.3948, "step": 4408 }, { "epoch": 3.001361470388019, "grad_norm": 1.3480087518692017, "learning_rate": 3.7688971278244878e-06, "loss": 0.4835, "step": 4409 }, { "epoch": 3.0020422055820286, "grad_norm": 1.6264997720718384, "learning_rate": 3.76671517228287e-06, "loss": 0.4316, "step": 4410 }, { "epoch": 3.0027229407760383, "grad_norm": 1.4547234773635864, "learning_rate": 3.764533466790574e-06, "loss": 0.4743, "step": 4411 }, { "epoch": 3.0034036759700475, "grad_norm": 1.5035886764526367, "learning_rate": 3.7623520117899415e-06, "loss": 0.4041, "step": 4412 }, { "epoch": 3.0040844111640572, "grad_norm": 1.5459223985671997, "learning_rate": 3.7601708077232646e-06, "loss": 0.6773, "step": 4413 }, { "epoch": 3.004765146358067, "grad_norm": 1.5171489715576172, "learning_rate": 3.7579898550327844e-06, "loss": 0.5317, "step": 4414 }, { "epoch": 3.005445881552076, "grad_norm": 1.8576068878173828, "learning_rate": 3.7558091541606886e-06, "loss": 0.5208, "step": 4415 }, { "epoch": 3.006126616746086, "grad_norm": 1.7508940696716309, "learning_rate": 3.7536287055491164e-06, "loss": 0.3734, "step": 4416 }, { "epoch": 3.0068073519400955, "grad_norm": 1.94747793674469, "learning_rate": 3.7514485096401577e-06, "loss": 0.352, "step": 4417 }, { "epoch": 3.0074880871341048, "grad_norm": 1.6354811191558838, "learning_rate": 3.7492685668758443e-06, "loss": 0.4423, "step": 4418 }, { "epoch": 3.0081688223281144, "grad_norm": 1.6151800155639648, "learning_rate": 3.747088877698165e-06, "loss": 0.4537, "step": 4419 }, { "epoch": 3.0088495575221237, "grad_norm": 1.664100170135498, "learning_rate": 3.744909442549051e-06, "loss": 0.4543, "step": 4420 }, { "epoch": 3.0095302927161334, "grad_norm": 1.5704041719436646, "learning_rate": 3.7427302618703866e-06, "loss": 0.4272, "step": 4421 }, { "epoch": 3.010211027910143, "grad_norm": 1.5820053815841675, "learning_rate": 3.740551336103999e-06, "loss": 0.5229, "step": 4422 }, { "epoch": 3.0108917631041523, "grad_norm": 1.5226658582687378, "learning_rate": 3.7383726656916684e-06, "loss": 0.4271, "step": 4423 }, { "epoch": 3.011572498298162, "grad_norm": 1.5480531454086304, "learning_rate": 3.7361942510751233e-06, "loss": 0.5351, "step": 4424 }, { "epoch": 3.0122532334921717, "grad_norm": 1.5789538621902466, "learning_rate": 3.7340160926960354e-06, "loss": 0.3904, "step": 4425 }, { "epoch": 3.012933968686181, "grad_norm": 1.4998507499694824, "learning_rate": 3.7318381909960284e-06, "loss": 0.5646, "step": 4426 }, { "epoch": 3.0136147038801906, "grad_norm": 1.6441293954849243, "learning_rate": 3.7296605464166753e-06, "loss": 0.3655, "step": 4427 }, { "epoch": 3.0142954390742003, "grad_norm": 1.6034784317016602, "learning_rate": 3.7274831593994954e-06, "loss": 0.4771, "step": 4428 }, { "epoch": 3.0149761742682095, "grad_norm": 1.7347166538238525, "learning_rate": 3.7253060303859525e-06, "loss": 0.4238, "step": 4429 }, { "epoch": 3.015656909462219, "grad_norm": 1.6023982763290405, "learning_rate": 3.723129159817462e-06, "loss": 0.4627, "step": 4430 }, { "epoch": 3.016337644656229, "grad_norm": 1.5017650127410889, "learning_rate": 3.7209525481353887e-06, "loss": 0.5223, "step": 4431 }, { "epoch": 3.017018379850238, "grad_norm": 1.6497231721878052, "learning_rate": 3.7187761957810377e-06, "loss": 0.6249, "step": 4432 }, { "epoch": 3.017699115044248, "grad_norm": 1.5897786617279053, "learning_rate": 3.7166001031956695e-06, "loss": 0.418, "step": 4433 }, { "epoch": 3.0183798502382575, "grad_norm": 1.708024501800537, "learning_rate": 3.7144242708204854e-06, "loss": 0.4733, "step": 4434 }, { "epoch": 3.0190605854322667, "grad_norm": 1.506633996963501, "learning_rate": 3.7122486990966367e-06, "loss": 0.4473, "step": 4435 }, { "epoch": 3.0197413206262764, "grad_norm": 1.6441913843154907, "learning_rate": 3.710073388465225e-06, "loss": 0.4926, "step": 4436 }, { "epoch": 3.020422055820286, "grad_norm": 1.5196385383605957, "learning_rate": 3.707898339367294e-06, "loss": 0.4706, "step": 4437 }, { "epoch": 3.0211027910142954, "grad_norm": 1.406876564025879, "learning_rate": 3.7057235522438386e-06, "loss": 0.3314, "step": 4438 }, { "epoch": 3.021783526208305, "grad_norm": 1.414210319519043, "learning_rate": 3.7035490275357948e-06, "loss": 0.382, "step": 4439 }, { "epoch": 3.0224642614023143, "grad_norm": 1.3792498111724854, "learning_rate": 3.7013747656840525e-06, "loss": 0.5671, "step": 4440 }, { "epoch": 3.023144996596324, "grad_norm": 1.5204017162322998, "learning_rate": 3.699200767129441e-06, "loss": 0.4636, "step": 4441 }, { "epoch": 3.0238257317903336, "grad_norm": 1.467887282371521, "learning_rate": 3.697027032312742e-06, "loss": 0.3959, "step": 4442 }, { "epoch": 3.024506466984343, "grad_norm": 1.6436675786972046, "learning_rate": 3.6948535616746816e-06, "loss": 0.2509, "step": 4443 }, { "epoch": 3.0251872021783526, "grad_norm": 1.5944626331329346, "learning_rate": 3.692680355655932e-06, "loss": 0.2657, "step": 4444 }, { "epoch": 3.0258679373723623, "grad_norm": 1.5006158351898193, "learning_rate": 3.690507414697115e-06, "loss": 0.5833, "step": 4445 }, { "epoch": 3.0265486725663715, "grad_norm": 1.4216243028640747, "learning_rate": 3.6883347392387915e-06, "loss": 0.5046, "step": 4446 }, { "epoch": 3.027229407760381, "grad_norm": 1.5749393701553345, "learning_rate": 3.686162329721476e-06, "loss": 0.4156, "step": 4447 }, { "epoch": 3.027910142954391, "grad_norm": 1.6779676675796509, "learning_rate": 3.6839901865856237e-06, "loss": 0.318, "step": 4448 }, { "epoch": 3.0285908781484, "grad_norm": 1.6095949411392212, "learning_rate": 3.6818183102716386e-06, "loss": 0.3652, "step": 4449 }, { "epoch": 3.02927161334241, "grad_norm": 1.737138032913208, "learning_rate": 3.6796467012198726e-06, "loss": 0.4472, "step": 4450 }, { "epoch": 3.0299523485364195, "grad_norm": 1.6663678884506226, "learning_rate": 3.6774753598706173e-06, "loss": 0.4561, "step": 4451 }, { "epoch": 3.0306330837304287, "grad_norm": 1.6719683408737183, "learning_rate": 3.6753042866641148e-06, "loss": 0.4095, "step": 4452 }, { "epoch": 3.0313138189244384, "grad_norm": 1.4045804738998413, "learning_rate": 3.6731334820405527e-06, "loss": 0.3415, "step": 4453 }, { "epoch": 3.031994554118448, "grad_norm": 1.496862769126892, "learning_rate": 3.670962946440063e-06, "loss": 0.4333, "step": 4454 }, { "epoch": 3.0326752893124573, "grad_norm": 1.5429418087005615, "learning_rate": 3.6687926803027224e-06, "loss": 0.397, "step": 4455 }, { "epoch": 3.033356024506467, "grad_norm": 1.48365318775177, "learning_rate": 3.6666226840685536e-06, "loss": 0.3992, "step": 4456 }, { "epoch": 3.0340367597004767, "grad_norm": 1.5472557544708252, "learning_rate": 3.6644529581775267e-06, "loss": 0.4524, "step": 4457 }, { "epoch": 3.034717494894486, "grad_norm": 1.6480714082717896, "learning_rate": 3.6622835030695525e-06, "loss": 0.2291, "step": 4458 }, { "epoch": 3.0353982300884956, "grad_norm": 1.508034348487854, "learning_rate": 3.66011431918449e-06, "loss": 0.4161, "step": 4459 }, { "epoch": 3.0360789652825053, "grad_norm": 1.5104691982269287, "learning_rate": 3.657945406962145e-06, "loss": 0.4386, "step": 4460 }, { "epoch": 3.0367597004765146, "grad_norm": 1.6299303770065308, "learning_rate": 3.6557767668422616e-06, "loss": 0.3646, "step": 4461 }, { "epoch": 3.0374404356705242, "grad_norm": 1.4915739297866821, "learning_rate": 3.653608399264538e-06, "loss": 0.433, "step": 4462 }, { "epoch": 3.0381211708645335, "grad_norm": 1.4177933931350708, "learning_rate": 3.651440304668608e-06, "loss": 0.6061, "step": 4463 }, { "epoch": 3.038801906058543, "grad_norm": 1.680215835571289, "learning_rate": 3.649272483494058e-06, "loss": 0.4432, "step": 4464 }, { "epoch": 3.039482641252553, "grad_norm": 1.4927233457565308, "learning_rate": 3.6471049361804124e-06, "loss": 0.45, "step": 4465 }, { "epoch": 3.040163376446562, "grad_norm": 1.5772682428359985, "learning_rate": 3.644937663167143e-06, "loss": 0.3094, "step": 4466 }, { "epoch": 3.040844111640572, "grad_norm": 1.6943843364715576, "learning_rate": 3.642770664893668e-06, "loss": 0.3231, "step": 4467 }, { "epoch": 3.0415248468345815, "grad_norm": 1.385547399520874, "learning_rate": 3.640603941799345e-06, "loss": 0.4831, "step": 4468 }, { "epoch": 3.0422055820285907, "grad_norm": 1.6527643203735352, "learning_rate": 3.638437494323479e-06, "loss": 0.4779, "step": 4469 }, { "epoch": 3.0428863172226004, "grad_norm": 1.6175744533538818, "learning_rate": 3.6362713229053204e-06, "loss": 0.4178, "step": 4470 }, { "epoch": 3.04356705241661, "grad_norm": 1.5137838125228882, "learning_rate": 3.6341054279840617e-06, "loss": 0.3771, "step": 4471 }, { "epoch": 3.0442477876106193, "grad_norm": 1.5531693696975708, "learning_rate": 3.631939809998838e-06, "loss": 0.3453, "step": 4472 }, { "epoch": 3.044928522804629, "grad_norm": 1.6488858461380005, "learning_rate": 3.629774469388731e-06, "loss": 0.4053, "step": 4473 }, { "epoch": 3.0456092579986387, "grad_norm": 1.585501790046692, "learning_rate": 3.6276094065927653e-06, "loss": 0.3832, "step": 4474 }, { "epoch": 3.046289993192648, "grad_norm": 1.5720338821411133, "learning_rate": 3.6254446220499075e-06, "loss": 0.4948, "step": 4475 }, { "epoch": 3.0469707283866576, "grad_norm": 1.686112403869629, "learning_rate": 3.6232801161990705e-06, "loss": 0.3824, "step": 4476 }, { "epoch": 3.0476514635806673, "grad_norm": 1.6691087484359741, "learning_rate": 3.621115889479107e-06, "loss": 0.4068, "step": 4477 }, { "epoch": 3.0483321987746765, "grad_norm": 1.6482411623001099, "learning_rate": 3.6189519423288164e-06, "loss": 0.3478, "step": 4478 }, { "epoch": 3.0490129339686862, "grad_norm": 1.7361384630203247, "learning_rate": 3.616788275186942e-06, "loss": 0.3608, "step": 4479 }, { "epoch": 3.049693669162696, "grad_norm": 1.4748765230178833, "learning_rate": 3.6146248884921687e-06, "loss": 0.4353, "step": 4480 }, { "epoch": 3.050374404356705, "grad_norm": 1.5066196918487549, "learning_rate": 3.612461782683122e-06, "loss": 0.4068, "step": 4481 }, { "epoch": 3.051055139550715, "grad_norm": 1.5180894136428833, "learning_rate": 3.6102989581983753e-06, "loss": 0.4424, "step": 4482 }, { "epoch": 3.0517358747447245, "grad_norm": 1.5795520544052124, "learning_rate": 3.608136415476443e-06, "loss": 0.575, "step": 4483 }, { "epoch": 3.0524166099387338, "grad_norm": 1.7228806018829346, "learning_rate": 3.6059741549557805e-06, "loss": 0.3889, "step": 4484 }, { "epoch": 3.0530973451327434, "grad_norm": 1.5302879810333252, "learning_rate": 3.603812177074788e-06, "loss": 0.2613, "step": 4485 }, { "epoch": 3.0537780803267527, "grad_norm": 1.6005959510803223, "learning_rate": 3.601650482271809e-06, "loss": 0.4127, "step": 4486 }, { "epoch": 3.0544588155207624, "grad_norm": 1.646520972251892, "learning_rate": 3.5994890709851255e-06, "loss": 0.3232, "step": 4487 }, { "epoch": 3.055139550714772, "grad_norm": 1.4378862380981445, "learning_rate": 3.5973279436529685e-06, "loss": 0.5479, "step": 4488 }, { "epoch": 3.0558202859087813, "grad_norm": 1.5826808214187622, "learning_rate": 3.595167100713507e-06, "loss": 0.3577, "step": 4489 }, { "epoch": 3.056501021102791, "grad_norm": 1.7435643672943115, "learning_rate": 3.5930065426048543e-06, "loss": 0.4916, "step": 4490 }, { "epoch": 3.0571817562968007, "grad_norm": 1.5946235656738281, "learning_rate": 3.5908462697650626e-06, "loss": 0.5299, "step": 4491 }, { "epoch": 3.05786249149081, "grad_norm": 1.4911514520645142, "learning_rate": 3.58868628263213e-06, "loss": 0.4579, "step": 4492 }, { "epoch": 3.0585432266848196, "grad_norm": 1.4316728115081787, "learning_rate": 3.586526581643996e-06, "loss": 0.4816, "step": 4493 }, { "epoch": 3.0592239618788293, "grad_norm": 1.6625465154647827, "learning_rate": 3.58436716723854e-06, "loss": 0.3633, "step": 4494 }, { "epoch": 3.0599046970728385, "grad_norm": 1.6194225549697876, "learning_rate": 3.582208039853584e-06, "loss": 0.3239, "step": 4495 }, { "epoch": 3.060585432266848, "grad_norm": 1.6305493116378784, "learning_rate": 3.580049199926895e-06, "loss": 0.3716, "step": 4496 }, { "epoch": 3.061266167460858, "grad_norm": 1.6537660360336304, "learning_rate": 3.5778906478961796e-06, "loss": 0.3427, "step": 4497 }, { "epoch": 3.061946902654867, "grad_norm": 1.596410870552063, "learning_rate": 3.5757323841990833e-06, "loss": 0.3603, "step": 4498 }, { "epoch": 3.062627637848877, "grad_norm": 1.5326229333877563, "learning_rate": 3.5735744092731963e-06, "loss": 0.4764, "step": 4499 }, { "epoch": 3.0633083730428865, "grad_norm": 1.783515453338623, "learning_rate": 3.57141672355605e-06, "loss": 0.3652, "step": 4500 }, { "epoch": 3.0639891082368957, "grad_norm": 1.732168197631836, "learning_rate": 3.569259327485115e-06, "loss": 0.3864, "step": 4501 }, { "epoch": 3.0646698434309054, "grad_norm": 1.5653384923934937, "learning_rate": 3.567102221497808e-06, "loss": 0.4003, "step": 4502 }, { "epoch": 3.065350578624915, "grad_norm": 1.654030680656433, "learning_rate": 3.56494540603148e-06, "loss": 0.3661, "step": 4503 }, { "epoch": 3.0660313138189244, "grad_norm": 1.648419737815857, "learning_rate": 3.562788881523428e-06, "loss": 0.5803, "step": 4504 }, { "epoch": 3.066712049012934, "grad_norm": 1.6198022365570068, "learning_rate": 3.5606326484108894e-06, "loss": 0.39, "step": 4505 }, { "epoch": 3.0673927842069437, "grad_norm": 1.4423540830612183, "learning_rate": 3.558476707131042e-06, "loss": 0.4938, "step": 4506 }, { "epoch": 3.068073519400953, "grad_norm": 1.544780969619751, "learning_rate": 3.556321058121005e-06, "loss": 0.3782, "step": 4507 }, { "epoch": 3.0687542545949626, "grad_norm": 1.5769296884536743, "learning_rate": 3.5541657018178368e-06, "loss": 0.2818, "step": 4508 }, { "epoch": 3.069434989788972, "grad_norm": 1.6847434043884277, "learning_rate": 3.5520106386585367e-06, "loss": 0.3808, "step": 4509 }, { "epoch": 3.0701157249829816, "grad_norm": 1.4819002151489258, "learning_rate": 3.5498558690800455e-06, "loss": 0.4895, "step": 4510 }, { "epoch": 3.0707964601769913, "grad_norm": 1.6469498872756958, "learning_rate": 3.547701393519244e-06, "loss": 0.4729, "step": 4511 }, { "epoch": 3.0714771953710005, "grad_norm": 1.40873122215271, "learning_rate": 3.5455472124129535e-06, "loss": 0.6423, "step": 4512 }, { "epoch": 3.07215793056501, "grad_norm": 1.5706651210784912, "learning_rate": 3.543393326197937e-06, "loss": 0.356, "step": 4513 }, { "epoch": 3.07283866575902, "grad_norm": 1.64425790309906, "learning_rate": 3.541239735310896e-06, "loss": 0.324, "step": 4514 }, { "epoch": 3.073519400953029, "grad_norm": 1.669142484664917, "learning_rate": 3.5390864401884717e-06, "loss": 0.4101, "step": 4515 }, { "epoch": 3.074200136147039, "grad_norm": 1.538252592086792, "learning_rate": 3.5369334412672474e-06, "loss": 0.4505, "step": 4516 }, { "epoch": 3.0748808713410485, "grad_norm": 1.4630156755447388, "learning_rate": 3.534780738983743e-06, "loss": 0.6488, "step": 4517 }, { "epoch": 3.0755616065350577, "grad_norm": 1.5939087867736816, "learning_rate": 3.5326283337744207e-06, "loss": 0.4539, "step": 4518 }, { "epoch": 3.0762423417290674, "grad_norm": 1.6095402240753174, "learning_rate": 3.5304762260756844e-06, "loss": 0.481, "step": 4519 }, { "epoch": 3.076923076923077, "grad_norm": 1.5840065479278564, "learning_rate": 3.5283244163238726e-06, "loss": 0.3265, "step": 4520 }, { "epoch": 3.0776038121170863, "grad_norm": 1.5086804628372192, "learning_rate": 3.526172904955266e-06, "loss": 0.3909, "step": 4521 }, { "epoch": 3.078284547311096, "grad_norm": 1.6877574920654297, "learning_rate": 3.5240216924060883e-06, "loss": 0.4152, "step": 4522 }, { "epoch": 3.0789652825051057, "grad_norm": 1.6327215433120728, "learning_rate": 3.5218707791124975e-06, "loss": 0.3846, "step": 4523 }, { "epoch": 3.079646017699115, "grad_norm": 1.4131226539611816, "learning_rate": 3.5197201655105916e-06, "loss": 0.5509, "step": 4524 }, { "epoch": 3.0803267528931246, "grad_norm": 1.7294020652770996, "learning_rate": 3.51756985203641e-06, "loss": 0.4237, "step": 4525 }, { "epoch": 3.0810074880871343, "grad_norm": 1.6457210779190063, "learning_rate": 3.515419839125931e-06, "loss": 0.4299, "step": 4526 }, { "epoch": 3.0816882232811436, "grad_norm": 1.6253806352615356, "learning_rate": 3.5132701272150693e-06, "loss": 0.5683, "step": 4527 }, { "epoch": 3.0823689584751532, "grad_norm": 1.6198650598526, "learning_rate": 3.511120716739681e-06, "loss": 0.5066, "step": 4528 }, { "epoch": 3.0830496936691625, "grad_norm": 1.4809799194335938, "learning_rate": 3.508971608135562e-06, "loss": 0.601, "step": 4529 }, { "epoch": 3.083730428863172, "grad_norm": 1.602094054222107, "learning_rate": 3.506822801838442e-06, "loss": 0.4363, "step": 4530 }, { "epoch": 3.084411164057182, "grad_norm": 1.528085708618164, "learning_rate": 3.5046742982839964e-06, "loss": 0.598, "step": 4531 }, { "epoch": 3.085091899251191, "grad_norm": 1.5585579872131348, "learning_rate": 3.5025260979078347e-06, "loss": 0.4259, "step": 4532 }, { "epoch": 3.085772634445201, "grad_norm": 1.6356980800628662, "learning_rate": 3.5003782011455063e-06, "loss": 0.4607, "step": 4533 }, { "epoch": 3.0864533696392105, "grad_norm": 1.5216459035873413, "learning_rate": 3.4982306084324974e-06, "loss": 0.5411, "step": 4534 }, { "epoch": 3.0871341048332197, "grad_norm": 1.4862756729125977, "learning_rate": 3.496083320204235e-06, "loss": 0.2983, "step": 4535 }, { "epoch": 3.0878148400272294, "grad_norm": 1.6775901317596436, "learning_rate": 3.493936336896083e-06, "loss": 0.5039, "step": 4536 }, { "epoch": 3.088495575221239, "grad_norm": 1.4988245964050293, "learning_rate": 3.4917896589433437e-06, "loss": 0.5027, "step": 4537 }, { "epoch": 3.0891763104152483, "grad_norm": 1.5117169618606567, "learning_rate": 3.489643286781256e-06, "loss": 0.47, "step": 4538 }, { "epoch": 3.089857045609258, "grad_norm": 1.6691620349884033, "learning_rate": 3.4874972208450002e-06, "loss": 0.3307, "step": 4539 }, { "epoch": 3.0905377808032677, "grad_norm": 1.702114224433899, "learning_rate": 3.4853514615696937e-06, "loss": 0.4328, "step": 4540 }, { "epoch": 3.091218515997277, "grad_norm": 1.5807828903198242, "learning_rate": 3.4832060093903874e-06, "loss": 0.4344, "step": 4541 }, { "epoch": 3.0918992511912866, "grad_norm": 1.6121294498443604, "learning_rate": 3.4810608647420753e-06, "loss": 0.5353, "step": 4542 }, { "epoch": 3.0925799863852963, "grad_norm": 1.6468688249588013, "learning_rate": 3.4789160280596866e-06, "loss": 0.3633, "step": 4543 }, { "epoch": 3.0932607215793055, "grad_norm": 1.4652040004730225, "learning_rate": 3.4767714997780882e-06, "loss": 0.5316, "step": 4544 }, { "epoch": 3.0939414567733152, "grad_norm": 1.529292106628418, "learning_rate": 3.4746272803320848e-06, "loss": 0.4326, "step": 4545 }, { "epoch": 3.094622191967325, "grad_norm": 1.6220009326934814, "learning_rate": 3.4724833701564176e-06, "loss": 0.4631, "step": 4546 }, { "epoch": 3.095302927161334, "grad_norm": 1.6232855319976807, "learning_rate": 3.4703397696857644e-06, "loss": 0.4385, "step": 4547 }, { "epoch": 3.095983662355344, "grad_norm": 1.671334147453308, "learning_rate": 3.468196479354745e-06, "loss": 0.3776, "step": 4548 }, { "epoch": 3.096664397549353, "grad_norm": 1.7536858320236206, "learning_rate": 3.4660534995979126e-06, "loss": 0.3874, "step": 4549 }, { "epoch": 3.0973451327433628, "grad_norm": 1.5123755931854248, "learning_rate": 3.463910830849756e-06, "loss": 0.5469, "step": 4550 }, { "epoch": 3.0980258679373724, "grad_norm": 1.5454744100570679, "learning_rate": 3.461768473544703e-06, "loss": 0.4473, "step": 4551 }, { "epoch": 3.0987066031313817, "grad_norm": 1.6132508516311646, "learning_rate": 3.459626428117119e-06, "loss": 0.3608, "step": 4552 }, { "epoch": 3.0993873383253914, "grad_norm": 1.6039785146713257, "learning_rate": 3.457484695001304e-06, "loss": 0.2524, "step": 4553 }, { "epoch": 3.100068073519401, "grad_norm": 1.4411380290985107, "learning_rate": 3.4553432746314963e-06, "loss": 0.4967, "step": 4554 }, { "epoch": 3.1007488087134103, "grad_norm": 1.666446566581726, "learning_rate": 3.453202167441871e-06, "loss": 0.3712, "step": 4555 }, { "epoch": 3.10142954390742, "grad_norm": 1.7172938585281372, "learning_rate": 3.451061373866536e-06, "loss": 0.4352, "step": 4556 }, { "epoch": 3.1021102791014297, "grad_norm": 1.6176002025604248, "learning_rate": 3.4489208943395424e-06, "loss": 0.3987, "step": 4557 }, { "epoch": 3.102791014295439, "grad_norm": 1.665503978729248, "learning_rate": 3.4467807292948726e-06, "loss": 0.4258, "step": 4558 }, { "epoch": 3.1034717494894486, "grad_norm": 1.5262633562088013, "learning_rate": 3.4446408791664464e-06, "loss": 0.3486, "step": 4559 }, { "epoch": 3.1041524846834583, "grad_norm": 1.698418378829956, "learning_rate": 3.4425013443881193e-06, "loss": 0.3572, "step": 4560 }, { "epoch": 3.1048332198774675, "grad_norm": 1.4927258491516113, "learning_rate": 3.4403621253936842e-06, "loss": 0.5308, "step": 4561 }, { "epoch": 3.105513955071477, "grad_norm": 1.4556269645690918, "learning_rate": 3.4382232226168692e-06, "loss": 0.5272, "step": 4562 }, { "epoch": 3.106194690265487, "grad_norm": 1.5678367614746094, "learning_rate": 3.4360846364913373e-06, "loss": 0.4057, "step": 4563 }, { "epoch": 3.106875425459496, "grad_norm": 1.573970079421997, "learning_rate": 3.433946367450688e-06, "loss": 0.4331, "step": 4564 }, { "epoch": 3.107556160653506, "grad_norm": 1.5204392671585083, "learning_rate": 3.4318084159284583e-06, "loss": 0.3893, "step": 4565 }, { "epoch": 3.1082368958475155, "grad_norm": 1.4751743078231812, "learning_rate": 3.4296707823581192e-06, "loss": 0.5502, "step": 4566 }, { "epoch": 3.1089176310415247, "grad_norm": 1.5013375282287598, "learning_rate": 3.4275334671730765e-06, "loss": 0.4583, "step": 4567 }, { "epoch": 3.1095983662355344, "grad_norm": 1.534684181213379, "learning_rate": 3.425396470806672e-06, "loss": 0.3232, "step": 4568 }, { "epoch": 3.110279101429544, "grad_norm": 1.5095270872116089, "learning_rate": 3.423259793692185e-06, "loss": 0.4593, "step": 4569 }, { "epoch": 3.1109598366235534, "grad_norm": 1.6300833225250244, "learning_rate": 3.4211234362628255e-06, "loss": 0.3869, "step": 4570 }, { "epoch": 3.111640571817563, "grad_norm": 1.6008182764053345, "learning_rate": 3.418987398951744e-06, "loss": 0.5383, "step": 4571 }, { "epoch": 3.1123213070115723, "grad_norm": 1.6195505857467651, "learning_rate": 3.4168516821920212e-06, "loss": 0.3727, "step": 4572 }, { "epoch": 3.113002042205582, "grad_norm": 1.6037793159484863, "learning_rate": 3.414716286416675e-06, "loss": 0.4216, "step": 4573 }, { "epoch": 3.1136827773995917, "grad_norm": 1.551699161529541, "learning_rate": 3.412581212058661e-06, "loss": 0.4896, "step": 4574 }, { "epoch": 3.114363512593601, "grad_norm": 1.553197979927063, "learning_rate": 3.4104464595508646e-06, "loss": 0.4701, "step": 4575 }, { "epoch": 3.1150442477876106, "grad_norm": 1.5992238521575928, "learning_rate": 3.4083120293261106e-06, "loss": 0.4763, "step": 4576 }, { "epoch": 3.1157249829816203, "grad_norm": 1.458720088005066, "learning_rate": 3.406177921817154e-06, "loss": 0.3921, "step": 4577 }, { "epoch": 3.1164057181756295, "grad_norm": 1.818159818649292, "learning_rate": 3.404044137456688e-06, "loss": 0.4074, "step": 4578 }, { "epoch": 3.117086453369639, "grad_norm": 1.5219892263412476, "learning_rate": 3.4019106766773364e-06, "loss": 0.3897, "step": 4579 }, { "epoch": 3.117767188563649, "grad_norm": 1.6644096374511719, "learning_rate": 3.3997775399116615e-06, "loss": 0.358, "step": 4580 }, { "epoch": 3.118447923757658, "grad_norm": 1.6030998229980469, "learning_rate": 3.3976447275921574e-06, "loss": 0.3879, "step": 4581 }, { "epoch": 3.119128658951668, "grad_norm": 1.4955025911331177, "learning_rate": 3.3955122401512554e-06, "loss": 0.481, "step": 4582 }, { "epoch": 3.1198093941456775, "grad_norm": 1.5439667701721191, "learning_rate": 3.3933800780213176e-06, "loss": 0.4503, "step": 4583 }, { "epoch": 3.1204901293396867, "grad_norm": 1.579814076423645, "learning_rate": 3.3912482416346405e-06, "loss": 0.4259, "step": 4584 }, { "epoch": 3.1211708645336964, "grad_norm": 1.4752837419509888, "learning_rate": 3.3891167314234564e-06, "loss": 0.5762, "step": 4585 }, { "epoch": 3.121851599727706, "grad_norm": 1.5603082180023193, "learning_rate": 3.386985547819929e-06, "loss": 0.3848, "step": 4586 }, { "epoch": 3.1225323349217153, "grad_norm": 1.5473378896713257, "learning_rate": 3.3848546912561576e-06, "loss": 0.5837, "step": 4587 }, { "epoch": 3.123213070115725, "grad_norm": 1.4484704732894897, "learning_rate": 3.382724162164176e-06, "loss": 0.5592, "step": 4588 }, { "epoch": 3.1238938053097347, "grad_norm": 1.774332046508789, "learning_rate": 3.3805939609759497e-06, "loss": 0.3468, "step": 4589 }, { "epoch": 3.124574540503744, "grad_norm": 1.588001012802124, "learning_rate": 3.3784640881233764e-06, "loss": 0.3685, "step": 4590 }, { "epoch": 3.1252552756977536, "grad_norm": 1.4659204483032227, "learning_rate": 3.3763345440382924e-06, "loss": 0.3853, "step": 4591 }, { "epoch": 3.1259360108917633, "grad_norm": 1.537217140197754, "learning_rate": 3.3742053291524646e-06, "loss": 0.5258, "step": 4592 }, { "epoch": 3.1266167460857726, "grad_norm": 1.6586277484893799, "learning_rate": 3.3720764438975895e-06, "loss": 0.5067, "step": 4593 }, { "epoch": 3.1272974812797822, "grad_norm": 1.6813262701034546, "learning_rate": 3.3699478887053017e-06, "loss": 0.2693, "step": 4594 }, { "epoch": 3.1279782164737915, "grad_norm": 1.6398216485977173, "learning_rate": 3.367819664007168e-06, "loss": 0.4524, "step": 4595 }, { "epoch": 3.128658951667801, "grad_norm": 1.7537381649017334, "learning_rate": 3.365691770234685e-06, "loss": 0.3206, "step": 4596 }, { "epoch": 3.129339686861811, "grad_norm": 1.5661178827285767, "learning_rate": 3.3635642078192864e-06, "loss": 0.4111, "step": 4597 }, { "epoch": 3.13002042205582, "grad_norm": 1.897739052772522, "learning_rate": 3.361436977192338e-06, "loss": 0.3975, "step": 4598 }, { "epoch": 3.13070115724983, "grad_norm": 1.785488486289978, "learning_rate": 3.3593100787851336e-06, "loss": 0.4484, "step": 4599 }, { "epoch": 3.1313818924438395, "grad_norm": 1.5909119844436646, "learning_rate": 3.357183513028906e-06, "loss": 0.3658, "step": 4600 }, { "epoch": 3.1320626276378487, "grad_norm": 1.6411538124084473, "learning_rate": 3.3550572803548178e-06, "loss": 0.3959, "step": 4601 }, { "epoch": 3.1327433628318584, "grad_norm": 1.5833402872085571, "learning_rate": 3.352931381193964e-06, "loss": 0.4563, "step": 4602 }, { "epoch": 3.133424098025868, "grad_norm": 1.4591940641403198, "learning_rate": 3.350805815977371e-06, "loss": 0.3443, "step": 4603 }, { "epoch": 3.1341048332198773, "grad_norm": 1.4566866159439087, "learning_rate": 3.348680585135999e-06, "loss": 0.4391, "step": 4604 }, { "epoch": 3.134785568413887, "grad_norm": 1.6116353273391724, "learning_rate": 3.346555689100741e-06, "loss": 0.3969, "step": 4605 }, { "epoch": 3.1354663036078967, "grad_norm": 1.5028833150863647, "learning_rate": 3.34443112830242e-06, "loss": 0.2569, "step": 4606 }, { "epoch": 3.136147038801906, "grad_norm": 1.6986339092254639, "learning_rate": 3.342306903171791e-06, "loss": 0.3899, "step": 4607 }, { "epoch": 3.1368277739959156, "grad_norm": 1.6154264211654663, "learning_rate": 3.3401830141395442e-06, "loss": 0.3288, "step": 4608 }, { "epoch": 3.1375085091899253, "grad_norm": 1.7067736387252808, "learning_rate": 3.3380594616362997e-06, "loss": 0.4419, "step": 4609 }, { "epoch": 3.1381892443839345, "grad_norm": 1.5505688190460205, "learning_rate": 3.3359362460926075e-06, "loss": 0.3797, "step": 4610 }, { "epoch": 3.1388699795779442, "grad_norm": 1.5025968551635742, "learning_rate": 3.3338133679389517e-06, "loss": 0.5802, "step": 4611 }, { "epoch": 3.139550714771954, "grad_norm": 1.5185377597808838, "learning_rate": 3.331690827605748e-06, "loss": 0.5835, "step": 4612 }, { "epoch": 3.140231449965963, "grad_norm": 1.5725088119506836, "learning_rate": 3.3295686255233417e-06, "loss": 0.3881, "step": 4613 }, { "epoch": 3.140912185159973, "grad_norm": 1.573215365409851, "learning_rate": 3.327446762122012e-06, "loss": 0.4346, "step": 4614 }, { "epoch": 3.1415929203539825, "grad_norm": 1.8133277893066406, "learning_rate": 3.3253252378319656e-06, "loss": 0.4696, "step": 4615 }, { "epoch": 3.1422736555479918, "grad_norm": 1.6729203462600708, "learning_rate": 3.3232040530833442e-06, "loss": 0.3371, "step": 4616 }, { "epoch": 3.1429543907420014, "grad_norm": 1.6735100746154785, "learning_rate": 3.3210832083062205e-06, "loss": 0.3348, "step": 4617 }, { "epoch": 3.1436351259360107, "grad_norm": 1.6739197969436646, "learning_rate": 3.3189627039305972e-06, "loss": 0.4099, "step": 4618 }, { "epoch": 3.1443158611300204, "grad_norm": 1.584848403930664, "learning_rate": 3.316842540386408e-06, "loss": 0.4769, "step": 4619 }, { "epoch": 3.14499659632403, "grad_norm": 1.6277437210083008, "learning_rate": 3.3147227181035153e-06, "loss": 0.3099, "step": 4620 }, { "epoch": 3.1456773315180393, "grad_norm": 1.5232527256011963, "learning_rate": 3.3126032375117172e-06, "loss": 0.4582, "step": 4621 }, { "epoch": 3.146358066712049, "grad_norm": 1.60452401638031, "learning_rate": 3.3104840990407375e-06, "loss": 0.5255, "step": 4622 }, { "epoch": 3.1470388019060587, "grad_norm": 1.643752098083496, "learning_rate": 3.3083653031202346e-06, "loss": 0.4199, "step": 4623 }, { "epoch": 3.147719537100068, "grad_norm": 1.4985616207122803, "learning_rate": 3.3062468501797956e-06, "loss": 0.3598, "step": 4624 }, { "epoch": 3.1484002722940776, "grad_norm": 1.5133966207504272, "learning_rate": 3.304128740648936e-06, "loss": 0.3811, "step": 4625 }, { "epoch": 3.1490810074880873, "grad_norm": 1.7084051370620728, "learning_rate": 3.3020109749571067e-06, "loss": 0.4011, "step": 4626 }, { "epoch": 3.1497617426820965, "grad_norm": 1.5714938640594482, "learning_rate": 3.299893553533685e-06, "loss": 0.4808, "step": 4627 }, { "epoch": 3.150442477876106, "grad_norm": 1.6749247312545776, "learning_rate": 3.297776476807981e-06, "loss": 0.3879, "step": 4628 }, { "epoch": 3.151123213070116, "grad_norm": 1.561482310295105, "learning_rate": 3.2956597452092307e-06, "loss": 0.4932, "step": 4629 }, { "epoch": 3.151803948264125, "grad_norm": 1.6000407934188843, "learning_rate": 3.2935433591666048e-06, "loss": 0.4392, "step": 4630 }, { "epoch": 3.152484683458135, "grad_norm": 1.575522541999817, "learning_rate": 3.291427319109203e-06, "loss": 0.3318, "step": 4631 }, { "epoch": 3.1531654186521445, "grad_norm": 1.5539509057998657, "learning_rate": 3.2893116254660495e-06, "loss": 0.4737, "step": 4632 }, { "epoch": 3.1538461538461537, "grad_norm": 1.6301568746566772, "learning_rate": 3.2871962786661053e-06, "loss": 0.3304, "step": 4633 }, { "epoch": 3.1545268890401634, "grad_norm": 1.619600772857666, "learning_rate": 3.2850812791382596e-06, "loss": 0.4848, "step": 4634 }, { "epoch": 3.155207624234173, "grad_norm": 1.5849775075912476, "learning_rate": 3.28296662731133e-06, "loss": 0.4572, "step": 4635 }, { "epoch": 3.1558883594281824, "grad_norm": 1.5336277484893799, "learning_rate": 3.280852323614061e-06, "loss": 0.4413, "step": 4636 }, { "epoch": 3.156569094622192, "grad_norm": 1.4732177257537842, "learning_rate": 3.27873836847513e-06, "loss": 0.3745, "step": 4637 }, { "epoch": 3.1572498298162017, "grad_norm": 1.5769116878509521, "learning_rate": 3.276624762323145e-06, "loss": 0.4357, "step": 4638 }, { "epoch": 3.157930565010211, "grad_norm": 1.5651605129241943, "learning_rate": 3.274511505586638e-06, "loss": 0.3042, "step": 4639 }, { "epoch": 3.1586113002042207, "grad_norm": 1.5544216632843018, "learning_rate": 3.2723985986940745e-06, "loss": 0.4065, "step": 4640 }, { "epoch": 3.15929203539823, "grad_norm": 1.5578185319900513, "learning_rate": 3.2702860420738484e-06, "loss": 0.4395, "step": 4641 }, { "epoch": 3.1599727705922396, "grad_norm": 1.6785131692886353, "learning_rate": 3.26817383615428e-06, "loss": 0.4326, "step": 4642 }, { "epoch": 3.1606535057862493, "grad_norm": 1.6724106073379517, "learning_rate": 3.2660619813636228e-06, "loss": 0.3221, "step": 4643 }, { "epoch": 3.1613342409802585, "grad_norm": 1.6600748300552368, "learning_rate": 3.2639504781300557e-06, "loss": 0.4122, "step": 4644 }, { "epoch": 3.162014976174268, "grad_norm": 1.5222244262695312, "learning_rate": 3.2618393268816884e-06, "loss": 0.4709, "step": 4645 }, { "epoch": 3.162695711368278, "grad_norm": 1.573683261871338, "learning_rate": 3.2597285280465564e-06, "loss": 0.5904, "step": 4646 }, { "epoch": 3.163376446562287, "grad_norm": 1.5017510652542114, "learning_rate": 3.257618082052627e-06, "loss": 0.4697, "step": 4647 }, { "epoch": 3.164057181756297, "grad_norm": 2.51200795173645, "learning_rate": 3.255507989327794e-06, "loss": 0.3714, "step": 4648 }, { "epoch": 3.1647379169503065, "grad_norm": 1.728188395500183, "learning_rate": 3.2533982502998796e-06, "loss": 0.3303, "step": 4649 }, { "epoch": 3.1654186521443157, "grad_norm": 1.489426612854004, "learning_rate": 3.2512888653966345e-06, "loss": 0.5716, "step": 4650 }, { "epoch": 3.1660993873383254, "grad_norm": 1.5981043577194214, "learning_rate": 3.2491798350457393e-06, "loss": 0.3429, "step": 4651 }, { "epoch": 3.166780122532335, "grad_norm": 1.6367381811141968, "learning_rate": 3.2470711596748022e-06, "loss": 0.4287, "step": 4652 }, { "epoch": 3.1674608577263443, "grad_norm": 1.5113779306411743, "learning_rate": 3.244962839711356e-06, "loss": 0.3862, "step": 4653 }, { "epoch": 3.168141592920354, "grad_norm": 1.679813265800476, "learning_rate": 3.242854875582866e-06, "loss": 0.4627, "step": 4654 }, { "epoch": 3.1688223281143637, "grad_norm": 1.6411521434783936, "learning_rate": 3.2407472677167214e-06, "loss": 0.3687, "step": 4655 }, { "epoch": 3.169503063308373, "grad_norm": 1.5165804624557495, "learning_rate": 3.238640016540242e-06, "loss": 0.5859, "step": 4656 }, { "epoch": 3.1701837985023826, "grad_norm": 1.643786072731018, "learning_rate": 3.236533122480675e-06, "loss": 0.4075, "step": 4657 }, { "epoch": 3.170864533696392, "grad_norm": 1.6289231777191162, "learning_rate": 3.234426585965194e-06, "loss": 0.5185, "step": 4658 }, { "epoch": 3.1715452688904016, "grad_norm": 1.588173270225525, "learning_rate": 3.232320407420899e-06, "loss": 0.5826, "step": 4659 }, { "epoch": 3.1722260040844112, "grad_norm": 1.7084901332855225, "learning_rate": 3.2302145872748225e-06, "loss": 0.4041, "step": 4660 }, { "epoch": 3.172906739278421, "grad_norm": 1.6340105533599854, "learning_rate": 3.22810912595392e-06, "loss": 0.4149, "step": 4661 }, { "epoch": 3.17358747447243, "grad_norm": 1.6404345035552979, "learning_rate": 3.226004023885073e-06, "loss": 0.3809, "step": 4662 }, { "epoch": 3.17426820966644, "grad_norm": 1.6672072410583496, "learning_rate": 3.223899281495094e-06, "loss": 0.3222, "step": 4663 }, { "epoch": 3.174948944860449, "grad_norm": 1.761712908744812, "learning_rate": 3.2217948992107224e-06, "loss": 0.3107, "step": 4664 }, { "epoch": 3.175629680054459, "grad_norm": 1.7331287860870361, "learning_rate": 3.21969087745862e-06, "loss": 0.334, "step": 4665 }, { "epoch": 3.1763104152484685, "grad_norm": 1.4800307750701904, "learning_rate": 3.21758721666538e-06, "loss": 0.5916, "step": 4666 }, { "epoch": 3.1769911504424777, "grad_norm": 1.5006489753723145, "learning_rate": 3.2154839172575225e-06, "loss": 0.3232, "step": 4667 }, { "epoch": 3.1776718856364874, "grad_norm": 1.4753614664077759, "learning_rate": 3.213380979661489e-06, "loss": 0.4472, "step": 4668 }, { "epoch": 3.178352620830497, "grad_norm": 1.7127025127410889, "learning_rate": 3.211278404303655e-06, "loss": 0.4169, "step": 4669 }, { "epoch": 3.1790333560245063, "grad_norm": 1.5441365242004395, "learning_rate": 3.209176191610318e-06, "loss": 0.41, "step": 4670 }, { "epoch": 3.179714091218516, "grad_norm": 1.731389045715332, "learning_rate": 3.207074342007704e-06, "loss": 0.3794, "step": 4671 }, { "epoch": 3.1803948264125257, "grad_norm": 1.5173084735870361, "learning_rate": 3.2049728559219625e-06, "loss": 0.3284, "step": 4672 }, { "epoch": 3.181075561606535, "grad_norm": 1.5691272020339966, "learning_rate": 3.202871733779172e-06, "loss": 0.5443, "step": 4673 }, { "epoch": 3.1817562968005446, "grad_norm": 1.657367467880249, "learning_rate": 3.200770976005338e-06, "loss": 0.3472, "step": 4674 }, { "epoch": 3.1824370319945543, "grad_norm": 1.4883168935775757, "learning_rate": 3.1986705830263877e-06, "loss": 0.4848, "step": 4675 }, { "epoch": 3.1831177671885635, "grad_norm": 1.612311840057373, "learning_rate": 3.196570555268178e-06, "loss": 0.4491, "step": 4676 }, { "epoch": 3.1837985023825732, "grad_norm": 1.7067179679870605, "learning_rate": 3.1944708931564917e-06, "loss": 0.4035, "step": 4677 }, { "epoch": 3.184479237576583, "grad_norm": 1.5770633220672607, "learning_rate": 3.1923715971170383e-06, "loss": 0.2925, "step": 4678 }, { "epoch": 3.185159972770592, "grad_norm": 1.646535038948059, "learning_rate": 3.190272667575448e-06, "loss": 0.3047, "step": 4679 }, { "epoch": 3.185840707964602, "grad_norm": 1.4994499683380127, "learning_rate": 3.1881741049572822e-06, "loss": 0.3931, "step": 4680 }, { "epoch": 3.186521443158611, "grad_norm": 1.7465687990188599, "learning_rate": 3.186075909688027e-06, "loss": 0.43, "step": 4681 }, { "epoch": 3.1872021783526208, "grad_norm": 1.6518372297286987, "learning_rate": 3.18397808219309e-06, "loss": 0.3497, "step": 4682 }, { "epoch": 3.1878829135466304, "grad_norm": 1.6715868711471558, "learning_rate": 3.1818806228978094e-06, "loss": 0.4582, "step": 4683 }, { "epoch": 3.1885636487406397, "grad_norm": 1.5859774351119995, "learning_rate": 3.179783532227445e-06, "loss": 0.2667, "step": 4684 }, { "epoch": 3.1892443839346494, "grad_norm": 1.5705782175064087, "learning_rate": 3.1776868106071823e-06, "loss": 0.5346, "step": 4685 }, { "epoch": 3.189925119128659, "grad_norm": 1.7566888332366943, "learning_rate": 3.1755904584621366e-06, "loss": 0.4303, "step": 4686 }, { "epoch": 3.1906058543226683, "grad_norm": 1.8241126537322998, "learning_rate": 3.173494476217342e-06, "loss": 0.4073, "step": 4687 }, { "epoch": 3.191286589516678, "grad_norm": 1.6023132801055908, "learning_rate": 3.171398864297762e-06, "loss": 0.3526, "step": 4688 }, { "epoch": 3.1919673247106877, "grad_norm": 1.5335739850997925, "learning_rate": 3.1693036231282814e-06, "loss": 0.4778, "step": 4689 }, { "epoch": 3.192648059904697, "grad_norm": 1.6179556846618652, "learning_rate": 3.167208753133714e-06, "loss": 0.4074, "step": 4690 }, { "epoch": 3.1933287950987066, "grad_norm": 1.5303105115890503, "learning_rate": 3.165114254738794e-06, "loss": 0.383, "step": 4691 }, { "epoch": 3.1940095302927163, "grad_norm": 1.6892861127853394, "learning_rate": 3.1630201283681826e-06, "loss": 0.37, "step": 4692 }, { "epoch": 3.1946902654867255, "grad_norm": 1.479319453239441, "learning_rate": 3.160926374446467e-06, "loss": 0.513, "step": 4693 }, { "epoch": 3.195371000680735, "grad_norm": 1.5776958465576172, "learning_rate": 3.158832993398153e-06, "loss": 0.3884, "step": 4694 }, { "epoch": 3.196051735874745, "grad_norm": 1.769166350364685, "learning_rate": 3.156739985647681e-06, "loss": 0.3841, "step": 4695 }, { "epoch": 3.196732471068754, "grad_norm": 1.5601528882980347, "learning_rate": 3.1546473516194066e-06, "loss": 0.2896, "step": 4696 }, { "epoch": 3.197413206262764, "grad_norm": 1.5106815099716187, "learning_rate": 3.152555091737613e-06, "loss": 0.3386, "step": 4697 }, { "epoch": 3.1980939414567735, "grad_norm": 1.5631014108657837, "learning_rate": 3.150463206426506e-06, "loss": 0.509, "step": 4698 }, { "epoch": 3.1987746766507827, "grad_norm": 1.5555224418640137, "learning_rate": 3.148371696110219e-06, "loss": 0.529, "step": 4699 }, { "epoch": 3.1994554118447924, "grad_norm": 1.6252193450927734, "learning_rate": 3.146280561212807e-06, "loss": 0.3454, "step": 4700 }, { "epoch": 3.200136147038802, "grad_norm": 1.617736577987671, "learning_rate": 3.144189802158246e-06, "loss": 0.4562, "step": 4701 }, { "epoch": 3.2008168822328114, "grad_norm": 1.4711304903030396, "learning_rate": 3.14209941937044e-06, "loss": 0.3883, "step": 4702 }, { "epoch": 3.201497617426821, "grad_norm": 1.5410609245300293, "learning_rate": 3.1400094132732183e-06, "loss": 0.4612, "step": 4703 }, { "epoch": 3.2021783526208303, "grad_norm": 1.7024781703948975, "learning_rate": 3.1379197842903297e-06, "loss": 0.2724, "step": 4704 }, { "epoch": 3.20285908781484, "grad_norm": 1.7459293603897095, "learning_rate": 3.1358305328454464e-06, "loss": 0.3908, "step": 4705 }, { "epoch": 3.2035398230088497, "grad_norm": 1.5525552034378052, "learning_rate": 3.133741659362166e-06, "loss": 0.5019, "step": 4706 }, { "epoch": 3.204220558202859, "grad_norm": 1.5649992227554321, "learning_rate": 3.1316531642640104e-06, "loss": 0.4222, "step": 4707 }, { "epoch": 3.2049012933968686, "grad_norm": 1.6799336671829224, "learning_rate": 3.1295650479744212e-06, "loss": 0.283, "step": 4708 }, { "epoch": 3.2055820285908783, "grad_norm": 1.6074475049972534, "learning_rate": 3.127477310916767e-06, "loss": 0.4005, "step": 4709 }, { "epoch": 3.2062627637848875, "grad_norm": 1.670879602432251, "learning_rate": 3.1253899535143384e-06, "loss": 0.4662, "step": 4710 }, { "epoch": 3.206943498978897, "grad_norm": 1.6428664922714233, "learning_rate": 3.123302976190345e-06, "loss": 0.446, "step": 4711 }, { "epoch": 3.207624234172907, "grad_norm": 1.5074162483215332, "learning_rate": 3.1212163793679266e-06, "loss": 0.4574, "step": 4712 }, { "epoch": 3.208304969366916, "grad_norm": 1.616796851158142, "learning_rate": 3.119130163470141e-06, "loss": 0.3954, "step": 4713 }, { "epoch": 3.208985704560926, "grad_norm": 1.6779484748840332, "learning_rate": 3.117044328919971e-06, "loss": 0.2793, "step": 4714 }, { "epoch": 3.2096664397549355, "grad_norm": 1.5008904933929443, "learning_rate": 3.1149588761403184e-06, "loss": 0.4474, "step": 4715 }, { "epoch": 3.2103471749489447, "grad_norm": 1.6099320650100708, "learning_rate": 3.112873805554012e-06, "loss": 0.4434, "step": 4716 }, { "epoch": 3.2110279101429544, "grad_norm": 1.6771901845932007, "learning_rate": 3.1107891175838003e-06, "loss": 0.3702, "step": 4717 }, { "epoch": 3.211708645336964, "grad_norm": 1.5899920463562012, "learning_rate": 3.108704812652355e-06, "loss": 0.3862, "step": 4718 }, { "epoch": 3.2123893805309733, "grad_norm": 1.5470311641693115, "learning_rate": 3.1066208911822706e-06, "loss": 0.412, "step": 4719 }, { "epoch": 3.213070115724983, "grad_norm": 1.6528085470199585, "learning_rate": 3.104537353596064e-06, "loss": 0.418, "step": 4720 }, { "epoch": 3.2137508509189927, "grad_norm": 1.691900610923767, "learning_rate": 3.102454200316175e-06, "loss": 0.321, "step": 4721 }, { "epoch": 3.214431586113002, "grad_norm": 1.627554178237915, "learning_rate": 3.100371431764961e-06, "loss": 0.34, "step": 4722 }, { "epoch": 3.2151123213070116, "grad_norm": 1.5624109506607056, "learning_rate": 3.0982890483647092e-06, "loss": 0.6907, "step": 4723 }, { "epoch": 3.2157930565010213, "grad_norm": 1.5290271043777466, "learning_rate": 3.0962070505376208e-06, "loss": 0.5789, "step": 4724 }, { "epoch": 3.2164737916950306, "grad_norm": 1.651705026626587, "learning_rate": 3.094125438705822e-06, "loss": 0.405, "step": 4725 }, { "epoch": 3.2171545268890402, "grad_norm": 1.794769287109375, "learning_rate": 3.092044213291364e-06, "loss": 0.4293, "step": 4726 }, { "epoch": 3.2178352620830495, "grad_norm": 1.6085149049758911, "learning_rate": 3.0899633747162133e-06, "loss": 0.4681, "step": 4727 }, { "epoch": 3.218515997277059, "grad_norm": 1.6623270511627197, "learning_rate": 3.0878829234022624e-06, "loss": 0.3794, "step": 4728 }, { "epoch": 3.219196732471069, "grad_norm": 1.7824887037277222, "learning_rate": 3.0858028597713248e-06, "loss": 0.4271, "step": 4729 }, { "epoch": 3.219877467665078, "grad_norm": 1.6480766534805298, "learning_rate": 3.083723184245136e-06, "loss": 0.3657, "step": 4730 }, { "epoch": 3.220558202859088, "grad_norm": 1.5662243366241455, "learning_rate": 3.0816438972453488e-06, "loss": 0.4145, "step": 4731 }, { "epoch": 3.2212389380530975, "grad_norm": 1.8510093688964844, "learning_rate": 3.079564999193542e-06, "loss": 0.4515, "step": 4732 }, { "epoch": 3.2219196732471067, "grad_norm": 1.5144169330596924, "learning_rate": 3.0774864905112133e-06, "loss": 0.3058, "step": 4733 }, { "epoch": 3.2226004084411164, "grad_norm": 1.5622494220733643, "learning_rate": 3.0754083716197814e-06, "loss": 0.3348, "step": 4734 }, { "epoch": 3.223281143635126, "grad_norm": 1.4450401067733765, "learning_rate": 3.073330642940586e-06, "loss": 0.4551, "step": 4735 }, { "epoch": 3.2239618788291353, "grad_norm": 1.5117415189743042, "learning_rate": 3.0712533048948888e-06, "loss": 0.5054, "step": 4736 }, { "epoch": 3.224642614023145, "grad_norm": 1.6965103149414062, "learning_rate": 3.069176357903869e-06, "loss": 0.3038, "step": 4737 }, { "epoch": 3.2253233492171547, "grad_norm": 1.5086853504180908, "learning_rate": 3.0670998023886324e-06, "loss": 0.387, "step": 4738 }, { "epoch": 3.226004084411164, "grad_norm": 1.5346637964248657, "learning_rate": 3.0650236387701997e-06, "loss": 0.5161, "step": 4739 }, { "epoch": 3.2266848196051736, "grad_norm": 1.613553524017334, "learning_rate": 3.0629478674695167e-06, "loss": 0.3621, "step": 4740 }, { "epoch": 3.2273655547991833, "grad_norm": 1.6209825277328491, "learning_rate": 3.0608724889074454e-06, "loss": 0.3898, "step": 4741 }, { "epoch": 3.2280462899931925, "grad_norm": 1.6142284870147705, "learning_rate": 3.05879750350477e-06, "loss": 0.3735, "step": 4742 }, { "epoch": 3.2287270251872022, "grad_norm": 1.508851170539856, "learning_rate": 3.0567229116821983e-06, "loss": 0.4036, "step": 4743 }, { "epoch": 3.229407760381212, "grad_norm": 1.7414816617965698, "learning_rate": 3.0546487138603508e-06, "loss": 0.5467, "step": 4744 }, { "epoch": 3.230088495575221, "grad_norm": 1.6066148281097412, "learning_rate": 3.0525749104597733e-06, "loss": 0.4372, "step": 4745 }, { "epoch": 3.230769230769231, "grad_norm": 1.4165889024734497, "learning_rate": 3.0505015019009344e-06, "loss": 0.4892, "step": 4746 }, { "epoch": 3.2314499659632405, "grad_norm": 1.543990135192871, "learning_rate": 3.048428488604217e-06, "loss": 0.511, "step": 4747 }, { "epoch": 3.2321307011572498, "grad_norm": 1.5997422933578491, "learning_rate": 3.0463558709899244e-06, "loss": 0.4698, "step": 4748 }, { "epoch": 3.2328114363512594, "grad_norm": 1.7128727436065674, "learning_rate": 3.0442836494782823e-06, "loss": 0.4008, "step": 4749 }, { "epoch": 3.2334921715452687, "grad_norm": 1.592016339302063, "learning_rate": 3.042211824489437e-06, "loss": 0.516, "step": 4750 }, { "epoch": 3.2341729067392784, "grad_norm": 1.761135220527649, "learning_rate": 3.0401403964434496e-06, "loss": 0.4147, "step": 4751 }, { "epoch": 3.234853641933288, "grad_norm": 1.6745420694351196, "learning_rate": 3.0380693657603043e-06, "loss": 0.4249, "step": 4752 }, { "epoch": 3.2355343771272973, "grad_norm": 1.6531308889389038, "learning_rate": 3.035998732859904e-06, "loss": 0.5457, "step": 4753 }, { "epoch": 3.236215112321307, "grad_norm": 1.56361985206604, "learning_rate": 3.0339284981620697e-06, "loss": 0.3928, "step": 4754 }, { "epoch": 3.2368958475153167, "grad_norm": 1.6191054582595825, "learning_rate": 3.031858662086546e-06, "loss": 0.3421, "step": 4755 }, { "epoch": 3.237576582709326, "grad_norm": 1.693732738494873, "learning_rate": 3.0297892250529916e-06, "loss": 0.4718, "step": 4756 }, { "epoch": 3.2382573179033356, "grad_norm": 1.539604663848877, "learning_rate": 3.027720187480987e-06, "loss": 0.622, "step": 4757 }, { "epoch": 3.2389380530973453, "grad_norm": 1.657769799232483, "learning_rate": 3.025651549790029e-06, "loss": 0.3108, "step": 4758 }, { "epoch": 3.2396187882913545, "grad_norm": 1.7729889154434204, "learning_rate": 3.023583312399538e-06, "loss": 0.3076, "step": 4759 }, { "epoch": 3.240299523485364, "grad_norm": 1.5085484981536865, "learning_rate": 3.021515475728848e-06, "loss": 0.3547, "step": 4760 }, { "epoch": 3.240980258679374, "grad_norm": 1.4032933712005615, "learning_rate": 3.019448040197216e-06, "loss": 0.509, "step": 4761 }, { "epoch": 3.241660993873383, "grad_norm": 1.5780125856399536, "learning_rate": 3.017381006223815e-06, "loss": 0.3861, "step": 4762 }, { "epoch": 3.242341729067393, "grad_norm": 1.677006483078003, "learning_rate": 3.015314374227736e-06, "loss": 0.3874, "step": 4763 }, { "epoch": 3.2430224642614025, "grad_norm": 1.615035891532898, "learning_rate": 3.013248144627995e-06, "loss": 0.3457, "step": 4764 }, { "epoch": 3.2437031994554117, "grad_norm": 1.7097437381744385, "learning_rate": 3.0111823178435163e-06, "loss": 0.3738, "step": 4765 }, { "epoch": 3.2443839346494214, "grad_norm": 1.637548804283142, "learning_rate": 3.0091168942931515e-06, "loss": 0.4469, "step": 4766 }, { "epoch": 3.2450646698434307, "grad_norm": 1.5984218120574951, "learning_rate": 3.007051874395663e-06, "loss": 0.6322, "step": 4767 }, { "epoch": 3.2457454050374404, "grad_norm": 1.55582594871521, "learning_rate": 3.004987258569737e-06, "loss": 0.5454, "step": 4768 }, { "epoch": 3.24642614023145, "grad_norm": 1.6174782514572144, "learning_rate": 3.0029230472339767e-06, "loss": 0.3908, "step": 4769 }, { "epoch": 3.2471068754254597, "grad_norm": 1.541741967201233, "learning_rate": 3.0008592408069004e-06, "loss": 0.3462, "step": 4770 }, { "epoch": 3.247787610619469, "grad_norm": 1.6644294261932373, "learning_rate": 2.998795839706945e-06, "loss": 0.4085, "step": 4771 }, { "epoch": 3.2484683458134787, "grad_norm": 1.6526256799697876, "learning_rate": 2.99673284435247e-06, "loss": 0.3615, "step": 4772 }, { "epoch": 3.249149081007488, "grad_norm": 1.555286169052124, "learning_rate": 2.994670255161748e-06, "loss": 0.6266, "step": 4773 }, { "epoch": 3.2498298162014976, "grad_norm": 1.6216650009155273, "learning_rate": 2.9926080725529694e-06, "loss": 0.4811, "step": 4774 }, { "epoch": 3.2505105513955073, "grad_norm": 1.5820143222808838, "learning_rate": 2.9905462969442433e-06, "loss": 0.483, "step": 4775 }, { "epoch": 3.2511912865895165, "grad_norm": 2.0575852394104004, "learning_rate": 2.9884849287535966e-06, "loss": 0.4637, "step": 4776 }, { "epoch": 3.251872021783526, "grad_norm": 1.69528067111969, "learning_rate": 2.9864239683989724e-06, "loss": 0.3478, "step": 4777 }, { "epoch": 3.252552756977536, "grad_norm": 1.6013802289962769, "learning_rate": 2.9843634162982314e-06, "loss": 0.3939, "step": 4778 }, { "epoch": 3.253233492171545, "grad_norm": 1.6321216821670532, "learning_rate": 2.9823032728691547e-06, "loss": 0.441, "step": 4779 }, { "epoch": 3.253914227365555, "grad_norm": 1.467504858970642, "learning_rate": 2.9802435385294325e-06, "loss": 0.5282, "step": 4780 }, { "epoch": 3.2545949625595645, "grad_norm": 1.517300009727478, "learning_rate": 2.978184213696683e-06, "loss": 0.4568, "step": 4781 }, { "epoch": 3.2552756977535737, "grad_norm": 1.5036659240722656, "learning_rate": 2.976125298788432e-06, "loss": 0.4751, "step": 4782 }, { "epoch": 3.2559564329475834, "grad_norm": 1.4695242643356323, "learning_rate": 2.974066794222128e-06, "loss": 0.5606, "step": 4783 }, { "epoch": 3.256637168141593, "grad_norm": 1.5817137956619263, "learning_rate": 2.9720087004151333e-06, "loss": 0.3457, "step": 4784 }, { "epoch": 3.2573179033356023, "grad_norm": 1.6519380807876587, "learning_rate": 2.969951017784727e-06, "loss": 0.3512, "step": 4785 }, { "epoch": 3.257998638529612, "grad_norm": 1.552895188331604, "learning_rate": 2.967893746748107e-06, "loss": 0.4385, "step": 4786 }, { "epoch": 3.2586793737236217, "grad_norm": 1.557338833808899, "learning_rate": 2.9658368877223854e-06, "loss": 0.5832, "step": 4787 }, { "epoch": 3.259360108917631, "grad_norm": 1.509393572807312, "learning_rate": 2.96378044112459e-06, "loss": 0.3323, "step": 4788 }, { "epoch": 3.2600408441116406, "grad_norm": 1.4505747556686401, "learning_rate": 2.961724407371671e-06, "loss": 0.499, "step": 4789 }, { "epoch": 3.26072157930565, "grad_norm": 1.616719365119934, "learning_rate": 2.9596687868804885e-06, "loss": 0.6128, "step": 4790 }, { "epoch": 3.2614023144996596, "grad_norm": 1.6165916919708252, "learning_rate": 2.9576135800678206e-06, "loss": 0.4476, "step": 4791 }, { "epoch": 3.2620830496936692, "grad_norm": 1.7931042909622192, "learning_rate": 2.9555587873503623e-06, "loss": 0.3596, "step": 4792 }, { "epoch": 3.262763784887679, "grad_norm": 1.516666293144226, "learning_rate": 2.9535044091447236e-06, "loss": 0.3876, "step": 4793 }, { "epoch": 3.263444520081688, "grad_norm": 1.569517731666565, "learning_rate": 2.951450445867431e-06, "loss": 0.4102, "step": 4794 }, { "epoch": 3.264125255275698, "grad_norm": 1.5234431028366089, "learning_rate": 2.9493968979349284e-06, "loss": 0.4612, "step": 4795 }, { "epoch": 3.264805990469707, "grad_norm": 1.610998272895813, "learning_rate": 2.9473437657635726e-06, "loss": 0.3495, "step": 4796 }, { "epoch": 3.265486725663717, "grad_norm": 1.6898976564407349, "learning_rate": 2.945291049769636e-06, "loss": 0.4184, "step": 4797 }, { "epoch": 3.2661674608577265, "grad_norm": 1.5503225326538086, "learning_rate": 2.9432387503693117e-06, "loss": 0.4537, "step": 4798 }, { "epoch": 3.2668481960517357, "grad_norm": 1.7207763195037842, "learning_rate": 2.9411868679787036e-06, "loss": 0.352, "step": 4799 }, { "epoch": 3.2675289312457454, "grad_norm": 1.7032283544540405, "learning_rate": 2.939135403013831e-06, "loss": 0.3815, "step": 4800 }, { "epoch": 3.268209666439755, "grad_norm": 1.5929365158081055, "learning_rate": 2.9370843558906305e-06, "loss": 0.4495, "step": 4801 }, { "epoch": 3.2688904016337643, "grad_norm": 1.5450739860534668, "learning_rate": 2.935033727024954e-06, "loss": 0.487, "step": 4802 }, { "epoch": 3.269571136827774, "grad_norm": 1.5959571599960327, "learning_rate": 2.932983516832566e-06, "loss": 0.4898, "step": 4803 }, { "epoch": 3.2702518720217837, "grad_norm": 1.5975887775421143, "learning_rate": 2.9309337257291497e-06, "loss": 0.3984, "step": 4804 }, { "epoch": 3.270932607215793, "grad_norm": 1.6333850622177124, "learning_rate": 2.928884354130302e-06, "loss": 0.3919, "step": 4805 }, { "epoch": 3.2716133424098026, "grad_norm": 1.6074672937393188, "learning_rate": 2.926835402451531e-06, "loss": 0.3704, "step": 4806 }, { "epoch": 3.2722940776038123, "grad_norm": 1.755189061164856, "learning_rate": 2.9247868711082668e-06, "loss": 0.3222, "step": 4807 }, { "epoch": 3.2729748127978215, "grad_norm": 1.5617974996566772, "learning_rate": 2.9227387605158486e-06, "loss": 0.4563, "step": 4808 }, { "epoch": 3.2736555479918312, "grad_norm": 1.7021021842956543, "learning_rate": 2.920691071089534e-06, "loss": 0.3659, "step": 4809 }, { "epoch": 3.274336283185841, "grad_norm": 1.4846676588058472, "learning_rate": 2.9186438032444915e-06, "loss": 0.4232, "step": 4810 }, { "epoch": 3.27501701837985, "grad_norm": 1.5989874601364136, "learning_rate": 2.916596957395807e-06, "loss": 0.3702, "step": 4811 }, { "epoch": 3.27569775357386, "grad_norm": 1.5384483337402344, "learning_rate": 2.914550533958478e-06, "loss": 0.4126, "step": 4812 }, { "epoch": 3.276378488767869, "grad_norm": 1.6335783004760742, "learning_rate": 2.912504533347421e-06, "loss": 0.5003, "step": 4813 }, { "epoch": 3.2770592239618788, "grad_norm": 1.560427188873291, "learning_rate": 2.9104589559774614e-06, "loss": 0.4375, "step": 4814 }, { "epoch": 3.2777399591558884, "grad_norm": 1.5598340034484863, "learning_rate": 2.9084138022633445e-06, "loss": 0.4096, "step": 4815 }, { "epoch": 3.278420694349898, "grad_norm": 1.5507352352142334, "learning_rate": 2.9063690726197226e-06, "loss": 0.5642, "step": 4816 }, { "epoch": 3.2791014295439074, "grad_norm": 1.5808494091033936, "learning_rate": 2.90432476746117e-06, "loss": 0.487, "step": 4817 }, { "epoch": 3.279782164737917, "grad_norm": 1.538938045501709, "learning_rate": 2.9022808872021698e-06, "loss": 0.5318, "step": 4818 }, { "epoch": 3.2804628999319263, "grad_norm": 1.589477777481079, "learning_rate": 2.900237432257117e-06, "loss": 0.4353, "step": 4819 }, { "epoch": 3.281143635125936, "grad_norm": 1.6554828882217407, "learning_rate": 2.8981944030403274e-06, "loss": 0.2945, "step": 4820 }, { "epoch": 3.2818243703199457, "grad_norm": 1.6877590417861938, "learning_rate": 2.896151799966025e-06, "loss": 0.3238, "step": 4821 }, { "epoch": 3.282505105513955, "grad_norm": 1.5054188966751099, "learning_rate": 2.8941096234483475e-06, "loss": 0.4884, "step": 4822 }, { "epoch": 3.2831858407079646, "grad_norm": 1.6705142259597778, "learning_rate": 2.8920678739013486e-06, "loss": 0.4097, "step": 4823 }, { "epoch": 3.2838665759019743, "grad_norm": 1.5695651769638062, "learning_rate": 2.890026551738997e-06, "loss": 0.4306, "step": 4824 }, { "epoch": 3.2845473110959835, "grad_norm": 1.6561145782470703, "learning_rate": 2.8879856573751696e-06, "loss": 0.3875, "step": 4825 }, { "epoch": 3.285228046289993, "grad_norm": 1.5687557458877563, "learning_rate": 2.885945191223657e-06, "loss": 0.3867, "step": 4826 }, { "epoch": 3.285908781484003, "grad_norm": 1.5285362005233765, "learning_rate": 2.88390515369817e-06, "loss": 0.3877, "step": 4827 }, { "epoch": 3.286589516678012, "grad_norm": 1.5857861042022705, "learning_rate": 2.8818655452123246e-06, "loss": 0.4515, "step": 4828 }, { "epoch": 3.287270251872022, "grad_norm": 1.5850975513458252, "learning_rate": 2.8798263661796515e-06, "loss": 0.4597, "step": 4829 }, { "epoch": 3.2879509870660315, "grad_norm": 1.456229329109192, "learning_rate": 2.877787617013599e-06, "loss": 0.5151, "step": 4830 }, { "epoch": 3.2886317222600407, "grad_norm": 1.5931423902511597, "learning_rate": 2.8757492981275235e-06, "loss": 0.5136, "step": 4831 }, { "epoch": 3.2893124574540504, "grad_norm": 1.5735206604003906, "learning_rate": 2.873711409934693e-06, "loss": 0.3903, "step": 4832 }, { "epoch": 3.28999319264806, "grad_norm": 1.6584807634353638, "learning_rate": 2.871673952848293e-06, "loss": 0.3032, "step": 4833 }, { "epoch": 3.2906739278420694, "grad_norm": 1.9674781560897827, "learning_rate": 2.8696369272814206e-06, "loss": 0.3441, "step": 4834 }, { "epoch": 3.291354663036079, "grad_norm": 1.4714058637619019, "learning_rate": 2.8676003336470837e-06, "loss": 0.5714, "step": 4835 }, { "epoch": 3.2920353982300883, "grad_norm": 1.5660028457641602, "learning_rate": 2.865564172358199e-06, "loss": 0.655, "step": 4836 }, { "epoch": 3.292716133424098, "grad_norm": 1.4831839799880981, "learning_rate": 2.8635284438276047e-06, "loss": 0.591, "step": 4837 }, { "epoch": 3.2933968686181077, "grad_norm": 1.6348785161972046, "learning_rate": 2.8614931484680443e-06, "loss": 0.2908, "step": 4838 }, { "epoch": 3.294077603812117, "grad_norm": 1.5892884731292725, "learning_rate": 2.8594582866921746e-06, "loss": 0.4052, "step": 4839 }, { "epoch": 3.2947583390061266, "grad_norm": 1.625551462173462, "learning_rate": 2.8574238589125614e-06, "loss": 0.3074, "step": 4840 }, { "epoch": 3.2954390742001363, "grad_norm": 1.627447247505188, "learning_rate": 2.8553898655416947e-06, "loss": 0.4291, "step": 4841 }, { "epoch": 3.2961198093941455, "grad_norm": 1.667262315750122, "learning_rate": 2.8533563069919634e-06, "loss": 0.4402, "step": 4842 }, { "epoch": 3.296800544588155, "grad_norm": 1.6537529230117798, "learning_rate": 2.8513231836756706e-06, "loss": 0.3486, "step": 4843 }, { "epoch": 3.297481279782165, "grad_norm": 1.4484683275222778, "learning_rate": 2.849290496005039e-06, "loss": 0.4299, "step": 4844 }, { "epoch": 3.298162014976174, "grad_norm": 1.534539818763733, "learning_rate": 2.8472582443921936e-06, "loss": 0.3402, "step": 4845 }, { "epoch": 3.298842750170184, "grad_norm": 1.5324252843856812, "learning_rate": 2.845226429249176e-06, "loss": 0.4415, "step": 4846 }, { "epoch": 3.2995234853641935, "grad_norm": 1.6408069133758545, "learning_rate": 2.843195050987936e-06, "loss": 0.4324, "step": 4847 }, { "epoch": 3.3002042205582027, "grad_norm": 1.6778665781021118, "learning_rate": 2.84116411002034e-06, "loss": 0.3383, "step": 4848 }, { "epoch": 3.3008849557522124, "grad_norm": 1.5548288822174072, "learning_rate": 2.8391336067581586e-06, "loss": 0.4464, "step": 4849 }, { "epoch": 3.301565690946222, "grad_norm": 1.5855586528778076, "learning_rate": 2.837103541613081e-06, "loss": 0.4265, "step": 4850 }, { "epoch": 3.3022464261402313, "grad_norm": 1.4854257106781006, "learning_rate": 2.8350739149967047e-06, "loss": 0.5558, "step": 4851 }, { "epoch": 3.302927161334241, "grad_norm": 1.6023613214492798, "learning_rate": 2.8330447273205376e-06, "loss": 0.4096, "step": 4852 }, { "epoch": 3.3036078965282503, "grad_norm": 1.6246087551116943, "learning_rate": 2.8310159789959966e-06, "loss": 0.4133, "step": 4853 }, { "epoch": 3.30428863172226, "grad_norm": 1.5937334299087524, "learning_rate": 2.828987670434412e-06, "loss": 0.3898, "step": 4854 }, { "epoch": 3.3049693669162696, "grad_norm": 1.3924931287765503, "learning_rate": 2.826959802047028e-06, "loss": 0.6759, "step": 4855 }, { "epoch": 3.3056501021102793, "grad_norm": 1.6625263690948486, "learning_rate": 2.8249323742449934e-06, "loss": 0.4501, "step": 4856 }, { "epoch": 3.3063308373042886, "grad_norm": 1.6857242584228516, "learning_rate": 2.82290538743937e-06, "loss": 0.3887, "step": 4857 }, { "epoch": 3.3070115724982982, "grad_norm": 1.5330636501312256, "learning_rate": 2.8208788420411307e-06, "loss": 0.428, "step": 4858 }, { "epoch": 3.3076923076923075, "grad_norm": 1.6117970943450928, "learning_rate": 2.818852738461163e-06, "loss": 0.4866, "step": 4859 }, { "epoch": 3.308373042886317, "grad_norm": 1.508569359779358, "learning_rate": 2.816827077110258e-06, "loss": 0.4328, "step": 4860 }, { "epoch": 3.309053778080327, "grad_norm": 1.6132844686508179, "learning_rate": 2.814801858399117e-06, "loss": 0.483, "step": 4861 }, { "epoch": 3.309734513274336, "grad_norm": 1.647530436515808, "learning_rate": 2.81277708273836e-06, "loss": 0.3562, "step": 4862 }, { "epoch": 3.310415248468346, "grad_norm": 1.557396411895752, "learning_rate": 2.8107527505385086e-06, "loss": 0.3466, "step": 4863 }, { "epoch": 3.3110959836623555, "grad_norm": 1.5427416563034058, "learning_rate": 2.8087288622099958e-06, "loss": 0.5673, "step": 4864 }, { "epoch": 3.3117767188563647, "grad_norm": 1.6617398262023926, "learning_rate": 2.8067054181631704e-06, "loss": 0.3464, "step": 4865 }, { "epoch": 3.3124574540503744, "grad_norm": 1.4655126333236694, "learning_rate": 2.804682418808282e-06, "loss": 0.438, "step": 4866 }, { "epoch": 3.313138189244384, "grad_norm": 1.6476497650146484, "learning_rate": 2.8026598645555007e-06, "loss": 0.4353, "step": 4867 }, { "epoch": 3.3138189244383933, "grad_norm": 1.6213574409484863, "learning_rate": 2.8006377558148963e-06, "loss": 0.4031, "step": 4868 }, { "epoch": 3.314499659632403, "grad_norm": 1.5189952850341797, "learning_rate": 2.7986160929964556e-06, "loss": 0.3952, "step": 4869 }, { "epoch": 3.3151803948264127, "grad_norm": 1.7142997980117798, "learning_rate": 2.7965948765100715e-06, "loss": 0.4771, "step": 4870 }, { "epoch": 3.315861130020422, "grad_norm": 1.4955953359603882, "learning_rate": 2.794574106765544e-06, "loss": 0.3949, "step": 4871 }, { "epoch": 3.3165418652144316, "grad_norm": 1.538008689880371, "learning_rate": 2.7925537841725904e-06, "loss": 0.5397, "step": 4872 }, { "epoch": 3.3172226004084413, "grad_norm": 1.641131043434143, "learning_rate": 2.790533909140829e-06, "loss": 0.3809, "step": 4873 }, { "epoch": 3.3179033356024505, "grad_norm": 1.633562445640564, "learning_rate": 2.7885144820797904e-06, "loss": 0.5428, "step": 4874 }, { "epoch": 3.3185840707964602, "grad_norm": 1.4520963430404663, "learning_rate": 2.786495503398916e-06, "loss": 0.3542, "step": 4875 }, { "epoch": 3.3192648059904695, "grad_norm": 1.633175253868103, "learning_rate": 2.7844769735075574e-06, "loss": 0.3166, "step": 4876 }, { "epoch": 3.319945541184479, "grad_norm": 1.4110912084579468, "learning_rate": 2.7824588928149698e-06, "loss": 0.6102, "step": 4877 }, { "epoch": 3.320626276378489, "grad_norm": 1.5567479133605957, "learning_rate": 2.78044126173032e-06, "loss": 0.3359, "step": 4878 }, { "epoch": 3.3213070115724985, "grad_norm": 1.6254962682724, "learning_rate": 2.7784240806626873e-06, "loss": 0.4584, "step": 4879 }, { "epoch": 3.3219877467665078, "grad_norm": 1.5931396484375, "learning_rate": 2.7764073500210543e-06, "loss": 0.5155, "step": 4880 }, { "epoch": 3.3226684819605175, "grad_norm": 1.6931061744689941, "learning_rate": 2.774391070214313e-06, "loss": 0.3844, "step": 4881 }, { "epoch": 3.3233492171545267, "grad_norm": 1.5618972778320312, "learning_rate": 2.7723752416512694e-06, "loss": 0.4647, "step": 4882 }, { "epoch": 3.3240299523485364, "grad_norm": 1.4448338747024536, "learning_rate": 2.7703598647406294e-06, "loss": 0.5359, "step": 4883 }, { "epoch": 3.324710687542546, "grad_norm": 1.616424322128296, "learning_rate": 2.7683449398910167e-06, "loss": 0.5376, "step": 4884 }, { "epoch": 3.3253914227365553, "grad_norm": 1.5391018390655518, "learning_rate": 2.7663304675109546e-06, "loss": 0.3755, "step": 4885 }, { "epoch": 3.326072157930565, "grad_norm": 1.474831223487854, "learning_rate": 2.7643164480088823e-06, "loss": 0.4379, "step": 4886 }, { "epoch": 3.3267528931245747, "grad_norm": 1.5585070848464966, "learning_rate": 2.7623028817931417e-06, "loss": 0.3235, "step": 4887 }, { "epoch": 3.327433628318584, "grad_norm": 1.6737596988677979, "learning_rate": 2.7602897692719832e-06, "loss": 0.4216, "step": 4888 }, { "epoch": 3.3281143635125936, "grad_norm": 1.5764122009277344, "learning_rate": 2.7582771108535705e-06, "loss": 0.3862, "step": 4889 }, { "epoch": 3.3287950987066033, "grad_norm": 1.5560364723205566, "learning_rate": 2.756264906945969e-06, "loss": 0.3832, "step": 4890 }, { "epoch": 3.3294758339006125, "grad_norm": 1.726378321647644, "learning_rate": 2.754253157957153e-06, "loss": 0.4, "step": 4891 }, { "epoch": 3.330156569094622, "grad_norm": 1.464539885520935, "learning_rate": 2.752241864295008e-06, "loss": 0.4737, "step": 4892 }, { "epoch": 3.330837304288632, "grad_norm": 1.5692906379699707, "learning_rate": 2.750231026367326e-06, "loss": 0.3751, "step": 4893 }, { "epoch": 3.331518039482641, "grad_norm": 1.531556248664856, "learning_rate": 2.748220644581805e-06, "loss": 0.4888, "step": 4894 }, { "epoch": 3.332198774676651, "grad_norm": 1.6811118125915527, "learning_rate": 2.746210719346048e-06, "loss": 0.4115, "step": 4895 }, { "epoch": 3.3328795098706605, "grad_norm": 1.5127928256988525, "learning_rate": 2.744201251067574e-06, "loss": 0.4538, "step": 4896 }, { "epoch": 3.3335602450646697, "grad_norm": 1.6554707288742065, "learning_rate": 2.7421922401538013e-06, "loss": 0.3908, "step": 4897 }, { "epoch": 3.3342409802586794, "grad_norm": 1.650029182434082, "learning_rate": 2.740183687012057e-06, "loss": 0.3778, "step": 4898 }, { "epoch": 3.3349217154526887, "grad_norm": 1.6867283582687378, "learning_rate": 2.738175592049579e-06, "loss": 0.4011, "step": 4899 }, { "epoch": 3.3356024506466984, "grad_norm": 1.6662920713424683, "learning_rate": 2.7361679556735094e-06, "loss": 0.5062, "step": 4900 }, { "epoch": 3.336283185840708, "grad_norm": 1.5838878154754639, "learning_rate": 2.734160778290895e-06, "loss": 0.4178, "step": 4901 }, { "epoch": 3.3369639210347177, "grad_norm": 1.5264406204223633, "learning_rate": 2.732154060308695e-06, "loss": 0.3624, "step": 4902 }, { "epoch": 3.337644656228727, "grad_norm": 1.4624738693237305, "learning_rate": 2.7301478021337748e-06, "loss": 0.5001, "step": 4903 }, { "epoch": 3.3383253914227367, "grad_norm": 1.6307439804077148, "learning_rate": 2.728142004172901e-06, "loss": 0.5069, "step": 4904 }, { "epoch": 3.339006126616746, "grad_norm": 1.687216877937317, "learning_rate": 2.7261366668327505e-06, "loss": 0.5059, "step": 4905 }, { "epoch": 3.3396868618107556, "grad_norm": 1.5204907655715942, "learning_rate": 2.72413179051991e-06, "loss": 0.4143, "step": 4906 }, { "epoch": 3.3403675970047653, "grad_norm": 1.6012506484985352, "learning_rate": 2.7221273756408673e-06, "loss": 0.3803, "step": 4907 }, { "epoch": 3.3410483321987745, "grad_norm": 1.5511151552200317, "learning_rate": 2.720123422602019e-06, "loss": 0.5265, "step": 4908 }, { "epoch": 3.341729067392784, "grad_norm": 1.5172858238220215, "learning_rate": 2.7181199318096642e-06, "loss": 0.5493, "step": 4909 }, { "epoch": 3.342409802586794, "grad_norm": 1.6156455278396606, "learning_rate": 2.7161169036700197e-06, "loss": 0.517, "step": 4910 }, { "epoch": 3.343090537780803, "grad_norm": 1.5502431392669678, "learning_rate": 2.7141143385891967e-06, "loss": 0.3028, "step": 4911 }, { "epoch": 3.343771272974813, "grad_norm": 1.50873863697052, "learning_rate": 2.712112236973215e-06, "loss": 0.4891, "step": 4912 }, { "epoch": 3.3444520081688225, "grad_norm": 1.653540015220642, "learning_rate": 2.7101105992280053e-06, "loss": 0.4507, "step": 4913 }, { "epoch": 3.3451327433628317, "grad_norm": 1.5159283876419067, "learning_rate": 2.7081094257594e-06, "loss": 0.4953, "step": 4914 }, { "epoch": 3.3458134785568414, "grad_norm": 1.5894290208816528, "learning_rate": 2.7061087169731376e-06, "loss": 0.387, "step": 4915 }, { "epoch": 3.346494213750851, "grad_norm": 1.497572660446167, "learning_rate": 2.704108473274861e-06, "loss": 0.4531, "step": 4916 }, { "epoch": 3.3471749489448603, "grad_norm": 1.613830804824829, "learning_rate": 2.7021086950701257e-06, "loss": 0.3872, "step": 4917 }, { "epoch": 3.34785568413887, "grad_norm": 1.6692067384719849, "learning_rate": 2.7001093827643843e-06, "loss": 0.3296, "step": 4918 }, { "epoch": 3.3485364193328797, "grad_norm": 1.4418712854385376, "learning_rate": 2.6981105367629994e-06, "loss": 0.457, "step": 4919 }, { "epoch": 3.349217154526889, "grad_norm": 1.7120120525360107, "learning_rate": 2.696112157471241e-06, "loss": 0.5013, "step": 4920 }, { "epoch": 3.3498978897208986, "grad_norm": 1.675227403640747, "learning_rate": 2.6941142452942805e-06, "loss": 0.4103, "step": 4921 }, { "epoch": 3.350578624914908, "grad_norm": 1.6804325580596924, "learning_rate": 2.6921168006371945e-06, "loss": 0.4795, "step": 4922 }, { "epoch": 3.3512593601089176, "grad_norm": 1.5635432004928589, "learning_rate": 2.6901198239049652e-06, "loss": 0.4981, "step": 4923 }, { "epoch": 3.3519400953029272, "grad_norm": 1.6025323867797852, "learning_rate": 2.6881233155024855e-06, "loss": 0.4969, "step": 4924 }, { "epoch": 3.352620830496937, "grad_norm": 1.6386189460754395, "learning_rate": 2.686127275834546e-06, "loss": 0.3899, "step": 4925 }, { "epoch": 3.353301565690946, "grad_norm": 1.4640427827835083, "learning_rate": 2.6841317053058424e-06, "loss": 0.5564, "step": 4926 }, { "epoch": 3.353982300884956, "grad_norm": 1.675901174545288, "learning_rate": 2.6821366043209814e-06, "loss": 0.4906, "step": 4927 }, { "epoch": 3.354663036078965, "grad_norm": 1.74039888381958, "learning_rate": 2.6801419732844723e-06, "loss": 0.4295, "step": 4928 }, { "epoch": 3.355343771272975, "grad_norm": 1.5121408700942993, "learning_rate": 2.6781478126007255e-06, "loss": 0.4559, "step": 4929 }, { "epoch": 3.3560245064669845, "grad_norm": 1.6377331018447876, "learning_rate": 2.676154122674057e-06, "loss": 0.3745, "step": 4930 }, { "epoch": 3.3567052416609937, "grad_norm": 1.4157967567443848, "learning_rate": 2.6741609039086935e-06, "loss": 0.5481, "step": 4931 }, { "epoch": 3.3573859768550034, "grad_norm": 1.5857183933258057, "learning_rate": 2.6721681567087583e-06, "loss": 0.3219, "step": 4932 }, { "epoch": 3.358066712049013, "grad_norm": 1.5181187391281128, "learning_rate": 2.6701758814782796e-06, "loss": 0.4692, "step": 4933 }, { "epoch": 3.3587474472430223, "grad_norm": 1.425191879272461, "learning_rate": 2.668184078621199e-06, "loss": 0.3299, "step": 4934 }, { "epoch": 3.359428182437032, "grad_norm": 1.5096791982650757, "learning_rate": 2.6661927485413493e-06, "loss": 0.3758, "step": 4935 }, { "epoch": 3.3601089176310417, "grad_norm": 1.518530249595642, "learning_rate": 2.6642018916424788e-06, "loss": 0.49, "step": 4936 }, { "epoch": 3.360789652825051, "grad_norm": 1.65382981300354, "learning_rate": 2.662211508328232e-06, "loss": 0.4055, "step": 4937 }, { "epoch": 3.3614703880190606, "grad_norm": 1.7218341827392578, "learning_rate": 2.660221599002163e-06, "loss": 0.5232, "step": 4938 }, { "epoch": 3.3621511232130703, "grad_norm": 1.706391453742981, "learning_rate": 2.6582321640677267e-06, "loss": 0.3978, "step": 4939 }, { "epoch": 3.3628318584070795, "grad_norm": 1.5341198444366455, "learning_rate": 2.6562432039282793e-06, "loss": 0.3929, "step": 4940 }, { "epoch": 3.3635125936010892, "grad_norm": 1.473871111869812, "learning_rate": 2.6542547189870877e-06, "loss": 0.4459, "step": 4941 }, { "epoch": 3.364193328795099, "grad_norm": 1.55819571018219, "learning_rate": 2.6522667096473177e-06, "loss": 0.3853, "step": 4942 }, { "epoch": 3.364874063989108, "grad_norm": 1.602988362312317, "learning_rate": 2.650279176312036e-06, "loss": 0.4851, "step": 4943 }, { "epoch": 3.365554799183118, "grad_norm": 1.5076946020126343, "learning_rate": 2.6482921193842192e-06, "loss": 0.511, "step": 4944 }, { "epoch": 3.366235534377127, "grad_norm": 1.5413857698440552, "learning_rate": 2.646305539266747e-06, "loss": 0.4014, "step": 4945 }, { "epoch": 3.3669162695711368, "grad_norm": 1.5182349681854248, "learning_rate": 2.644319436362397e-06, "loss": 0.4886, "step": 4946 }, { "epoch": 3.3675970047651465, "grad_norm": 1.4889905452728271, "learning_rate": 2.6423338110738507e-06, "loss": 0.3976, "step": 4947 }, { "epoch": 3.368277739959156, "grad_norm": 1.441140055656433, "learning_rate": 2.640348663803699e-06, "loss": 0.6167, "step": 4948 }, { "epoch": 3.3689584751531654, "grad_norm": 1.6041696071624756, "learning_rate": 2.6383639949544304e-06, "loss": 0.3068, "step": 4949 }, { "epoch": 3.369639210347175, "grad_norm": 1.5411080121994019, "learning_rate": 2.636379804928436e-06, "loss": 0.5303, "step": 4950 }, { "epoch": 3.3703199455411843, "grad_norm": 1.432687759399414, "learning_rate": 2.634396094128015e-06, "loss": 0.5148, "step": 4951 }, { "epoch": 3.371000680735194, "grad_norm": 1.6267114877700806, "learning_rate": 2.6324128629553624e-06, "loss": 0.4318, "step": 4952 }, { "epoch": 3.3716814159292037, "grad_norm": 1.589056134223938, "learning_rate": 2.630430111812583e-06, "loss": 0.4313, "step": 4953 }, { "epoch": 3.372362151123213, "grad_norm": 1.5554271936416626, "learning_rate": 2.6284478411016786e-06, "loss": 0.4454, "step": 4954 }, { "epoch": 3.3730428863172226, "grad_norm": 1.7013901472091675, "learning_rate": 2.6264660512245577e-06, "loss": 0.4653, "step": 4955 }, { "epoch": 3.3737236215112323, "grad_norm": 1.4709498882293701, "learning_rate": 2.624484742583029e-06, "loss": 0.6571, "step": 4956 }, { "epoch": 3.3744043567052415, "grad_norm": 1.5842269659042358, "learning_rate": 2.622503915578802e-06, "loss": 0.3324, "step": 4957 }, { "epoch": 3.375085091899251, "grad_norm": 1.5089129209518433, "learning_rate": 2.6205235706134936e-06, "loss": 0.4226, "step": 4958 }, { "epoch": 3.375765827093261, "grad_norm": 1.502286672592163, "learning_rate": 2.61854370808862e-06, "loss": 0.4473, "step": 4959 }, { "epoch": 3.37644656228727, "grad_norm": 1.523582100868225, "learning_rate": 2.6165643284055956e-06, "loss": 0.578, "step": 4960 }, { "epoch": 3.37712729748128, "grad_norm": 1.6135978698730469, "learning_rate": 2.6145854319657443e-06, "loss": 0.3578, "step": 4961 }, { "epoch": 3.377808032675289, "grad_norm": 1.582434892654419, "learning_rate": 2.61260701917029e-06, "loss": 0.4724, "step": 4962 }, { "epoch": 3.3784887678692987, "grad_norm": 1.690863847732544, "learning_rate": 2.610629090420356e-06, "loss": 0.316, "step": 4963 }, { "epoch": 3.3791695030633084, "grad_norm": 1.3733946084976196, "learning_rate": 2.6086516461169666e-06, "loss": 0.507, "step": 4964 }, { "epoch": 3.379850238257318, "grad_norm": 1.611213207244873, "learning_rate": 2.6066746866610527e-06, "loss": 0.4024, "step": 4965 }, { "epoch": 3.3805309734513274, "grad_norm": 1.605758786201477, "learning_rate": 2.6046982124534427e-06, "loss": 0.3614, "step": 4966 }, { "epoch": 3.381211708645337, "grad_norm": 1.4632959365844727, "learning_rate": 2.602722223894867e-06, "loss": 0.5289, "step": 4967 }, { "epoch": 3.3818924438393463, "grad_norm": 1.612674355506897, "learning_rate": 2.6007467213859615e-06, "loss": 0.3365, "step": 4968 }, { "epoch": 3.382573179033356, "grad_norm": 1.6318325996398926, "learning_rate": 2.5987717053272594e-06, "loss": 0.4388, "step": 4969 }, { "epoch": 3.3832539142273657, "grad_norm": 1.5370086431503296, "learning_rate": 2.5967971761191947e-06, "loss": 0.4526, "step": 4970 }, { "epoch": 3.383934649421375, "grad_norm": 1.568179965019226, "learning_rate": 2.594823134162105e-06, "loss": 0.3864, "step": 4971 }, { "epoch": 3.3846153846153846, "grad_norm": 1.6300824880599976, "learning_rate": 2.592849579856232e-06, "loss": 0.4985, "step": 4972 }, { "epoch": 3.3852961198093943, "grad_norm": 1.6828793287277222, "learning_rate": 2.5908765136017126e-06, "loss": 0.4796, "step": 4973 }, { "epoch": 3.3859768550034035, "grad_norm": 1.5487850904464722, "learning_rate": 2.5889039357985855e-06, "loss": 0.4476, "step": 4974 }, { "epoch": 3.386657590197413, "grad_norm": 1.6974785327911377, "learning_rate": 2.586931846846797e-06, "loss": 0.4871, "step": 4975 }, { "epoch": 3.387338325391423, "grad_norm": 1.6052078008651733, "learning_rate": 2.584960247146186e-06, "loss": 0.3849, "step": 4976 }, { "epoch": 3.388019060585432, "grad_norm": 1.482481837272644, "learning_rate": 2.5829891370964965e-06, "loss": 0.5421, "step": 4977 }, { "epoch": 3.388699795779442, "grad_norm": 1.440332055091858, "learning_rate": 2.581018517097368e-06, "loss": 0.4669, "step": 4978 }, { "epoch": 3.3893805309734515, "grad_norm": 1.5321193933486938, "learning_rate": 2.579048387548353e-06, "loss": 0.3593, "step": 4979 }, { "epoch": 3.3900612661674607, "grad_norm": 1.4983923435211182, "learning_rate": 2.5770787488488925e-06, "loss": 0.3986, "step": 4980 }, { "epoch": 3.3907420013614704, "grad_norm": 1.6150641441345215, "learning_rate": 2.5751096013983313e-06, "loss": 0.3635, "step": 4981 }, { "epoch": 3.39142273655548, "grad_norm": 1.6676768064498901, "learning_rate": 2.573140945595917e-06, "loss": 0.2859, "step": 4982 }, { "epoch": 3.3921034717494893, "grad_norm": 1.6442794799804688, "learning_rate": 2.571172781840796e-06, "loss": 0.284, "step": 4983 }, { "epoch": 3.392784206943499, "grad_norm": 1.7375608682632446, "learning_rate": 2.569205110532014e-06, "loss": 0.4563, "step": 4984 }, { "epoch": 3.3934649421375083, "grad_norm": 1.5339391231536865, "learning_rate": 2.5672379320685158e-06, "loss": 0.3563, "step": 4985 }, { "epoch": 3.394145677331518, "grad_norm": 1.567495346069336, "learning_rate": 2.5652712468491526e-06, "loss": 0.4329, "step": 4986 }, { "epoch": 3.3948264125255276, "grad_norm": 1.5493972301483154, "learning_rate": 2.5633050552726657e-06, "loss": 0.3395, "step": 4987 }, { "epoch": 3.3955071477195373, "grad_norm": 1.7180806398391724, "learning_rate": 2.5613393577377054e-06, "loss": 0.4257, "step": 4988 }, { "epoch": 3.3961878829135466, "grad_norm": 1.623645305633545, "learning_rate": 2.5593741546428184e-06, "loss": 0.4304, "step": 4989 }, { "epoch": 3.3968686181075562, "grad_norm": 1.508030891418457, "learning_rate": 2.5574094463864508e-06, "loss": 0.4256, "step": 4990 }, { "epoch": 3.3975493533015655, "grad_norm": 1.4985620975494385, "learning_rate": 2.5554452333669476e-06, "loss": 0.3591, "step": 4991 }, { "epoch": 3.398230088495575, "grad_norm": 1.581169843673706, "learning_rate": 2.553481515982553e-06, "loss": 0.4027, "step": 4992 }, { "epoch": 3.398910823689585, "grad_norm": 1.6886590719223022, "learning_rate": 2.5515182946314145e-06, "loss": 0.4619, "step": 4993 }, { "epoch": 3.399591558883594, "grad_norm": 1.734742522239685, "learning_rate": 2.549555569711576e-06, "loss": 0.44, "step": 4994 }, { "epoch": 3.400272294077604, "grad_norm": 1.599130630493164, "learning_rate": 2.5475933416209762e-06, "loss": 0.5323, "step": 4995 }, { "epoch": 3.4009530292716135, "grad_norm": 1.5812691450119019, "learning_rate": 2.5456316107574676e-06, "loss": 0.2979, "step": 4996 }, { "epoch": 3.4016337644656227, "grad_norm": 1.6137727499008179, "learning_rate": 2.5436703775187864e-06, "loss": 0.4757, "step": 4997 }, { "epoch": 3.4023144996596324, "grad_norm": 1.7460299730300903, "learning_rate": 2.5417096423025756e-06, "loss": 0.342, "step": 4998 }, { "epoch": 3.402995234853642, "grad_norm": 1.6979153156280518, "learning_rate": 2.539749405506373e-06, "loss": 0.4749, "step": 4999 }, { "epoch": 3.4036759700476513, "grad_norm": 1.6151174306869507, "learning_rate": 2.5377896675276227e-06, "loss": 0.2847, "step": 5000 }, { "epoch": 3.404356705241661, "grad_norm": 1.70828378200531, "learning_rate": 2.5358304287636593e-06, "loss": 0.3586, "step": 5001 }, { "epoch": 3.4050374404356707, "grad_norm": 1.6512742042541504, "learning_rate": 2.533871689611719e-06, "loss": 0.3017, "step": 5002 }, { "epoch": 3.40571817562968, "grad_norm": 1.7735320329666138, "learning_rate": 2.53191345046894e-06, "loss": 0.4417, "step": 5003 }, { "epoch": 3.4063989108236896, "grad_norm": 1.6055506467819214, "learning_rate": 2.529955711732355e-06, "loss": 0.4803, "step": 5004 }, { "epoch": 3.4070796460176993, "grad_norm": 1.6918797492980957, "learning_rate": 2.5279984737988995e-06, "loss": 0.3036, "step": 5005 }, { "epoch": 3.4077603812117085, "grad_norm": 1.6284854412078857, "learning_rate": 2.5260417370654e-06, "loss": 0.3651, "step": 5006 }, { "epoch": 3.4084411164057182, "grad_norm": 1.478975772857666, "learning_rate": 2.5240855019285916e-06, "loss": 0.4618, "step": 5007 }, { "epoch": 3.4091218515997275, "grad_norm": 1.4566479921340942, "learning_rate": 2.5221297687851e-06, "loss": 0.3411, "step": 5008 }, { "epoch": 3.409802586793737, "grad_norm": 1.6370433568954468, "learning_rate": 2.5201745380314484e-06, "loss": 0.483, "step": 5009 }, { "epoch": 3.410483321987747, "grad_norm": 1.4797000885009766, "learning_rate": 2.5182198100640657e-06, "loss": 0.4069, "step": 5010 }, { "epoch": 3.4111640571817565, "grad_norm": 1.626810073852539, "learning_rate": 2.5162655852792726e-06, "loss": 0.3628, "step": 5011 }, { "epoch": 3.4118447923757658, "grad_norm": 1.5720230340957642, "learning_rate": 2.514311864073288e-06, "loss": 0.3942, "step": 5012 }, { "epoch": 3.4125255275697755, "grad_norm": 1.5434982776641846, "learning_rate": 2.512358646842231e-06, "loss": 0.5086, "step": 5013 }, { "epoch": 3.4132062627637847, "grad_norm": 1.569581151008606, "learning_rate": 2.5104059339821203e-06, "loss": 0.3725, "step": 5014 }, { "epoch": 3.4138869979577944, "grad_norm": 1.585508942604065, "learning_rate": 2.5084537258888674e-06, "loss": 0.498, "step": 5015 }, { "epoch": 3.414567733151804, "grad_norm": 1.5249005556106567, "learning_rate": 2.5065020229582814e-06, "loss": 0.4749, "step": 5016 }, { "epoch": 3.4152484683458133, "grad_norm": 1.5826103687286377, "learning_rate": 2.5045508255860773e-06, "loss": 0.4507, "step": 5017 }, { "epoch": 3.415929203539823, "grad_norm": 1.7118864059448242, "learning_rate": 2.5026001341678576e-06, "loss": 0.4499, "step": 5018 }, { "epoch": 3.4166099387338327, "grad_norm": 6.123201847076416, "learning_rate": 2.500649949099125e-06, "loss": 0.5058, "step": 5019 }, { "epoch": 3.417290673927842, "grad_norm": 1.6942729949951172, "learning_rate": 2.4987002707752844e-06, "loss": 0.4123, "step": 5020 }, { "epoch": 3.4179714091218516, "grad_norm": 1.8658102750778198, "learning_rate": 2.4967510995916315e-06, "loss": 0.4187, "step": 5021 }, { "epoch": 3.4186521443158613, "grad_norm": 1.609350562095642, "learning_rate": 2.4948024359433654e-06, "loss": 0.5096, "step": 5022 }, { "epoch": 3.4193328795098705, "grad_norm": 1.566203236579895, "learning_rate": 2.492854280225575e-06, "loss": 0.478, "step": 5023 }, { "epoch": 3.42001361470388, "grad_norm": 1.8162345886230469, "learning_rate": 2.490906632833254e-06, "loss": 0.3835, "step": 5024 }, { "epoch": 3.42069434989789, "grad_norm": 1.6267231702804565, "learning_rate": 2.488959494161287e-06, "loss": 0.3623, "step": 5025 }, { "epoch": 3.421375085091899, "grad_norm": 1.5521535873413086, "learning_rate": 2.487012864604456e-06, "loss": 0.4189, "step": 5026 }, { "epoch": 3.422055820285909, "grad_norm": 1.4897499084472656, "learning_rate": 2.4850667445574457e-06, "loss": 0.6003, "step": 5027 }, { "epoch": 3.4227365554799185, "grad_norm": 1.6638518571853638, "learning_rate": 2.4831211344148308e-06, "loss": 0.367, "step": 5028 }, { "epoch": 3.4234172906739277, "grad_norm": 1.7097302675247192, "learning_rate": 2.4811760345710833e-06, "loss": 0.4545, "step": 5029 }, { "epoch": 3.4240980258679374, "grad_norm": 1.7224197387695312, "learning_rate": 2.4792314454205755e-06, "loss": 0.3039, "step": 5030 }, { "epoch": 3.4247787610619467, "grad_norm": 1.532978892326355, "learning_rate": 2.4772873673575767e-06, "loss": 0.3801, "step": 5031 }, { "epoch": 3.4254594962559564, "grad_norm": 1.6241726875305176, "learning_rate": 2.4753438007762463e-06, "loss": 0.4226, "step": 5032 }, { "epoch": 3.426140231449966, "grad_norm": 1.5620626211166382, "learning_rate": 2.4734007460706432e-06, "loss": 0.4819, "step": 5033 }, { "epoch": 3.4268209666439757, "grad_norm": 1.6696604490280151, "learning_rate": 2.4714582036347268e-06, "loss": 0.3565, "step": 5034 }, { "epoch": 3.427501701837985, "grad_norm": 1.6365866661071777, "learning_rate": 2.4695161738623464e-06, "loss": 0.473, "step": 5035 }, { "epoch": 3.4281824370319947, "grad_norm": 1.4951270818710327, "learning_rate": 2.467574657147249e-06, "loss": 0.5658, "step": 5036 }, { "epoch": 3.428863172226004, "grad_norm": 1.7446197271347046, "learning_rate": 2.4656336538830807e-06, "loss": 0.3691, "step": 5037 }, { "epoch": 3.4295439074200136, "grad_norm": 1.5568495988845825, "learning_rate": 2.46369316446338e-06, "loss": 0.4851, "step": 5038 }, { "epoch": 3.4302246426140233, "grad_norm": 1.5941650867462158, "learning_rate": 2.4617531892815807e-06, "loss": 0.4022, "step": 5039 }, { "epoch": 3.4309053778080325, "grad_norm": 1.5708155632019043, "learning_rate": 2.4598137287310157e-06, "loss": 0.4267, "step": 5040 }, { "epoch": 3.431586113002042, "grad_norm": 1.5314393043518066, "learning_rate": 2.4578747832049145e-06, "loss": 0.4609, "step": 5041 }, { "epoch": 3.432266848196052, "grad_norm": 1.4818469285964966, "learning_rate": 2.455936353096397e-06, "loss": 0.5482, "step": 5042 }, { "epoch": 3.432947583390061, "grad_norm": 1.613992691040039, "learning_rate": 2.4539984387984795e-06, "loss": 0.3842, "step": 5043 }, { "epoch": 3.433628318584071, "grad_norm": 1.5618996620178223, "learning_rate": 2.452061040704079e-06, "loss": 0.4471, "step": 5044 }, { "epoch": 3.4343090537780805, "grad_norm": 1.6357208490371704, "learning_rate": 2.4501241592060028e-06, "loss": 0.3454, "step": 5045 }, { "epoch": 3.4349897889720897, "grad_norm": 1.765052318572998, "learning_rate": 2.4481877946969545e-06, "loss": 0.4586, "step": 5046 }, { "epoch": 3.4356705241660994, "grad_norm": 1.4965600967407227, "learning_rate": 2.4462519475695298e-06, "loss": 0.3563, "step": 5047 }, { "epoch": 3.436351259360109, "grad_norm": 1.4856655597686768, "learning_rate": 2.4443166182162305e-06, "loss": 0.3667, "step": 5048 }, { "epoch": 3.4370319945541183, "grad_norm": 1.6358373165130615, "learning_rate": 2.4423818070294415e-06, "loss": 0.3893, "step": 5049 }, { "epoch": 3.437712729748128, "grad_norm": 1.716525673866272, "learning_rate": 2.440447514401446e-06, "loss": 0.3122, "step": 5050 }, { "epoch": 3.4383934649421377, "grad_norm": 1.5249722003936768, "learning_rate": 2.438513740724427e-06, "loss": 0.5297, "step": 5051 }, { "epoch": 3.439074200136147, "grad_norm": 1.6522098779678345, "learning_rate": 2.4365804863904558e-06, "loss": 0.3977, "step": 5052 }, { "epoch": 3.4397549353301566, "grad_norm": 1.6728111505508423, "learning_rate": 2.434647751791502e-06, "loss": 0.268, "step": 5053 }, { "epoch": 3.440435670524166, "grad_norm": 1.5311121940612793, "learning_rate": 2.4327155373194254e-06, "loss": 0.517, "step": 5054 }, { "epoch": 3.4411164057181756, "grad_norm": 1.5484635829925537, "learning_rate": 2.430783843365989e-06, "loss": 0.5226, "step": 5055 }, { "epoch": 3.4417971409121852, "grad_norm": 1.4151487350463867, "learning_rate": 2.4288526703228405e-06, "loss": 0.5022, "step": 5056 }, { "epoch": 3.442477876106195, "grad_norm": 1.599673867225647, "learning_rate": 2.426922018581529e-06, "loss": 0.4159, "step": 5057 }, { "epoch": 3.443158611300204, "grad_norm": 1.5511350631713867, "learning_rate": 2.424991888533497e-06, "loss": 0.4441, "step": 5058 }, { "epoch": 3.443839346494214, "grad_norm": 1.6234644651412964, "learning_rate": 2.423062280570077e-06, "loss": 0.4841, "step": 5059 }, { "epoch": 3.444520081688223, "grad_norm": 1.5784658193588257, "learning_rate": 2.4211331950824996e-06, "loss": 0.478, "step": 5060 }, { "epoch": 3.445200816882233, "grad_norm": 1.5460858345031738, "learning_rate": 2.419204632461885e-06, "loss": 0.5246, "step": 5061 }, { "epoch": 3.4458815520762425, "grad_norm": 1.5597515106201172, "learning_rate": 2.417276593099256e-06, "loss": 0.3487, "step": 5062 }, { "epoch": 3.4465622872702517, "grad_norm": 1.6357920169830322, "learning_rate": 2.4153490773855203e-06, "loss": 0.3839, "step": 5063 }, { "epoch": 3.4472430224642614, "grad_norm": 1.394656777381897, "learning_rate": 2.4134220857114806e-06, "loss": 0.5152, "step": 5064 }, { "epoch": 3.447923757658271, "grad_norm": 1.552450180053711, "learning_rate": 2.411495618467842e-06, "loss": 0.4882, "step": 5065 }, { "epoch": 3.4486044928522803, "grad_norm": 1.6541200876235962, "learning_rate": 2.4095696760451937e-06, "loss": 0.2918, "step": 5066 }, { "epoch": 3.44928522804629, "grad_norm": 1.7824294567108154, "learning_rate": 2.407644258834023e-06, "loss": 0.2738, "step": 5067 }, { "epoch": 3.4499659632402997, "grad_norm": 1.6160271167755127, "learning_rate": 2.4057193672247053e-06, "loss": 0.4371, "step": 5068 }, { "epoch": 3.450646698434309, "grad_norm": 1.669309139251709, "learning_rate": 2.4037950016075197e-06, "loss": 0.4249, "step": 5069 }, { "epoch": 3.4513274336283186, "grad_norm": 1.5127722024917603, "learning_rate": 2.4018711623726293e-06, "loss": 0.4078, "step": 5070 }, { "epoch": 3.4520081688223283, "grad_norm": 1.688049077987671, "learning_rate": 2.3999478499100924e-06, "loss": 0.3832, "step": 5071 }, { "epoch": 3.4526889040163375, "grad_norm": 1.3567008972167969, "learning_rate": 2.3980250646098663e-06, "loss": 0.6287, "step": 5072 }, { "epoch": 3.4533696392103472, "grad_norm": 1.5786473751068115, "learning_rate": 2.396102806861792e-06, "loss": 0.4565, "step": 5073 }, { "epoch": 3.454050374404357, "grad_norm": 1.5868602991104126, "learning_rate": 2.394181077055613e-06, "loss": 0.3552, "step": 5074 }, { "epoch": 3.454731109598366, "grad_norm": 1.5235539674758911, "learning_rate": 2.3922598755809572e-06, "loss": 0.426, "step": 5075 }, { "epoch": 3.455411844792376, "grad_norm": 1.7139570713043213, "learning_rate": 2.3903392028273537e-06, "loss": 0.3792, "step": 5076 }, { "epoch": 3.456092579986385, "grad_norm": 1.6878594160079956, "learning_rate": 2.3884190591842183e-06, "loss": 0.3349, "step": 5077 }, { "epoch": 3.4567733151803948, "grad_norm": 1.5718564987182617, "learning_rate": 2.386499445040859e-06, "loss": 0.3248, "step": 5078 }, { "epoch": 3.4574540503744045, "grad_norm": 1.7874958515167236, "learning_rate": 2.3845803607864827e-06, "loss": 0.5416, "step": 5079 }, { "epoch": 3.4581347855684137, "grad_norm": 1.5986050367355347, "learning_rate": 2.382661806810184e-06, "loss": 0.3816, "step": 5080 }, { "epoch": 3.4588155207624234, "grad_norm": 1.598156213760376, "learning_rate": 2.380743783500948e-06, "loss": 0.4402, "step": 5081 }, { "epoch": 3.459496255956433, "grad_norm": 1.6178972721099854, "learning_rate": 2.3788262912476574e-06, "loss": 0.4995, "step": 5082 }, { "epoch": 3.4601769911504423, "grad_norm": 1.4960075616836548, "learning_rate": 2.376909330439087e-06, "loss": 0.5104, "step": 5083 }, { "epoch": 3.460857726344452, "grad_norm": 1.5950883626937866, "learning_rate": 2.3749929014639e-06, "loss": 0.4897, "step": 5084 }, { "epoch": 3.4615384615384617, "grad_norm": 1.4600279331207275, "learning_rate": 2.373077004710652e-06, "loss": 0.4759, "step": 5085 }, { "epoch": 3.462219196732471, "grad_norm": 1.6165547370910645, "learning_rate": 2.3711616405677963e-06, "loss": 0.6142, "step": 5086 }, { "epoch": 3.4628999319264806, "grad_norm": 1.6095305681228638, "learning_rate": 2.3692468094236715e-06, "loss": 0.412, "step": 5087 }, { "epoch": 3.4635806671204903, "grad_norm": 1.7830541133880615, "learning_rate": 2.3673325116665095e-06, "loss": 0.3361, "step": 5088 }, { "epoch": 3.4642614023144995, "grad_norm": 1.5448683500289917, "learning_rate": 2.3654187476844392e-06, "loss": 0.4593, "step": 5089 }, { "epoch": 3.464942137508509, "grad_norm": 1.576899528503418, "learning_rate": 2.363505517865474e-06, "loss": 0.3215, "step": 5090 }, { "epoch": 3.465622872702519, "grad_norm": 1.59589421749115, "learning_rate": 2.3615928225975267e-06, "loss": 0.3555, "step": 5091 }, { "epoch": 3.466303607896528, "grad_norm": 1.5268909931182861, "learning_rate": 2.359680662268392e-06, "loss": 0.4063, "step": 5092 }, { "epoch": 3.466984343090538, "grad_norm": 1.6233112812042236, "learning_rate": 2.3577690372657675e-06, "loss": 0.3303, "step": 5093 }, { "epoch": 3.467665078284547, "grad_norm": 1.5112425088882446, "learning_rate": 2.355857947977233e-06, "loss": 0.4308, "step": 5094 }, { "epoch": 3.4683458134785567, "grad_norm": 1.5165234804153442, "learning_rate": 2.353947394790262e-06, "loss": 0.4067, "step": 5095 }, { "epoch": 3.4690265486725664, "grad_norm": 1.5451267957687378, "learning_rate": 2.3520373780922245e-06, "loss": 0.4128, "step": 5096 }, { "epoch": 3.469707283866576, "grad_norm": 1.5714541673660278, "learning_rate": 2.350127898270375e-06, "loss": 0.3991, "step": 5097 }, { "epoch": 3.4703880190605854, "grad_norm": 1.6158900260925293, "learning_rate": 2.3482189557118605e-06, "loss": 0.4133, "step": 5098 }, { "epoch": 3.471068754254595, "grad_norm": 1.506306529045105, "learning_rate": 2.346310550803722e-06, "loss": 0.4619, "step": 5099 }, { "epoch": 3.4717494894486043, "grad_norm": 1.639209270477295, "learning_rate": 2.344402683932892e-06, "loss": 0.5335, "step": 5100 }, { "epoch": 3.472430224642614, "grad_norm": 1.600551724433899, "learning_rate": 2.3424953554861895e-06, "loss": 0.5435, "step": 5101 }, { "epoch": 3.4731109598366237, "grad_norm": 1.760439157485962, "learning_rate": 2.3405885658503254e-06, "loss": 0.4585, "step": 5102 }, { "epoch": 3.473791695030633, "grad_norm": 1.6415692567825317, "learning_rate": 2.338682315411905e-06, "loss": 0.5473, "step": 5103 }, { "epoch": 3.4744724302246426, "grad_norm": 1.584563136100769, "learning_rate": 2.3367766045574214e-06, "loss": 0.3508, "step": 5104 }, { "epoch": 3.4751531654186523, "grad_norm": 1.493723750114441, "learning_rate": 2.3348714336732564e-06, "loss": 0.3595, "step": 5105 }, { "epoch": 3.4758339006126615, "grad_norm": 1.5435172319412231, "learning_rate": 2.3329668031456883e-06, "loss": 0.4234, "step": 5106 }, { "epoch": 3.476514635806671, "grad_norm": 1.662900447845459, "learning_rate": 2.33106271336088e-06, "loss": 0.3905, "step": 5107 }, { "epoch": 3.477195371000681, "grad_norm": 1.5729970932006836, "learning_rate": 2.3291591647048857e-06, "loss": 0.2927, "step": 5108 }, { "epoch": 3.47787610619469, "grad_norm": 1.7111400365829468, "learning_rate": 2.3272561575636532e-06, "loss": 0.4137, "step": 5109 }, { "epoch": 3.4785568413887, "grad_norm": 1.5922993421554565, "learning_rate": 2.3253536923230198e-06, "loss": 0.4823, "step": 5110 }, { "epoch": 3.4792375765827095, "grad_norm": 1.5146763324737549, "learning_rate": 2.3234517693687093e-06, "loss": 0.4634, "step": 5111 }, { "epoch": 3.4799183117767187, "grad_norm": 1.6199661493301392, "learning_rate": 2.3215503890863367e-06, "loss": 0.4988, "step": 5112 }, { "epoch": 3.4805990469707284, "grad_norm": 1.5794318914413452, "learning_rate": 2.3196495518614122e-06, "loss": 0.4616, "step": 5113 }, { "epoch": 3.481279782164738, "grad_norm": 1.63529372215271, "learning_rate": 2.3177492580793293e-06, "loss": 0.5332, "step": 5114 }, { "epoch": 3.4819605173587473, "grad_norm": 1.5187269449234009, "learning_rate": 2.315849508125374e-06, "loss": 0.416, "step": 5115 }, { "epoch": 3.482641252552757, "grad_norm": 1.707808256149292, "learning_rate": 2.313950302384718e-06, "loss": 0.3497, "step": 5116 }, { "epoch": 3.4833219877467663, "grad_norm": 1.5890270471572876, "learning_rate": 2.3120516412424345e-06, "loss": 0.3862, "step": 5117 }, { "epoch": 3.484002722940776, "grad_norm": 1.615065574645996, "learning_rate": 2.310153525083473e-06, "loss": 0.4152, "step": 5118 }, { "epoch": 3.4846834581347856, "grad_norm": 1.6911265850067139, "learning_rate": 2.3082559542926788e-06, "loss": 0.302, "step": 5119 }, { "epoch": 3.4853641933287953, "grad_norm": 1.5653530359268188, "learning_rate": 2.3063589292547867e-06, "loss": 0.5011, "step": 5120 }, { "epoch": 3.4860449285228046, "grad_norm": 1.5899884700775146, "learning_rate": 2.3044624503544194e-06, "loss": 0.4976, "step": 5121 }, { "epoch": 3.4867256637168142, "grad_norm": 1.6233854293823242, "learning_rate": 2.3025665179760888e-06, "loss": 0.3914, "step": 5122 }, { "epoch": 3.4874063989108235, "grad_norm": 1.6572041511535645, "learning_rate": 2.300671132504194e-06, "loss": 0.4322, "step": 5123 }, { "epoch": 3.488087134104833, "grad_norm": 1.6169722080230713, "learning_rate": 2.298776294323031e-06, "loss": 0.3496, "step": 5124 }, { "epoch": 3.488767869298843, "grad_norm": 1.6542017459869385, "learning_rate": 2.296882003816774e-06, "loss": 0.4499, "step": 5125 }, { "epoch": 3.489448604492852, "grad_norm": 1.695926308631897, "learning_rate": 2.2949882613694945e-06, "loss": 0.4419, "step": 5126 }, { "epoch": 3.490129339686862, "grad_norm": 1.916569709777832, "learning_rate": 2.2930950673651518e-06, "loss": 0.4612, "step": 5127 }, { "epoch": 3.4908100748808715, "grad_norm": 1.702920913696289, "learning_rate": 2.29120242218759e-06, "loss": 0.4415, "step": 5128 }, { "epoch": 3.4914908100748807, "grad_norm": 1.6274011135101318, "learning_rate": 2.289310326220545e-06, "loss": 0.3969, "step": 5129 }, { "epoch": 3.4921715452688904, "grad_norm": 1.5914475917816162, "learning_rate": 2.2874187798476377e-06, "loss": 0.4945, "step": 5130 }, { "epoch": 3.4928522804629, "grad_norm": 1.4749881029129028, "learning_rate": 2.2855277834523855e-06, "loss": 0.4246, "step": 5131 }, { "epoch": 3.4935330156569093, "grad_norm": 1.5559223890304565, "learning_rate": 2.283637337418185e-06, "loss": 0.3818, "step": 5132 }, { "epoch": 3.494213750850919, "grad_norm": 1.5348840951919556, "learning_rate": 2.2817474421283243e-06, "loss": 0.3183, "step": 5133 }, { "epoch": 3.4948944860449287, "grad_norm": 1.622178316116333, "learning_rate": 2.2798580979659864e-06, "loss": 0.4861, "step": 5134 }, { "epoch": 3.495575221238938, "grad_norm": 1.539644718170166, "learning_rate": 2.277969305314234e-06, "loss": 0.456, "step": 5135 }, { "epoch": 3.4962559564329476, "grad_norm": 1.5534641742706299, "learning_rate": 2.2760810645560217e-06, "loss": 0.572, "step": 5136 }, { "epoch": 3.4969366916269573, "grad_norm": 1.4863758087158203, "learning_rate": 2.2741933760741885e-06, "loss": 0.5239, "step": 5137 }, { "epoch": 3.4976174268209665, "grad_norm": 1.6104776859283447, "learning_rate": 2.2723062402514697e-06, "loss": 0.4467, "step": 5138 }, { "epoch": 3.4982981620149762, "grad_norm": 1.6904969215393066, "learning_rate": 2.2704196574704797e-06, "loss": 0.3769, "step": 5139 }, { "epoch": 3.4989788972089855, "grad_norm": 1.6432764530181885, "learning_rate": 2.2685336281137243e-06, "loss": 0.3313, "step": 5140 }, { "epoch": 3.499659632402995, "grad_norm": 1.7114033699035645, "learning_rate": 2.2666481525636e-06, "loss": 0.3186, "step": 5141 }, { "epoch": 3.500340367597005, "grad_norm": 1.5437685251235962, "learning_rate": 2.264763231202384e-06, "loss": 0.4969, "step": 5142 }, { "epoch": 3.5010211027910145, "grad_norm": 1.6830562353134155, "learning_rate": 2.262878864412251e-06, "loss": 0.3817, "step": 5143 }, { "epoch": 3.5017018379850238, "grad_norm": 1.6640266180038452, "learning_rate": 2.2609950525752517e-06, "loss": 0.3626, "step": 5144 }, { "epoch": 3.5023825731790335, "grad_norm": 1.7050731182098389, "learning_rate": 2.259111796073335e-06, "loss": 0.5829, "step": 5145 }, { "epoch": 3.5030633083730427, "grad_norm": 1.6834584474563599, "learning_rate": 2.257229095288331e-06, "loss": 0.3649, "step": 5146 }, { "epoch": 3.5037440435670524, "grad_norm": 1.5227100849151611, "learning_rate": 2.2553469506019555e-06, "loss": 0.3998, "step": 5147 }, { "epoch": 3.504424778761062, "grad_norm": 1.489605188369751, "learning_rate": 2.253465362395819e-06, "loss": 0.3839, "step": 5148 }, { "epoch": 3.5051055139550717, "grad_norm": 1.5412005186080933, "learning_rate": 2.251584331051413e-06, "loss": 0.4469, "step": 5149 }, { "epoch": 3.505786249149081, "grad_norm": 1.5989006757736206, "learning_rate": 2.2497038569501163e-06, "loss": 0.3486, "step": 5150 }, { "epoch": 3.5064669843430907, "grad_norm": 1.5642863512039185, "learning_rate": 2.247823940473197e-06, "loss": 0.3383, "step": 5151 }, { "epoch": 3.5071477195371, "grad_norm": 1.7925333976745605, "learning_rate": 2.2459445820018123e-06, "loss": 0.4331, "step": 5152 }, { "epoch": 3.5078284547311096, "grad_norm": 1.6540316343307495, "learning_rate": 2.244065781917001e-06, "loss": 0.4005, "step": 5153 }, { "epoch": 3.5085091899251193, "grad_norm": 1.549128770828247, "learning_rate": 2.242187540599689e-06, "loss": 0.4333, "step": 5154 }, { "epoch": 3.5091899251191285, "grad_norm": 1.6188182830810547, "learning_rate": 2.240309858430696e-06, "loss": 0.4864, "step": 5155 }, { "epoch": 3.509870660313138, "grad_norm": 1.554645299911499, "learning_rate": 2.23843273579072e-06, "loss": 0.5103, "step": 5156 }, { "epoch": 3.5105513955071475, "grad_norm": 1.6229218244552612, "learning_rate": 2.2365561730603473e-06, "loss": 0.3425, "step": 5157 }, { "epoch": 3.511232130701157, "grad_norm": 1.6305170059204102, "learning_rate": 2.2346801706200567e-06, "loss": 0.407, "step": 5158 }, { "epoch": 3.511912865895167, "grad_norm": 1.5246400833129883, "learning_rate": 2.232804728850205e-06, "loss": 0.44, "step": 5159 }, { "epoch": 3.5125936010891765, "grad_norm": 1.4023396968841553, "learning_rate": 2.2309298481310415e-06, "loss": 0.5732, "step": 5160 }, { "epoch": 3.5132743362831858, "grad_norm": 1.476477861404419, "learning_rate": 2.229055528842698e-06, "loss": 0.3723, "step": 5161 }, { "epoch": 3.5139550714771954, "grad_norm": 1.5142490863800049, "learning_rate": 2.2271817713651966e-06, "loss": 0.3975, "step": 5162 }, { "epoch": 3.5146358066712047, "grad_norm": 1.5919008255004883, "learning_rate": 2.225308576078441e-06, "loss": 0.4279, "step": 5163 }, { "epoch": 3.5153165418652144, "grad_norm": 1.6878936290740967, "learning_rate": 2.2234359433622216e-06, "loss": 0.5209, "step": 5164 }, { "epoch": 3.515997277059224, "grad_norm": 1.5415754318237305, "learning_rate": 2.2215638735962186e-06, "loss": 0.4188, "step": 5165 }, { "epoch": 3.5166780122532337, "grad_norm": 1.6696900129318237, "learning_rate": 2.219692367159994e-06, "loss": 0.4092, "step": 5166 }, { "epoch": 3.517358747447243, "grad_norm": 1.6286303997039795, "learning_rate": 2.2178214244329956e-06, "loss": 0.3561, "step": 5167 }, { "epoch": 3.5180394826412527, "grad_norm": 1.5337247848510742, "learning_rate": 2.2159510457945595e-06, "loss": 0.4649, "step": 5168 }, { "epoch": 3.518720217835262, "grad_norm": 1.5990934371948242, "learning_rate": 2.214081231623908e-06, "loss": 0.3641, "step": 5169 }, { "epoch": 3.5194009530292716, "grad_norm": 1.4700136184692383, "learning_rate": 2.2122119823001464e-06, "loss": 0.5226, "step": 5170 }, { "epoch": 3.5200816882232813, "grad_norm": 1.4842727184295654, "learning_rate": 2.2103432982022634e-06, "loss": 0.5899, "step": 5171 }, { "epoch": 3.5207624234172905, "grad_norm": 1.4827746152877808, "learning_rate": 2.2084751797091393e-06, "loss": 0.3975, "step": 5172 }, { "epoch": 3.5214431586113, "grad_norm": 1.4734246730804443, "learning_rate": 2.206607627199535e-06, "loss": 0.5285, "step": 5173 }, { "epoch": 3.52212389380531, "grad_norm": 1.7037216424942017, "learning_rate": 2.2047406410520974e-06, "loss": 0.3314, "step": 5174 }, { "epoch": 3.522804628999319, "grad_norm": 1.6677701473236084, "learning_rate": 2.202874221645361e-06, "loss": 0.3695, "step": 5175 }, { "epoch": 3.523485364193329, "grad_norm": 1.5699353218078613, "learning_rate": 2.201008369357742e-06, "loss": 0.5008, "step": 5176 }, { "epoch": 3.5241660993873385, "grad_norm": 1.621606707572937, "learning_rate": 2.199143084567543e-06, "loss": 0.3823, "step": 5177 }, { "epoch": 3.5248468345813477, "grad_norm": 1.5836741924285889, "learning_rate": 2.197278367652951e-06, "loss": 0.472, "step": 5178 }, { "epoch": 3.5255275697753574, "grad_norm": 1.6319756507873535, "learning_rate": 2.195414218992043e-06, "loss": 0.4925, "step": 5179 }, { "epoch": 3.5262083049693667, "grad_norm": 1.6549999713897705, "learning_rate": 2.193550638962773e-06, "loss": 0.472, "step": 5180 }, { "epoch": 3.5268890401633763, "grad_norm": 1.618089199066162, "learning_rate": 2.1916876279429817e-06, "loss": 0.382, "step": 5181 }, { "epoch": 3.527569775357386, "grad_norm": 1.5431162118911743, "learning_rate": 2.1898251863103993e-06, "loss": 0.4843, "step": 5182 }, { "epoch": 3.5282505105513957, "grad_norm": 1.5551046133041382, "learning_rate": 2.1879633144426355e-06, "loss": 0.3876, "step": 5183 }, { "epoch": 3.528931245745405, "grad_norm": 1.5716873407363892, "learning_rate": 2.1861020127171843e-06, "loss": 0.5136, "step": 5184 }, { "epoch": 3.5296119809394146, "grad_norm": 1.5301278829574585, "learning_rate": 2.1842412815114274e-06, "loss": 0.3497, "step": 5185 }, { "epoch": 3.530292716133424, "grad_norm": 1.426539421081543, "learning_rate": 2.1823811212026315e-06, "loss": 0.4815, "step": 5186 }, { "epoch": 3.5309734513274336, "grad_norm": 1.575968861579895, "learning_rate": 2.180521532167943e-06, "loss": 0.4422, "step": 5187 }, { "epoch": 3.5316541865214433, "grad_norm": 1.703922152519226, "learning_rate": 2.1786625147843933e-06, "loss": 0.4092, "step": 5188 }, { "epoch": 3.532334921715453, "grad_norm": 1.416918158531189, "learning_rate": 2.1768040694289027e-06, "loss": 0.4472, "step": 5189 }, { "epoch": 3.533015656909462, "grad_norm": 1.593665361404419, "learning_rate": 2.174946196478271e-06, "loss": 0.4476, "step": 5190 }, { "epoch": 3.533696392103472, "grad_norm": 1.4834339618682861, "learning_rate": 2.1730888963091826e-06, "loss": 0.435, "step": 5191 }, { "epoch": 3.534377127297481, "grad_norm": 1.572421908378601, "learning_rate": 2.1712321692982053e-06, "loss": 0.4042, "step": 5192 }, { "epoch": 3.535057862491491, "grad_norm": 1.5750198364257812, "learning_rate": 2.1693760158217946e-06, "loss": 0.3993, "step": 5193 }, { "epoch": 3.5357385976855005, "grad_norm": 1.6391395330429077, "learning_rate": 2.1675204362562842e-06, "loss": 0.4755, "step": 5194 }, { "epoch": 3.5364193328795097, "grad_norm": 1.7081128358840942, "learning_rate": 2.1656654309778945e-06, "loss": 0.3053, "step": 5195 }, { "epoch": 3.5371000680735194, "grad_norm": 1.5838180780410767, "learning_rate": 2.1638110003627317e-06, "loss": 0.5256, "step": 5196 }, { "epoch": 3.537780803267529, "grad_norm": 1.5523320436477661, "learning_rate": 2.1619571447867816e-06, "loss": 0.6094, "step": 5197 }, { "epoch": 3.5384615384615383, "grad_norm": 1.4712332487106323, "learning_rate": 2.160103864625913e-06, "loss": 0.441, "step": 5198 }, { "epoch": 3.539142273655548, "grad_norm": 1.5902233123779297, "learning_rate": 2.1582511602558798e-06, "loss": 0.3586, "step": 5199 }, { "epoch": 3.5398230088495577, "grad_norm": 1.5414718389511108, "learning_rate": 2.1563990320523213e-06, "loss": 0.4415, "step": 5200 }, { "epoch": 3.540503744043567, "grad_norm": 1.610166311264038, "learning_rate": 2.1545474803907564e-06, "loss": 0.3121, "step": 5201 }, { "epoch": 3.5411844792375766, "grad_norm": 1.7111377716064453, "learning_rate": 2.152696505646585e-06, "loss": 0.3969, "step": 5202 }, { "epoch": 3.541865214431586, "grad_norm": 1.6402699947357178, "learning_rate": 2.1508461081951003e-06, "loss": 0.475, "step": 5203 }, { "epoch": 3.5425459496255955, "grad_norm": 1.4457069635391235, "learning_rate": 2.1489962884114685e-06, "loss": 0.5869, "step": 5204 }, { "epoch": 3.5432266848196052, "grad_norm": 1.560994029045105, "learning_rate": 2.147147046670741e-06, "loss": 0.4649, "step": 5205 }, { "epoch": 3.543907420013615, "grad_norm": 1.5418291091918945, "learning_rate": 2.145298383347852e-06, "loss": 0.5453, "step": 5206 }, { "epoch": 3.544588155207624, "grad_norm": 1.4732890129089355, "learning_rate": 2.143450298817622e-06, "loss": 0.4003, "step": 5207 }, { "epoch": 3.545268890401634, "grad_norm": 1.6955031156539917, "learning_rate": 2.1416027934547494e-06, "loss": 0.3363, "step": 5208 }, { "epoch": 3.545949625595643, "grad_norm": 1.6110574007034302, "learning_rate": 2.1397558676338165e-06, "loss": 0.4244, "step": 5209 }, { "epoch": 3.5466303607896528, "grad_norm": 1.6484321355819702, "learning_rate": 2.1379095217292916e-06, "loss": 0.3955, "step": 5210 }, { "epoch": 3.5473110959836625, "grad_norm": 1.7395703792572021, "learning_rate": 2.1360637561155188e-06, "loss": 0.3277, "step": 5211 }, { "epoch": 3.547991831177672, "grad_norm": 1.639398455619812, "learning_rate": 2.1342185711667325e-06, "loss": 0.5136, "step": 5212 }, { "epoch": 3.5486725663716814, "grad_norm": 1.6412971019744873, "learning_rate": 2.132373967257041e-06, "loss": 0.3103, "step": 5213 }, { "epoch": 3.549353301565691, "grad_norm": 1.8156331777572632, "learning_rate": 2.130529944760443e-06, "loss": 0.4271, "step": 5214 }, { "epoch": 3.5500340367597003, "grad_norm": 1.6771724224090576, "learning_rate": 2.1286865040508125e-06, "loss": 0.4552, "step": 5215 }, { "epoch": 3.55071477195371, "grad_norm": 1.7126003503799438, "learning_rate": 2.126843645501908e-06, "loss": 0.3637, "step": 5216 }, { "epoch": 3.5513955071477197, "grad_norm": 1.593693733215332, "learning_rate": 2.125001369487373e-06, "loss": 0.4826, "step": 5217 }, { "epoch": 3.552076242341729, "grad_norm": 1.7088154554367065, "learning_rate": 2.1231596763807287e-06, "loss": 0.4332, "step": 5218 }, { "epoch": 3.5527569775357386, "grad_norm": 1.6407934427261353, "learning_rate": 2.121318566555378e-06, "loss": 0.4511, "step": 5219 }, { "epoch": 3.5534377127297483, "grad_norm": 1.5070618391036987, "learning_rate": 2.119478040384608e-06, "loss": 0.4146, "step": 5220 }, { "epoch": 3.5541184479237575, "grad_norm": 1.6962300539016724, "learning_rate": 2.11763809824159e-06, "loss": 0.2104, "step": 5221 }, { "epoch": 3.554799183117767, "grad_norm": 1.6738420724868774, "learning_rate": 2.11579874049937e-06, "loss": 0.3356, "step": 5222 }, { "epoch": 3.555479918311777, "grad_norm": 1.6033371686935425, "learning_rate": 2.1139599675308785e-06, "loss": 0.3456, "step": 5223 }, { "epoch": 3.556160653505786, "grad_norm": 1.665568232536316, "learning_rate": 2.1121217797089306e-06, "loss": 0.3749, "step": 5224 }, { "epoch": 3.556841388699796, "grad_norm": 1.6542103290557861, "learning_rate": 2.110284177406219e-06, "loss": 0.3048, "step": 5225 }, { "epoch": 3.557522123893805, "grad_norm": 1.5321253538131714, "learning_rate": 2.1084471609953166e-06, "loss": 0.4857, "step": 5226 }, { "epoch": 3.5582028590878148, "grad_norm": 1.6550092697143555, "learning_rate": 2.1066107308486834e-06, "loss": 0.3901, "step": 5227 }, { "epoch": 3.5588835942818244, "grad_norm": 1.6535030603408813, "learning_rate": 2.104774887338653e-06, "loss": 0.4673, "step": 5228 }, { "epoch": 3.559564329475834, "grad_norm": 1.59015953540802, "learning_rate": 2.1029396308374485e-06, "loss": 0.5335, "step": 5229 }, { "epoch": 3.5602450646698434, "grad_norm": 1.6991515159606934, "learning_rate": 2.1011049617171644e-06, "loss": 0.4597, "step": 5230 }, { "epoch": 3.560925799863853, "grad_norm": 1.5351214408874512, "learning_rate": 2.099270880349786e-06, "loss": 0.5362, "step": 5231 }, { "epoch": 3.5616065350578623, "grad_norm": 1.6447384357452393, "learning_rate": 2.097437387107171e-06, "loss": 0.3345, "step": 5232 }, { "epoch": 3.562287270251872, "grad_norm": 1.625736951828003, "learning_rate": 2.095604482361061e-06, "loss": 0.4261, "step": 5233 }, { "epoch": 3.5629680054458817, "grad_norm": 1.4121105670928955, "learning_rate": 2.093772166483082e-06, "loss": 0.3944, "step": 5234 }, { "epoch": 3.5636487406398913, "grad_norm": 1.7630912065505981, "learning_rate": 2.091940439844736e-06, "loss": 0.3761, "step": 5235 }, { "epoch": 3.5643294758339006, "grad_norm": 1.6112743616104126, "learning_rate": 2.0901093028174035e-06, "loss": 0.4572, "step": 5236 }, { "epoch": 3.5650102110279103, "grad_norm": 1.6037620306015015, "learning_rate": 2.0882787557723514e-06, "loss": 0.3933, "step": 5237 }, { "epoch": 3.5656909462219195, "grad_norm": 1.639485239982605, "learning_rate": 2.086448799080727e-06, "loss": 0.5182, "step": 5238 }, { "epoch": 3.566371681415929, "grad_norm": 1.5562962293624878, "learning_rate": 2.0846194331135524e-06, "loss": 0.4938, "step": 5239 }, { "epoch": 3.567052416609939, "grad_norm": 1.5233185291290283, "learning_rate": 2.08279065824173e-06, "loss": 0.4862, "step": 5240 }, { "epoch": 3.567733151803948, "grad_norm": 1.6106059551239014, "learning_rate": 2.0809624748360513e-06, "loss": 0.3935, "step": 5241 }, { "epoch": 3.568413886997958, "grad_norm": 1.6596555709838867, "learning_rate": 2.0791348832671776e-06, "loss": 0.419, "step": 5242 }, { "epoch": 3.569094622191967, "grad_norm": 1.742287278175354, "learning_rate": 2.077307883905654e-06, "loss": 0.4768, "step": 5243 }, { "epoch": 3.5697753573859767, "grad_norm": 1.5834217071533203, "learning_rate": 2.0754814771219083e-06, "loss": 0.5184, "step": 5244 }, { "epoch": 3.5704560925799864, "grad_norm": 1.536407709121704, "learning_rate": 2.073655663286244e-06, "loss": 0.4184, "step": 5245 }, { "epoch": 3.571136827773996, "grad_norm": 1.6336674690246582, "learning_rate": 2.0718304427688458e-06, "loss": 0.3824, "step": 5246 }, { "epoch": 3.5718175629680053, "grad_norm": 1.5008693933486938, "learning_rate": 2.0700058159397785e-06, "loss": 0.5444, "step": 5247 }, { "epoch": 3.572498298162015, "grad_norm": 1.4094699621200562, "learning_rate": 2.0681817831689886e-06, "loss": 0.5095, "step": 5248 }, { "epoch": 3.5731790333560243, "grad_norm": 1.5516529083251953, "learning_rate": 2.066358344826299e-06, "loss": 0.3259, "step": 5249 }, { "epoch": 3.573859768550034, "grad_norm": 1.6253347396850586, "learning_rate": 2.06453550128141e-06, "loss": 0.5424, "step": 5250 }, { "epoch": 3.5745405037440436, "grad_norm": 1.5912615060806274, "learning_rate": 2.062713252903909e-06, "loss": 0.4019, "step": 5251 }, { "epoch": 3.5752212389380533, "grad_norm": 1.5505424737930298, "learning_rate": 2.0608916000632556e-06, "loss": 0.4002, "step": 5252 }, { "epoch": 3.5759019741320626, "grad_norm": 1.6802948713302612, "learning_rate": 2.0590705431287895e-06, "loss": 0.5017, "step": 5253 }, { "epoch": 3.5765827093260723, "grad_norm": 1.5410513877868652, "learning_rate": 2.057250082469733e-06, "loss": 0.5285, "step": 5254 }, { "epoch": 3.5772634445200815, "grad_norm": 1.5444673299789429, "learning_rate": 2.055430218455188e-06, "loss": 0.3662, "step": 5255 }, { "epoch": 3.577944179714091, "grad_norm": 1.584820032119751, "learning_rate": 2.05361095145413e-06, "loss": 0.3223, "step": 5256 }, { "epoch": 3.578624914908101, "grad_norm": 1.4209933280944824, "learning_rate": 2.051792281835416e-06, "loss": 0.4377, "step": 5257 }, { "epoch": 3.5793056501021105, "grad_norm": 1.5223950147628784, "learning_rate": 2.0499742099677866e-06, "loss": 0.4927, "step": 5258 }, { "epoch": 3.57998638529612, "grad_norm": 1.7012964487075806, "learning_rate": 2.0481567362198537e-06, "loss": 0.3942, "step": 5259 }, { "epoch": 3.5806671204901295, "grad_norm": 1.7137887477874756, "learning_rate": 2.0463398609601122e-06, "loss": 0.3202, "step": 5260 }, { "epoch": 3.5813478556841387, "grad_norm": 1.4852310419082642, "learning_rate": 2.0445235845569327e-06, "loss": 0.5356, "step": 5261 }, { "epoch": 3.5820285908781484, "grad_norm": 1.6787058115005493, "learning_rate": 2.0427079073785703e-06, "loss": 0.4444, "step": 5262 }, { "epoch": 3.582709326072158, "grad_norm": 1.7055877447128296, "learning_rate": 2.0408928297931514e-06, "loss": 0.4276, "step": 5263 }, { "epoch": 3.5833900612661673, "grad_norm": 1.6973884105682373, "learning_rate": 2.0390783521686853e-06, "loss": 0.3716, "step": 5264 }, { "epoch": 3.584070796460177, "grad_norm": 1.5294699668884277, "learning_rate": 2.0372644748730606e-06, "loss": 0.4789, "step": 5265 }, { "epoch": 3.5847515316541863, "grad_norm": 1.645727515220642, "learning_rate": 2.035451198274041e-06, "loss": 0.4122, "step": 5266 }, { "epoch": 3.585432266848196, "grad_norm": 1.4945064783096313, "learning_rate": 2.0336385227392684e-06, "loss": 0.4983, "step": 5267 }, { "epoch": 3.5861130020422056, "grad_norm": 1.8015402555465698, "learning_rate": 2.031826448636263e-06, "loss": 0.3646, "step": 5268 }, { "epoch": 3.5867937372362153, "grad_norm": 1.7353118658065796, "learning_rate": 2.030014976332427e-06, "loss": 0.3806, "step": 5269 }, { "epoch": 3.5874744724302245, "grad_norm": 1.5628232955932617, "learning_rate": 2.0282041061950367e-06, "loss": 0.4424, "step": 5270 }, { "epoch": 3.5881552076242342, "grad_norm": 1.5053112506866455, "learning_rate": 2.026393838591243e-06, "loss": 0.3894, "step": 5271 }, { "epoch": 3.5888359428182435, "grad_norm": 1.4953571557998657, "learning_rate": 2.0245841738880862e-06, "loss": 0.4496, "step": 5272 }, { "epoch": 3.589516678012253, "grad_norm": 1.6878573894500732, "learning_rate": 2.0227751124524734e-06, "loss": 0.3133, "step": 5273 }, { "epoch": 3.590197413206263, "grad_norm": 1.6954628229141235, "learning_rate": 2.020966654651193e-06, "loss": 0.3539, "step": 5274 }, { "epoch": 3.5908781484002725, "grad_norm": 1.5641614198684692, "learning_rate": 2.0191588008509095e-06, "loss": 0.3609, "step": 5275 }, { "epoch": 3.5915588835942818, "grad_norm": 1.421125054359436, "learning_rate": 2.0173515514181696e-06, "loss": 0.548, "step": 5276 }, { "epoch": 3.5922396187882915, "grad_norm": 1.6267647743225098, "learning_rate": 2.0155449067193928e-06, "loss": 0.286, "step": 5277 }, { "epoch": 3.5929203539823007, "grad_norm": 1.6319745779037476, "learning_rate": 2.0137388671208754e-06, "loss": 0.4161, "step": 5278 }, { "epoch": 3.5936010891763104, "grad_norm": 1.6559948921203613, "learning_rate": 2.0119334329887974e-06, "loss": 0.3906, "step": 5279 }, { "epoch": 3.59428182437032, "grad_norm": 1.5670452117919922, "learning_rate": 2.010128604689208e-06, "loss": 0.5172, "step": 5280 }, { "epoch": 3.5949625595643298, "grad_norm": 1.686745047569275, "learning_rate": 2.00832438258804e-06, "loss": 0.4201, "step": 5281 }, { "epoch": 3.595643294758339, "grad_norm": 1.6432793140411377, "learning_rate": 2.006520767051098e-06, "loss": 0.4848, "step": 5282 }, { "epoch": 3.5963240299523487, "grad_norm": 1.513301968574524, "learning_rate": 2.0047177584440696e-06, "loss": 0.574, "step": 5283 }, { "epoch": 3.597004765146358, "grad_norm": 1.7572921514511108, "learning_rate": 2.0029153571325145e-06, "loss": 0.3135, "step": 5284 }, { "epoch": 3.5976855003403676, "grad_norm": 1.6889029741287231, "learning_rate": 2.001113563481868e-06, "loss": 0.2711, "step": 5285 }, { "epoch": 3.5983662355343773, "grad_norm": 1.598389983177185, "learning_rate": 1.9993123778574497e-06, "loss": 0.4689, "step": 5286 }, { "epoch": 3.5990469707283865, "grad_norm": 1.5065011978149414, "learning_rate": 1.9975118006244487e-06, "loss": 0.4425, "step": 5287 }, { "epoch": 3.599727705922396, "grad_norm": 1.6012877225875854, "learning_rate": 1.9957118321479326e-06, "loss": 0.4628, "step": 5288 }, { "epoch": 3.6004084411164055, "grad_norm": 1.5674887895584106, "learning_rate": 1.993912472792846e-06, "loss": 0.306, "step": 5289 }, { "epoch": 3.601089176310415, "grad_norm": 1.5316611528396606, "learning_rate": 1.9921137229240134e-06, "loss": 0.4102, "step": 5290 }, { "epoch": 3.601769911504425, "grad_norm": 1.5932090282440186, "learning_rate": 1.9903155829061307e-06, "loss": 0.451, "step": 5291 }, { "epoch": 3.6024506466984345, "grad_norm": 1.575447678565979, "learning_rate": 1.9885180531037694e-06, "loss": 0.4113, "step": 5292 }, { "epoch": 3.6031313818924438, "grad_norm": 1.6983734369277954, "learning_rate": 1.9867211338813842e-06, "loss": 0.4221, "step": 5293 }, { "epoch": 3.6038121170864534, "grad_norm": 1.5679399967193604, "learning_rate": 1.9849248256033e-06, "loss": 0.4057, "step": 5294 }, { "epoch": 3.6044928522804627, "grad_norm": 1.5533409118652344, "learning_rate": 1.983129128633717e-06, "loss": 0.5062, "step": 5295 }, { "epoch": 3.6051735874744724, "grad_norm": 1.6946887969970703, "learning_rate": 1.981334043336718e-06, "loss": 0.374, "step": 5296 }, { "epoch": 3.605854322668482, "grad_norm": 1.5726784467697144, "learning_rate": 1.9795395700762537e-06, "loss": 0.3328, "step": 5297 }, { "epoch": 3.6065350578624917, "grad_norm": 1.688740611076355, "learning_rate": 1.977745709216159e-06, "loss": 0.3584, "step": 5298 }, { "epoch": 3.607215793056501, "grad_norm": 1.679294228553772, "learning_rate": 1.9759524611201364e-06, "loss": 0.4095, "step": 5299 }, { "epoch": 3.6078965282505107, "grad_norm": 1.4872421026229858, "learning_rate": 1.974159826151772e-06, "loss": 0.4507, "step": 5300 }, { "epoch": 3.60857726344452, "grad_norm": 1.5712082386016846, "learning_rate": 1.9723678046745216e-06, "loss": 0.402, "step": 5301 }, { "epoch": 3.6092579986385296, "grad_norm": 1.6691639423370361, "learning_rate": 1.9705763970517174e-06, "loss": 0.3804, "step": 5302 }, { "epoch": 3.6099387338325393, "grad_norm": 1.752562403678894, "learning_rate": 1.9687856036465712e-06, "loss": 0.2894, "step": 5303 }, { "epoch": 3.6106194690265485, "grad_norm": 1.549456238746643, "learning_rate": 1.966995424822167e-06, "loss": 0.4324, "step": 5304 }, { "epoch": 3.611300204220558, "grad_norm": 1.558354139328003, "learning_rate": 1.9652058609414614e-06, "loss": 0.3402, "step": 5305 }, { "epoch": 3.611980939414568, "grad_norm": 1.7385140657424927, "learning_rate": 1.9634169123672935e-06, "loss": 0.3254, "step": 5306 }, { "epoch": 3.612661674608577, "grad_norm": 1.4717814922332764, "learning_rate": 1.961628579462374e-06, "loss": 0.5844, "step": 5307 }, { "epoch": 3.613342409802587, "grad_norm": 1.5011403560638428, "learning_rate": 1.9598408625892872e-06, "loss": 0.5828, "step": 5308 }, { "epoch": 3.6140231449965965, "grad_norm": 1.57346510887146, "learning_rate": 1.958053762110492e-06, "loss": 0.3935, "step": 5309 }, { "epoch": 3.6147038801906057, "grad_norm": 1.6527612209320068, "learning_rate": 1.9562672783883285e-06, "loss": 0.4453, "step": 5310 }, { "epoch": 3.6153846153846154, "grad_norm": 1.5530493259429932, "learning_rate": 1.9544814117850053e-06, "loss": 0.3901, "step": 5311 }, { "epoch": 3.6160653505786247, "grad_norm": 1.5679010152816772, "learning_rate": 1.9526961626626057e-06, "loss": 0.4794, "step": 5312 }, { "epoch": 3.6167460857726343, "grad_norm": 1.540840744972229, "learning_rate": 1.9509115313830946e-06, "loss": 0.4088, "step": 5313 }, { "epoch": 3.617426820966644, "grad_norm": 1.6025649309158325, "learning_rate": 1.9491275183083024e-06, "loss": 0.3228, "step": 5314 }, { "epoch": 3.6181075561606537, "grad_norm": 1.561646819114685, "learning_rate": 1.9473441237999435e-06, "loss": 0.3926, "step": 5315 }, { "epoch": 3.618788291354663, "grad_norm": 1.7331963777542114, "learning_rate": 1.945561348219599e-06, "loss": 0.2869, "step": 5316 }, { "epoch": 3.6194690265486726, "grad_norm": 1.646304726600647, "learning_rate": 1.943779191928731e-06, "loss": 0.3695, "step": 5317 }, { "epoch": 3.620149761742682, "grad_norm": 1.7323968410491943, "learning_rate": 1.9419976552886704e-06, "loss": 0.3509, "step": 5318 }, { "epoch": 3.6208304969366916, "grad_norm": 1.5875152349472046, "learning_rate": 1.9402167386606243e-06, "loss": 0.5034, "step": 5319 }, { "epoch": 3.6215112321307013, "grad_norm": 1.6639294624328613, "learning_rate": 1.9384364424056773e-06, "loss": 0.4506, "step": 5320 }, { "epoch": 3.622191967324711, "grad_norm": 1.5450042486190796, "learning_rate": 1.9366567668847845e-06, "loss": 0.3771, "step": 5321 }, { "epoch": 3.62287270251872, "grad_norm": 1.6684391498565674, "learning_rate": 1.934877712458774e-06, "loss": 0.422, "step": 5322 }, { "epoch": 3.62355343771273, "grad_norm": 1.5710184574127197, "learning_rate": 1.9330992794883537e-06, "loss": 0.3928, "step": 5323 }, { "epoch": 3.624234172906739, "grad_norm": 1.5842928886413574, "learning_rate": 1.9313214683341015e-06, "loss": 0.3991, "step": 5324 }, { "epoch": 3.624914908100749, "grad_norm": 1.635651707649231, "learning_rate": 1.9295442793564693e-06, "loss": 0.4392, "step": 5325 }, { "epoch": 3.6255956432947585, "grad_norm": 1.8026987314224243, "learning_rate": 1.9277677129157813e-06, "loss": 0.3293, "step": 5326 }, { "epoch": 3.6262763784887677, "grad_norm": 1.5308096408843994, "learning_rate": 1.9259917693722413e-06, "loss": 0.3297, "step": 5327 }, { "epoch": 3.6269571136827774, "grad_norm": 1.455229640007019, "learning_rate": 1.924216449085921e-06, "loss": 0.4711, "step": 5328 }, { "epoch": 3.627637848876787, "grad_norm": 1.5866676568984985, "learning_rate": 1.9224417524167667e-06, "loss": 0.334, "step": 5329 }, { "epoch": 3.6283185840707963, "grad_norm": 1.4772334098815918, "learning_rate": 1.9206676797246014e-06, "loss": 0.2948, "step": 5330 }, { "epoch": 3.628999319264806, "grad_norm": 1.59116530418396, "learning_rate": 1.9188942313691195e-06, "loss": 0.5045, "step": 5331 }, { "epoch": 3.6296800544588157, "grad_norm": 1.700776219367981, "learning_rate": 1.9171214077098853e-06, "loss": 0.4447, "step": 5332 }, { "epoch": 3.630360789652825, "grad_norm": 1.6798175573349, "learning_rate": 1.9153492091063435e-06, "loss": 0.4925, "step": 5333 }, { "epoch": 3.6310415248468346, "grad_norm": 1.6344925165176392, "learning_rate": 1.9135776359178094e-06, "loss": 0.4471, "step": 5334 }, { "epoch": 3.631722260040844, "grad_norm": 1.5861424207687378, "learning_rate": 1.9118066885034685e-06, "loss": 0.5051, "step": 5335 }, { "epoch": 3.6324029952348535, "grad_norm": 1.6791325807571411, "learning_rate": 1.910036367222382e-06, "loss": 0.5655, "step": 5336 }, { "epoch": 3.6330837304288632, "grad_norm": 1.7225656509399414, "learning_rate": 1.9082666724334825e-06, "loss": 0.2879, "step": 5337 }, { "epoch": 3.633764465622873, "grad_norm": 1.69704270362854, "learning_rate": 1.9064976044955796e-06, "loss": 0.306, "step": 5338 }, { "epoch": 3.634445200816882, "grad_norm": 1.6343153715133667, "learning_rate": 1.904729163767351e-06, "loss": 0.3415, "step": 5339 }, { "epoch": 3.635125936010892, "grad_norm": 1.4051470756530762, "learning_rate": 1.9029613506073468e-06, "loss": 0.5638, "step": 5340 }, { "epoch": 3.635806671204901, "grad_norm": 1.616947889328003, "learning_rate": 1.9011941653739985e-06, "loss": 0.4903, "step": 5341 }, { "epoch": 3.6364874063989108, "grad_norm": 1.5886285305023193, "learning_rate": 1.8994276084256008e-06, "loss": 0.4706, "step": 5342 }, { "epoch": 3.6371681415929205, "grad_norm": 1.5110666751861572, "learning_rate": 1.897661680120324e-06, "loss": 0.5483, "step": 5343 }, { "epoch": 3.63784887678693, "grad_norm": 1.468281626701355, "learning_rate": 1.89589638081621e-06, "loss": 0.4387, "step": 5344 }, { "epoch": 3.6385296119809394, "grad_norm": 1.651434063911438, "learning_rate": 1.8941317108711777e-06, "loss": 0.366, "step": 5345 }, { "epoch": 3.639210347174949, "grad_norm": 1.6143548488616943, "learning_rate": 1.8923676706430133e-06, "loss": 0.4217, "step": 5346 }, { "epoch": 3.6398910823689583, "grad_norm": 1.6017200946807861, "learning_rate": 1.8906042604893753e-06, "loss": 0.4018, "step": 5347 }, { "epoch": 3.640571817562968, "grad_norm": 1.5705554485321045, "learning_rate": 1.8888414807678001e-06, "loss": 0.4081, "step": 5348 }, { "epoch": 3.6412525527569777, "grad_norm": 1.465320348739624, "learning_rate": 1.8870793318356888e-06, "loss": 0.5666, "step": 5349 }, { "epoch": 3.641933287950987, "grad_norm": 1.9390804767608643, "learning_rate": 1.8853178140503214e-06, "loss": 0.3081, "step": 5350 }, { "epoch": 3.6426140231449966, "grad_norm": 1.5526063442230225, "learning_rate": 1.8835569277688442e-06, "loss": 0.3543, "step": 5351 }, { "epoch": 3.6432947583390063, "grad_norm": 1.5211488008499146, "learning_rate": 1.8817966733482807e-06, "loss": 0.4754, "step": 5352 }, { "epoch": 3.6439754935330155, "grad_norm": 1.5514118671417236, "learning_rate": 1.8800370511455218e-06, "loss": 0.3587, "step": 5353 }, { "epoch": 3.644656228727025, "grad_norm": 1.705822467803955, "learning_rate": 1.8782780615173313e-06, "loss": 0.3574, "step": 5354 }, { "epoch": 3.645336963921035, "grad_norm": 1.7081936597824097, "learning_rate": 1.876519704820348e-06, "loss": 0.4479, "step": 5355 }, { "epoch": 3.646017699115044, "grad_norm": 1.4590588808059692, "learning_rate": 1.8747619814110796e-06, "loss": 0.4996, "step": 5356 }, { "epoch": 3.646698434309054, "grad_norm": 1.5742655992507935, "learning_rate": 1.873004891645902e-06, "loss": 0.561, "step": 5357 }, { "epoch": 3.647379169503063, "grad_norm": 1.4571812152862549, "learning_rate": 1.8712484358810695e-06, "loss": 0.5455, "step": 5358 }, { "epoch": 3.6480599046970728, "grad_norm": 1.5729808807373047, "learning_rate": 1.8694926144727055e-06, "loss": 0.4934, "step": 5359 }, { "epoch": 3.6487406398910824, "grad_norm": 1.5525481700897217, "learning_rate": 1.8677374277768034e-06, "loss": 0.3848, "step": 5360 }, { "epoch": 3.649421375085092, "grad_norm": 1.6168595552444458, "learning_rate": 1.8659828761492255e-06, "loss": 0.4089, "step": 5361 }, { "epoch": 3.6501021102791014, "grad_norm": 1.5605652332305908, "learning_rate": 1.8642289599457125e-06, "loss": 0.4062, "step": 5362 }, { "epoch": 3.650782845473111, "grad_norm": 1.5721566677093506, "learning_rate": 1.8624756795218696e-06, "loss": 0.5097, "step": 5363 }, { "epoch": 3.6514635806671203, "grad_norm": 1.511357069015503, "learning_rate": 1.860723035233175e-06, "loss": 0.4585, "step": 5364 }, { "epoch": 3.65214431586113, "grad_norm": 1.5689672231674194, "learning_rate": 1.858971027434981e-06, "loss": 0.3995, "step": 5365 }, { "epoch": 3.6528250510551397, "grad_norm": 1.5373328924179077, "learning_rate": 1.8572196564825057e-06, "loss": 0.4831, "step": 5366 }, { "epoch": 3.6535057862491493, "grad_norm": 1.6023731231689453, "learning_rate": 1.855468922730843e-06, "loss": 0.3869, "step": 5367 }, { "epoch": 3.6541865214431586, "grad_norm": 1.5383864641189575, "learning_rate": 1.8537188265349532e-06, "loss": 0.3082, "step": 5368 }, { "epoch": 3.6548672566371683, "grad_norm": 1.5418579578399658, "learning_rate": 1.851969368249672e-06, "loss": 0.5471, "step": 5369 }, { "epoch": 3.6555479918311775, "grad_norm": 1.5782740116119385, "learning_rate": 1.850220548229702e-06, "loss": 0.4266, "step": 5370 }, { "epoch": 3.656228727025187, "grad_norm": 1.516654372215271, "learning_rate": 1.8484723668296157e-06, "loss": 0.391, "step": 5371 }, { "epoch": 3.656909462219197, "grad_norm": 1.7793058156967163, "learning_rate": 1.8467248244038616e-06, "loss": 0.4927, "step": 5372 }, { "epoch": 3.657590197413206, "grad_norm": 1.51936936378479, "learning_rate": 1.8449779213067532e-06, "loss": 0.2939, "step": 5373 }, { "epoch": 3.658270932607216, "grad_norm": 1.4624414443969727, "learning_rate": 1.8432316578924747e-06, "loss": 0.4315, "step": 5374 }, { "epoch": 3.658951667801225, "grad_norm": 1.4792183637619019, "learning_rate": 1.8414860345150837e-06, "loss": 0.4664, "step": 5375 }, { "epoch": 3.6596324029952347, "grad_norm": 1.5933881998062134, "learning_rate": 1.8397410515285086e-06, "loss": 0.3664, "step": 5376 }, { "epoch": 3.6603131381892444, "grad_norm": 1.432860255241394, "learning_rate": 1.8379967092865437e-06, "loss": 0.5059, "step": 5377 }, { "epoch": 3.660993873383254, "grad_norm": 1.5541726350784302, "learning_rate": 1.8362530081428543e-06, "loss": 0.5405, "step": 5378 }, { "epoch": 3.6616746085772633, "grad_norm": 1.4122061729431152, "learning_rate": 1.8345099484509799e-06, "loss": 0.5588, "step": 5379 }, { "epoch": 3.662355343771273, "grad_norm": 1.5288699865341187, "learning_rate": 1.8327675305643255e-06, "loss": 0.5125, "step": 5380 }, { "epoch": 3.6630360789652823, "grad_norm": 1.487505316734314, "learning_rate": 1.8310257548361659e-06, "loss": 0.6055, "step": 5381 }, { "epoch": 3.663716814159292, "grad_norm": 1.469752550125122, "learning_rate": 1.8292846216196508e-06, "loss": 0.4289, "step": 5382 }, { "epoch": 3.6643975493533016, "grad_norm": 1.6023657321929932, "learning_rate": 1.8275441312677928e-06, "loss": 0.3984, "step": 5383 }, { "epoch": 3.6650782845473113, "grad_norm": 1.5651829242706299, "learning_rate": 1.82580428413348e-06, "loss": 0.4991, "step": 5384 }, { "epoch": 3.6657590197413206, "grad_norm": 1.5714284181594849, "learning_rate": 1.8240650805694653e-06, "loss": 0.332, "step": 5385 }, { "epoch": 3.6664397549353303, "grad_norm": 1.5603537559509277, "learning_rate": 1.8223265209283758e-06, "loss": 0.4383, "step": 5386 }, { "epoch": 3.6671204901293395, "grad_norm": 1.5286577939987183, "learning_rate": 1.8205886055627048e-06, "loss": 0.5648, "step": 5387 }, { "epoch": 3.667801225323349, "grad_norm": 1.5555005073547363, "learning_rate": 1.8188513348248137e-06, "loss": 0.3868, "step": 5388 }, { "epoch": 3.668481960517359, "grad_norm": 1.5643128156661987, "learning_rate": 1.8171147090669383e-06, "loss": 0.4711, "step": 5389 }, { "epoch": 3.6691626957113685, "grad_norm": 1.587084412574768, "learning_rate": 1.81537872864118e-06, "loss": 0.4884, "step": 5390 }, { "epoch": 3.669843430905378, "grad_norm": 1.533158540725708, "learning_rate": 1.813643393899507e-06, "loss": 0.4857, "step": 5391 }, { "epoch": 3.6705241660993875, "grad_norm": 1.5877182483673096, "learning_rate": 1.8119087051937623e-06, "loss": 0.4503, "step": 5392 }, { "epoch": 3.6712049012933967, "grad_norm": 1.5094071626663208, "learning_rate": 1.8101746628756561e-06, "loss": 0.4593, "step": 5393 }, { "epoch": 3.6718856364874064, "grad_norm": 1.739619493484497, "learning_rate": 1.8084412672967654e-06, "loss": 0.4045, "step": 5394 }, { "epoch": 3.672566371681416, "grad_norm": 1.546623706817627, "learning_rate": 1.806708518808536e-06, "loss": 0.4683, "step": 5395 }, { "epoch": 3.6732471068754253, "grad_norm": 1.6640329360961914, "learning_rate": 1.8049764177622864e-06, "loss": 0.3859, "step": 5396 }, { "epoch": 3.673927842069435, "grad_norm": 1.6138410568237305, "learning_rate": 1.8032449645092004e-06, "loss": 0.5121, "step": 5397 }, { "epoch": 3.6746085772634443, "grad_norm": 1.4374672174453735, "learning_rate": 1.8015141594003284e-06, "loss": 0.4756, "step": 5398 }, { "epoch": 3.675289312457454, "grad_norm": 1.6418243646621704, "learning_rate": 1.799784002786597e-06, "loss": 0.3546, "step": 5399 }, { "epoch": 3.6759700476514636, "grad_norm": 1.6307452917099, "learning_rate": 1.798054495018794e-06, "loss": 0.4535, "step": 5400 }, { "epoch": 3.6766507828454733, "grad_norm": 1.6141371726989746, "learning_rate": 1.7963256364475768e-06, "loss": 0.4532, "step": 5401 }, { "epoch": 3.6773315180394825, "grad_norm": 1.6050959825515747, "learning_rate": 1.7945974274234746e-06, "loss": 0.4668, "step": 5402 }, { "epoch": 3.6780122532334922, "grad_norm": 1.541200041770935, "learning_rate": 1.7928698682968842e-06, "loss": 0.5242, "step": 5403 }, { "epoch": 3.6786929884275015, "grad_norm": 1.5034931898117065, "learning_rate": 1.791142959418068e-06, "loss": 0.2961, "step": 5404 }, { "epoch": 3.679373723621511, "grad_norm": 1.6155110597610474, "learning_rate": 1.7894167011371561e-06, "loss": 0.4678, "step": 5405 }, { "epoch": 3.680054458815521, "grad_norm": 1.5843058824539185, "learning_rate": 1.7876910938041513e-06, "loss": 0.3316, "step": 5406 }, { "epoch": 3.6807351940095305, "grad_norm": 1.6923195123672485, "learning_rate": 1.7859661377689213e-06, "loss": 0.4191, "step": 5407 }, { "epoch": 3.6814159292035398, "grad_norm": 1.5404475927352905, "learning_rate": 1.784241833381201e-06, "loss": 0.4794, "step": 5408 }, { "epoch": 3.6820966643975495, "grad_norm": 1.6098166704177856, "learning_rate": 1.7825181809905917e-06, "loss": 0.4118, "step": 5409 }, { "epoch": 3.6827773995915587, "grad_norm": 1.67472505569458, "learning_rate": 1.7807951809465712e-06, "loss": 0.5285, "step": 5410 }, { "epoch": 3.6834581347855684, "grad_norm": 1.669730544090271, "learning_rate": 1.7790728335984759e-06, "loss": 0.3465, "step": 5411 }, { "epoch": 3.684138869979578, "grad_norm": 1.6469992399215698, "learning_rate": 1.7773511392955122e-06, "loss": 0.4653, "step": 5412 }, { "epoch": 3.6848196051735873, "grad_norm": 1.4176839590072632, "learning_rate": 1.7756300983867537e-06, "loss": 0.4525, "step": 5413 }, { "epoch": 3.685500340367597, "grad_norm": 1.621830701828003, "learning_rate": 1.7739097112211457e-06, "loss": 0.3616, "step": 5414 }, { "epoch": 3.6861810755616067, "grad_norm": 1.5178197622299194, "learning_rate": 1.7721899781474966e-06, "loss": 0.3348, "step": 5415 }, { "epoch": 3.686861810755616, "grad_norm": 1.6197723150253296, "learning_rate": 1.7704708995144815e-06, "loss": 0.4978, "step": 5416 }, { "epoch": 3.6875425459496256, "grad_norm": 1.7370328903198242, "learning_rate": 1.7687524756706476e-06, "loss": 0.3942, "step": 5417 }, { "epoch": 3.6882232811436353, "grad_norm": 1.500624418258667, "learning_rate": 1.7670347069644039e-06, "loss": 0.4729, "step": 5418 }, { "epoch": 3.6889040163376445, "grad_norm": 1.6743860244750977, "learning_rate": 1.765317593744032e-06, "loss": 0.3939, "step": 5419 }, { "epoch": 3.689584751531654, "grad_norm": 1.553340196609497, "learning_rate": 1.7636011363576743e-06, "loss": 0.5338, "step": 5420 }, { "epoch": 3.6902654867256635, "grad_norm": 1.6343690156936646, "learning_rate": 1.7618853351533478e-06, "loss": 0.4616, "step": 5421 }, { "epoch": 3.690946221919673, "grad_norm": 1.6917732954025269, "learning_rate": 1.7601701904789298e-06, "loss": 0.2867, "step": 5422 }, { "epoch": 3.691626957113683, "grad_norm": 1.4016813039779663, "learning_rate": 1.7584557026821658e-06, "loss": 0.4091, "step": 5423 }, { "epoch": 3.6923076923076925, "grad_norm": 1.5655165910720825, "learning_rate": 1.7567418721106728e-06, "loss": 0.3774, "step": 5424 }, { "epoch": 3.6929884275017018, "grad_norm": 1.620959758758545, "learning_rate": 1.7550286991119292e-06, "loss": 0.4549, "step": 5425 }, { "epoch": 3.6936691626957114, "grad_norm": 1.648102045059204, "learning_rate": 1.7533161840332796e-06, "loss": 0.4048, "step": 5426 }, { "epoch": 3.6943498978897207, "grad_norm": 1.5504037141799927, "learning_rate": 1.7516043272219406e-06, "loss": 0.3646, "step": 5427 }, { "epoch": 3.6950306330837304, "grad_norm": 1.5960729122161865, "learning_rate": 1.7498931290249931e-06, "loss": 0.3721, "step": 5428 }, { "epoch": 3.69571136827774, "grad_norm": 1.6335673332214355, "learning_rate": 1.748182589789383e-06, "loss": 0.4237, "step": 5429 }, { "epoch": 3.6963921034717497, "grad_norm": 1.5515987873077393, "learning_rate": 1.74647270986192e-06, "loss": 0.5177, "step": 5430 }, { "epoch": 3.697072838665759, "grad_norm": 1.3851287364959717, "learning_rate": 1.7447634895892878e-06, "loss": 0.3735, "step": 5431 }, { "epoch": 3.6977535738597687, "grad_norm": 1.5975085496902466, "learning_rate": 1.74305492931803e-06, "loss": 0.3655, "step": 5432 }, { "epoch": 3.698434309053778, "grad_norm": 1.5002977848052979, "learning_rate": 1.7413470293945566e-06, "loss": 0.4167, "step": 5433 }, { "epoch": 3.6991150442477876, "grad_norm": 1.5238348245620728, "learning_rate": 1.7396397901651496e-06, "loss": 0.578, "step": 5434 }, { "epoch": 3.6997957794417973, "grad_norm": 1.6928985118865967, "learning_rate": 1.7379332119759484e-06, "loss": 0.3471, "step": 5435 }, { "epoch": 3.7004765146358065, "grad_norm": 1.6404438018798828, "learning_rate": 1.7362272951729665e-06, "loss": 0.3843, "step": 5436 }, { "epoch": 3.701157249829816, "grad_norm": 1.6272152662277222, "learning_rate": 1.7345220401020768e-06, "loss": 0.3948, "step": 5437 }, { "epoch": 3.701837985023826, "grad_norm": 1.4240461587905884, "learning_rate": 1.7328174471090238e-06, "loss": 0.6292, "step": 5438 }, { "epoch": 3.702518720217835, "grad_norm": 1.8053863048553467, "learning_rate": 1.7311135165394134e-06, "loss": 0.357, "step": 5439 }, { "epoch": 3.703199455411845, "grad_norm": 1.5364480018615723, "learning_rate": 1.7294102487387172e-06, "loss": 0.4808, "step": 5440 }, { "epoch": 3.7038801906058545, "grad_norm": 1.541285514831543, "learning_rate": 1.7277076440522767e-06, "loss": 0.3561, "step": 5441 }, { "epoch": 3.7045609257998637, "grad_norm": 1.5350018739700317, "learning_rate": 1.7260057028252947e-06, "loss": 0.4324, "step": 5442 }, { "epoch": 3.7052416609938734, "grad_norm": 1.450377345085144, "learning_rate": 1.7243044254028391e-06, "loss": 0.4239, "step": 5443 }, { "epoch": 3.7059223961878827, "grad_norm": 1.534784197807312, "learning_rate": 1.722603812129847e-06, "loss": 0.5196, "step": 5444 }, { "epoch": 3.7066031313818923, "grad_norm": 1.5181701183319092, "learning_rate": 1.7209038633511204e-06, "loss": 0.4546, "step": 5445 }, { "epoch": 3.707283866575902, "grad_norm": 1.6815671920776367, "learning_rate": 1.7192045794113238e-06, "loss": 0.3449, "step": 5446 }, { "epoch": 3.7079646017699117, "grad_norm": 1.6104207038879395, "learning_rate": 1.7175059606549855e-06, "loss": 0.4116, "step": 5447 }, { "epoch": 3.708645336963921, "grad_norm": 1.4440513849258423, "learning_rate": 1.715808007426506e-06, "loss": 0.4752, "step": 5448 }, { "epoch": 3.7093260721579306, "grad_norm": 1.9097981452941895, "learning_rate": 1.714110720070144e-06, "loss": 0.4135, "step": 5449 }, { "epoch": 3.71000680735194, "grad_norm": 1.566699743270874, "learning_rate": 1.7124140989300242e-06, "loss": 0.4499, "step": 5450 }, { "epoch": 3.7106875425459496, "grad_norm": 1.5881147384643555, "learning_rate": 1.7107181443501413e-06, "loss": 0.5885, "step": 5451 }, { "epoch": 3.7113682777399593, "grad_norm": 1.6246998310089111, "learning_rate": 1.7090228566743477e-06, "loss": 0.445, "step": 5452 }, { "epoch": 3.712049012933969, "grad_norm": 1.5089199542999268, "learning_rate": 1.7073282362463672e-06, "loss": 0.5939, "step": 5453 }, { "epoch": 3.712729748127978, "grad_norm": 1.6448816061019897, "learning_rate": 1.7056342834097822e-06, "loss": 0.3369, "step": 5454 }, { "epoch": 3.713410483321988, "grad_norm": 1.859992504119873, "learning_rate": 1.7039409985080463e-06, "loss": 0.3549, "step": 5455 }, { "epoch": 3.714091218515997, "grad_norm": 1.6184147596359253, "learning_rate": 1.7022483818844714e-06, "loss": 0.4846, "step": 5456 }, { "epoch": 3.714771953710007, "grad_norm": 1.635494351387024, "learning_rate": 1.7005564338822366e-06, "loss": 0.4063, "step": 5457 }, { "epoch": 3.7154526889040165, "grad_norm": 1.6172726154327393, "learning_rate": 1.6988651548443874e-06, "loss": 0.396, "step": 5458 }, { "epoch": 3.7161334240980257, "grad_norm": 1.6306716203689575, "learning_rate": 1.697174545113831e-06, "loss": 0.3929, "step": 5459 }, { "epoch": 3.7168141592920354, "grad_norm": 1.472985863685608, "learning_rate": 1.6954846050333374e-06, "loss": 0.516, "step": 5460 }, { "epoch": 3.717494894486045, "grad_norm": 1.591349720954895, "learning_rate": 1.693795334945545e-06, "loss": 0.4317, "step": 5461 }, { "epoch": 3.7181756296800543, "grad_norm": 1.6317367553710938, "learning_rate": 1.692106735192956e-06, "loss": 0.3477, "step": 5462 }, { "epoch": 3.718856364874064, "grad_norm": 1.6328487396240234, "learning_rate": 1.6904188061179339e-06, "loss": 0.4704, "step": 5463 }, { "epoch": 3.7195371000680737, "grad_norm": 1.590378761291504, "learning_rate": 1.6887315480627053e-06, "loss": 0.3949, "step": 5464 }, { "epoch": 3.720217835262083, "grad_norm": 1.6031780242919922, "learning_rate": 1.6870449613693663e-06, "loss": 0.4279, "step": 5465 }, { "epoch": 3.7208985704560926, "grad_norm": 1.4857969284057617, "learning_rate": 1.685359046379872e-06, "loss": 0.5274, "step": 5466 }, { "epoch": 3.721579305650102, "grad_norm": 1.5338125228881836, "learning_rate": 1.6836738034360405e-06, "loss": 0.3376, "step": 5467 }, { "epoch": 3.7222600408441116, "grad_norm": 1.7939566373825073, "learning_rate": 1.6819892328795601e-06, "loss": 0.3842, "step": 5468 }, { "epoch": 3.7229407760381212, "grad_norm": 1.585963487625122, "learning_rate": 1.6803053350519771e-06, "loss": 0.4639, "step": 5469 }, { "epoch": 3.723621511232131, "grad_norm": 1.5962284803390503, "learning_rate": 1.6786221102946998e-06, "loss": 0.4452, "step": 5470 }, { "epoch": 3.72430224642614, "grad_norm": 1.3949700593948364, "learning_rate": 1.676939558949006e-06, "loss": 0.5636, "step": 5471 }, { "epoch": 3.72498298162015, "grad_norm": 1.6388733386993408, "learning_rate": 1.675257681356036e-06, "loss": 0.387, "step": 5472 }, { "epoch": 3.725663716814159, "grad_norm": 1.5810661315917969, "learning_rate": 1.6735764778567891e-06, "loss": 0.5263, "step": 5473 }, { "epoch": 3.7263444520081688, "grad_norm": 1.6961897611618042, "learning_rate": 1.6718959487921293e-06, "loss": 0.3537, "step": 5474 }, { "epoch": 3.7270251872021785, "grad_norm": 1.562825083732605, "learning_rate": 1.6702160945027884e-06, "loss": 0.5782, "step": 5475 }, { "epoch": 3.727705922396188, "grad_norm": 1.5340776443481445, "learning_rate": 1.6685369153293556e-06, "loss": 0.4561, "step": 5476 }, { "epoch": 3.7283866575901974, "grad_norm": 1.6003155708312988, "learning_rate": 1.666858411612286e-06, "loss": 0.4525, "step": 5477 }, { "epoch": 3.729067392784207, "grad_norm": 1.5920538902282715, "learning_rate": 1.6651805836918943e-06, "loss": 0.368, "step": 5478 }, { "epoch": 3.7297481279782163, "grad_norm": 1.509690761566162, "learning_rate": 1.6635034319083675e-06, "loss": 0.4991, "step": 5479 }, { "epoch": 3.730428863172226, "grad_norm": 1.5102062225341797, "learning_rate": 1.6618269566017465e-06, "loss": 0.442, "step": 5480 }, { "epoch": 3.7311095983662357, "grad_norm": 1.6738406419754028, "learning_rate": 1.6601511581119374e-06, "loss": 0.348, "step": 5481 }, { "epoch": 3.731790333560245, "grad_norm": 1.7300063371658325, "learning_rate": 1.6584760367787072e-06, "loss": 0.4376, "step": 5482 }, { "epoch": 3.7324710687542546, "grad_norm": 1.5805152654647827, "learning_rate": 1.6568015929416919e-06, "loss": 0.359, "step": 5483 }, { "epoch": 3.7331518039482643, "grad_norm": 1.4932985305786133, "learning_rate": 1.6551278269403842e-06, "loss": 0.5785, "step": 5484 }, { "epoch": 3.7338325391422735, "grad_norm": 1.6791325807571411, "learning_rate": 1.6534547391141398e-06, "loss": 0.33, "step": 5485 }, { "epoch": 3.734513274336283, "grad_norm": 1.6141570806503296, "learning_rate": 1.6517823298021819e-06, "loss": 0.2977, "step": 5486 }, { "epoch": 3.735194009530293, "grad_norm": 1.6461677551269531, "learning_rate": 1.650110599343589e-06, "loss": 0.4026, "step": 5487 }, { "epoch": 3.735874744724302, "grad_norm": 1.6789249181747437, "learning_rate": 1.6484395480773085e-06, "loss": 0.4074, "step": 5488 }, { "epoch": 3.736555479918312, "grad_norm": 1.529081106185913, "learning_rate": 1.6467691763421446e-06, "loss": 0.3804, "step": 5489 }, { "epoch": 3.737236215112321, "grad_norm": 1.673166275024414, "learning_rate": 1.6450994844767688e-06, "loss": 0.3502, "step": 5490 }, { "epoch": 3.7379169503063308, "grad_norm": 1.5690516233444214, "learning_rate": 1.6434304728197109e-06, "loss": 0.4229, "step": 5491 }, { "epoch": 3.7385976855003404, "grad_norm": 1.5899382829666138, "learning_rate": 1.6417621417093633e-06, "loss": 0.4263, "step": 5492 }, { "epoch": 3.73927842069435, "grad_norm": 1.7250070571899414, "learning_rate": 1.6400944914839834e-06, "loss": 0.474, "step": 5493 }, { "epoch": 3.7399591558883594, "grad_norm": 1.6934716701507568, "learning_rate": 1.638427522481687e-06, "loss": 0.3994, "step": 5494 }, { "epoch": 3.740639891082369, "grad_norm": 1.5858591794967651, "learning_rate": 1.6367612350404522e-06, "loss": 0.3535, "step": 5495 }, { "epoch": 3.7413206262763783, "grad_norm": 1.6174994707107544, "learning_rate": 1.6350956294981201e-06, "loss": 0.4668, "step": 5496 }, { "epoch": 3.742001361470388, "grad_norm": 1.7076221704483032, "learning_rate": 1.633430706192397e-06, "loss": 0.5536, "step": 5497 }, { "epoch": 3.7426820966643977, "grad_norm": 1.543128252029419, "learning_rate": 1.631766465460844e-06, "loss": 0.4738, "step": 5498 }, { "epoch": 3.7433628318584073, "grad_norm": 1.6928832530975342, "learning_rate": 1.6301029076408858e-06, "loss": 0.3947, "step": 5499 }, { "epoch": 3.7440435670524166, "grad_norm": 1.5488662719726562, "learning_rate": 1.628440033069813e-06, "loss": 0.4416, "step": 5500 }, { "epoch": 3.7447243022464263, "grad_norm": 1.5392470359802246, "learning_rate": 1.6267778420847735e-06, "loss": 0.4293, "step": 5501 }, { "epoch": 3.7454050374404355, "grad_norm": 1.5044403076171875, "learning_rate": 1.6251163350227756e-06, "loss": 0.3642, "step": 5502 }, { "epoch": 3.746085772634445, "grad_norm": 1.729562759399414, "learning_rate": 1.623455512220694e-06, "loss": 0.3738, "step": 5503 }, { "epoch": 3.746766507828455, "grad_norm": 1.6119188070297241, "learning_rate": 1.6217953740152587e-06, "loss": 0.4298, "step": 5504 }, { "epoch": 3.747447243022464, "grad_norm": 1.6738125085830688, "learning_rate": 1.6201359207430667e-06, "loss": 0.5236, "step": 5505 }, { "epoch": 3.748127978216474, "grad_norm": 1.5603406429290771, "learning_rate": 1.6184771527405706e-06, "loss": 0.3893, "step": 5506 }, { "epoch": 3.748808713410483, "grad_norm": 1.650310754776001, "learning_rate": 1.6168190703440894e-06, "loss": 0.3972, "step": 5507 }, { "epoch": 3.7494894486044927, "grad_norm": 1.5955867767333984, "learning_rate": 1.615161673889799e-06, "loss": 0.3648, "step": 5508 }, { "epoch": 3.7501701837985024, "grad_norm": 1.5996532440185547, "learning_rate": 1.6135049637137362e-06, "loss": 0.4091, "step": 5509 }, { "epoch": 3.750850918992512, "grad_norm": 1.6604400873184204, "learning_rate": 1.6118489401518034e-06, "loss": 0.3996, "step": 5510 }, { "epoch": 3.7515316541865213, "grad_norm": 1.6128894090652466, "learning_rate": 1.6101936035397587e-06, "loss": 0.2908, "step": 5511 }, { "epoch": 3.752212389380531, "grad_norm": 1.6651396751403809, "learning_rate": 1.6085389542132206e-06, "loss": 0.549, "step": 5512 }, { "epoch": 3.7528931245745403, "grad_norm": 1.5954418182373047, "learning_rate": 1.6068849925076724e-06, "loss": 0.395, "step": 5513 }, { "epoch": 3.75357385976855, "grad_norm": 1.5532026290893555, "learning_rate": 1.6052317187584576e-06, "loss": 0.5024, "step": 5514 }, { "epoch": 3.7542545949625596, "grad_norm": 1.6567808389663696, "learning_rate": 1.6035791333007772e-06, "loss": 0.4387, "step": 5515 }, { "epoch": 3.7549353301565693, "grad_norm": 1.7662502527236938, "learning_rate": 1.601927236469692e-06, "loss": 0.4075, "step": 5516 }, { "epoch": 3.7556160653505786, "grad_norm": 1.7224570512771606, "learning_rate": 1.6002760286001284e-06, "loss": 0.4337, "step": 5517 }, { "epoch": 3.7562968005445883, "grad_norm": 1.6120833158493042, "learning_rate": 1.5986255100268683e-06, "loss": 0.5734, "step": 5518 }, { "epoch": 3.7569775357385975, "grad_norm": 1.5448640584945679, "learning_rate": 1.5969756810845543e-06, "loss": 0.36, "step": 5519 }, { "epoch": 3.757658270932607, "grad_norm": 1.7849633693695068, "learning_rate": 1.595326542107693e-06, "loss": 0.518, "step": 5520 }, { "epoch": 3.758339006126617, "grad_norm": 1.680207371711731, "learning_rate": 1.593678093430645e-06, "loss": 0.2532, "step": 5521 }, { "epoch": 3.7590197413206266, "grad_norm": 1.6291921138763428, "learning_rate": 1.5920303353876383e-06, "loss": 0.4422, "step": 5522 }, { "epoch": 3.759700476514636, "grad_norm": 1.6372250318527222, "learning_rate": 1.5903832683127535e-06, "loss": 0.3085, "step": 5523 }, { "epoch": 3.7603812117086455, "grad_norm": 1.4696451425552368, "learning_rate": 1.588736892539937e-06, "loss": 0.4072, "step": 5524 }, { "epoch": 3.7610619469026547, "grad_norm": 1.4574381113052368, "learning_rate": 1.5870912084029921e-06, "loss": 0.3766, "step": 5525 }, { "epoch": 3.7617426820966644, "grad_norm": 1.413285732269287, "learning_rate": 1.5854462162355804e-06, "loss": 0.4473, "step": 5526 }, { "epoch": 3.762423417290674, "grad_norm": 1.5934125185012817, "learning_rate": 1.5838019163712282e-06, "loss": 0.4767, "step": 5527 }, { "epoch": 3.7631041524846833, "grad_norm": 1.3967703580856323, "learning_rate": 1.5821583091433168e-06, "loss": 0.387, "step": 5528 }, { "epoch": 3.763784887678693, "grad_norm": 1.5702637434005737, "learning_rate": 1.5805153948850866e-06, "loss": 0.5138, "step": 5529 }, { "epoch": 3.7644656228727023, "grad_norm": 1.5573182106018066, "learning_rate": 1.578873173929642e-06, "loss": 0.5963, "step": 5530 }, { "epoch": 3.765146358066712, "grad_norm": 1.598612666130066, "learning_rate": 1.5772316466099458e-06, "loss": 0.3938, "step": 5531 }, { "epoch": 3.7658270932607216, "grad_norm": 1.4772127866744995, "learning_rate": 1.575590813258817e-06, "loss": 0.5772, "step": 5532 }, { "epoch": 3.7665078284547313, "grad_norm": 1.595928430557251, "learning_rate": 1.573950674208934e-06, "loss": 0.5286, "step": 5533 }, { "epoch": 3.7671885636487406, "grad_norm": 1.5329411029815674, "learning_rate": 1.5723112297928395e-06, "loss": 0.4996, "step": 5534 }, { "epoch": 3.7678692988427502, "grad_norm": 1.647096037864685, "learning_rate": 1.5706724803429297e-06, "loss": 0.3991, "step": 5535 }, { "epoch": 3.7685500340367595, "grad_norm": 1.5957965850830078, "learning_rate": 1.5690344261914615e-06, "loss": 0.3258, "step": 5536 }, { "epoch": 3.769230769230769, "grad_norm": 1.5883785486221313, "learning_rate": 1.5673970676705541e-06, "loss": 0.3783, "step": 5537 }, { "epoch": 3.769911504424779, "grad_norm": 1.6136754751205444, "learning_rate": 1.5657604051121816e-06, "loss": 0.4411, "step": 5538 }, { "epoch": 3.7705922396187885, "grad_norm": 1.5248966217041016, "learning_rate": 1.564124438848177e-06, "loss": 0.5089, "step": 5539 }, { "epoch": 3.7712729748127978, "grad_norm": 1.4981645345687866, "learning_rate": 1.5624891692102345e-06, "loss": 0.6376, "step": 5540 }, { "epoch": 3.7719537100068075, "grad_norm": 1.4020355939865112, "learning_rate": 1.5608545965299083e-06, "loss": 0.3571, "step": 5541 }, { "epoch": 3.7726344452008167, "grad_norm": 1.571535587310791, "learning_rate": 1.5592207211386073e-06, "loss": 0.4697, "step": 5542 }, { "epoch": 3.7733151803948264, "grad_norm": 1.6369205713272095, "learning_rate": 1.5575875433675991e-06, "loss": 0.4677, "step": 5543 }, { "epoch": 3.773995915588836, "grad_norm": 1.4389573335647583, "learning_rate": 1.5559550635480152e-06, "loss": 0.5025, "step": 5544 }, { "epoch": 3.7746766507828453, "grad_norm": 1.5282232761383057, "learning_rate": 1.55432328201084e-06, "loss": 0.4652, "step": 5545 }, { "epoch": 3.775357385976855, "grad_norm": 1.625497817993164, "learning_rate": 1.5526921990869187e-06, "loss": 0.4359, "step": 5546 }, { "epoch": 3.7760381211708647, "grad_norm": 1.631852626800537, "learning_rate": 1.5510618151069505e-06, "loss": 0.2521, "step": 5547 }, { "epoch": 3.776718856364874, "grad_norm": 1.479536771774292, "learning_rate": 1.5494321304015042e-06, "loss": 0.3538, "step": 5548 }, { "epoch": 3.7773995915588836, "grad_norm": 1.580919861793518, "learning_rate": 1.5478031453009956e-06, "loss": 0.518, "step": 5549 }, { "epoch": 3.7780803267528933, "grad_norm": 1.639326572418213, "learning_rate": 1.5461748601357008e-06, "loss": 0.3337, "step": 5550 }, { "epoch": 3.7787610619469025, "grad_norm": 1.762918472290039, "learning_rate": 1.5445472752357594e-06, "loss": 0.3848, "step": 5551 }, { "epoch": 3.779441797140912, "grad_norm": 1.6451756954193115, "learning_rate": 1.5429203909311636e-06, "loss": 0.4531, "step": 5552 }, { "epoch": 3.7801225323349215, "grad_norm": 1.5602176189422607, "learning_rate": 1.5412942075517651e-06, "loss": 0.3657, "step": 5553 }, { "epoch": 3.780803267528931, "grad_norm": 1.6820963621139526, "learning_rate": 1.5396687254272724e-06, "loss": 0.4744, "step": 5554 }, { "epoch": 3.781484002722941, "grad_norm": 1.6592633724212646, "learning_rate": 1.5380439448872559e-06, "loss": 0.498, "step": 5555 }, { "epoch": 3.7821647379169505, "grad_norm": 1.699789047241211, "learning_rate": 1.5364198662611374e-06, "loss": 0.3435, "step": 5556 }, { "epoch": 3.7828454731109598, "grad_norm": 1.4488508701324463, "learning_rate": 1.5347964898782042e-06, "loss": 0.4636, "step": 5557 }, { "epoch": 3.7835262083049694, "grad_norm": 1.395763874053955, "learning_rate": 1.5331738160675929e-06, "loss": 0.563, "step": 5558 }, { "epoch": 3.7842069434989787, "grad_norm": 1.621833324432373, "learning_rate": 1.531551845158305e-06, "loss": 0.3709, "step": 5559 }, { "epoch": 3.7848876786929884, "grad_norm": 1.4959981441497803, "learning_rate": 1.5299305774791951e-06, "loss": 0.6154, "step": 5560 }, { "epoch": 3.785568413886998, "grad_norm": 1.4572869539260864, "learning_rate": 1.5283100133589745e-06, "loss": 0.517, "step": 5561 }, { "epoch": 3.7862491490810077, "grad_norm": 1.5228334665298462, "learning_rate": 1.5266901531262163e-06, "loss": 0.5187, "step": 5562 }, { "epoch": 3.786929884275017, "grad_norm": 1.7664905786514282, "learning_rate": 1.5250709971093468e-06, "loss": 0.3271, "step": 5563 }, { "epoch": 3.7876106194690267, "grad_norm": 1.6809865236282349, "learning_rate": 1.52345254563665e-06, "loss": 0.3669, "step": 5564 }, { "epoch": 3.788291354663036, "grad_norm": 1.7525345087051392, "learning_rate": 1.5218347990362692e-06, "loss": 0.4278, "step": 5565 }, { "epoch": 3.7889720898570456, "grad_norm": 1.4279078245162964, "learning_rate": 1.5202177576362054e-06, "loss": 0.6145, "step": 5566 }, { "epoch": 3.7896528250510553, "grad_norm": 1.692140817642212, "learning_rate": 1.5186014217643125e-06, "loss": 0.3636, "step": 5567 }, { "epoch": 3.7903335602450645, "grad_norm": 1.5202587842941284, "learning_rate": 1.5169857917483026e-06, "loss": 0.3132, "step": 5568 }, { "epoch": 3.791014295439074, "grad_norm": 1.5321459770202637, "learning_rate": 1.5153708679157498e-06, "loss": 0.5152, "step": 5569 }, { "epoch": 3.791695030633084, "grad_norm": 1.5492362976074219, "learning_rate": 1.513756650594078e-06, "loss": 0.5216, "step": 5570 }, { "epoch": 3.792375765827093, "grad_norm": 1.6893513202667236, "learning_rate": 1.5121431401105697e-06, "loss": 0.3473, "step": 5571 }, { "epoch": 3.793056501021103, "grad_norm": 1.57593834400177, "learning_rate": 1.5105303367923685e-06, "loss": 0.5341, "step": 5572 }, { "epoch": 3.7937372362151125, "grad_norm": 1.4128226041793823, "learning_rate": 1.5089182409664688e-06, "loss": 0.4211, "step": 5573 }, { "epoch": 3.7944179714091217, "grad_norm": 1.596916675567627, "learning_rate": 1.5073068529597263e-06, "loss": 0.4798, "step": 5574 }, { "epoch": 3.7950987066031314, "grad_norm": 1.4840905666351318, "learning_rate": 1.505696173098848e-06, "loss": 0.6037, "step": 5575 }, { "epoch": 3.7957794417971407, "grad_norm": 1.6844141483306885, "learning_rate": 1.5040862017104035e-06, "loss": 0.3862, "step": 5576 }, { "epoch": 3.7964601769911503, "grad_norm": 1.542459487915039, "learning_rate": 1.502476939120815e-06, "loss": 0.5178, "step": 5577 }, { "epoch": 3.79714091218516, "grad_norm": 1.4620155096054077, "learning_rate": 1.5008683856563583e-06, "loss": 0.2799, "step": 5578 }, { "epoch": 3.7978216473791697, "grad_norm": 1.5536688566207886, "learning_rate": 1.4992605416431732e-06, "loss": 0.4369, "step": 5579 }, { "epoch": 3.798502382573179, "grad_norm": 1.6659634113311768, "learning_rate": 1.4976534074072485e-06, "loss": 0.3991, "step": 5580 }, { "epoch": 3.7991831177671886, "grad_norm": 1.7587738037109375, "learning_rate": 1.4960469832744313e-06, "loss": 0.3064, "step": 5581 }, { "epoch": 3.799863852961198, "grad_norm": 1.5287318229675293, "learning_rate": 1.4944412695704264e-06, "loss": 0.3522, "step": 5582 }, { "epoch": 3.8005445881552076, "grad_norm": 1.6324338912963867, "learning_rate": 1.492836266620794e-06, "loss": 0.3745, "step": 5583 }, { "epoch": 3.8012253233492173, "grad_norm": 1.5814194679260254, "learning_rate": 1.491231974750949e-06, "loss": 0.3881, "step": 5584 }, { "epoch": 3.801906058543227, "grad_norm": 1.5091065168380737, "learning_rate": 1.4896283942861605e-06, "loss": 0.4238, "step": 5585 }, { "epoch": 3.802586793737236, "grad_norm": 1.5589215755462646, "learning_rate": 1.4880255255515592e-06, "loss": 0.3818, "step": 5586 }, { "epoch": 3.803267528931246, "grad_norm": 1.5698521137237549, "learning_rate": 1.4864233688721252e-06, "loss": 0.3006, "step": 5587 }, { "epoch": 3.803948264125255, "grad_norm": 1.7263320684432983, "learning_rate": 1.4848219245726964e-06, "loss": 0.3514, "step": 5588 }, { "epoch": 3.804628999319265, "grad_norm": 1.5836312770843506, "learning_rate": 1.4832211929779695e-06, "loss": 0.4602, "step": 5589 }, { "epoch": 3.8053097345132745, "grad_norm": 1.7521358728408813, "learning_rate": 1.4816211744124903e-06, "loss": 0.41, "step": 5590 }, { "epoch": 3.8059904697072837, "grad_norm": 1.6025662422180176, "learning_rate": 1.4800218692006674e-06, "loss": 0.5448, "step": 5591 }, { "epoch": 3.8066712049012934, "grad_norm": 1.4388691186904907, "learning_rate": 1.4784232776667574e-06, "loss": 0.3081, "step": 5592 }, { "epoch": 3.807351940095303, "grad_norm": 1.6116608381271362, "learning_rate": 1.4768254001348792e-06, "loss": 0.3472, "step": 5593 }, { "epoch": 3.8080326752893123, "grad_norm": 1.509874939918518, "learning_rate": 1.4752282369290016e-06, "loss": 0.482, "step": 5594 }, { "epoch": 3.808713410483322, "grad_norm": 1.55414879322052, "learning_rate": 1.4736317883729495e-06, "loss": 0.4683, "step": 5595 }, { "epoch": 3.8093941456773317, "grad_norm": 1.5079766511917114, "learning_rate": 1.4720360547904066e-06, "loss": 0.3954, "step": 5596 }, { "epoch": 3.810074880871341, "grad_norm": 1.6085968017578125, "learning_rate": 1.4704410365049076e-06, "loss": 0.4577, "step": 5597 }, { "epoch": 3.8107556160653506, "grad_norm": 1.5014857053756714, "learning_rate": 1.468846733839842e-06, "loss": 0.5327, "step": 5598 }, { "epoch": 3.81143635125936, "grad_norm": 1.6141489744186401, "learning_rate": 1.467253147118457e-06, "loss": 0.6306, "step": 5599 }, { "epoch": 3.8121170864533696, "grad_norm": 1.5266824960708618, "learning_rate": 1.4656602766638556e-06, "loss": 0.52, "step": 5600 }, { "epoch": 3.8127978216473792, "grad_norm": 1.56050443649292, "learning_rate": 1.4640681227989906e-06, "loss": 0.598, "step": 5601 }, { "epoch": 3.813478556841389, "grad_norm": 1.3995331525802612, "learning_rate": 1.4624766858466715e-06, "loss": 0.6103, "step": 5602 }, { "epoch": 3.814159292035398, "grad_norm": 1.31912100315094, "learning_rate": 1.4608859661295672e-06, "loss": 0.647, "step": 5603 }, { "epoch": 3.814840027229408, "grad_norm": 1.5232133865356445, "learning_rate": 1.4592959639701937e-06, "loss": 0.4418, "step": 5604 }, { "epoch": 3.815520762423417, "grad_norm": 1.599210262298584, "learning_rate": 1.4577066796909246e-06, "loss": 0.3949, "step": 5605 }, { "epoch": 3.8162014976174268, "grad_norm": 1.5852444171905518, "learning_rate": 1.4561181136139912e-06, "loss": 0.451, "step": 5606 }, { "epoch": 3.8168822328114365, "grad_norm": 1.5398645401000977, "learning_rate": 1.4545302660614752e-06, "loss": 0.3714, "step": 5607 }, { "epoch": 3.817562968005446, "grad_norm": 1.533097267150879, "learning_rate": 1.4529431373553115e-06, "loss": 0.3952, "step": 5608 }, { "epoch": 3.8182437031994554, "grad_norm": 1.566859483718872, "learning_rate": 1.4513567278172934e-06, "loss": 0.4755, "step": 5609 }, { "epoch": 3.818924438393465, "grad_norm": 1.4659918546676636, "learning_rate": 1.4497710377690682e-06, "loss": 0.4301, "step": 5610 }, { "epoch": 3.8196051735874743, "grad_norm": 1.4557323455810547, "learning_rate": 1.4481860675321335e-06, "loss": 0.5519, "step": 5611 }, { "epoch": 3.820285908781484, "grad_norm": 1.4334800243377686, "learning_rate": 1.4466018174278422e-06, "loss": 0.3848, "step": 5612 }, { "epoch": 3.8209666439754937, "grad_norm": 1.5230976343154907, "learning_rate": 1.445018287777405e-06, "loss": 0.5109, "step": 5613 }, { "epoch": 3.821647379169503, "grad_norm": 1.6216410398483276, "learning_rate": 1.4434354789018817e-06, "loss": 0.417, "step": 5614 }, { "epoch": 3.8223281143635126, "grad_norm": 1.5029720067977905, "learning_rate": 1.4418533911221882e-06, "loss": 0.4638, "step": 5615 }, { "epoch": 3.823008849557522, "grad_norm": 1.535821795463562, "learning_rate": 1.4402720247590908e-06, "loss": 0.377, "step": 5616 }, { "epoch": 3.8236895847515315, "grad_norm": 1.627465844154358, "learning_rate": 1.4386913801332185e-06, "loss": 0.4444, "step": 5617 }, { "epoch": 3.824370319945541, "grad_norm": 1.706484317779541, "learning_rate": 1.4371114575650452e-06, "loss": 0.337, "step": 5618 }, { "epoch": 3.825051055139551, "grad_norm": 1.55258309841156, "learning_rate": 1.4355322573748992e-06, "loss": 0.4084, "step": 5619 }, { "epoch": 3.82573179033356, "grad_norm": 1.8030462265014648, "learning_rate": 1.4339537798829684e-06, "loss": 0.3322, "step": 5620 }, { "epoch": 3.82641252552757, "grad_norm": 1.6051281690597534, "learning_rate": 1.4323760254092878e-06, "loss": 0.3168, "step": 5621 }, { "epoch": 3.827093260721579, "grad_norm": 1.4251703023910522, "learning_rate": 1.4307989942737482e-06, "loss": 0.4523, "step": 5622 }, { "epoch": 3.8277739959155888, "grad_norm": 1.6924681663513184, "learning_rate": 1.429222686796093e-06, "loss": 0.3354, "step": 5623 }, { "epoch": 3.8284547311095984, "grad_norm": 1.753313422203064, "learning_rate": 1.4276471032959215e-06, "loss": 0.3168, "step": 5624 }, { "epoch": 3.829135466303608, "grad_norm": 1.5137100219726562, "learning_rate": 1.4260722440926822e-06, "loss": 0.3845, "step": 5625 }, { "epoch": 3.8298162014976174, "grad_norm": 1.5384228229522705, "learning_rate": 1.4244981095056798e-06, "loss": 0.4501, "step": 5626 }, { "epoch": 3.830496936691627, "grad_norm": 1.5329842567443848, "learning_rate": 1.4229246998540736e-06, "loss": 0.4567, "step": 5627 }, { "epoch": 3.8311776718856363, "grad_norm": 1.6774678230285645, "learning_rate": 1.4213520154568705e-06, "loss": 0.4062, "step": 5628 }, { "epoch": 3.831858407079646, "grad_norm": 1.5309982299804688, "learning_rate": 1.419780056632934e-06, "loss": 0.6227, "step": 5629 }, { "epoch": 3.8325391422736557, "grad_norm": 1.6157892942428589, "learning_rate": 1.4182088237009788e-06, "loss": 0.4691, "step": 5630 }, { "epoch": 3.8332198774676653, "grad_norm": 1.769173502922058, "learning_rate": 1.4166383169795756e-06, "loss": 0.3667, "step": 5631 }, { "epoch": 3.8339006126616746, "grad_norm": 1.51475989818573, "learning_rate": 1.4150685367871448e-06, "loss": 0.4718, "step": 5632 }, { "epoch": 3.8345813478556843, "grad_norm": 1.5462909936904907, "learning_rate": 1.4134994834419585e-06, "loss": 0.4847, "step": 5633 }, { "epoch": 3.8352620830496935, "grad_norm": 1.5074741840362549, "learning_rate": 1.4119311572621447e-06, "loss": 0.5247, "step": 5634 }, { "epoch": 3.835942818243703, "grad_norm": 1.6866295337677002, "learning_rate": 1.4103635585656844e-06, "loss": 0.4441, "step": 5635 }, { "epoch": 3.836623553437713, "grad_norm": 1.6955863237380981, "learning_rate": 1.4087966876704079e-06, "loss": 0.3088, "step": 5636 }, { "epoch": 3.837304288631722, "grad_norm": 1.6607882976531982, "learning_rate": 1.4072305448939976e-06, "loss": 0.339, "step": 5637 }, { "epoch": 3.837985023825732, "grad_norm": 1.6893432140350342, "learning_rate": 1.405665130553993e-06, "loss": 0.2667, "step": 5638 }, { "epoch": 3.838665759019741, "grad_norm": 1.6514333486557007, "learning_rate": 1.4041004449677815e-06, "loss": 0.4082, "step": 5639 }, { "epoch": 3.8393464942137507, "grad_norm": 1.6071182489395142, "learning_rate": 1.4025364884526022e-06, "loss": 0.4086, "step": 5640 }, { "epoch": 3.8400272294077604, "grad_norm": 1.458848237991333, "learning_rate": 1.4009732613255521e-06, "loss": 0.5861, "step": 5641 }, { "epoch": 3.84070796460177, "grad_norm": 1.5370357036590576, "learning_rate": 1.3994107639035725e-06, "loss": 0.4926, "step": 5642 }, { "epoch": 3.8413886997957793, "grad_norm": 1.5110366344451904, "learning_rate": 1.3978489965034652e-06, "loss": 0.4723, "step": 5643 }, { "epoch": 3.842069434989789, "grad_norm": 1.4854284524917603, "learning_rate": 1.3962879594418754e-06, "loss": 0.3701, "step": 5644 }, { "epoch": 3.8427501701837983, "grad_norm": 1.4802695512771606, "learning_rate": 1.3947276530353082e-06, "loss": 0.5025, "step": 5645 }, { "epoch": 3.843430905377808, "grad_norm": 1.5341248512268066, "learning_rate": 1.3931680776001144e-06, "loss": 0.3116, "step": 5646 }, { "epoch": 3.8441116405718176, "grad_norm": 1.7470875978469849, "learning_rate": 1.3916092334524984e-06, "loss": 0.3963, "step": 5647 }, { "epoch": 3.8447923757658273, "grad_norm": 1.6516584157943726, "learning_rate": 1.3900511209085188e-06, "loss": 0.5417, "step": 5648 }, { "epoch": 3.8454731109598366, "grad_norm": 1.6607087850570679, "learning_rate": 1.3884937402840837e-06, "loss": 0.4344, "step": 5649 }, { "epoch": 3.8461538461538463, "grad_norm": 1.7610814571380615, "learning_rate": 1.3869370918949504e-06, "loss": 0.5179, "step": 5650 }, { "epoch": 3.8468345813478555, "grad_norm": 1.6189712285995483, "learning_rate": 1.3853811760567326e-06, "loss": 0.4187, "step": 5651 }, { "epoch": 3.847515316541865, "grad_norm": 1.590910792350769, "learning_rate": 1.383825993084895e-06, "loss": 0.5351, "step": 5652 }, { "epoch": 3.848196051735875, "grad_norm": 1.4932910203933716, "learning_rate": 1.3822715432947498e-06, "loss": 0.284, "step": 5653 }, { "epoch": 3.8488767869298846, "grad_norm": 1.5861506462097168, "learning_rate": 1.3807178270014615e-06, "loss": 0.5312, "step": 5654 }, { "epoch": 3.849557522123894, "grad_norm": 1.5379533767700195, "learning_rate": 1.3791648445200507e-06, "loss": 0.3321, "step": 5655 }, { "epoch": 3.8502382573179035, "grad_norm": 1.6468498706817627, "learning_rate": 1.377612596165383e-06, "loss": 0.4681, "step": 5656 }, { "epoch": 3.8509189925119127, "grad_norm": 1.4742375612258911, "learning_rate": 1.3760610822521769e-06, "loss": 0.4791, "step": 5657 }, { "epoch": 3.8515997277059224, "grad_norm": 1.4756135940551758, "learning_rate": 1.3745103030950057e-06, "loss": 0.4835, "step": 5658 }, { "epoch": 3.852280462899932, "grad_norm": 1.6021686792373657, "learning_rate": 1.3729602590082886e-06, "loss": 0.4767, "step": 5659 }, { "epoch": 3.8529611980939413, "grad_norm": 1.823219656944275, "learning_rate": 1.3714109503063e-06, "loss": 0.4884, "step": 5660 }, { "epoch": 3.853641933287951, "grad_norm": 1.5319708585739136, "learning_rate": 1.3698623773031611e-06, "loss": 0.4479, "step": 5661 }, { "epoch": 3.8543226684819603, "grad_norm": 1.6627256870269775, "learning_rate": 1.368314540312849e-06, "loss": 0.3751, "step": 5662 }, { "epoch": 3.85500340367597, "grad_norm": 1.4046112298965454, "learning_rate": 1.366767439649187e-06, "loss": 0.4112, "step": 5663 }, { "epoch": 3.8556841388699796, "grad_norm": 1.7113900184631348, "learning_rate": 1.365221075625849e-06, "loss": 0.2203, "step": 5664 }, { "epoch": 3.8563648740639893, "grad_norm": 1.6689825057983398, "learning_rate": 1.3636754485563647e-06, "loss": 0.3895, "step": 5665 }, { "epoch": 3.8570456092579986, "grad_norm": 1.6915470361709595, "learning_rate": 1.3621305587541096e-06, "loss": 0.4383, "step": 5666 }, { "epoch": 3.8577263444520082, "grad_norm": 1.59848153591156, "learning_rate": 1.3605864065323094e-06, "loss": 0.3587, "step": 5667 }, { "epoch": 3.8584070796460175, "grad_norm": 1.582568645477295, "learning_rate": 1.3590429922040438e-06, "loss": 0.5895, "step": 5668 }, { "epoch": 3.859087814840027, "grad_norm": 1.6065031290054321, "learning_rate": 1.357500316082242e-06, "loss": 0.327, "step": 5669 }, { "epoch": 3.859768550034037, "grad_norm": 1.6372560262680054, "learning_rate": 1.355958378479682e-06, "loss": 0.374, "step": 5670 }, { "epoch": 3.8604492852280465, "grad_norm": 1.464050054550171, "learning_rate": 1.35441717970899e-06, "loss": 0.5087, "step": 5671 }, { "epoch": 3.8611300204220558, "grad_norm": 1.512752652168274, "learning_rate": 1.3528767200826493e-06, "loss": 0.4876, "step": 5672 }, { "epoch": 3.8618107556160655, "grad_norm": 1.676684021949768, "learning_rate": 1.3513369999129867e-06, "loss": 0.5239, "step": 5673 }, { "epoch": 3.8624914908100747, "grad_norm": 1.7154970169067383, "learning_rate": 1.3497980195121808e-06, "loss": 0.2978, "step": 5674 }, { "epoch": 3.8631722260040844, "grad_norm": 1.5927245616912842, "learning_rate": 1.3482597791922631e-06, "loss": 0.4099, "step": 5675 }, { "epoch": 3.863852961198094, "grad_norm": 1.6086511611938477, "learning_rate": 1.3467222792651113e-06, "loss": 0.3398, "step": 5676 }, { "epoch": 3.8645336963921033, "grad_norm": 1.7111274003982544, "learning_rate": 1.345185520042454e-06, "loss": 0.3678, "step": 5677 }, { "epoch": 3.865214431586113, "grad_norm": 1.4670610427856445, "learning_rate": 1.3436495018358708e-06, "loss": 0.5147, "step": 5678 }, { "epoch": 3.8658951667801227, "grad_norm": 1.5351173877716064, "learning_rate": 1.3421142249567925e-06, "loss": 0.3543, "step": 5679 }, { "epoch": 3.866575901974132, "grad_norm": 1.5266364812850952, "learning_rate": 1.3405796897164957e-06, "loss": 0.5131, "step": 5680 }, { "epoch": 3.8672566371681416, "grad_norm": 1.6468786001205444, "learning_rate": 1.3390458964261072e-06, "loss": 0.2696, "step": 5681 }, { "epoch": 3.8679373723621513, "grad_norm": 1.445619821548462, "learning_rate": 1.3375128453966069e-06, "loss": 0.6108, "step": 5682 }, { "epoch": 3.8686181075561605, "grad_norm": 1.507248044013977, "learning_rate": 1.3359805369388212e-06, "loss": 0.5045, "step": 5683 }, { "epoch": 3.86929884275017, "grad_norm": 1.607847809791565, "learning_rate": 1.3344489713634262e-06, "loss": 0.4775, "step": 5684 }, { "epoch": 3.8699795779441795, "grad_norm": 1.6313420534133911, "learning_rate": 1.332918148980945e-06, "loss": 0.3997, "step": 5685 }, { "epoch": 3.870660313138189, "grad_norm": 1.493451476097107, "learning_rate": 1.331388070101759e-06, "loss": 0.396, "step": 5686 }, { "epoch": 3.871341048332199, "grad_norm": 1.7889200448989868, "learning_rate": 1.3298587350360892e-06, "loss": 0.5034, "step": 5687 }, { "epoch": 3.8720217835262085, "grad_norm": 1.471442699432373, "learning_rate": 1.328330144094009e-06, "loss": 0.502, "step": 5688 }, { "epoch": 3.8727025187202178, "grad_norm": 1.5328351259231567, "learning_rate": 1.3268022975854428e-06, "loss": 0.4389, "step": 5689 }, { "epoch": 3.8733832539142274, "grad_norm": 1.543601632118225, "learning_rate": 1.3252751958201614e-06, "loss": 0.4413, "step": 5690 }, { "epoch": 3.8740639891082367, "grad_norm": 1.5726304054260254, "learning_rate": 1.3237488391077864e-06, "loss": 0.5196, "step": 5691 }, { "epoch": 3.8747447243022464, "grad_norm": 1.6251896619796753, "learning_rate": 1.3222232277577852e-06, "loss": 0.6049, "step": 5692 }, { "epoch": 3.875425459496256, "grad_norm": 1.552522897720337, "learning_rate": 1.3206983620794806e-06, "loss": 0.4549, "step": 5693 }, { "epoch": 3.8761061946902657, "grad_norm": 1.7827273607254028, "learning_rate": 1.3191742423820357e-06, "loss": 0.5028, "step": 5694 }, { "epoch": 3.876786929884275, "grad_norm": 1.4196075201034546, "learning_rate": 1.3176508689744699e-06, "loss": 0.4411, "step": 5695 }, { "epoch": 3.8774676650782847, "grad_norm": 1.6333889961242676, "learning_rate": 1.3161282421656485e-06, "loss": 0.3978, "step": 5696 }, { "epoch": 3.878148400272294, "grad_norm": 1.505117416381836, "learning_rate": 1.314606362264284e-06, "loss": 0.4772, "step": 5697 }, { "epoch": 3.8788291354663036, "grad_norm": 1.4513366222381592, "learning_rate": 1.3130852295789386e-06, "loss": 0.3384, "step": 5698 }, { "epoch": 3.8795098706603133, "grad_norm": 1.4525336027145386, "learning_rate": 1.3115648444180218e-06, "loss": 0.5895, "step": 5699 }, { "epoch": 3.8801906058543225, "grad_norm": 1.6204782724380493, "learning_rate": 1.3100452070897945e-06, "loss": 0.511, "step": 5700 }, { "epoch": 3.880871341048332, "grad_norm": 1.6463109254837036, "learning_rate": 1.3085263179023638e-06, "loss": 0.4633, "step": 5701 }, { "epoch": 3.881552076242342, "grad_norm": 1.6856330633163452, "learning_rate": 1.3070081771636839e-06, "loss": 0.3214, "step": 5702 }, { "epoch": 3.882232811436351, "grad_norm": 1.491265892982483, "learning_rate": 1.3054907851815602e-06, "loss": 0.5417, "step": 5703 }, { "epoch": 3.882913546630361, "grad_norm": 1.632689356803894, "learning_rate": 1.3039741422636466e-06, "loss": 0.4432, "step": 5704 }, { "epoch": 3.8835942818243705, "grad_norm": 1.5961114168167114, "learning_rate": 1.302458248717441e-06, "loss": 0.4311, "step": 5705 }, { "epoch": 3.8842750170183797, "grad_norm": 1.4129718542099, "learning_rate": 1.3009431048502913e-06, "loss": 0.4286, "step": 5706 }, { "epoch": 3.8849557522123894, "grad_norm": 1.6238518953323364, "learning_rate": 1.299428710969397e-06, "loss": 0.596, "step": 5707 }, { "epoch": 3.8856364874063987, "grad_norm": 1.611619472503662, "learning_rate": 1.2979150673818003e-06, "loss": 0.3993, "step": 5708 }, { "epoch": 3.8863172226004083, "grad_norm": 1.548728585243225, "learning_rate": 1.2964021743943928e-06, "loss": 0.4289, "step": 5709 }, { "epoch": 3.886997957794418, "grad_norm": 1.523529052734375, "learning_rate": 1.294890032313917e-06, "loss": 0.4694, "step": 5710 }, { "epoch": 3.8876786929884277, "grad_norm": 1.7354166507720947, "learning_rate": 1.2933786414469574e-06, "loss": 0.293, "step": 5711 }, { "epoch": 3.888359428182437, "grad_norm": 1.379937767982483, "learning_rate": 1.291868002099953e-06, "loss": 0.489, "step": 5712 }, { "epoch": 3.8890401633764466, "grad_norm": 1.4875129461288452, "learning_rate": 1.2903581145791838e-06, "loss": 0.4509, "step": 5713 }, { "epoch": 3.889720898570456, "grad_norm": 1.647935390472412, "learning_rate": 1.2888489791907837e-06, "loss": 0.2506, "step": 5714 }, { "epoch": 3.8904016337644656, "grad_norm": 1.5802338123321533, "learning_rate": 1.2873405962407293e-06, "loss": 0.4525, "step": 5715 }, { "epoch": 3.8910823689584753, "grad_norm": 1.6483434438705444, "learning_rate": 1.2858329660348446e-06, "loss": 0.5385, "step": 5716 }, { "epoch": 3.891763104152485, "grad_norm": 1.5399140119552612, "learning_rate": 1.2843260888788057e-06, "loss": 0.4017, "step": 5717 }, { "epoch": 3.892443839346494, "grad_norm": 1.5381884574890137, "learning_rate": 1.2828199650781315e-06, "loss": 0.4746, "step": 5718 }, { "epoch": 3.893124574540504, "grad_norm": 1.6683777570724487, "learning_rate": 1.2813145949381884e-06, "loss": 0.4046, "step": 5719 }, { "epoch": 3.893805309734513, "grad_norm": 1.4771355390548706, "learning_rate": 1.2798099787641916e-06, "loss": 0.4422, "step": 5720 }, { "epoch": 3.894486044928523, "grad_norm": 1.5885554552078247, "learning_rate": 1.278306116861206e-06, "loss": 0.4636, "step": 5721 }, { "epoch": 3.8951667801225325, "grad_norm": 1.5708625316619873, "learning_rate": 1.2768030095341383e-06, "loss": 0.4384, "step": 5722 }, { "epoch": 3.8958475153165417, "grad_norm": 1.6155141592025757, "learning_rate": 1.275300657087743e-06, "loss": 0.4395, "step": 5723 }, { "epoch": 3.8965282505105514, "grad_norm": 1.7952638864517212, "learning_rate": 1.2737990598266259e-06, "loss": 0.3232, "step": 5724 }, { "epoch": 3.897208985704561, "grad_norm": 1.6678274869918823, "learning_rate": 1.2722982180552363e-06, "loss": 0.3062, "step": 5725 }, { "epoch": 3.8978897208985703, "grad_norm": 1.54680335521698, "learning_rate": 1.2707981320778683e-06, "loss": 0.5518, "step": 5726 }, { "epoch": 3.89857045609258, "grad_norm": 1.51484215259552, "learning_rate": 1.2692988021986686e-06, "loss": 0.4353, "step": 5727 }, { "epoch": 3.8992511912865897, "grad_norm": 1.586779236793518, "learning_rate": 1.2678002287216246e-06, "loss": 0.6056, "step": 5728 }, { "epoch": 3.899931926480599, "grad_norm": 1.7096896171569824, "learning_rate": 1.2663024119505762e-06, "loss": 0.3686, "step": 5729 }, { "epoch": 3.9006126616746086, "grad_norm": 1.7132381200790405, "learning_rate": 1.2648053521892033e-06, "loss": 0.3375, "step": 5730 }, { "epoch": 3.901293396868618, "grad_norm": 1.520348072052002, "learning_rate": 1.2633090497410388e-06, "loss": 0.477, "step": 5731 }, { "epoch": 3.9019741320626276, "grad_norm": 1.6440229415893555, "learning_rate": 1.2618135049094582e-06, "loss": 0.3236, "step": 5732 }, { "epoch": 3.9026548672566372, "grad_norm": 1.5544992685317993, "learning_rate": 1.2603187179976816e-06, "loss": 0.4823, "step": 5733 }, { "epoch": 3.903335602450647, "grad_norm": 1.5138144493103027, "learning_rate": 1.2588246893087818e-06, "loss": 0.6773, "step": 5734 }, { "epoch": 3.904016337644656, "grad_norm": 1.6010606288909912, "learning_rate": 1.2573314191456727e-06, "loss": 0.5333, "step": 5735 }, { "epoch": 3.904697072838666, "grad_norm": 1.638343334197998, "learning_rate": 1.2558389078111138e-06, "loss": 0.2557, "step": 5736 }, { "epoch": 3.905377808032675, "grad_norm": 1.5937042236328125, "learning_rate": 1.2543471556077146e-06, "loss": 0.44, "step": 5737 }, { "epoch": 3.9060585432266848, "grad_norm": 1.6694756746292114, "learning_rate": 1.252856162837931e-06, "loss": 0.4326, "step": 5738 }, { "epoch": 3.9067392784206945, "grad_norm": 1.5715069770812988, "learning_rate": 1.2513659298040598e-06, "loss": 0.4875, "step": 5739 }, { "epoch": 3.907420013614704, "grad_norm": 1.660544991493225, "learning_rate": 1.249876456808246e-06, "loss": 0.4529, "step": 5740 }, { "epoch": 3.9081007488087134, "grad_norm": 1.6611666679382324, "learning_rate": 1.2483877441524843e-06, "loss": 0.3428, "step": 5741 }, { "epoch": 3.908781484002723, "grad_norm": 1.6006951332092285, "learning_rate": 1.2468997921386112e-06, "loss": 0.4524, "step": 5742 }, { "epoch": 3.9094622191967323, "grad_norm": 1.4762413501739502, "learning_rate": 1.2454126010683071e-06, "loss": 0.4134, "step": 5743 }, { "epoch": 3.910142954390742, "grad_norm": 1.5835591554641724, "learning_rate": 1.2439261712431055e-06, "loss": 0.3817, "step": 5744 }, { "epoch": 3.9108236895847517, "grad_norm": 1.6197946071624756, "learning_rate": 1.2424405029643783e-06, "loss": 0.3258, "step": 5745 }, { "epoch": 3.911504424778761, "grad_norm": 1.5758930444717407, "learning_rate": 1.2409555965333448e-06, "loss": 0.5915, "step": 5746 }, { "epoch": 3.9121851599727706, "grad_norm": 1.4244270324707031, "learning_rate": 1.2394714522510726e-06, "loss": 0.4345, "step": 5747 }, { "epoch": 3.91286589516678, "grad_norm": 1.565852403640747, "learning_rate": 1.2379880704184738e-06, "loss": 0.4268, "step": 5748 }, { "epoch": 3.9135466303607895, "grad_norm": 1.5312893390655518, "learning_rate": 1.236505451336304e-06, "loss": 0.4419, "step": 5749 }, { "epoch": 3.914227365554799, "grad_norm": 1.5089482069015503, "learning_rate": 1.2350235953051637e-06, "loss": 0.5186, "step": 5750 }, { "epoch": 3.914908100748809, "grad_norm": 1.5061867237091064, "learning_rate": 1.2335425026255026e-06, "loss": 0.4036, "step": 5751 }, { "epoch": 3.915588835942818, "grad_norm": 1.4761799573898315, "learning_rate": 1.2320621735976124e-06, "loss": 0.4992, "step": 5752 }, { "epoch": 3.916269571136828, "grad_norm": 1.5541291236877441, "learning_rate": 1.2305826085216306e-06, "loss": 0.4152, "step": 5753 }, { "epoch": 3.916950306330837, "grad_norm": 1.8191183805465698, "learning_rate": 1.229103807697537e-06, "loss": 0.286, "step": 5754 }, { "epoch": 3.9176310415248468, "grad_norm": 1.6091078519821167, "learning_rate": 1.2276257714251655e-06, "loss": 0.4337, "step": 5755 }, { "epoch": 3.9183117767188564, "grad_norm": 1.5685288906097412, "learning_rate": 1.2261485000041856e-06, "loss": 0.4185, "step": 5756 }, { "epoch": 3.918992511912866, "grad_norm": 1.6677664518356323, "learning_rate": 1.224671993734114e-06, "loss": 0.3854, "step": 5757 }, { "epoch": 3.9196732471068754, "grad_norm": 1.5381731986999512, "learning_rate": 1.223196252914316e-06, "loss": 0.3925, "step": 5758 }, { "epoch": 3.920353982300885, "grad_norm": 1.7447482347488403, "learning_rate": 1.221721277843998e-06, "loss": 0.3139, "step": 5759 }, { "epoch": 3.9210347174948943, "grad_norm": 1.5766887664794922, "learning_rate": 1.2202470688222113e-06, "loss": 0.4586, "step": 5760 }, { "epoch": 3.921715452688904, "grad_norm": 1.4476380348205566, "learning_rate": 1.2187736261478523e-06, "loss": 0.5549, "step": 5761 }, { "epoch": 3.9223961878829137, "grad_norm": 1.4463893175125122, "learning_rate": 1.2173009501196652e-06, "loss": 0.4284, "step": 5762 }, { "epoch": 3.9230769230769234, "grad_norm": 1.513960361480713, "learning_rate": 1.2158290410362328e-06, "loss": 0.3475, "step": 5763 }, { "epoch": 3.9237576582709326, "grad_norm": 2.02331280708313, "learning_rate": 1.214357899195987e-06, "loss": 0.4201, "step": 5764 }, { "epoch": 3.9244383934649423, "grad_norm": 1.5205296277999878, "learning_rate": 1.2128875248972043e-06, "loss": 0.5188, "step": 5765 }, { "epoch": 3.9251191286589515, "grad_norm": 1.6323477029800415, "learning_rate": 1.2114179184380032e-06, "loss": 0.4636, "step": 5766 }, { "epoch": 3.925799863852961, "grad_norm": 1.5510364770889282, "learning_rate": 1.2099490801163472e-06, "loss": 0.4683, "step": 5767 }, { "epoch": 3.926480599046971, "grad_norm": 1.5150796175003052, "learning_rate": 1.208481010230042e-06, "loss": 0.5036, "step": 5768 }, { "epoch": 3.92716133424098, "grad_norm": 1.4547382593154907, "learning_rate": 1.2070137090767436e-06, "loss": 0.501, "step": 5769 }, { "epoch": 3.92784206943499, "grad_norm": 1.4275089502334595, "learning_rate": 1.2055471769539463e-06, "loss": 0.498, "step": 5770 }, { "epoch": 3.928522804628999, "grad_norm": 1.535644769668579, "learning_rate": 1.204081414158989e-06, "loss": 0.5955, "step": 5771 }, { "epoch": 3.9292035398230087, "grad_norm": 1.5425410270690918, "learning_rate": 1.2026164209890583e-06, "loss": 0.4595, "step": 5772 }, { "epoch": 3.9298842750170184, "grad_norm": 1.69145667552948, "learning_rate": 1.2011521977411828e-06, "loss": 0.45, "step": 5773 }, { "epoch": 3.930565010211028, "grad_norm": 1.6061259508132935, "learning_rate": 1.199688744712234e-06, "loss": 0.3871, "step": 5774 }, { "epoch": 3.9312457454050374, "grad_norm": 1.6683769226074219, "learning_rate": 1.1982260621989267e-06, "loss": 0.5836, "step": 5775 }, { "epoch": 3.931926480599047, "grad_norm": 1.451797366142273, "learning_rate": 1.1967641504978234e-06, "loss": 0.4583, "step": 5776 }, { "epoch": 3.9326072157930563, "grad_norm": 1.3299469947814941, "learning_rate": 1.1953030099053258e-06, "loss": 0.4985, "step": 5777 }, { "epoch": 3.933287950987066, "grad_norm": 1.564886450767517, "learning_rate": 1.1938426407176806e-06, "loss": 0.4158, "step": 5778 }, { "epoch": 3.9339686861810756, "grad_norm": 1.6268093585968018, "learning_rate": 1.1923830432309808e-06, "loss": 0.5153, "step": 5779 }, { "epoch": 3.9346494213750853, "grad_norm": 1.491732120513916, "learning_rate": 1.1909242177411578e-06, "loss": 0.3806, "step": 5780 }, { "epoch": 3.9353301565690946, "grad_norm": 1.5947130918502808, "learning_rate": 1.189466164543993e-06, "loss": 0.4486, "step": 5781 }, { "epoch": 3.9360108917631043, "grad_norm": 1.552761197090149, "learning_rate": 1.1880088839351039e-06, "loss": 0.4646, "step": 5782 }, { "epoch": 3.9366916269571135, "grad_norm": 1.538016438484192, "learning_rate": 1.186552376209959e-06, "loss": 0.3071, "step": 5783 }, { "epoch": 3.937372362151123, "grad_norm": 1.4368151426315308, "learning_rate": 1.1850966416638643e-06, "loss": 0.5475, "step": 5784 }, { "epoch": 3.938053097345133, "grad_norm": 1.665891408920288, "learning_rate": 1.1836416805919687e-06, "loss": 0.389, "step": 5785 }, { "epoch": 3.938733832539142, "grad_norm": 1.5915883779525757, "learning_rate": 1.1821874932892708e-06, "loss": 0.5362, "step": 5786 }, { "epoch": 3.939414567733152, "grad_norm": 1.6139895915985107, "learning_rate": 1.1807340800506061e-06, "loss": 0.3585, "step": 5787 }, { "epoch": 3.9400953029271615, "grad_norm": 1.4956836700439453, "learning_rate": 1.1792814411706532e-06, "loss": 0.5284, "step": 5788 }, { "epoch": 3.9407760381211707, "grad_norm": 1.5539802312850952, "learning_rate": 1.1778295769439374e-06, "loss": 0.3817, "step": 5789 }, { "epoch": 3.9414567733151804, "grad_norm": 1.536879062652588, "learning_rate": 1.1763784876648271e-06, "loss": 0.6403, "step": 5790 }, { "epoch": 3.94213750850919, "grad_norm": 1.642353892326355, "learning_rate": 1.1749281736275292e-06, "loss": 0.4065, "step": 5791 }, { "epoch": 3.9428182437031993, "grad_norm": 1.543868899345398, "learning_rate": 1.1734786351260946e-06, "loss": 0.5114, "step": 5792 }, { "epoch": 3.943498978897209, "grad_norm": 1.593308448791504, "learning_rate": 1.1720298724544216e-06, "loss": 0.3033, "step": 5793 }, { "epoch": 3.9441797140912183, "grad_norm": 1.5901484489440918, "learning_rate": 1.1705818859062462e-06, "loss": 0.4756, "step": 5794 }, { "epoch": 3.944860449285228, "grad_norm": 1.5225974321365356, "learning_rate": 1.169134675775147e-06, "loss": 0.3223, "step": 5795 }, { "epoch": 3.9455411844792376, "grad_norm": 1.5014692544937134, "learning_rate": 1.1676882423545493e-06, "loss": 0.3491, "step": 5796 }, { "epoch": 3.9462219196732473, "grad_norm": 1.6588321924209595, "learning_rate": 1.1662425859377169e-06, "loss": 0.3936, "step": 5797 }, { "epoch": 3.9469026548672566, "grad_norm": 1.6909089088439941, "learning_rate": 1.164797706817759e-06, "loss": 0.4377, "step": 5798 }, { "epoch": 3.9475833900612662, "grad_norm": 1.46688973903656, "learning_rate": 1.1633536052876237e-06, "loss": 0.3863, "step": 5799 }, { "epoch": 3.9482641252552755, "grad_norm": 1.5643806457519531, "learning_rate": 1.1619102816401062e-06, "loss": 0.4417, "step": 5800 }, { "epoch": 3.948944860449285, "grad_norm": 1.6758421659469604, "learning_rate": 1.1604677361678402e-06, "loss": 0.4752, "step": 5801 }, { "epoch": 3.949625595643295, "grad_norm": 1.6197761297225952, "learning_rate": 1.159025969163301e-06, "loss": 0.3733, "step": 5802 }, { "epoch": 3.9503063308373045, "grad_norm": 1.630338430404663, "learning_rate": 1.1575849809188106e-06, "loss": 0.4814, "step": 5803 }, { "epoch": 3.9509870660313138, "grad_norm": 1.5959173440933228, "learning_rate": 1.1561447717265295e-06, "loss": 0.4626, "step": 5804 }, { "epoch": 3.9516678012253235, "grad_norm": 1.7051968574523926, "learning_rate": 1.1547053418784586e-06, "loss": 0.4002, "step": 5805 }, { "epoch": 3.9523485364193327, "grad_norm": 1.672739863395691, "learning_rate": 1.153266691666446e-06, "loss": 0.2897, "step": 5806 }, { "epoch": 3.9530292716133424, "grad_norm": 1.5827293395996094, "learning_rate": 1.1518288213821793e-06, "loss": 0.3595, "step": 5807 }, { "epoch": 3.953710006807352, "grad_norm": 1.6777064800262451, "learning_rate": 1.150391731317187e-06, "loss": 0.3599, "step": 5808 }, { "epoch": 3.9543907420013613, "grad_norm": 1.45070219039917, "learning_rate": 1.148955421762838e-06, "loss": 0.5867, "step": 5809 }, { "epoch": 3.955071477195371, "grad_norm": 1.4600670337677002, "learning_rate": 1.147519893010348e-06, "loss": 0.4003, "step": 5810 }, { "epoch": 3.9557522123893807, "grad_norm": 1.6187609434127808, "learning_rate": 1.1460851453507705e-06, "loss": 0.4284, "step": 5811 }, { "epoch": 3.95643294758339, "grad_norm": 1.6025441884994507, "learning_rate": 1.1446511790749997e-06, "loss": 0.2847, "step": 5812 }, { "epoch": 3.9571136827773996, "grad_norm": 1.596501350402832, "learning_rate": 1.1432179944737764e-06, "loss": 0.4815, "step": 5813 }, { "epoch": 3.9577944179714093, "grad_norm": 1.5794414281845093, "learning_rate": 1.141785591837678e-06, "loss": 0.3775, "step": 5814 }, { "epoch": 3.9584751531654185, "grad_norm": 1.5093940496444702, "learning_rate": 1.1403539714571233e-06, "loss": 0.3737, "step": 5815 }, { "epoch": 3.959155888359428, "grad_norm": 1.6043487787246704, "learning_rate": 1.1389231336223767e-06, "loss": 0.4745, "step": 5816 }, { "epoch": 3.9598366235534375, "grad_norm": 1.4614918231964111, "learning_rate": 1.1374930786235416e-06, "loss": 0.4333, "step": 5817 }, { "epoch": 3.960517358747447, "grad_norm": 1.5366590023040771, "learning_rate": 1.1360638067505631e-06, "loss": 0.5191, "step": 5818 }, { "epoch": 3.961198093941457, "grad_norm": 1.3710248470306396, "learning_rate": 1.1346353182932234e-06, "loss": 0.6364, "step": 5819 }, { "epoch": 3.9618788291354665, "grad_norm": 1.5773435831069946, "learning_rate": 1.133207613541154e-06, "loss": 0.5402, "step": 5820 }, { "epoch": 3.9625595643294758, "grad_norm": 1.453628420829773, "learning_rate": 1.131780692783821e-06, "loss": 0.4303, "step": 5821 }, { "epoch": 3.9632402995234854, "grad_norm": 1.5978280305862427, "learning_rate": 1.1303545563105334e-06, "loss": 0.4842, "step": 5822 }, { "epoch": 3.9639210347174947, "grad_norm": 1.717642068862915, "learning_rate": 1.1289292044104382e-06, "loss": 0.2865, "step": 5823 }, { "epoch": 3.9646017699115044, "grad_norm": 1.571840524673462, "learning_rate": 1.127504637372533e-06, "loss": 0.4641, "step": 5824 }, { "epoch": 3.965282505105514, "grad_norm": 1.599753975868225, "learning_rate": 1.1260808554856461e-06, "loss": 0.4642, "step": 5825 }, { "epoch": 3.9659632402995237, "grad_norm": 1.4336751699447632, "learning_rate": 1.1246578590384493e-06, "loss": 0.4628, "step": 5826 }, { "epoch": 3.966643975493533, "grad_norm": 1.5095126628875732, "learning_rate": 1.123235648319459e-06, "loss": 0.4183, "step": 5827 }, { "epoch": 3.9673247106875427, "grad_norm": 1.593733787536621, "learning_rate": 1.1218142236170275e-06, "loss": 0.3109, "step": 5828 }, { "epoch": 3.968005445881552, "grad_norm": 1.7078516483306885, "learning_rate": 1.1203935852193498e-06, "loss": 0.4498, "step": 5829 }, { "epoch": 3.9686861810755616, "grad_norm": 1.6206306219100952, "learning_rate": 1.1189737334144597e-06, "loss": 0.3683, "step": 5830 }, { "epoch": 3.9693669162695713, "grad_norm": 1.4521750211715698, "learning_rate": 1.117554668490236e-06, "loss": 0.3317, "step": 5831 }, { "epoch": 3.9700476514635805, "grad_norm": 1.5516167879104614, "learning_rate": 1.1161363907343925e-06, "loss": 0.4591, "step": 5832 }, { "epoch": 3.97072838665759, "grad_norm": 1.5365468263626099, "learning_rate": 1.114718900434487e-06, "loss": 0.3519, "step": 5833 }, { "epoch": 3.9714091218516, "grad_norm": 1.6515822410583496, "learning_rate": 1.1133021978779185e-06, "loss": 0.4256, "step": 5834 }, { "epoch": 3.972089857045609, "grad_norm": 1.5213615894317627, "learning_rate": 1.111886283351923e-06, "loss": 0.4173, "step": 5835 }, { "epoch": 3.972770592239619, "grad_norm": 1.6401358842849731, "learning_rate": 1.1104711571435766e-06, "loss": 0.4682, "step": 5836 }, { "epoch": 3.9734513274336285, "grad_norm": 1.6410925388336182, "learning_rate": 1.1090568195397976e-06, "loss": 0.3905, "step": 5837 }, { "epoch": 3.9741320626276377, "grad_norm": 1.6110495328903198, "learning_rate": 1.1076432708273467e-06, "loss": 0.4766, "step": 5838 }, { "epoch": 3.9748127978216474, "grad_norm": 1.5155832767486572, "learning_rate": 1.1062305112928184e-06, "loss": 0.4033, "step": 5839 }, { "epoch": 3.9754935330156567, "grad_norm": 1.5689473152160645, "learning_rate": 1.1048185412226515e-06, "loss": 0.5134, "step": 5840 }, { "epoch": 3.9761742682096664, "grad_norm": 1.5388107299804688, "learning_rate": 1.1034073609031242e-06, "loss": 0.4601, "step": 5841 }, { "epoch": 3.976855003403676, "grad_norm": 1.7457200288772583, "learning_rate": 1.1019969706203553e-06, "loss": 0.4152, "step": 5842 }, { "epoch": 3.9775357385976857, "grad_norm": 1.7631280422210693, "learning_rate": 1.100587370660302e-06, "loss": 0.3704, "step": 5843 }, { "epoch": 3.978216473791695, "grad_norm": 1.5309748649597168, "learning_rate": 1.099178561308759e-06, "loss": 0.4323, "step": 5844 }, { "epoch": 3.9788972089857046, "grad_norm": 1.6344020366668701, "learning_rate": 1.0977705428513674e-06, "loss": 0.4896, "step": 5845 }, { "epoch": 3.979577944179714, "grad_norm": 1.5591726303100586, "learning_rate": 1.096363315573602e-06, "loss": 0.4872, "step": 5846 }, { "epoch": 3.9802586793737236, "grad_norm": 1.6437770128250122, "learning_rate": 1.0949568797607773e-06, "loss": 0.4415, "step": 5847 }, { "epoch": 3.9809394145677333, "grad_norm": 1.6600996255874634, "learning_rate": 1.0935512356980522e-06, "loss": 0.5109, "step": 5848 }, { "epoch": 3.981620149761743, "grad_norm": 1.6468844413757324, "learning_rate": 1.0921463836704193e-06, "loss": 0.3906, "step": 5849 }, { "epoch": 3.982300884955752, "grad_norm": 1.7424046993255615, "learning_rate": 1.0907423239627157e-06, "loss": 0.3471, "step": 5850 }, { "epoch": 3.982981620149762, "grad_norm": 1.5443880558013916, "learning_rate": 1.089339056859613e-06, "loss": 0.5041, "step": 5851 }, { "epoch": 3.983662355343771, "grad_norm": 1.5534427165985107, "learning_rate": 1.0879365826456273e-06, "loss": 0.383, "step": 5852 }, { "epoch": 3.984343090537781, "grad_norm": 1.648184061050415, "learning_rate": 1.0865349016051097e-06, "loss": 0.4329, "step": 5853 }, { "epoch": 3.9850238257317905, "grad_norm": 1.5369871854782104, "learning_rate": 1.0851340140222504e-06, "loss": 0.4321, "step": 5854 }, { "epoch": 3.9857045609257997, "grad_norm": 1.5238970518112183, "learning_rate": 1.083733920181083e-06, "loss": 0.3946, "step": 5855 }, { "epoch": 3.9863852961198094, "grad_norm": 1.549943447113037, "learning_rate": 1.0823346203654771e-06, "loss": 0.4526, "step": 5856 }, { "epoch": 3.9870660313138186, "grad_norm": 1.5820884704589844, "learning_rate": 1.080936114859139e-06, "loss": 0.4415, "step": 5857 }, { "epoch": 3.9877467665078283, "grad_norm": 1.6594265699386597, "learning_rate": 1.0795384039456186e-06, "loss": 0.4053, "step": 5858 }, { "epoch": 3.988427501701838, "grad_norm": 1.6298496723175049, "learning_rate": 1.078141487908304e-06, "loss": 0.3857, "step": 5859 }, { "epoch": 3.9891082368958477, "grad_norm": 1.582137942314148, "learning_rate": 1.0767453670304196e-06, "loss": 0.4257, "step": 5860 }, { "epoch": 3.989788972089857, "grad_norm": 1.411338210105896, "learning_rate": 1.075350041595028e-06, "loss": 0.5466, "step": 5861 }, { "epoch": 3.9904697072838666, "grad_norm": 1.672114372253418, "learning_rate": 1.0739555118850358e-06, "loss": 0.4911, "step": 5862 }, { "epoch": 3.991150442477876, "grad_norm": 1.6198362112045288, "learning_rate": 1.0725617781831827e-06, "loss": 0.4238, "step": 5863 }, { "epoch": 3.9918311776718856, "grad_norm": 1.4822361469268799, "learning_rate": 1.0711688407720488e-06, "loss": 0.5204, "step": 5864 }, { "epoch": 3.9925119128658952, "grad_norm": 1.5881156921386719, "learning_rate": 1.0697766999340548e-06, "loss": 0.5094, "step": 5865 }, { "epoch": 3.993192648059905, "grad_norm": 1.6575676202774048, "learning_rate": 1.0683853559514562e-06, "loss": 0.3862, "step": 5866 }, { "epoch": 3.993873383253914, "grad_norm": 1.4770352840423584, "learning_rate": 1.066994809106351e-06, "loss": 0.4915, "step": 5867 }, { "epoch": 3.994554118447924, "grad_norm": 1.6258682012557983, "learning_rate": 1.065605059680671e-06, "loss": 0.3694, "step": 5868 }, { "epoch": 3.995234853641933, "grad_norm": 1.6859891414642334, "learning_rate": 1.064216107956192e-06, "loss": 0.2711, "step": 5869 }, { "epoch": 3.9959155888359428, "grad_norm": 1.8214305639266968, "learning_rate": 1.0628279542145231e-06, "loss": 0.382, "step": 5870 }, { "epoch": 3.9965963240299525, "grad_norm": 1.4722397327423096, "learning_rate": 1.0614405987371118e-06, "loss": 0.5379, "step": 5871 }, { "epoch": 3.997277059223962, "grad_norm": 1.475598692893982, "learning_rate": 1.0600540418052485e-06, "loss": 0.4709, "step": 5872 }, { "epoch": 3.9979577944179714, "grad_norm": 1.6351927518844604, "learning_rate": 1.0586682837000572e-06, "loss": 0.3677, "step": 5873 }, { "epoch": 3.998638529611981, "grad_norm": 1.6496649980545044, "learning_rate": 1.0572833247025e-06, "loss": 0.3916, "step": 5874 }, { "epoch": 3.9993192648059903, "grad_norm": 1.5332103967666626, "learning_rate": 1.0558991650933792e-06, "loss": 0.5704, "step": 5875 }, { "epoch": 4.0, "grad_norm": 1.3132648468017578, "learning_rate": 1.0545158051533355e-06, "loss": 0.3629, "step": 5876 }, { "epoch": 4.00068073519401, "grad_norm": 1.362047553062439, "learning_rate": 1.0531332451628452e-06, "loss": 0.3861, "step": 5877 }, { "epoch": 4.001361470388019, "grad_norm": 1.430801510810852, "learning_rate": 1.051751485402221e-06, "loss": 0.3626, "step": 5878 }, { "epoch": 4.002042205582028, "grad_norm": 1.3636401891708374, "learning_rate": 1.0503705261516188e-06, "loss": 0.3303, "step": 5879 }, { "epoch": 4.002722940776038, "grad_norm": 1.1548664569854736, "learning_rate": 1.0489903676910285e-06, "loss": 0.3797, "step": 5880 }, { "epoch": 4.0034036759700475, "grad_norm": 1.5123035907745361, "learning_rate": 1.0476110103002758e-06, "loss": 0.4583, "step": 5881 }, { "epoch": 4.004084411164057, "grad_norm": 1.4076956510543823, "learning_rate": 1.0462324542590286e-06, "loss": 0.3835, "step": 5882 }, { "epoch": 4.004765146358067, "grad_norm": 1.2623622417449951, "learning_rate": 1.0448546998467895e-06, "loss": 0.3695, "step": 5883 }, { "epoch": 4.005445881552077, "grad_norm": 1.2215988636016846, "learning_rate": 1.043477747342898e-06, "loss": 0.3437, "step": 5884 }, { "epoch": 4.006126616746085, "grad_norm": 1.372942566871643, "learning_rate": 1.042101597026533e-06, "loss": 0.4559, "step": 5885 }, { "epoch": 4.006807351940095, "grad_norm": 1.2623298168182373, "learning_rate": 1.0407262491767117e-06, "loss": 0.2578, "step": 5886 }, { "epoch": 4.007488087134105, "grad_norm": 1.1545777320861816, "learning_rate": 1.0393517040722844e-06, "loss": 0.2853, "step": 5887 }, { "epoch": 4.0081688223281144, "grad_norm": 1.3921458721160889, "learning_rate": 1.037977961991941e-06, "loss": 0.4409, "step": 5888 }, { "epoch": 4.008849557522124, "grad_norm": 1.4770166873931885, "learning_rate": 1.0366050232142105e-06, "loss": 0.3269, "step": 5889 }, { "epoch": 4.009530292716134, "grad_norm": 1.4129573106765747, "learning_rate": 1.0352328880174562e-06, "loss": 0.1864, "step": 5890 }, { "epoch": 4.010211027910143, "grad_norm": 1.4329901933670044, "learning_rate": 1.0338615566798793e-06, "loss": 0.2458, "step": 5891 }, { "epoch": 4.010891763104152, "grad_norm": 1.4574018716812134, "learning_rate": 1.032491029479515e-06, "loss": 0.2856, "step": 5892 }, { "epoch": 4.011572498298162, "grad_norm": 1.487835168838501, "learning_rate": 1.0311213066942445e-06, "loss": 0.3648, "step": 5893 }, { "epoch": 4.012253233492172, "grad_norm": 1.586383581161499, "learning_rate": 1.0297523886017775e-06, "loss": 0.2783, "step": 5894 }, { "epoch": 4.012933968686181, "grad_norm": 1.5653231143951416, "learning_rate": 1.0283842754796608e-06, "loss": 0.3349, "step": 5895 }, { "epoch": 4.013614703880191, "grad_norm": 1.3602277040481567, "learning_rate": 1.0270169676052843e-06, "loss": 0.3469, "step": 5896 }, { "epoch": 4.0142954390742, "grad_norm": 1.3447822332382202, "learning_rate": 1.025650465255868e-06, "loss": 0.295, "step": 5897 }, { "epoch": 4.0149761742682095, "grad_norm": 1.3592112064361572, "learning_rate": 1.0242847687084718e-06, "loss": 0.4037, "step": 5898 }, { "epoch": 4.015656909462219, "grad_norm": 1.5394247770309448, "learning_rate": 1.0229198782399901e-06, "loss": 0.4027, "step": 5899 }, { "epoch": 4.016337644656229, "grad_norm": 1.3353924751281738, "learning_rate": 1.0215557941271581e-06, "loss": 0.3867, "step": 5900 }, { "epoch": 4.017018379850239, "grad_norm": 1.5923540592193604, "learning_rate": 1.020192516646542e-06, "loss": 0.3796, "step": 5901 }, { "epoch": 4.017699115044247, "grad_norm": 1.4109975099563599, "learning_rate": 1.0188300460745487e-06, "loss": 0.399, "step": 5902 }, { "epoch": 4.018379850238257, "grad_norm": 1.3859978914260864, "learning_rate": 1.0174683826874209e-06, "loss": 0.203, "step": 5903 }, { "epoch": 4.019060585432267, "grad_norm": 1.3286138772964478, "learning_rate": 1.0161075267612358e-06, "loss": 0.2613, "step": 5904 }, { "epoch": 4.019741320626276, "grad_norm": 1.4744223356246948, "learning_rate": 1.014747478571908e-06, "loss": 0.1911, "step": 5905 }, { "epoch": 4.020422055820286, "grad_norm": 1.3794221878051758, "learning_rate": 1.013388238395187e-06, "loss": 0.3708, "step": 5906 }, { "epoch": 4.021102791014296, "grad_norm": 1.4299238920211792, "learning_rate": 1.0120298065066614e-06, "loss": 0.2456, "step": 5907 }, { "epoch": 4.021783526208305, "grad_norm": 1.3040871620178223, "learning_rate": 1.0106721831817535e-06, "loss": 0.2983, "step": 5908 }, { "epoch": 4.022464261402314, "grad_norm": 1.3250058889389038, "learning_rate": 1.0093153686957214e-06, "loss": 0.2469, "step": 5909 }, { "epoch": 4.023144996596324, "grad_norm": 1.380445122718811, "learning_rate": 1.00795936332366e-06, "loss": 0.3066, "step": 5910 }, { "epoch": 4.023825731790334, "grad_norm": 1.2257667779922485, "learning_rate": 1.0066041673405036e-06, "loss": 0.4393, "step": 5911 }, { "epoch": 4.024506466984343, "grad_norm": 1.4384117126464844, "learning_rate": 1.005249781021016e-06, "loss": 0.3821, "step": 5912 }, { "epoch": 4.025187202178353, "grad_norm": 1.3916667699813843, "learning_rate": 1.0038962046397998e-06, "loss": 0.2393, "step": 5913 }, { "epoch": 4.025867937372362, "grad_norm": 1.3559374809265137, "learning_rate": 1.0025434384712957e-06, "loss": 0.2325, "step": 5914 }, { "epoch": 4.0265486725663715, "grad_norm": 1.362642765045166, "learning_rate": 1.0011914827897768e-06, "loss": 0.3405, "step": 5915 }, { "epoch": 4.027229407760381, "grad_norm": 1.2848490476608276, "learning_rate": 9.998403378693517e-07, "loss": 0.2679, "step": 5916 }, { "epoch": 4.027910142954391, "grad_norm": 1.3788095712661743, "learning_rate": 9.984900039839678e-07, "loss": 0.3485, "step": 5917 }, { "epoch": 4.0285908781484006, "grad_norm": 1.5717113018035889, "learning_rate": 9.971404814074044e-07, "loss": 0.4527, "step": 5918 }, { "epoch": 4.02927161334241, "grad_norm": 1.4275857210159302, "learning_rate": 9.957917704132808e-07, "loss": 0.4151, "step": 5919 }, { "epoch": 4.029952348536419, "grad_norm": 1.5299503803253174, "learning_rate": 9.94443871275046e-07, "loss": 0.3049, "step": 5920 }, { "epoch": 4.030633083730429, "grad_norm": 1.4649512767791748, "learning_rate": 9.9309678426599e-07, "loss": 0.4891, "step": 5921 }, { "epoch": 4.031313818924438, "grad_norm": 1.405683159828186, "learning_rate": 9.91750509659235e-07, "loss": 0.3169, "step": 5922 }, { "epoch": 4.031994554118448, "grad_norm": 1.2908226251602173, "learning_rate": 9.904050477277367e-07, "loss": 0.3391, "step": 5923 }, { "epoch": 4.032675289312458, "grad_norm": 1.3080158233642578, "learning_rate": 9.89060398744292e-07, "loss": 0.2506, "step": 5924 }, { "epoch": 4.033356024506467, "grad_norm": 1.2624722719192505, "learning_rate": 9.877165629815272e-07, "loss": 0.1565, "step": 5925 }, { "epoch": 4.034036759700476, "grad_norm": 1.3028050661087036, "learning_rate": 9.863735407119057e-07, "loss": 0.4027, "step": 5926 }, { "epoch": 4.034717494894486, "grad_norm": 1.520576000213623, "learning_rate": 9.850313322077266e-07, "loss": 0.4554, "step": 5927 }, { "epoch": 4.035398230088496, "grad_norm": 1.5702346563339233, "learning_rate": 9.836899377411246e-07, "loss": 0.3243, "step": 5928 }, { "epoch": 4.036078965282505, "grad_norm": 1.419851541519165, "learning_rate": 9.82349357584068e-07, "loss": 0.4383, "step": 5929 }, { "epoch": 4.036759700476515, "grad_norm": 1.4040887355804443, "learning_rate": 9.810095920083584e-07, "loss": 0.5564, "step": 5930 }, { "epoch": 4.037440435670524, "grad_norm": 1.323257327079773, "learning_rate": 9.796706412856371e-07, "loss": 0.3347, "step": 5931 }, { "epoch": 4.0381211708645335, "grad_norm": 1.3061654567718506, "learning_rate": 9.783325056873756e-07, "loss": 0.2711, "step": 5932 }, { "epoch": 4.038801906058543, "grad_norm": 1.446191668510437, "learning_rate": 9.769951854848803e-07, "loss": 0.3576, "step": 5933 }, { "epoch": 4.039482641252553, "grad_norm": 1.1576403379440308, "learning_rate": 9.756586809492968e-07, "loss": 0.2595, "step": 5934 }, { "epoch": 4.0401633764465625, "grad_norm": 1.4524606466293335, "learning_rate": 9.743229923516e-07, "loss": 0.3023, "step": 5935 }, { "epoch": 4.040844111640572, "grad_norm": 1.33367121219635, "learning_rate": 9.729881199626034e-07, "loss": 0.3482, "step": 5936 }, { "epoch": 4.041524846834581, "grad_norm": 1.4384055137634277, "learning_rate": 9.716540640529514e-07, "loss": 0.3756, "step": 5937 }, { "epoch": 4.042205582028591, "grad_norm": 1.354131817817688, "learning_rate": 9.703208248931272e-07, "loss": 0.4316, "step": 5938 }, { "epoch": 4.0428863172226, "grad_norm": 1.2838112115859985, "learning_rate": 9.68988402753444e-07, "loss": 0.2642, "step": 5939 }, { "epoch": 4.04356705241661, "grad_norm": 1.361420750617981, "learning_rate": 9.676567979040507e-07, "loss": 0.4298, "step": 5940 }, { "epoch": 4.04424778761062, "grad_norm": 1.2538856267929077, "learning_rate": 9.66326010614933e-07, "loss": 0.1895, "step": 5941 }, { "epoch": 4.044928522804629, "grad_norm": 1.4808367490768433, "learning_rate": 9.649960411559085e-07, "loss": 0.46, "step": 5942 }, { "epoch": 4.045609257998638, "grad_norm": 1.5896501541137695, "learning_rate": 9.636668897966278e-07, "loss": 0.2234, "step": 5943 }, { "epoch": 4.046289993192648, "grad_norm": 1.2662605047225952, "learning_rate": 9.62338556806578e-07, "loss": 0.3773, "step": 5944 }, { "epoch": 4.046970728386658, "grad_norm": 1.4021157026290894, "learning_rate": 9.610110424550812e-07, "loss": 0.3906, "step": 5945 }, { "epoch": 4.047651463580667, "grad_norm": 1.2777799367904663, "learning_rate": 9.596843470112905e-07, "loss": 0.3485, "step": 5946 }, { "epoch": 4.048332198774677, "grad_norm": 1.3495292663574219, "learning_rate": 9.58358470744193e-07, "loss": 0.2722, "step": 5947 }, { "epoch": 4.049012933968686, "grad_norm": 1.386696457862854, "learning_rate": 9.57033413922613e-07, "loss": 0.2627, "step": 5948 }, { "epoch": 4.0496936691626955, "grad_norm": 1.5161550045013428, "learning_rate": 9.557091768152066e-07, "loss": 0.3537, "step": 5949 }, { "epoch": 4.050374404356705, "grad_norm": 1.42876398563385, "learning_rate": 9.54385759690461e-07, "loss": 0.313, "step": 5950 }, { "epoch": 4.051055139550715, "grad_norm": 1.2254011631011963, "learning_rate": 9.530631628167037e-07, "loss": 0.3143, "step": 5951 }, { "epoch": 4.0517358747447245, "grad_norm": 1.3017922639846802, "learning_rate": 9.517413864620906e-07, "loss": 0.28, "step": 5952 }, { "epoch": 4.052416609938734, "grad_norm": 1.445936918258667, "learning_rate": 9.504204308946108e-07, "loss": 0.5797, "step": 5953 }, { "epoch": 4.053097345132743, "grad_norm": 1.3485608100891113, "learning_rate": 9.491002963820911e-07, "loss": 0.3542, "step": 5954 }, { "epoch": 4.053778080326753, "grad_norm": 1.4658209085464478, "learning_rate": 9.477809831921903e-07, "loss": 0.2803, "step": 5955 }, { "epoch": 4.054458815520762, "grad_norm": 1.3785545825958252, "learning_rate": 9.464624915923987e-07, "loss": 0.3399, "step": 5956 }, { "epoch": 4.055139550714772, "grad_norm": 1.4062657356262207, "learning_rate": 9.451448218500403e-07, "loss": 0.3878, "step": 5957 }, { "epoch": 4.055820285908782, "grad_norm": 1.3587098121643066, "learning_rate": 9.438279742322759e-07, "loss": 0.2624, "step": 5958 }, { "epoch": 4.056501021102791, "grad_norm": 1.2609069347381592, "learning_rate": 9.425119490060963e-07, "loss": 0.2883, "step": 5959 }, { "epoch": 4.0571817562968, "grad_norm": 1.3972071409225464, "learning_rate": 9.411967464383254e-07, "loss": 0.1846, "step": 5960 }, { "epoch": 4.05786249149081, "grad_norm": 1.4387034177780151, "learning_rate": 9.398823667956219e-07, "loss": 0.2815, "step": 5961 }, { "epoch": 4.05854322668482, "grad_norm": 1.4199265241622925, "learning_rate": 9.385688103444784e-07, "loss": 0.4778, "step": 5962 }, { "epoch": 4.059223961878829, "grad_norm": 1.3575632572174072, "learning_rate": 9.372560773512191e-07, "loss": 0.2755, "step": 5963 }, { "epoch": 4.059904697072839, "grad_norm": 1.2826465368270874, "learning_rate": 9.359441680819997e-07, "loss": 0.5172, "step": 5964 }, { "epoch": 4.060585432266848, "grad_norm": 1.2200206518173218, "learning_rate": 9.346330828028127e-07, "loss": 0.2653, "step": 5965 }, { "epoch": 4.0612661674608574, "grad_norm": 1.486478328704834, "learning_rate": 9.33322821779481e-07, "loss": 0.4307, "step": 5966 }, { "epoch": 4.061946902654867, "grad_norm": 1.3596023321151733, "learning_rate": 9.320133852776602e-07, "loss": 0.4031, "step": 5967 }, { "epoch": 4.062627637848877, "grad_norm": 1.3842675685882568, "learning_rate": 9.307047735628389e-07, "loss": 0.3105, "step": 5968 }, { "epoch": 4.0633083730428865, "grad_norm": 1.5454100370407104, "learning_rate": 9.293969869003411e-07, "loss": 0.4051, "step": 5969 }, { "epoch": 4.063989108236896, "grad_norm": 1.4163250923156738, "learning_rate": 9.280900255553193e-07, "loss": 0.2976, "step": 5970 }, { "epoch": 4.064669843430905, "grad_norm": 1.5526926517486572, "learning_rate": 9.267838897927611e-07, "loss": 0.3397, "step": 5971 }, { "epoch": 4.065350578624915, "grad_norm": 1.4283196926116943, "learning_rate": 9.254785798774884e-07, "loss": 0.364, "step": 5972 }, { "epoch": 4.066031313818924, "grad_norm": 1.3361594676971436, "learning_rate": 9.241740960741525e-07, "loss": 0.4054, "step": 5973 }, { "epoch": 4.066712049012934, "grad_norm": 1.3448926210403442, "learning_rate": 9.228704386472381e-07, "loss": 0.3144, "step": 5974 }, { "epoch": 4.067392784206944, "grad_norm": 1.394106149673462, "learning_rate": 9.215676078610608e-07, "loss": 0.1654, "step": 5975 }, { "epoch": 4.068073519400953, "grad_norm": 1.379352331161499, "learning_rate": 9.202656039797742e-07, "loss": 0.3641, "step": 5976 }, { "epoch": 4.068754254594962, "grad_norm": 1.4732847213745117, "learning_rate": 9.189644272673582e-07, "loss": 0.3322, "step": 5977 }, { "epoch": 4.069434989788972, "grad_norm": 1.280667781829834, "learning_rate": 9.176640779876262e-07, "loss": 0.232, "step": 5978 }, { "epoch": 4.070115724982982, "grad_norm": 1.465011715888977, "learning_rate": 9.163645564042268e-07, "loss": 0.4562, "step": 5979 }, { "epoch": 4.070796460176991, "grad_norm": 1.3086817264556885, "learning_rate": 9.150658627806386e-07, "loss": 0.355, "step": 5980 }, { "epoch": 4.071477195371001, "grad_norm": 1.4167956113815308, "learning_rate": 9.137679973801727e-07, "loss": 0.3145, "step": 5981 }, { "epoch": 4.072157930565011, "grad_norm": 1.4041305780410767, "learning_rate": 9.12470960465971e-07, "loss": 0.2488, "step": 5982 }, { "epoch": 4.072838665759019, "grad_norm": 1.3921384811401367, "learning_rate": 9.1117475230101e-07, "loss": 0.3857, "step": 5983 }, { "epoch": 4.073519400953029, "grad_norm": 1.5978151559829712, "learning_rate": 9.098793731480965e-07, "loss": 0.3683, "step": 5984 }, { "epoch": 4.074200136147039, "grad_norm": 1.6477681398391724, "learning_rate": 9.08584823269868e-07, "loss": 0.3446, "step": 5985 }, { "epoch": 4.0748808713410485, "grad_norm": 1.3825746774673462, "learning_rate": 9.072911029287972e-07, "loss": 0.4353, "step": 5986 }, { "epoch": 4.075561606535058, "grad_norm": 1.2673472166061401, "learning_rate": 9.059982123871852e-07, "loss": 0.3201, "step": 5987 }, { "epoch": 4.076242341729067, "grad_norm": 1.2321897745132446, "learning_rate": 9.047061519071681e-07, "loss": 0.3268, "step": 5988 }, { "epoch": 4.076923076923077, "grad_norm": 1.4527815580368042, "learning_rate": 9.034149217507104e-07, "loss": 0.3723, "step": 5989 }, { "epoch": 4.077603812117086, "grad_norm": 1.4477421045303345, "learning_rate": 9.021245221796121e-07, "loss": 0.3628, "step": 5990 }, { "epoch": 4.078284547311096, "grad_norm": 1.3194109201431274, "learning_rate": 9.008349534555005e-07, "loss": 0.4714, "step": 5991 }, { "epoch": 4.078965282505106, "grad_norm": 1.3449153900146484, "learning_rate": 8.99546215839836e-07, "loss": 0.4874, "step": 5992 }, { "epoch": 4.079646017699115, "grad_norm": 1.383469820022583, "learning_rate": 8.98258309593914e-07, "loss": 0.4247, "step": 5993 }, { "epoch": 4.080326752893124, "grad_norm": 1.3030840158462524, "learning_rate": 8.969712349788561e-07, "loss": 0.2965, "step": 5994 }, { "epoch": 4.081007488087134, "grad_norm": 1.419052243232727, "learning_rate": 8.956849922556166e-07, "loss": 0.3671, "step": 5995 }, { "epoch": 4.081688223281144, "grad_norm": 1.2938064336776733, "learning_rate": 8.943995816849837e-07, "loss": 0.4031, "step": 5996 }, { "epoch": 4.082368958475153, "grad_norm": 1.3506766557693481, "learning_rate": 8.931150035275759e-07, "loss": 0.4199, "step": 5997 }, { "epoch": 4.083049693669163, "grad_norm": 1.3948132991790771, "learning_rate": 8.918312580438415e-07, "loss": 0.4897, "step": 5998 }, { "epoch": 4.083730428863173, "grad_norm": 1.4386589527130127, "learning_rate": 8.905483454940594e-07, "loss": 0.3551, "step": 5999 }, { "epoch": 4.084411164057181, "grad_norm": 1.5488640069961548, "learning_rate": 8.892662661383433e-07, "loss": 0.2602, "step": 6000 }, { "epoch": 4.085091899251191, "grad_norm": 1.3251605033874512, "learning_rate": 8.879850202366347e-07, "loss": 0.2966, "step": 6001 }, { "epoch": 4.085772634445201, "grad_norm": 1.390177845954895, "learning_rate": 8.867046080487058e-07, "loss": 0.4014, "step": 6002 }, { "epoch": 4.0864533696392105, "grad_norm": 1.2524737119674683, "learning_rate": 8.854250298341632e-07, "loss": 0.4451, "step": 6003 }, { "epoch": 4.08713410483322, "grad_norm": 1.4466789960861206, "learning_rate": 8.841462858524397e-07, "loss": 0.3524, "step": 6004 }, { "epoch": 4.08781484002723, "grad_norm": 1.4131733179092407, "learning_rate": 8.828683763628049e-07, "loss": 0.3799, "step": 6005 }, { "epoch": 4.088495575221239, "grad_norm": 1.432506799697876, "learning_rate": 8.815913016243515e-07, "loss": 0.3192, "step": 6006 }, { "epoch": 4.089176310415248, "grad_norm": 1.2214068174362183, "learning_rate": 8.803150618960115e-07, "loss": 0.3518, "step": 6007 }, { "epoch": 4.089857045609258, "grad_norm": 1.5073292255401611, "learning_rate": 8.790396574365418e-07, "loss": 0.2413, "step": 6008 }, { "epoch": 4.090537780803268, "grad_norm": 1.472546100616455, "learning_rate": 8.777650885045291e-07, "loss": 0.3008, "step": 6009 }, { "epoch": 4.091218515997277, "grad_norm": 1.3203413486480713, "learning_rate": 8.76491355358397e-07, "loss": 0.356, "step": 6010 }, { "epoch": 4.091899251191286, "grad_norm": 1.4747741222381592, "learning_rate": 8.752184582563938e-07, "loss": 0.3009, "step": 6011 }, { "epoch": 4.092579986385296, "grad_norm": 1.3038607835769653, "learning_rate": 8.739463974565992e-07, "loss": 0.1864, "step": 6012 }, { "epoch": 4.0932607215793055, "grad_norm": 1.476629614830017, "learning_rate": 8.726751732169253e-07, "loss": 0.3435, "step": 6013 }, { "epoch": 4.093941456773315, "grad_norm": 1.3822436332702637, "learning_rate": 8.714047857951153e-07, "loss": 0.3894, "step": 6014 }, { "epoch": 4.094622191967325, "grad_norm": 1.4672837257385254, "learning_rate": 8.701352354487392e-07, "loss": 0.4083, "step": 6015 }, { "epoch": 4.095302927161335, "grad_norm": 1.4400910139083862, "learning_rate": 8.688665224351989e-07, "loss": 0.4809, "step": 6016 }, { "epoch": 4.095983662355343, "grad_norm": 1.3200210332870483, "learning_rate": 8.675986470117287e-07, "loss": 0.3769, "step": 6017 }, { "epoch": 4.096664397549353, "grad_norm": 1.3628360033035278, "learning_rate": 8.663316094353901e-07, "loss": 0.4478, "step": 6018 }, { "epoch": 4.097345132743363, "grad_norm": 1.5601142644882202, "learning_rate": 8.650654099630745e-07, "loss": 0.2844, "step": 6019 }, { "epoch": 4.0980258679373724, "grad_norm": 1.32341468334198, "learning_rate": 8.638000488515075e-07, "loss": 0.3577, "step": 6020 }, { "epoch": 4.098706603131382, "grad_norm": 1.5370017290115356, "learning_rate": 8.625355263572399e-07, "loss": 0.3062, "step": 6021 }, { "epoch": 4.099387338325392, "grad_norm": 1.752862572669983, "learning_rate": 8.61271842736654e-07, "loss": 0.286, "step": 6022 }, { "epoch": 4.100068073519401, "grad_norm": 1.4273558855056763, "learning_rate": 8.600089982459636e-07, "loss": 0.3201, "step": 6023 }, { "epoch": 4.10074880871341, "grad_norm": 1.3582566976547241, "learning_rate": 8.587469931412118e-07, "loss": 0.214, "step": 6024 }, { "epoch": 4.10142954390742, "grad_norm": 1.4500147104263306, "learning_rate": 8.57485827678271e-07, "loss": 0.2742, "step": 6025 }, { "epoch": 4.10211027910143, "grad_norm": 1.373003602027893, "learning_rate": 8.562255021128407e-07, "loss": 0.2842, "step": 6026 }, { "epoch": 4.102791014295439, "grad_norm": 1.4716089963912964, "learning_rate": 8.549660167004565e-07, "loss": 0.3031, "step": 6027 }, { "epoch": 4.103471749489449, "grad_norm": 1.3871361017227173, "learning_rate": 8.537073716964777e-07, "loss": 0.3448, "step": 6028 }, { "epoch": 4.104152484683458, "grad_norm": 1.4278203248977661, "learning_rate": 8.524495673560946e-07, "loss": 0.2454, "step": 6029 }, { "epoch": 4.1048332198774675, "grad_norm": 1.2816251516342163, "learning_rate": 8.511926039343294e-07, "loss": 0.3376, "step": 6030 }, { "epoch": 4.105513955071477, "grad_norm": 1.4398853778839111, "learning_rate": 8.499364816860328e-07, "loss": 0.4125, "step": 6031 }, { "epoch": 4.106194690265487, "grad_norm": 1.565649151802063, "learning_rate": 8.48681200865884e-07, "loss": 0.229, "step": 6032 }, { "epoch": 4.106875425459497, "grad_norm": 1.3432528972625732, "learning_rate": 8.474267617283899e-07, "loss": 0.2109, "step": 6033 }, { "epoch": 4.107556160653505, "grad_norm": 1.4315576553344727, "learning_rate": 8.46173164527892e-07, "loss": 0.2887, "step": 6034 }, { "epoch": 4.108236895847515, "grad_norm": 1.3450826406478882, "learning_rate": 8.449204095185565e-07, "loss": 0.2639, "step": 6035 }, { "epoch": 4.108917631041525, "grad_norm": 1.3574696779251099, "learning_rate": 8.436684969543801e-07, "loss": 0.2963, "step": 6036 }, { "epoch": 4.109598366235534, "grad_norm": 1.3705756664276123, "learning_rate": 8.424174270891883e-07, "loss": 0.3584, "step": 6037 }, { "epoch": 4.110279101429544, "grad_norm": 1.5590729713439941, "learning_rate": 8.411672001766386e-07, "loss": 0.1937, "step": 6038 }, { "epoch": 4.110959836623554, "grad_norm": 1.4031428098678589, "learning_rate": 8.399178164702121e-07, "loss": 0.44, "step": 6039 }, { "epoch": 4.111640571817563, "grad_norm": 1.2431751489639282, "learning_rate": 8.386692762232246e-07, "loss": 0.2728, "step": 6040 }, { "epoch": 4.112321307011572, "grad_norm": 1.3123579025268555, "learning_rate": 8.374215796888186e-07, "loss": 0.3032, "step": 6041 }, { "epoch": 4.113002042205582, "grad_norm": 1.365149974822998, "learning_rate": 8.361747271199649e-07, "loss": 0.3776, "step": 6042 }, { "epoch": 4.113682777399592, "grad_norm": 1.4797275066375732, "learning_rate": 8.349287187694638e-07, "loss": 0.177, "step": 6043 }, { "epoch": 4.114363512593601, "grad_norm": 1.4489362239837646, "learning_rate": 8.336835548899425e-07, "loss": 0.3012, "step": 6044 }, { "epoch": 4.115044247787611, "grad_norm": 1.3801405429840088, "learning_rate": 8.324392357338618e-07, "loss": 0.42, "step": 6045 }, { "epoch": 4.11572498298162, "grad_norm": 1.3664166927337646, "learning_rate": 8.311957615535066e-07, "loss": 0.3683, "step": 6046 }, { "epoch": 4.1164057181756295, "grad_norm": 1.3250460624694824, "learning_rate": 8.299531326009908e-07, "loss": 0.31, "step": 6047 }, { "epoch": 4.117086453369639, "grad_norm": 1.3715722560882568, "learning_rate": 8.2871134912826e-07, "loss": 0.3188, "step": 6048 }, { "epoch": 4.117767188563649, "grad_norm": 1.321077823638916, "learning_rate": 8.274704113870874e-07, "loss": 0.3022, "step": 6049 }, { "epoch": 4.118447923757659, "grad_norm": 1.4618215560913086, "learning_rate": 8.262303196290733e-07, "loss": 0.2273, "step": 6050 }, { "epoch": 4.119128658951668, "grad_norm": 1.3735265731811523, "learning_rate": 8.24991074105645e-07, "loss": 0.2487, "step": 6051 }, { "epoch": 4.119809394145677, "grad_norm": 1.4928910732269287, "learning_rate": 8.237526750680636e-07, "loss": 0.2548, "step": 6052 }, { "epoch": 4.120490129339687, "grad_norm": 1.3640966415405273, "learning_rate": 8.225151227674138e-07, "loss": 0.2453, "step": 6053 }, { "epoch": 4.121170864533696, "grad_norm": 1.4242072105407715, "learning_rate": 8.21278417454609e-07, "loss": 0.4329, "step": 6054 }, { "epoch": 4.121851599727706, "grad_norm": 1.4690701961517334, "learning_rate": 8.200425593803946e-07, "loss": 0.4855, "step": 6055 }, { "epoch": 4.122532334921716, "grad_norm": 1.388529658317566, "learning_rate": 8.18807548795339e-07, "loss": 0.3285, "step": 6056 }, { "epoch": 4.123213070115725, "grad_norm": 1.2920175790786743, "learning_rate": 8.175733859498436e-07, "loss": 0.4822, "step": 6057 }, { "epoch": 4.123893805309734, "grad_norm": 1.484311819076538, "learning_rate": 8.163400710941339e-07, "loss": 0.4079, "step": 6058 }, { "epoch": 4.124574540503744, "grad_norm": 1.4061174392700195, "learning_rate": 8.151076044782669e-07, "loss": 0.272, "step": 6059 }, { "epoch": 4.125255275697754, "grad_norm": 1.5090818405151367, "learning_rate": 8.138759863521262e-07, "loss": 0.2441, "step": 6060 }, { "epoch": 4.125936010891763, "grad_norm": 1.2872337102890015, "learning_rate": 8.126452169654203e-07, "loss": 0.3511, "step": 6061 }, { "epoch": 4.126616746085773, "grad_norm": 1.4301090240478516, "learning_rate": 8.114152965676925e-07, "loss": 0.4077, "step": 6062 }, { "epoch": 4.127297481279782, "grad_norm": 1.3759729862213135, "learning_rate": 8.101862254083076e-07, "loss": 0.4466, "step": 6063 }, { "epoch": 4.1279782164737915, "grad_norm": 1.4230180978775024, "learning_rate": 8.089580037364602e-07, "loss": 0.2588, "step": 6064 }, { "epoch": 4.128658951667801, "grad_norm": 1.3652111291885376, "learning_rate": 8.077306318011735e-07, "loss": 0.2824, "step": 6065 }, { "epoch": 4.129339686861811, "grad_norm": 1.3873450756072998, "learning_rate": 8.065041098512999e-07, "loss": 0.2914, "step": 6066 }, { "epoch": 4.1300204220558205, "grad_norm": 1.3979105949401855, "learning_rate": 8.052784381355161e-07, "loss": 0.2112, "step": 6067 }, { "epoch": 4.13070115724983, "grad_norm": 1.4098422527313232, "learning_rate": 8.04053616902326e-07, "loss": 0.2574, "step": 6068 }, { "epoch": 4.131381892443839, "grad_norm": 1.3990874290466309, "learning_rate": 8.028296464000668e-07, "loss": 0.3548, "step": 6069 }, { "epoch": 4.132062627637849, "grad_norm": 1.376906394958496, "learning_rate": 8.016065268768969e-07, "loss": 0.3812, "step": 6070 }, { "epoch": 4.132743362831858, "grad_norm": 1.3412591218948364, "learning_rate": 8.003842585808036e-07, "loss": 0.3514, "step": 6071 }, { "epoch": 4.133424098025868, "grad_norm": 1.389847755432129, "learning_rate": 7.991628417596047e-07, "loss": 0.3585, "step": 6072 }, { "epoch": 4.134104833219878, "grad_norm": 1.347591519355774, "learning_rate": 7.979422766609424e-07, "loss": 0.2226, "step": 6073 }, { "epoch": 4.1347855684138874, "grad_norm": 1.3894007205963135, "learning_rate": 7.967225635322878e-07, "loss": 0.3129, "step": 6074 }, { "epoch": 4.135466303607896, "grad_norm": 1.3895834684371948, "learning_rate": 7.955037026209367e-07, "loss": 0.3122, "step": 6075 }, { "epoch": 4.136147038801906, "grad_norm": 1.3524845838546753, "learning_rate": 7.942856941740168e-07, "loss": 0.3433, "step": 6076 }, { "epoch": 4.136827773995916, "grad_norm": 1.568385362625122, "learning_rate": 7.930685384384784e-07, "loss": 0.3581, "step": 6077 }, { "epoch": 4.137508509189925, "grad_norm": 1.3755236864089966, "learning_rate": 7.918522356610997e-07, "loss": 0.4218, "step": 6078 }, { "epoch": 4.138189244383935, "grad_norm": 1.537978172302246, "learning_rate": 7.906367860884889e-07, "loss": 0.3512, "step": 6079 }, { "epoch": 4.138869979577944, "grad_norm": 1.2973065376281738, "learning_rate": 7.894221899670784e-07, "loss": 0.432, "step": 6080 }, { "epoch": 4.1395507147719535, "grad_norm": 1.4602463245391846, "learning_rate": 7.882084475431273e-07, "loss": 0.375, "step": 6081 }, { "epoch": 4.140231449965963, "grad_norm": 1.3862336874008179, "learning_rate": 7.869955590627238e-07, "loss": 0.4246, "step": 6082 }, { "epoch": 4.140912185159973, "grad_norm": 1.4128601551055908, "learning_rate": 7.857835247717827e-07, "loss": 0.2752, "step": 6083 }, { "epoch": 4.1415929203539825, "grad_norm": 1.3645243644714355, "learning_rate": 7.845723449160436e-07, "loss": 0.3478, "step": 6084 }, { "epoch": 4.142273655547992, "grad_norm": 1.4410146474838257, "learning_rate": 7.833620197410735e-07, "loss": 0.2585, "step": 6085 }, { "epoch": 4.142954390742001, "grad_norm": 1.2995558977127075, "learning_rate": 7.821525494922683e-07, "loss": 0.2208, "step": 6086 }, { "epoch": 4.143635125936011, "grad_norm": 1.4044138193130493, "learning_rate": 7.809439344148479e-07, "loss": 0.3193, "step": 6087 }, { "epoch": 4.14431586113002, "grad_norm": 1.424176573753357, "learning_rate": 7.797361747538584e-07, "loss": 0.2548, "step": 6088 }, { "epoch": 4.14499659632403, "grad_norm": 1.2460455894470215, "learning_rate": 7.78529270754177e-07, "loss": 0.3829, "step": 6089 }, { "epoch": 4.14567733151804, "grad_norm": 1.444846749305725, "learning_rate": 7.773232226605027e-07, "loss": 0.2177, "step": 6090 }, { "epoch": 4.146358066712049, "grad_norm": 1.387572169303894, "learning_rate": 7.761180307173605e-07, "loss": 0.2892, "step": 6091 }, { "epoch": 4.147038801906058, "grad_norm": 1.550377607345581, "learning_rate": 7.749136951691066e-07, "loss": 0.3178, "step": 6092 }, { "epoch": 4.147719537100068, "grad_norm": 1.482873797416687, "learning_rate": 7.737102162599208e-07, "loss": 0.442, "step": 6093 }, { "epoch": 4.148400272294078, "grad_norm": 1.3159806728363037, "learning_rate": 7.72507594233809e-07, "loss": 0.1443, "step": 6094 }, { "epoch": 4.149081007488087, "grad_norm": 1.4604990482330322, "learning_rate": 7.71305829334602e-07, "loss": 0.2609, "step": 6095 }, { "epoch": 4.149761742682097, "grad_norm": 1.4955201148986816, "learning_rate": 7.701049218059603e-07, "loss": 0.3308, "step": 6096 }, { "epoch": 4.150442477876107, "grad_norm": 1.220374584197998, "learning_rate": 7.689048718913683e-07, "loss": 0.2506, "step": 6097 }, { "epoch": 4.1511232130701154, "grad_norm": 1.5167020559310913, "learning_rate": 7.677056798341353e-07, "loss": 0.4884, "step": 6098 }, { "epoch": 4.151803948264125, "grad_norm": 1.551916480064392, "learning_rate": 7.665073458774003e-07, "loss": 0.329, "step": 6099 }, { "epoch": 4.152484683458135, "grad_norm": 1.5035563707351685, "learning_rate": 7.65309870264126e-07, "loss": 0.2613, "step": 6100 }, { "epoch": 4.1531654186521445, "grad_norm": 1.623771071434021, "learning_rate": 7.64113253237101e-07, "loss": 0.543, "step": 6101 }, { "epoch": 4.153846153846154, "grad_norm": 1.383129358291626, "learning_rate": 7.629174950389396e-07, "loss": 0.3778, "step": 6102 }, { "epoch": 4.154526889040163, "grad_norm": 1.5592767000198364, "learning_rate": 7.617225959120839e-07, "loss": 0.482, "step": 6103 }, { "epoch": 4.155207624234173, "grad_norm": 1.4151365756988525, "learning_rate": 7.605285560987996e-07, "loss": 0.3051, "step": 6104 }, { "epoch": 4.155888359428182, "grad_norm": 1.4419704675674438, "learning_rate": 7.593353758411787e-07, "loss": 0.3307, "step": 6105 }, { "epoch": 4.156569094622192, "grad_norm": 1.3881862163543701, "learning_rate": 7.581430553811403e-07, "loss": 0.4411, "step": 6106 }, { "epoch": 4.157249829816202, "grad_norm": 1.2858009338378906, "learning_rate": 7.569515949604284e-07, "loss": 0.5799, "step": 6107 }, { "epoch": 4.157930565010211, "grad_norm": 1.4024368524551392, "learning_rate": 7.557609948206102e-07, "loss": 0.3988, "step": 6108 }, { "epoch": 4.15861130020422, "grad_norm": 1.4836325645446777, "learning_rate": 7.54571255203082e-07, "loss": 0.3639, "step": 6109 }, { "epoch": 4.15929203539823, "grad_norm": 1.8825159072875977, "learning_rate": 7.533823763490655e-07, "loss": 0.4166, "step": 6110 }, { "epoch": 4.15997277059224, "grad_norm": 1.4276924133300781, "learning_rate": 7.521943584996061e-07, "loss": 0.471, "step": 6111 }, { "epoch": 4.160653505786249, "grad_norm": 1.361860990524292, "learning_rate": 7.510072018955745e-07, "loss": 0.2537, "step": 6112 }, { "epoch": 4.161334240980259, "grad_norm": 1.2414350509643555, "learning_rate": 7.498209067776668e-07, "loss": 0.2827, "step": 6113 }, { "epoch": 4.162014976174269, "grad_norm": 1.6566578149795532, "learning_rate": 7.486354733864071e-07, "loss": 0.2546, "step": 6114 }, { "epoch": 4.162695711368277, "grad_norm": 1.4981130361557007, "learning_rate": 7.474509019621418e-07, "loss": 0.3025, "step": 6115 }, { "epoch": 4.163376446562287, "grad_norm": 1.3354578018188477, "learning_rate": 7.462671927450421e-07, "loss": 0.3194, "step": 6116 }, { "epoch": 4.164057181756297, "grad_norm": 1.4464325904846191, "learning_rate": 7.450843459751079e-07, "loss": 0.4988, "step": 6117 }, { "epoch": 4.1647379169503065, "grad_norm": 1.4135589599609375, "learning_rate": 7.43902361892162e-07, "loss": 0.2634, "step": 6118 }, { "epoch": 4.165418652144316, "grad_norm": 1.3721494674682617, "learning_rate": 7.427212407358525e-07, "loss": 0.3544, "step": 6119 }, { "epoch": 4.166099387338325, "grad_norm": 1.3881289958953857, "learning_rate": 7.415409827456504e-07, "loss": 0.2202, "step": 6120 }, { "epoch": 4.166780122532335, "grad_norm": 1.4191951751708984, "learning_rate": 7.403615881608572e-07, "loss": 0.2995, "step": 6121 }, { "epoch": 4.167460857726344, "grad_norm": 1.4346003532409668, "learning_rate": 7.391830572205939e-07, "loss": 0.4211, "step": 6122 }, { "epoch": 4.168141592920354, "grad_norm": 1.3198045492172241, "learning_rate": 7.380053901638073e-07, "loss": 0.2554, "step": 6123 }, { "epoch": 4.168822328114364, "grad_norm": 1.4729223251342773, "learning_rate": 7.36828587229273e-07, "loss": 0.299, "step": 6124 }, { "epoch": 4.169503063308373, "grad_norm": 1.4662081003189087, "learning_rate": 7.356526486555854e-07, "loss": 0.4992, "step": 6125 }, { "epoch": 4.170183798502382, "grad_norm": 1.419761300086975, "learning_rate": 7.344775746811705e-07, "loss": 0.2921, "step": 6126 }, { "epoch": 4.170864533696392, "grad_norm": 1.3278006315231323, "learning_rate": 7.333033655442718e-07, "loss": 0.3846, "step": 6127 }, { "epoch": 4.171545268890402, "grad_norm": 1.2364903688430786, "learning_rate": 7.321300214829641e-07, "loss": 0.1413, "step": 6128 }, { "epoch": 4.172226004084411, "grad_norm": 1.5282949209213257, "learning_rate": 7.309575427351417e-07, "loss": 0.5394, "step": 6129 }, { "epoch": 4.172906739278421, "grad_norm": 1.4696367979049683, "learning_rate": 7.297859295385251e-07, "loss": 0.2877, "step": 6130 }, { "epoch": 4.173587474472431, "grad_norm": 1.469099998474121, "learning_rate": 7.286151821306614e-07, "loss": 0.3375, "step": 6131 }, { "epoch": 4.174268209666439, "grad_norm": 1.341359257698059, "learning_rate": 7.274453007489197e-07, "loss": 0.3498, "step": 6132 }, { "epoch": 4.174948944860449, "grad_norm": 1.4202717542648315, "learning_rate": 7.262762856304928e-07, "loss": 0.3915, "step": 6133 }, { "epoch": 4.175629680054459, "grad_norm": 1.4103962182998657, "learning_rate": 7.251081370124003e-07, "loss": 0.2587, "step": 6134 }, { "epoch": 4.1763104152484685, "grad_norm": 1.3442398309707642, "learning_rate": 7.23940855131487e-07, "loss": 0.2698, "step": 6135 }, { "epoch": 4.176991150442478, "grad_norm": 1.4160369634628296, "learning_rate": 7.22774440224418e-07, "loss": 0.2674, "step": 6136 }, { "epoch": 4.177671885636488, "grad_norm": 1.418613314628601, "learning_rate": 7.216088925276843e-07, "loss": 0.3809, "step": 6137 }, { "epoch": 4.178352620830497, "grad_norm": 1.4351340532302856, "learning_rate": 7.204442122776029e-07, "loss": 0.2255, "step": 6138 }, { "epoch": 4.179033356024506, "grad_norm": 1.3373188972473145, "learning_rate": 7.192803997103138e-07, "loss": 0.3898, "step": 6139 }, { "epoch": 4.179714091218516, "grad_norm": 1.6873681545257568, "learning_rate": 7.181174550617781e-07, "loss": 0.3206, "step": 6140 }, { "epoch": 4.180394826412526, "grad_norm": 1.3695518970489502, "learning_rate": 7.169553785677863e-07, "loss": 0.4517, "step": 6141 }, { "epoch": 4.181075561606535, "grad_norm": 1.4354485273361206, "learning_rate": 7.157941704639481e-07, "loss": 0.3546, "step": 6142 }, { "epoch": 4.181756296800544, "grad_norm": 1.4962944984436035, "learning_rate": 7.146338309857021e-07, "loss": 0.348, "step": 6143 }, { "epoch": 4.182437031994554, "grad_norm": 1.4083396196365356, "learning_rate": 7.134743603683047e-07, "loss": 0.3059, "step": 6144 }, { "epoch": 4.1831177671885635, "grad_norm": 1.3732942342758179, "learning_rate": 7.123157588468421e-07, "loss": 0.5335, "step": 6145 }, { "epoch": 4.183798502382573, "grad_norm": 1.3282780647277832, "learning_rate": 7.111580266562201e-07, "loss": 0.2364, "step": 6146 }, { "epoch": 4.184479237576583, "grad_norm": 1.3704298734664917, "learning_rate": 7.100011640311688e-07, "loss": 0.3297, "step": 6147 }, { "epoch": 4.185159972770593, "grad_norm": 1.3211437463760376, "learning_rate": 7.08845171206245e-07, "loss": 0.353, "step": 6148 }, { "epoch": 4.185840707964601, "grad_norm": 1.4383920431137085, "learning_rate": 7.076900484158261e-07, "loss": 0.3343, "step": 6149 }, { "epoch": 4.186521443158611, "grad_norm": 1.4283665418624878, "learning_rate": 7.065357958941122e-07, "loss": 0.3073, "step": 6150 }, { "epoch": 4.187202178352621, "grad_norm": 1.4387941360473633, "learning_rate": 7.053824138751308e-07, "loss": 0.3955, "step": 6151 }, { "epoch": 4.1878829135466304, "grad_norm": 1.4498258829116821, "learning_rate": 7.042299025927312e-07, "loss": 0.3545, "step": 6152 }, { "epoch": 4.18856364874064, "grad_norm": 1.4364374876022339, "learning_rate": 7.030782622805854e-07, "loss": 0.3444, "step": 6153 }, { "epoch": 4.18924438393465, "grad_norm": 1.4760137796401978, "learning_rate": 7.019274931721881e-07, "loss": 0.3006, "step": 6154 }, { "epoch": 4.189925119128659, "grad_norm": 1.3231022357940674, "learning_rate": 7.0077759550086e-07, "loss": 0.2403, "step": 6155 }, { "epoch": 4.190605854322668, "grad_norm": 1.4904533624649048, "learning_rate": 6.996285694997429e-07, "loss": 0.5201, "step": 6156 }, { "epoch": 4.191286589516678, "grad_norm": 1.5382471084594727, "learning_rate": 6.984804154018021e-07, "loss": 0.3187, "step": 6157 }, { "epoch": 4.191967324710688, "grad_norm": 1.4577845335006714, "learning_rate": 6.973331334398276e-07, "loss": 0.5298, "step": 6158 }, { "epoch": 4.192648059904697, "grad_norm": 1.422808289527893, "learning_rate": 6.961867238464315e-07, "loss": 0.3188, "step": 6159 }, { "epoch": 4.193328795098706, "grad_norm": 1.3725736141204834, "learning_rate": 6.950411868540475e-07, "loss": 0.4181, "step": 6160 }, { "epoch": 4.194009530292716, "grad_norm": 1.291570782661438, "learning_rate": 6.938965226949357e-07, "loss": 0.3346, "step": 6161 }, { "epoch": 4.1946902654867255, "grad_norm": 1.5566346645355225, "learning_rate": 6.927527316011778e-07, "loss": 0.4214, "step": 6162 }, { "epoch": 4.195371000680735, "grad_norm": 1.3746238946914673, "learning_rate": 6.916098138046773e-07, "loss": 0.317, "step": 6163 }, { "epoch": 4.196051735874745, "grad_norm": 1.2711868286132812, "learning_rate": 6.904677695371608e-07, "loss": 0.3526, "step": 6164 }, { "epoch": 4.196732471068755, "grad_norm": 1.498693823814392, "learning_rate": 6.893265990301806e-07, "loss": 0.3006, "step": 6165 }, { "epoch": 4.197413206262763, "grad_norm": 1.4937704801559448, "learning_rate": 6.881863025151087e-07, "loss": 0.2805, "step": 6166 }, { "epoch": 4.198093941456773, "grad_norm": 1.3984625339508057, "learning_rate": 6.8704688022314e-07, "loss": 0.3235, "step": 6167 }, { "epoch": 4.198774676650783, "grad_norm": 1.5644110441207886, "learning_rate": 6.859083323852933e-07, "loss": 0.383, "step": 6168 }, { "epoch": 4.199455411844792, "grad_norm": 1.5025023221969604, "learning_rate": 6.847706592324127e-07, "loss": 0.3558, "step": 6169 }, { "epoch": 4.200136147038802, "grad_norm": 1.5074756145477295, "learning_rate": 6.836338609951599e-07, "loss": 0.2737, "step": 6170 }, { "epoch": 4.200816882232812, "grad_norm": 1.4901105165481567, "learning_rate": 6.824979379040203e-07, "loss": 0.3562, "step": 6171 }, { "epoch": 4.201497617426821, "grad_norm": 1.596846580505371, "learning_rate": 6.81362890189306e-07, "loss": 0.3801, "step": 6172 }, { "epoch": 4.20217835262083, "grad_norm": 1.7856007814407349, "learning_rate": 6.802287180811468e-07, "loss": 0.3024, "step": 6173 }, { "epoch": 4.20285908781484, "grad_norm": 1.3636702299118042, "learning_rate": 6.790954218094964e-07, "loss": 0.2827, "step": 6174 }, { "epoch": 4.20353982300885, "grad_norm": 1.3459007740020752, "learning_rate": 6.779630016041339e-07, "loss": 0.3212, "step": 6175 }, { "epoch": 4.204220558202859, "grad_norm": 1.479971170425415, "learning_rate": 6.768314576946561e-07, "loss": 0.2938, "step": 6176 }, { "epoch": 4.204901293396869, "grad_norm": 1.1903096437454224, "learning_rate": 6.757007903104834e-07, "loss": 0.3807, "step": 6177 }, { "epoch": 4.205582028590878, "grad_norm": 1.3919483423233032, "learning_rate": 6.745709996808609e-07, "loss": 0.4108, "step": 6178 }, { "epoch": 4.2062627637848875, "grad_norm": 1.503211498260498, "learning_rate": 6.734420860348556e-07, "loss": 0.3416, "step": 6179 }, { "epoch": 4.206943498978897, "grad_norm": 1.3795312643051147, "learning_rate": 6.723140496013542e-07, "loss": 0.3395, "step": 6180 }, { "epoch": 4.207624234172907, "grad_norm": 1.4382884502410889, "learning_rate": 6.711868906090657e-07, "loss": 0.5592, "step": 6181 }, { "epoch": 4.208304969366917, "grad_norm": 1.4386488199234009, "learning_rate": 6.700606092865241e-07, "loss": 0.3813, "step": 6182 }, { "epoch": 4.208985704560925, "grad_norm": 1.5913441181182861, "learning_rate": 6.689352058620835e-07, "loss": 0.3268, "step": 6183 }, { "epoch": 4.209666439754935, "grad_norm": 1.4453672170639038, "learning_rate": 6.678106805639195e-07, "loss": 0.432, "step": 6184 }, { "epoch": 4.210347174948945, "grad_norm": 1.517795205116272, "learning_rate": 6.666870336200293e-07, "loss": 0.2957, "step": 6185 }, { "epoch": 4.211027910142954, "grad_norm": 1.338965654373169, "learning_rate": 6.655642652582345e-07, "loss": 0.4042, "step": 6186 }, { "epoch": 4.211708645336964, "grad_norm": 1.3406779766082764, "learning_rate": 6.64442375706178e-07, "loss": 0.4176, "step": 6187 }, { "epoch": 4.212389380530974, "grad_norm": 1.472088098526001, "learning_rate": 6.633213651913229e-07, "loss": 0.3266, "step": 6188 }, { "epoch": 4.213070115724983, "grad_norm": 1.4236383438110352, "learning_rate": 6.622012339409528e-07, "loss": 0.3571, "step": 6189 }, { "epoch": 4.213750850918992, "grad_norm": 1.4112731218338013, "learning_rate": 6.610819821821784e-07, "loss": 0.36, "step": 6190 }, { "epoch": 4.214431586113002, "grad_norm": 1.4292412996292114, "learning_rate": 6.599636101419271e-07, "loss": 0.3207, "step": 6191 }, { "epoch": 4.215112321307012, "grad_norm": 1.4001095294952393, "learning_rate": 6.588461180469485e-07, "loss": 0.3369, "step": 6192 }, { "epoch": 4.215793056501021, "grad_norm": 1.3650749921798706, "learning_rate": 6.57729506123817e-07, "loss": 0.4102, "step": 6193 }, { "epoch": 4.216473791695031, "grad_norm": 1.4479727745056152, "learning_rate": 6.566137745989248e-07, "loss": 0.3786, "step": 6194 }, { "epoch": 4.21715452688904, "grad_norm": 1.3721774816513062, "learning_rate": 6.554989236984883e-07, "loss": 0.3272, "step": 6195 }, { "epoch": 4.2178352620830495, "grad_norm": 1.3676143884658813, "learning_rate": 6.54384953648543e-07, "loss": 0.3145, "step": 6196 }, { "epoch": 4.218515997277059, "grad_norm": 1.2787739038467407, "learning_rate": 6.532718646749492e-07, "loss": 0.3842, "step": 6197 }, { "epoch": 4.219196732471069, "grad_norm": 1.4509369134902954, "learning_rate": 6.521596570033845e-07, "loss": 0.4039, "step": 6198 }, { "epoch": 4.2198774676650785, "grad_norm": 1.397102952003479, "learning_rate": 6.510483308593496e-07, "loss": 0.1906, "step": 6199 }, { "epoch": 4.220558202859088, "grad_norm": 1.3719233274459839, "learning_rate": 6.499378864681683e-07, "loss": 0.3065, "step": 6200 }, { "epoch": 4.221238938053097, "grad_norm": 1.322167158126831, "learning_rate": 6.488283240549831e-07, "loss": 0.3628, "step": 6201 }, { "epoch": 4.221919673247107, "grad_norm": 1.5016381740570068, "learning_rate": 6.477196438447575e-07, "loss": 0.4607, "step": 6202 }, { "epoch": 4.222600408441116, "grad_norm": 1.4281517267227173, "learning_rate": 6.466118460622777e-07, "loss": 0.3164, "step": 6203 }, { "epoch": 4.223281143635126, "grad_norm": 1.4160946607589722, "learning_rate": 6.45504930932152e-07, "loss": 0.3071, "step": 6204 }, { "epoch": 4.223961878829136, "grad_norm": 1.3973915576934814, "learning_rate": 6.443988986788069e-07, "loss": 0.2503, "step": 6205 }, { "epoch": 4.224642614023145, "grad_norm": 1.4349701404571533, "learning_rate": 6.432937495264902e-07, "loss": 0.3765, "step": 6206 }, { "epoch": 4.225323349217154, "grad_norm": 1.563292384147644, "learning_rate": 6.421894836992742e-07, "loss": 0.3297, "step": 6207 }, { "epoch": 4.226004084411164, "grad_norm": 1.44408118724823, "learning_rate": 6.410861014210473e-07, "loss": 0.4132, "step": 6208 }, { "epoch": 4.226684819605174, "grad_norm": 1.285435676574707, "learning_rate": 6.399836029155215e-07, "loss": 0.3998, "step": 6209 }, { "epoch": 4.227365554799183, "grad_norm": 1.3779727220535278, "learning_rate": 6.388819884062297e-07, "loss": 0.3295, "step": 6210 }, { "epoch": 4.228046289993193, "grad_norm": 1.3796415328979492, "learning_rate": 6.377812581165238e-07, "loss": 0.4246, "step": 6211 }, { "epoch": 4.228727025187202, "grad_norm": 1.1725565195083618, "learning_rate": 6.366814122695797e-07, "loss": 0.4071, "step": 6212 }, { "epoch": 4.2294077603812115, "grad_norm": 1.4929914474487305, "learning_rate": 6.35582451088389e-07, "loss": 0.1722, "step": 6213 }, { "epoch": 4.230088495575221, "grad_norm": 1.4165496826171875, "learning_rate": 6.344843747957697e-07, "loss": 0.5063, "step": 6214 }, { "epoch": 4.230769230769231, "grad_norm": 1.4793379306793213, "learning_rate": 6.333871836143562e-07, "loss": 0.3689, "step": 6215 }, { "epoch": 4.2314499659632405, "grad_norm": 1.4308403730392456, "learning_rate": 6.322908777666037e-07, "loss": 0.3059, "step": 6216 }, { "epoch": 4.23213070115725, "grad_norm": 1.451959490776062, "learning_rate": 6.311954574747908e-07, "loss": 0.2872, "step": 6217 }, { "epoch": 4.232811436351259, "grad_norm": 1.6004457473754883, "learning_rate": 6.301009229610144e-07, "loss": 0.2548, "step": 6218 }, { "epoch": 4.233492171545269, "grad_norm": 1.3511486053466797, "learning_rate": 6.290072744471904e-07, "loss": 0.3255, "step": 6219 }, { "epoch": 4.234172906739278, "grad_norm": 1.352521300315857, "learning_rate": 6.279145121550578e-07, "loss": 0.3181, "step": 6220 }, { "epoch": 4.234853641933288, "grad_norm": 1.2947677373886108, "learning_rate": 6.268226363061758e-07, "loss": 0.28, "step": 6221 }, { "epoch": 4.235534377127298, "grad_norm": 1.5115647315979004, "learning_rate": 6.257316471219227e-07, "loss": 0.3712, "step": 6222 }, { "epoch": 4.236215112321307, "grad_norm": 1.529760718345642, "learning_rate": 6.246415448234954e-07, "loss": 0.3587, "step": 6223 }, { "epoch": 4.236895847515316, "grad_norm": 1.4476852416992188, "learning_rate": 6.235523296319152e-07, "loss": 0.3649, "step": 6224 }, { "epoch": 4.237576582709326, "grad_norm": 1.3395915031433105, "learning_rate": 6.224640017680206e-07, "loss": 0.2006, "step": 6225 }, { "epoch": 4.238257317903336, "grad_norm": 1.431328535079956, "learning_rate": 6.213765614524692e-07, "loss": 0.3463, "step": 6226 }, { "epoch": 4.238938053097345, "grad_norm": 1.3872973918914795, "learning_rate": 6.202900089057423e-07, "loss": 0.2363, "step": 6227 }, { "epoch": 4.239618788291355, "grad_norm": 1.5610053539276123, "learning_rate": 6.192043443481388e-07, "loss": 0.4531, "step": 6228 }, { "epoch": 4.240299523485364, "grad_norm": 1.3900984525680542, "learning_rate": 6.181195679997759e-07, "loss": 0.3204, "step": 6229 }, { "epoch": 4.2409802586793734, "grad_norm": 1.5102077722549438, "learning_rate": 6.170356800805949e-07, "loss": 0.3456, "step": 6230 }, { "epoch": 4.241660993873383, "grad_norm": 1.4394787549972534, "learning_rate": 6.159526808103549e-07, "loss": 0.3934, "step": 6231 }, { "epoch": 4.242341729067393, "grad_norm": 1.3889517784118652, "learning_rate": 6.148705704086339e-07, "loss": 0.3775, "step": 6232 }, { "epoch": 4.2430224642614025, "grad_norm": 1.550143837928772, "learning_rate": 6.137893490948299e-07, "loss": 0.3702, "step": 6233 }, { "epoch": 4.243703199455412, "grad_norm": 1.3353923559188843, "learning_rate": 6.127090170881628e-07, "loss": 0.2221, "step": 6234 }, { "epoch": 4.244383934649421, "grad_norm": 1.393274188041687, "learning_rate": 6.1162957460767e-07, "loss": 0.2839, "step": 6235 }, { "epoch": 4.245064669843431, "grad_norm": 1.67582106590271, "learning_rate": 6.105510218722077e-07, "loss": 0.3147, "step": 6236 }, { "epoch": 4.24574540503744, "grad_norm": 1.3530305624008179, "learning_rate": 6.094733591004548e-07, "loss": 0.4431, "step": 6237 }, { "epoch": 4.24642614023145, "grad_norm": 1.3230721950531006, "learning_rate": 6.083965865109088e-07, "loss": 0.3093, "step": 6238 }, { "epoch": 4.24710687542546, "grad_norm": 1.4345171451568604, "learning_rate": 6.073207043218848e-07, "loss": 0.2732, "step": 6239 }, { "epoch": 4.247787610619469, "grad_norm": 1.4428095817565918, "learning_rate": 6.062457127515175e-07, "loss": 0.4666, "step": 6240 }, { "epoch": 4.248468345813478, "grad_norm": 1.520824670791626, "learning_rate": 6.051716120177653e-07, "loss": 0.4038, "step": 6241 }, { "epoch": 4.249149081007488, "grad_norm": 1.3172121047973633, "learning_rate": 6.040984023384011e-07, "loss": 0.351, "step": 6242 }, { "epoch": 4.249829816201498, "grad_norm": 1.5437655448913574, "learning_rate": 6.030260839310176e-07, "loss": 0.2006, "step": 6243 }, { "epoch": 4.250510551395507, "grad_norm": 1.5002228021621704, "learning_rate": 6.019546570130302e-07, "loss": 0.4563, "step": 6244 }, { "epoch": 4.251191286589517, "grad_norm": 1.549641728401184, "learning_rate": 6.008841218016709e-07, "loss": 0.4076, "step": 6245 }, { "epoch": 4.251872021783527, "grad_norm": 1.5675668716430664, "learning_rate": 5.998144785139898e-07, "loss": 0.3168, "step": 6246 }, { "epoch": 4.252552756977535, "grad_norm": 1.3769466876983643, "learning_rate": 5.987457273668595e-07, "loss": 0.3655, "step": 6247 }, { "epoch": 4.253233492171545, "grad_norm": 1.313310146331787, "learning_rate": 5.976778685769702e-07, "loss": 0.233, "step": 6248 }, { "epoch": 4.253914227365555, "grad_norm": 1.3816452026367188, "learning_rate": 5.966109023608307e-07, "loss": 0.3072, "step": 6249 }, { "epoch": 4.2545949625595645, "grad_norm": 1.3705730438232422, "learning_rate": 5.955448289347676e-07, "loss": 0.3579, "step": 6250 }, { "epoch": 4.255275697753574, "grad_norm": 1.2655473947525024, "learning_rate": 5.944796485149307e-07, "loss": 0.3518, "step": 6251 }, { "epoch": 4.255956432947583, "grad_norm": 1.4123690128326416, "learning_rate": 5.934153613172839e-07, "loss": 0.2974, "step": 6252 }, { "epoch": 4.256637168141593, "grad_norm": 1.3083324432373047, "learning_rate": 5.923519675576134e-07, "loss": 0.5062, "step": 6253 }, { "epoch": 4.257317903335602, "grad_norm": 1.4661052227020264, "learning_rate": 5.912894674515207e-07, "loss": 0.4153, "step": 6254 }, { "epoch": 4.257998638529612, "grad_norm": 1.3305150270462036, "learning_rate": 5.902278612144302e-07, "loss": 0.374, "step": 6255 }, { "epoch": 4.258679373723622, "grad_norm": 1.219893217086792, "learning_rate": 5.891671490615847e-07, "loss": 0.3239, "step": 6256 }, { "epoch": 4.259360108917631, "grad_norm": 1.4300822019577026, "learning_rate": 5.88107331208042e-07, "loss": 0.3689, "step": 6257 }, { "epoch": 4.26004084411164, "grad_norm": 1.5110979080200195, "learning_rate": 5.870484078686811e-07, "loss": 0.44, "step": 6258 }, { "epoch": 4.26072157930565, "grad_norm": 1.56537663936615, "learning_rate": 5.859903792582011e-07, "loss": 0.3948, "step": 6259 }, { "epoch": 4.26140231449966, "grad_norm": 1.4063044786453247, "learning_rate": 5.849332455911167e-07, "loss": 0.2095, "step": 6260 }, { "epoch": 4.262083049693669, "grad_norm": 1.4839937686920166, "learning_rate": 5.838770070817612e-07, "loss": 0.477, "step": 6261 }, { "epoch": 4.262763784887679, "grad_norm": 1.3363244533538818, "learning_rate": 5.828216639442907e-07, "loss": 0.4674, "step": 6262 }, { "epoch": 4.263444520081689, "grad_norm": 1.3833626508712769, "learning_rate": 5.817672163926735e-07, "loss": 0.3859, "step": 6263 }, { "epoch": 4.264125255275697, "grad_norm": 1.3066154718399048, "learning_rate": 5.807136646407024e-07, "loss": 0.3069, "step": 6264 }, { "epoch": 4.264805990469707, "grad_norm": 1.4007893800735474, "learning_rate": 5.796610089019833e-07, "loss": 0.2911, "step": 6265 }, { "epoch": 4.265486725663717, "grad_norm": 1.4338207244873047, "learning_rate": 5.786092493899453e-07, "loss": 0.293, "step": 6266 }, { "epoch": 4.2661674608577265, "grad_norm": 1.425244927406311, "learning_rate": 5.775583863178319e-07, "loss": 0.386, "step": 6267 }, { "epoch": 4.266848196051736, "grad_norm": 1.313028335571289, "learning_rate": 5.765084198987053e-07, "loss": 0.2401, "step": 6268 }, { "epoch": 4.267528931245746, "grad_norm": 1.3160024881362915, "learning_rate": 5.754593503454498e-07, "loss": 0.212, "step": 6269 }, { "epoch": 4.268209666439755, "grad_norm": 1.467084527015686, "learning_rate": 5.744111778707623e-07, "loss": 0.2648, "step": 6270 }, { "epoch": 4.268890401633764, "grad_norm": 1.279520869255066, "learning_rate": 5.733639026871607e-07, "loss": 0.333, "step": 6271 }, { "epoch": 4.269571136827774, "grad_norm": 1.298491358757019, "learning_rate": 5.723175250069812e-07, "loss": 0.3709, "step": 6272 }, { "epoch": 4.270251872021784, "grad_norm": 1.3396629095077515, "learning_rate": 5.712720450423792e-07, "loss": 0.3679, "step": 6273 }, { "epoch": 4.270932607215793, "grad_norm": 1.297755241394043, "learning_rate": 5.702274630053245e-07, "loss": 0.365, "step": 6274 }, { "epoch": 4.271613342409802, "grad_norm": 1.2414474487304688, "learning_rate": 5.691837791076066e-07, "loss": 0.3779, "step": 6275 }, { "epoch": 4.272294077603812, "grad_norm": 1.2744364738464355, "learning_rate": 5.681409935608351e-07, "loss": 0.4747, "step": 6276 }, { "epoch": 4.2729748127978215, "grad_norm": 1.3807332515716553, "learning_rate": 5.670991065764336e-07, "loss": 0.3559, "step": 6277 }, { "epoch": 4.273655547991831, "grad_norm": 1.4034003019332886, "learning_rate": 5.660581183656455e-07, "loss": 0.3439, "step": 6278 }, { "epoch": 4.274336283185841, "grad_norm": 1.4059098958969116, "learning_rate": 5.65018029139533e-07, "loss": 0.4372, "step": 6279 }, { "epoch": 4.275017018379851, "grad_norm": 1.338964819908142, "learning_rate": 5.639788391089735e-07, "loss": 0.2878, "step": 6280 }, { "epoch": 4.275697753573859, "grad_norm": 1.4242198467254639, "learning_rate": 5.62940548484665e-07, "loss": 0.4885, "step": 6281 }, { "epoch": 4.276378488767869, "grad_norm": 1.3981735706329346, "learning_rate": 5.6190315747712e-07, "loss": 0.1473, "step": 6282 }, { "epoch": 4.277059223961879, "grad_norm": 1.5507488250732422, "learning_rate": 5.608666662966716e-07, "loss": 0.4364, "step": 6283 }, { "epoch": 4.2777399591558884, "grad_norm": 1.3472975492477417, "learning_rate": 5.598310751534691e-07, "loss": 0.3348, "step": 6284 }, { "epoch": 4.278420694349898, "grad_norm": 1.661658525466919, "learning_rate": 5.587963842574773e-07, "loss": 0.3612, "step": 6285 }, { "epoch": 4.279101429543908, "grad_norm": 1.4608820676803589, "learning_rate": 5.577625938184833e-07, "loss": 0.4149, "step": 6286 }, { "epoch": 4.279782164737917, "grad_norm": 1.2477976083755493, "learning_rate": 5.567297040460867e-07, "loss": 0.2918, "step": 6287 }, { "epoch": 4.280462899931926, "grad_norm": 1.3718128204345703, "learning_rate": 5.556977151497067e-07, "loss": 0.3367, "step": 6288 }, { "epoch": 4.281143635125936, "grad_norm": 1.4774423837661743, "learning_rate": 5.546666273385804e-07, "loss": 0.3383, "step": 6289 }, { "epoch": 4.281824370319946, "grad_norm": 1.457282304763794, "learning_rate": 5.536364408217626e-07, "loss": 0.3743, "step": 6290 }, { "epoch": 4.282505105513955, "grad_norm": 1.3827744722366333, "learning_rate": 5.52607155808123e-07, "loss": 0.3632, "step": 6291 }, { "epoch": 4.283185840707965, "grad_norm": 1.343902826309204, "learning_rate": 5.515787725063487e-07, "loss": 0.4261, "step": 6292 }, { "epoch": 4.283866575901974, "grad_norm": 1.2666957378387451, "learning_rate": 5.505512911249477e-07, "loss": 0.3949, "step": 6293 }, { "epoch": 4.2845473110959835, "grad_norm": 1.2478364706039429, "learning_rate": 5.495247118722408e-07, "loss": 0.2315, "step": 6294 }, { "epoch": 4.285228046289993, "grad_norm": 1.253318428993225, "learning_rate": 5.484990349563673e-07, "loss": 0.4147, "step": 6295 }, { "epoch": 4.285908781484003, "grad_norm": 1.537019968032837, "learning_rate": 5.474742605852856e-07, "loss": 0.3328, "step": 6296 }, { "epoch": 4.286589516678013, "grad_norm": 1.4424874782562256, "learning_rate": 5.464503889667688e-07, "loss": 0.4005, "step": 6297 }, { "epoch": 4.287270251872021, "grad_norm": 1.497382640838623, "learning_rate": 5.454274203084054e-07, "loss": 0.4369, "step": 6298 }, { "epoch": 4.287950987066031, "grad_norm": 1.4377264976501465, "learning_rate": 5.444053548176054e-07, "loss": 0.3133, "step": 6299 }, { "epoch": 4.288631722260041, "grad_norm": 1.4911880493164062, "learning_rate": 5.433841927015932e-07, "loss": 0.2063, "step": 6300 }, { "epoch": 4.28931245745405, "grad_norm": 1.2687073945999146, "learning_rate": 5.423639341674097e-07, "loss": 0.1936, "step": 6301 }, { "epoch": 4.28999319264806, "grad_norm": 1.3188323974609375, "learning_rate": 5.413445794219119e-07, "loss": 0.3414, "step": 6302 }, { "epoch": 4.29067392784207, "grad_norm": 1.311339020729065, "learning_rate": 5.403261286717765e-07, "loss": 0.3821, "step": 6303 }, { "epoch": 4.291354663036079, "grad_norm": 1.4502052068710327, "learning_rate": 5.393085821234939e-07, "loss": 0.328, "step": 6304 }, { "epoch": 4.292035398230088, "grad_norm": 1.3114283084869385, "learning_rate": 5.382919399833713e-07, "loss": 0.5128, "step": 6305 }, { "epoch": 4.292716133424098, "grad_norm": 1.2763057947158813, "learning_rate": 5.372762024575356e-07, "loss": 0.3138, "step": 6306 }, { "epoch": 4.293396868618108, "grad_norm": 1.5031715631484985, "learning_rate": 5.362613697519281e-07, "loss": 0.3629, "step": 6307 }, { "epoch": 4.294077603812117, "grad_norm": 1.3515416383743286, "learning_rate": 5.352474420723058e-07, "loss": 0.4017, "step": 6308 }, { "epoch": 4.294758339006127, "grad_norm": 1.4912174940109253, "learning_rate": 5.342344196242433e-07, "loss": 0.495, "step": 6309 }, { "epoch": 4.295439074200136, "grad_norm": 1.3899332284927368, "learning_rate": 5.332223026131328e-07, "loss": 0.3599, "step": 6310 }, { "epoch": 4.2961198093941455, "grad_norm": 1.399405837059021, "learning_rate": 5.322110912441803e-07, "loss": 0.3494, "step": 6311 }, { "epoch": 4.296800544588155, "grad_norm": 1.4265844821929932, "learning_rate": 5.312007857224094e-07, "loss": 0.5021, "step": 6312 }, { "epoch": 4.297481279782165, "grad_norm": 1.420106291770935, "learning_rate": 5.30191386252662e-07, "loss": 0.2887, "step": 6313 }, { "epoch": 4.298162014976175, "grad_norm": 1.4121934175491333, "learning_rate": 5.291828930395931e-07, "loss": 0.3726, "step": 6314 }, { "epoch": 4.298842750170184, "grad_norm": 1.5334727764129639, "learning_rate": 5.281753062876749e-07, "loss": 0.3102, "step": 6315 }, { "epoch": 4.299523485364193, "grad_norm": 1.469309687614441, "learning_rate": 5.271686262011971e-07, "loss": 0.2977, "step": 6316 }, { "epoch": 4.300204220558203, "grad_norm": 1.4331367015838623, "learning_rate": 5.26162852984266e-07, "loss": 0.3354, "step": 6317 }, { "epoch": 4.300884955752212, "grad_norm": 1.3613076210021973, "learning_rate": 5.251579868408008e-07, "loss": 0.3704, "step": 6318 }, { "epoch": 4.301565690946222, "grad_norm": 1.4497137069702148, "learning_rate": 5.241540279745388e-07, "loss": 0.3529, "step": 6319 }, { "epoch": 4.302246426140232, "grad_norm": 1.4944647550582886, "learning_rate": 5.231509765890353e-07, "loss": 0.3824, "step": 6320 }, { "epoch": 4.302927161334241, "grad_norm": 1.4595142602920532, "learning_rate": 5.221488328876584e-07, "loss": 0.3336, "step": 6321 }, { "epoch": 4.30360789652825, "grad_norm": 1.4194873571395874, "learning_rate": 5.211475970735929e-07, "loss": 0.312, "step": 6322 }, { "epoch": 4.30428863172226, "grad_norm": 1.4655424356460571, "learning_rate": 5.201472693498399e-07, "loss": 0.4131, "step": 6323 }, { "epoch": 4.30496936691627, "grad_norm": 1.371171474456787, "learning_rate": 5.191478499192171e-07, "loss": 0.4262, "step": 6324 }, { "epoch": 4.305650102110279, "grad_norm": 1.4412356615066528, "learning_rate": 5.181493389843584e-07, "loss": 0.3526, "step": 6325 }, { "epoch": 4.306330837304289, "grad_norm": 1.4311466217041016, "learning_rate": 5.171517367477102e-07, "loss": 0.3579, "step": 6326 }, { "epoch": 4.307011572498298, "grad_norm": 1.3634891510009766, "learning_rate": 5.161550434115403e-07, "loss": 0.2455, "step": 6327 }, { "epoch": 4.3076923076923075, "grad_norm": 1.4527649879455566, "learning_rate": 5.151592591779264e-07, "loss": 0.2429, "step": 6328 }, { "epoch": 4.308373042886317, "grad_norm": 1.452722191810608, "learning_rate": 5.141643842487654e-07, "loss": 0.3917, "step": 6329 }, { "epoch": 4.309053778080327, "grad_norm": 1.3620102405548096, "learning_rate": 5.131704188257674e-07, "loss": 0.3046, "step": 6330 }, { "epoch": 4.3097345132743365, "grad_norm": 1.3453923463821411, "learning_rate": 5.121773631104621e-07, "loss": 0.2085, "step": 6331 }, { "epoch": 4.310415248468346, "grad_norm": 1.2855950593948364, "learning_rate": 5.111852173041893e-07, "loss": 0.4565, "step": 6332 }, { "epoch": 4.311095983662355, "grad_norm": 1.4096083641052246, "learning_rate": 5.101939816081097e-07, "loss": 0.3326, "step": 6333 }, { "epoch": 4.311776718856365, "grad_norm": 1.5923030376434326, "learning_rate": 5.092036562231956e-07, "loss": 0.317, "step": 6334 }, { "epoch": 4.312457454050374, "grad_norm": 1.2674896717071533, "learning_rate": 5.082142413502373e-07, "loss": 0.289, "step": 6335 }, { "epoch": 4.313138189244384, "grad_norm": 1.4050027132034302, "learning_rate": 5.072257371898387e-07, "loss": 0.415, "step": 6336 }, { "epoch": 4.313818924438394, "grad_norm": 1.5022433996200562, "learning_rate": 5.06238143942418e-07, "loss": 0.4116, "step": 6337 }, { "epoch": 4.3144996596324034, "grad_norm": 1.560514211654663, "learning_rate": 5.052514618082138e-07, "loss": 0.285, "step": 6338 }, { "epoch": 4.315180394826412, "grad_norm": 1.4504255056381226, "learning_rate": 5.04265690987274e-07, "loss": 0.2833, "step": 6339 }, { "epoch": 4.315861130020422, "grad_norm": 1.4805755615234375, "learning_rate": 5.032808316794647e-07, "loss": 0.2668, "step": 6340 }, { "epoch": 4.316541865214432, "grad_norm": 1.477431058883667, "learning_rate": 5.022968840844666e-07, "loss": 0.1959, "step": 6341 }, { "epoch": 4.317222600408441, "grad_norm": 1.3668901920318604, "learning_rate": 5.013138484017777e-07, "loss": 0.4008, "step": 6342 }, { "epoch": 4.317903335602451, "grad_norm": 1.3846529722213745, "learning_rate": 5.003317248307071e-07, "loss": 0.4044, "step": 6343 }, { "epoch": 4.31858407079646, "grad_norm": 1.357289433479309, "learning_rate": 4.993505135703808e-07, "loss": 0.4705, "step": 6344 }, { "epoch": 4.3192648059904695, "grad_norm": 1.4717321395874023, "learning_rate": 4.983702148197422e-07, "loss": 0.2849, "step": 6345 }, { "epoch": 4.319945541184479, "grad_norm": 1.406441569328308, "learning_rate": 4.973908287775458e-07, "loss": 0.3174, "step": 6346 }, { "epoch": 4.320626276378489, "grad_norm": 1.3581382036209106, "learning_rate": 4.964123556423622e-07, "loss": 0.4327, "step": 6347 }, { "epoch": 4.3213070115724985, "grad_norm": 1.402588963508606, "learning_rate": 4.954347956125794e-07, "loss": 0.4278, "step": 6348 }, { "epoch": 4.321987746766508, "grad_norm": 1.5232775211334229, "learning_rate": 4.944581488863965e-07, "loss": 0.2374, "step": 6349 }, { "epoch": 4.322668481960517, "grad_norm": 1.3651227951049805, "learning_rate": 4.934824156618306e-07, "loss": 0.2902, "step": 6350 }, { "epoch": 4.323349217154527, "grad_norm": 1.267111897468567, "learning_rate": 4.925075961367115e-07, "loss": 0.2879, "step": 6351 }, { "epoch": 4.324029952348536, "grad_norm": 1.530092716217041, "learning_rate": 4.915336905086854e-07, "loss": 0.2589, "step": 6352 }, { "epoch": 4.324710687542546, "grad_norm": 1.433341383934021, "learning_rate": 4.905606989752115e-07, "loss": 0.348, "step": 6353 }, { "epoch": 4.325391422736556, "grad_norm": 1.442530870437622, "learning_rate": 4.895886217335644e-07, "loss": 0.3646, "step": 6354 }, { "epoch": 4.3260721579305645, "grad_norm": 1.3548569679260254, "learning_rate": 4.886174589808341e-07, "loss": 0.267, "step": 6355 }, { "epoch": 4.326752893124574, "grad_norm": 1.4249285459518433, "learning_rate": 4.87647210913924e-07, "loss": 0.2919, "step": 6356 }, { "epoch": 4.327433628318584, "grad_norm": 1.4245033264160156, "learning_rate": 4.866778777295522e-07, "loss": 0.2708, "step": 6357 }, { "epoch": 4.328114363512594, "grad_norm": 1.489927053451538, "learning_rate": 4.857094596242517e-07, "loss": 0.2921, "step": 6358 }, { "epoch": 4.328795098706603, "grad_norm": 1.5766657590866089, "learning_rate": 4.847419567943718e-07, "loss": 0.3195, "step": 6359 }, { "epoch": 4.329475833900613, "grad_norm": 1.3925671577453613, "learning_rate": 4.837753694360725e-07, "loss": 0.2888, "step": 6360 }, { "epoch": 4.330156569094623, "grad_norm": 1.5219199657440186, "learning_rate": 4.828096977453295e-07, "loss": 0.2824, "step": 6361 }, { "epoch": 4.3308373042886315, "grad_norm": 1.3694689273834229, "learning_rate": 4.81844941917935e-07, "loss": 0.3191, "step": 6362 }, { "epoch": 4.331518039482641, "grad_norm": 1.3804900646209717, "learning_rate": 4.80881102149493e-07, "loss": 0.3117, "step": 6363 }, { "epoch": 4.332198774676651, "grad_norm": 1.477704405784607, "learning_rate": 4.799181786354223e-07, "loss": 0.3335, "step": 6364 }, { "epoch": 4.3328795098706605, "grad_norm": 1.3939799070358276, "learning_rate": 4.789561715709578e-07, "loss": 0.2654, "step": 6365 }, { "epoch": 4.33356024506467, "grad_norm": 1.3699712753295898, "learning_rate": 4.779950811511458e-07, "loss": 0.3118, "step": 6366 }, { "epoch": 4.334240980258679, "grad_norm": 1.5451374053955078, "learning_rate": 4.770349075708475e-07, "loss": 0.3164, "step": 6367 }, { "epoch": 4.334921715452689, "grad_norm": 1.7729699611663818, "learning_rate": 4.760756510247394e-07, "loss": 0.4265, "step": 6368 }, { "epoch": 4.335602450646698, "grad_norm": 1.3450767993927002, "learning_rate": 4.7511731170731266e-07, "loss": 0.4452, "step": 6369 }, { "epoch": 4.336283185840708, "grad_norm": 1.4762358665466309, "learning_rate": 4.7415988981286965e-07, "loss": 0.2543, "step": 6370 }, { "epoch": 4.336963921034718, "grad_norm": 1.4434937238693237, "learning_rate": 4.7320338553552835e-07, "loss": 0.2809, "step": 6371 }, { "epoch": 4.337644656228727, "grad_norm": 1.2662270069122314, "learning_rate": 4.7224779906922146e-07, "loss": 0.3834, "step": 6372 }, { "epoch": 4.338325391422736, "grad_norm": 1.4010175466537476, "learning_rate": 4.712931306076951e-07, "loss": 0.2566, "step": 6373 }, { "epoch": 4.339006126616746, "grad_norm": 1.7809200286865234, "learning_rate": 4.7033938034450666e-07, "loss": 0.3091, "step": 6374 }, { "epoch": 4.339686861810756, "grad_norm": 1.4893279075622559, "learning_rate": 4.6938654847303086e-07, "loss": 0.2814, "step": 6375 }, { "epoch": 4.340367597004765, "grad_norm": 1.3339636325836182, "learning_rate": 4.6843463518645715e-07, "loss": 0.259, "step": 6376 }, { "epoch": 4.341048332198775, "grad_norm": 1.4470345973968506, "learning_rate": 4.674836406777838e-07, "loss": 0.2337, "step": 6377 }, { "epoch": 4.341729067392784, "grad_norm": 1.44688880443573, "learning_rate": 4.665335651398262e-07, "loss": 0.4644, "step": 6378 }, { "epoch": 4.342409802586793, "grad_norm": 1.5837594270706177, "learning_rate": 4.655844087652139e-07, "loss": 0.2921, "step": 6379 }, { "epoch": 4.343090537780803, "grad_norm": 1.6379806995391846, "learning_rate": 4.646361717463882e-07, "loss": 0.3829, "step": 6380 }, { "epoch": 4.343771272974813, "grad_norm": 1.4685336351394653, "learning_rate": 4.6368885427560405e-07, "loss": 0.4187, "step": 6381 }, { "epoch": 4.3444520081688225, "grad_norm": 1.5211775302886963, "learning_rate": 4.6274245654493175e-07, "loss": 0.3507, "step": 6382 }, { "epoch": 4.345132743362832, "grad_norm": 1.456924557685852, "learning_rate": 4.617969787462545e-07, "loss": 0.3324, "step": 6383 }, { "epoch": 4.345813478556842, "grad_norm": 1.3604556322097778, "learning_rate": 4.6085242107126617e-07, "loss": 0.3993, "step": 6384 }, { "epoch": 4.346494213750851, "grad_norm": 1.323919653892517, "learning_rate": 4.5990878371147853e-07, "loss": 0.4349, "step": 6385 }, { "epoch": 4.34717494894486, "grad_norm": 1.378671646118164, "learning_rate": 4.589660668582152e-07, "loss": 0.3418, "step": 6386 }, { "epoch": 4.34785568413887, "grad_norm": 1.2743421792984009, "learning_rate": 4.5802427070261166e-07, "loss": 0.3117, "step": 6387 }, { "epoch": 4.34853641933288, "grad_norm": 1.4346685409545898, "learning_rate": 4.5708339543561686e-07, "loss": 0.3793, "step": 6388 }, { "epoch": 4.349217154526889, "grad_norm": 1.4631582498550415, "learning_rate": 4.5614344124799594e-07, "loss": 0.3844, "step": 6389 }, { "epoch": 4.349897889720898, "grad_norm": 1.3919731378555298, "learning_rate": 4.5520440833032363e-07, "loss": 0.2973, "step": 6390 }, { "epoch": 4.350578624914908, "grad_norm": 1.5590689182281494, "learning_rate": 4.5426629687298997e-07, "loss": 0.451, "step": 6391 }, { "epoch": 4.351259360108918, "grad_norm": 1.437607765197754, "learning_rate": 4.5332910706619715e-07, "loss": 0.2609, "step": 6392 }, { "epoch": 4.351940095302927, "grad_norm": 1.5211617946624756, "learning_rate": 4.52392839099961e-07, "loss": 0.3063, "step": 6393 }, { "epoch": 4.352620830496937, "grad_norm": 1.3379309177398682, "learning_rate": 4.514574931641125e-07, "loss": 0.3077, "step": 6394 }, { "epoch": 4.353301565690947, "grad_norm": 1.5033307075500488, "learning_rate": 4.505230694482909e-07, "loss": 0.285, "step": 6395 }, { "epoch": 4.353982300884955, "grad_norm": 1.500706672668457, "learning_rate": 4.495895681419532e-07, "loss": 0.2925, "step": 6396 }, { "epoch": 4.354663036078965, "grad_norm": 1.2935124635696411, "learning_rate": 4.486569894343673e-07, "loss": 0.426, "step": 6397 }, { "epoch": 4.355343771272975, "grad_norm": 1.4250167608261108, "learning_rate": 4.4772533351461313e-07, "loss": 0.335, "step": 6398 }, { "epoch": 4.3560245064669845, "grad_norm": 1.4169496297836304, "learning_rate": 4.46794600571584e-07, "loss": 0.3375, "step": 6399 }, { "epoch": 4.356705241660994, "grad_norm": 1.2861855030059814, "learning_rate": 4.4586479079398895e-07, "loss": 0.3351, "step": 6400 }, { "epoch": 4.357385976855003, "grad_norm": 1.3089051246643066, "learning_rate": 4.4493590437034496e-07, "loss": 0.389, "step": 6401 }, { "epoch": 4.358066712049013, "grad_norm": 1.285383939743042, "learning_rate": 4.4400794148898583e-07, "loss": 0.2835, "step": 6402 }, { "epoch": 4.358747447243022, "grad_norm": 1.508908748626709, "learning_rate": 4.430809023380572e-07, "loss": 0.3118, "step": 6403 }, { "epoch": 4.359428182437032, "grad_norm": 1.3563894033432007, "learning_rate": 4.42154787105516e-07, "loss": 0.2433, "step": 6404 }, { "epoch": 4.360108917631042, "grad_norm": 1.7136121988296509, "learning_rate": 4.41229595979133e-07, "loss": 0.3071, "step": 6405 }, { "epoch": 4.360789652825051, "grad_norm": 1.5770843029022217, "learning_rate": 4.403053291464904e-07, "loss": 0.3727, "step": 6406 }, { "epoch": 4.36147038801906, "grad_norm": 1.5469096899032593, "learning_rate": 4.39381986794985e-07, "loss": 0.4382, "step": 6407 }, { "epoch": 4.36215112321307, "grad_norm": 1.434314489364624, "learning_rate": 4.384595691118254e-07, "loss": 0.403, "step": 6408 }, { "epoch": 4.3628318584070795, "grad_norm": 1.3151428699493408, "learning_rate": 4.375380762840303e-07, "loss": 0.245, "step": 6409 }, { "epoch": 4.363512593601089, "grad_norm": 1.5489628314971924, "learning_rate": 4.3661750849843475e-07, "loss": 0.2826, "step": 6410 }, { "epoch": 4.364193328795099, "grad_norm": 1.435028076171875, "learning_rate": 4.3569786594168496e-07, "loss": 0.362, "step": 6411 }, { "epoch": 4.364874063989109, "grad_norm": 1.4069596529006958, "learning_rate": 4.347791488002384e-07, "loss": 0.3222, "step": 6412 }, { "epoch": 4.365554799183117, "grad_norm": 1.3501088619232178, "learning_rate": 4.338613572603645e-07, "loss": 0.3604, "step": 6413 }, { "epoch": 4.366235534377127, "grad_norm": 1.255056381225586, "learning_rate": 4.329444915081482e-07, "loss": 0.4082, "step": 6414 }, { "epoch": 4.366916269571137, "grad_norm": 1.4467090368270874, "learning_rate": 4.320285517294831e-07, "loss": 0.288, "step": 6415 }, { "epoch": 4.3675970047651465, "grad_norm": 1.427892804145813, "learning_rate": 4.311135381100762e-07, "loss": 0.3365, "step": 6416 }, { "epoch": 4.368277739959156, "grad_norm": 1.4608440399169922, "learning_rate": 4.30199450835449e-07, "loss": 0.3248, "step": 6417 }, { "epoch": 4.368958475153166, "grad_norm": 1.536655306816101, "learning_rate": 4.2928629009093115e-07, "loss": 0.2915, "step": 6418 }, { "epoch": 4.369639210347175, "grad_norm": 1.2290849685668945, "learning_rate": 4.2837405606166835e-07, "loss": 0.2246, "step": 6419 }, { "epoch": 4.370319945541184, "grad_norm": 1.4180569648742676, "learning_rate": 4.274627489326155e-07, "loss": 0.247, "step": 6420 }, { "epoch": 4.371000680735194, "grad_norm": 1.4098039865493774, "learning_rate": 4.26552368888542e-07, "loss": 0.4297, "step": 6421 }, { "epoch": 4.371681415929204, "grad_norm": 1.4631034135818481, "learning_rate": 4.256429161140274e-07, "loss": 0.3811, "step": 6422 }, { "epoch": 4.372362151123213, "grad_norm": 1.439684271812439, "learning_rate": 4.2473439079346257e-07, "loss": 0.3535, "step": 6423 }, { "epoch": 4.373042886317222, "grad_norm": 1.4525024890899658, "learning_rate": 4.2382679311105345e-07, "loss": 0.4447, "step": 6424 }, { "epoch": 4.373723621511232, "grad_norm": 1.3723702430725098, "learning_rate": 4.2292012325081567e-07, "loss": 0.497, "step": 6425 }, { "epoch": 4.3744043567052415, "grad_norm": 1.3638404607772827, "learning_rate": 4.22014381396576e-07, "loss": 0.5625, "step": 6426 }, { "epoch": 4.375085091899251, "grad_norm": 1.5105632543563843, "learning_rate": 4.2110956773197486e-07, "loss": 0.4376, "step": 6427 }, { "epoch": 4.375765827093261, "grad_norm": 1.4180541038513184, "learning_rate": 4.202056824404649e-07, "loss": 0.2685, "step": 6428 }, { "epoch": 4.376446562287271, "grad_norm": 1.336207628250122, "learning_rate": 4.193027257053089e-07, "loss": 0.3314, "step": 6429 }, { "epoch": 4.377127297481279, "grad_norm": 1.3766381740570068, "learning_rate": 4.184006977095806e-07, "loss": 0.2995, "step": 6430 }, { "epoch": 4.377808032675289, "grad_norm": 1.3468519449234009, "learning_rate": 4.174995986361685e-07, "loss": 0.3352, "step": 6431 }, { "epoch": 4.378488767869299, "grad_norm": 1.5095064640045166, "learning_rate": 4.165994286677705e-07, "loss": 0.3782, "step": 6432 }, { "epoch": 4.379169503063308, "grad_norm": 1.3927096128463745, "learning_rate": 4.157001879868955e-07, "loss": 0.3796, "step": 6433 }, { "epoch": 4.379850238257318, "grad_norm": 1.3153247833251953, "learning_rate": 4.1480187677586716e-07, "loss": 0.283, "step": 6434 }, { "epoch": 4.380530973451328, "grad_norm": 1.4945104122161865, "learning_rate": 4.1390449521681753e-07, "loss": 0.4697, "step": 6435 }, { "epoch": 4.381211708645337, "grad_norm": 1.3397051095962524, "learning_rate": 4.1300804349169057e-07, "loss": 0.3263, "step": 6436 }, { "epoch": 4.381892443839346, "grad_norm": 1.3909900188446045, "learning_rate": 4.121125217822436e-07, "loss": 0.3023, "step": 6437 }, { "epoch": 4.382573179033356, "grad_norm": 1.4983779191970825, "learning_rate": 4.1121793027004475e-07, "loss": 0.3293, "step": 6438 }, { "epoch": 4.383253914227366, "grad_norm": 1.32270085811615, "learning_rate": 4.1032426913647174e-07, "loss": 0.344, "step": 6439 }, { "epoch": 4.383934649421375, "grad_norm": 1.4250962734222412, "learning_rate": 4.0943153856271565e-07, "loss": 0.3342, "step": 6440 }, { "epoch": 4.384615384615385, "grad_norm": 1.324157476425171, "learning_rate": 4.0853973872977794e-07, "loss": 0.302, "step": 6441 }, { "epoch": 4.385296119809394, "grad_norm": 1.326274037361145, "learning_rate": 4.0764886981847274e-07, "loss": 0.4094, "step": 6442 }, { "epoch": 4.3859768550034035, "grad_norm": 1.4104554653167725, "learning_rate": 4.067589320094217e-07, "loss": 0.1769, "step": 6443 }, { "epoch": 4.386657590197413, "grad_norm": 1.362802267074585, "learning_rate": 4.0586992548306227e-07, "loss": 0.4097, "step": 6444 }, { "epoch": 4.387338325391423, "grad_norm": 1.3695645332336426, "learning_rate": 4.049818504196412e-07, "loss": 0.3497, "step": 6445 }, { "epoch": 4.388019060585433, "grad_norm": 1.3666294813156128, "learning_rate": 4.0409470699921627e-07, "loss": 0.4293, "step": 6446 }, { "epoch": 4.388699795779441, "grad_norm": 1.4451857805252075, "learning_rate": 4.032084954016552e-07, "loss": 0.4049, "step": 6447 }, { "epoch": 4.389380530973451, "grad_norm": 1.364488959312439, "learning_rate": 4.023232158066398e-07, "loss": 0.3114, "step": 6448 }, { "epoch": 4.390061266167461, "grad_norm": 1.3279838562011719, "learning_rate": 4.014388683936599e-07, "loss": 0.1987, "step": 6449 }, { "epoch": 4.39074200136147, "grad_norm": 1.4127994775772095, "learning_rate": 4.005554533420164e-07, "loss": 0.3439, "step": 6450 }, { "epoch": 4.39142273655548, "grad_norm": 1.3680330514907837, "learning_rate": 3.9967297083082503e-07, "loss": 0.3851, "step": 6451 }, { "epoch": 4.39210347174949, "grad_norm": 1.5518378019332886, "learning_rate": 3.9879142103900815e-07, "loss": 0.4807, "step": 6452 }, { "epoch": 4.392784206943499, "grad_norm": 1.369226098060608, "learning_rate": 3.979108041452995e-07, "loss": 0.405, "step": 6453 }, { "epoch": 4.393464942137508, "grad_norm": 1.4996001720428467, "learning_rate": 3.970311203282462e-07, "loss": 0.2987, "step": 6454 }, { "epoch": 4.394145677331518, "grad_norm": 1.554195523262024, "learning_rate": 3.9615236976620565e-07, "loss": 0.3781, "step": 6455 }, { "epoch": 4.394826412525528, "grad_norm": 1.4992115497589111, "learning_rate": 3.952745526373436e-07, "loss": 0.3021, "step": 6456 }, { "epoch": 4.395507147719537, "grad_norm": 1.6177223920822144, "learning_rate": 3.9439766911963716e-07, "loss": 0.2597, "step": 6457 }, { "epoch": 4.396187882913547, "grad_norm": 1.5676316022872925, "learning_rate": 3.93521719390878e-07, "loss": 0.3974, "step": 6458 }, { "epoch": 4.396868618107556, "grad_norm": 1.4294756650924683, "learning_rate": 3.9264670362866295e-07, "loss": 0.3882, "step": 6459 }, { "epoch": 4.3975493533015655, "grad_norm": 1.98049795627594, "learning_rate": 3.9177262201040347e-07, "loss": 0.3073, "step": 6460 }, { "epoch": 4.398230088495575, "grad_norm": 1.424442172050476, "learning_rate": 3.908994747133188e-07, "loss": 0.4034, "step": 6461 }, { "epoch": 4.398910823689585, "grad_norm": 1.640405535697937, "learning_rate": 3.9002726191444075e-07, "loss": 0.4144, "step": 6462 }, { "epoch": 4.3995915588835945, "grad_norm": 1.3447059392929077, "learning_rate": 3.891559837906117e-07, "loss": 0.3699, "step": 6463 }, { "epoch": 4.400272294077604, "grad_norm": 1.359695553779602, "learning_rate": 3.8828564051848317e-07, "loss": 0.4384, "step": 6464 }, { "epoch": 4.400953029271613, "grad_norm": 1.5301132202148438, "learning_rate": 3.874162322745184e-07, "loss": 0.2993, "step": 6465 }, { "epoch": 4.401633764465623, "grad_norm": 1.4108917713165283, "learning_rate": 3.8654775923499087e-07, "loss": 0.3661, "step": 6466 }, { "epoch": 4.402314499659632, "grad_norm": 1.39776611328125, "learning_rate": 3.856802215759825e-07, "loss": 0.3486, "step": 6467 }, { "epoch": 4.402995234853642, "grad_norm": 1.398091435432434, "learning_rate": 3.8481361947338756e-07, "loss": 0.2766, "step": 6468 }, { "epoch": 4.403675970047652, "grad_norm": 1.4033139944076538, "learning_rate": 3.839479531029117e-07, "loss": 0.4152, "step": 6469 }, { "epoch": 4.404356705241661, "grad_norm": 1.4038506746292114, "learning_rate": 3.830832226400671e-07, "loss": 0.2412, "step": 6470 }, { "epoch": 4.40503744043567, "grad_norm": 1.4218695163726807, "learning_rate": 3.8221942826017986e-07, "loss": 0.3719, "step": 6471 }, { "epoch": 4.40571817562968, "grad_norm": 1.3335264921188354, "learning_rate": 3.813565701383859e-07, "loss": 0.3307, "step": 6472 }, { "epoch": 4.40639891082369, "grad_norm": 1.4025214910507202, "learning_rate": 3.8049464844962855e-07, "loss": 0.3697, "step": 6473 }, { "epoch": 4.407079646017699, "grad_norm": 1.3607853651046753, "learning_rate": 3.7963366336866317e-07, "loss": 0.3257, "step": 6474 }, { "epoch": 4.407760381211709, "grad_norm": 1.365372896194458, "learning_rate": 3.7877361507005495e-07, "loss": 0.2084, "step": 6475 }, { "epoch": 4.408441116405718, "grad_norm": 1.2723397016525269, "learning_rate": 3.779145037281806e-07, "loss": 0.3073, "step": 6476 }, { "epoch": 4.4091218515997275, "grad_norm": 1.506666660308838, "learning_rate": 3.7705632951722414e-07, "loss": 0.2, "step": 6477 }, { "epoch": 4.409802586793737, "grad_norm": 1.4877150058746338, "learning_rate": 3.7619909261118137e-07, "loss": 0.3524, "step": 6478 }, { "epoch": 4.410483321987747, "grad_norm": 1.5010920763015747, "learning_rate": 3.753427931838571e-07, "loss": 0.3854, "step": 6479 }, { "epoch": 4.4111640571817565, "grad_norm": 1.4543157815933228, "learning_rate": 3.744874314088681e-07, "loss": 0.4167, "step": 6480 }, { "epoch": 4.411844792375766, "grad_norm": 1.41606605052948, "learning_rate": 3.736330074596384e-07, "loss": 0.4342, "step": 6481 }, { "epoch": 4.412525527569775, "grad_norm": 1.4087313413619995, "learning_rate": 3.7277952150940264e-07, "loss": 0.4703, "step": 6482 }, { "epoch": 4.413206262763785, "grad_norm": 1.301367998123169, "learning_rate": 3.7192697373120754e-07, "loss": 0.2203, "step": 6483 }, { "epoch": 4.413886997957794, "grad_norm": 1.377872109413147, "learning_rate": 3.710753642979059e-07, "loss": 0.6314, "step": 6484 }, { "epoch": 4.414567733151804, "grad_norm": 1.3511584997177124, "learning_rate": 3.702246933821624e-07, "loss": 0.3472, "step": 6485 }, { "epoch": 4.415248468345814, "grad_norm": 1.5115716457366943, "learning_rate": 3.693749611564518e-07, "loss": 0.2535, "step": 6486 }, { "epoch": 4.415929203539823, "grad_norm": 1.4709078073501587, "learning_rate": 3.6852616779305683e-07, "loss": 0.4268, "step": 6487 }, { "epoch": 4.416609938733832, "grad_norm": 1.5033174753189087, "learning_rate": 3.6767831346407267e-07, "loss": 0.2069, "step": 6488 }, { "epoch": 4.417290673927842, "grad_norm": 1.47335684299469, "learning_rate": 3.668313983414001e-07, "loss": 0.3065, "step": 6489 }, { "epoch": 4.417971409121852, "grad_norm": 1.421565055847168, "learning_rate": 3.6598542259675353e-07, "loss": 0.3983, "step": 6490 }, { "epoch": 4.418652144315861, "grad_norm": 1.5896602869033813, "learning_rate": 3.6514038640165505e-07, "loss": 0.454, "step": 6491 }, { "epoch": 4.419332879509871, "grad_norm": 1.3753739595413208, "learning_rate": 3.6429628992743436e-07, "loss": 0.4713, "step": 6492 }, { "epoch": 4.42001361470388, "grad_norm": 1.3013098239898682, "learning_rate": 3.6345313334523514e-07, "loss": 0.2721, "step": 6493 }, { "epoch": 4.4206943498978895, "grad_norm": 1.3329209089279175, "learning_rate": 3.626109168260067e-07, "loss": 0.3736, "step": 6494 }, { "epoch": 4.421375085091899, "grad_norm": 1.3823161125183105, "learning_rate": 3.61769640540508e-07, "loss": 0.3613, "step": 6495 }, { "epoch": 4.422055820285909, "grad_norm": 1.5291210412979126, "learning_rate": 3.6092930465930933e-07, "loss": 0.2263, "step": 6496 }, { "epoch": 4.4227365554799185, "grad_norm": 1.4665966033935547, "learning_rate": 3.600899093527904e-07, "loss": 0.2666, "step": 6497 }, { "epoch": 4.423417290673928, "grad_norm": 1.394779086112976, "learning_rate": 3.5925145479113854e-07, "loss": 0.2275, "step": 6498 }, { "epoch": 4.424098025867937, "grad_norm": 1.4779421091079712, "learning_rate": 3.584139411443499e-07, "loss": 0.4211, "step": 6499 }, { "epoch": 4.424778761061947, "grad_norm": 1.3081921339035034, "learning_rate": 3.57577368582232e-07, "loss": 0.2904, "step": 6500 }, { "epoch": 4.425459496255956, "grad_norm": 1.527774691581726, "learning_rate": 3.567417372744003e-07, "loss": 0.2734, "step": 6501 }, { "epoch": 4.426140231449966, "grad_norm": 1.4886577129364014, "learning_rate": 3.559070473902787e-07, "loss": 0.2402, "step": 6502 }, { "epoch": 4.426820966643976, "grad_norm": 1.3994312286376953, "learning_rate": 3.550732990991029e-07, "loss": 0.3594, "step": 6503 }, { "epoch": 4.427501701837985, "grad_norm": 1.5472996234893799, "learning_rate": 3.54240492569915e-07, "loss": 0.4748, "step": 6504 }, { "epoch": 4.428182437031994, "grad_norm": 1.3657898902893066, "learning_rate": 3.5340862797156594e-07, "loss": 0.2169, "step": 6505 }, { "epoch": 4.428863172226004, "grad_norm": 1.4317179918289185, "learning_rate": 3.5257770547271753e-07, "loss": 0.308, "step": 6506 }, { "epoch": 4.429543907420014, "grad_norm": 1.3295787572860718, "learning_rate": 3.5174772524184163e-07, "loss": 0.3323, "step": 6507 }, { "epoch": 4.430224642614023, "grad_norm": 1.3226852416992188, "learning_rate": 3.509186874472159e-07, "loss": 0.2242, "step": 6508 }, { "epoch": 4.430905377808033, "grad_norm": 1.471312165260315, "learning_rate": 3.5009059225692697e-07, "loss": 0.33, "step": 6509 }, { "epoch": 4.431586113002043, "grad_norm": 1.6085073947906494, "learning_rate": 3.492634398388739e-07, "loss": 0.2106, "step": 6510 }, { "epoch": 4.432266848196051, "grad_norm": 1.486930012702942, "learning_rate": 3.4843723036076195e-07, "loss": 0.2733, "step": 6511 }, { "epoch": 4.432947583390061, "grad_norm": 1.3180932998657227, "learning_rate": 3.4761196399010375e-07, "loss": 0.3845, "step": 6512 }, { "epoch": 4.433628318584071, "grad_norm": 1.5304646492004395, "learning_rate": 3.4678764089422445e-07, "loss": 0.2854, "step": 6513 }, { "epoch": 4.4343090537780805, "grad_norm": 1.3574869632720947, "learning_rate": 3.459642612402564e-07, "loss": 0.2538, "step": 6514 }, { "epoch": 4.43498978897209, "grad_norm": 1.704120397567749, "learning_rate": 3.4514182519513996e-07, "loss": 0.3469, "step": 6515 }, { "epoch": 4.435670524166099, "grad_norm": 1.451505184173584, "learning_rate": 3.443203329256234e-07, "loss": 0.3058, "step": 6516 }, { "epoch": 4.436351259360109, "grad_norm": 1.3884385824203491, "learning_rate": 3.434997845982663e-07, "loss": 0.3297, "step": 6517 }, { "epoch": 4.437031994554118, "grad_norm": 1.4184470176696777, "learning_rate": 3.4268018037943493e-07, "loss": 0.3302, "step": 6518 }, { "epoch": 4.437712729748128, "grad_norm": 1.460841178894043, "learning_rate": 3.4186152043530376e-07, "loss": 0.2486, "step": 6519 }, { "epoch": 4.438393464942138, "grad_norm": 1.3215316534042358, "learning_rate": 3.410438049318576e-07, "loss": 0.2081, "step": 6520 }, { "epoch": 4.439074200136147, "grad_norm": 1.4548898935317993, "learning_rate": 3.4022703403488956e-07, "loss": 0.3425, "step": 6521 }, { "epoch": 4.439754935330156, "grad_norm": 1.4220094680786133, "learning_rate": 3.394112079099976e-07, "loss": 0.438, "step": 6522 }, { "epoch": 4.440435670524166, "grad_norm": 1.5144401788711548, "learning_rate": 3.385963267225939e-07, "loss": 0.4285, "step": 6523 }, { "epoch": 4.441116405718176, "grad_norm": 1.3978796005249023, "learning_rate": 3.377823906378952e-07, "loss": 0.347, "step": 6524 }, { "epoch": 4.441797140912185, "grad_norm": 1.494951844215393, "learning_rate": 3.369693998209278e-07, "loss": 0.3564, "step": 6525 }, { "epoch": 4.442477876106195, "grad_norm": 1.431878924369812, "learning_rate": 3.361573544365254e-07, "loss": 0.368, "step": 6526 }, { "epoch": 4.443158611300205, "grad_norm": 1.3470185995101929, "learning_rate": 3.353462546493319e-07, "loss": 0.2718, "step": 6527 }, { "epoch": 4.443839346494213, "grad_norm": 1.5118879079818726, "learning_rate": 3.345361006237974e-07, "loss": 0.3093, "step": 6528 }, { "epoch": 4.444520081688223, "grad_norm": 1.4966130256652832, "learning_rate": 3.3372689252418153e-07, "loss": 0.4195, "step": 6529 }, { "epoch": 4.445200816882233, "grad_norm": 1.5873732566833496, "learning_rate": 3.3291863051455097e-07, "loss": 0.3328, "step": 6530 }, { "epoch": 4.4458815520762425, "grad_norm": 1.3917396068572998, "learning_rate": 3.321113147587818e-07, "loss": 0.3505, "step": 6531 }, { "epoch": 4.446562287270252, "grad_norm": 1.4351730346679688, "learning_rate": 3.313049454205591e-07, "loss": 0.4547, "step": 6532 }, { "epoch": 4.447243022464262, "grad_norm": 1.4299622774124146, "learning_rate": 3.304995226633728e-07, "loss": 0.3721, "step": 6533 }, { "epoch": 4.447923757658271, "grad_norm": 1.542360782623291, "learning_rate": 3.29695046650525e-07, "loss": 0.3111, "step": 6534 }, { "epoch": 4.44860449285228, "grad_norm": 1.3467929363250732, "learning_rate": 3.2889151754512237e-07, "loss": 0.4014, "step": 6535 }, { "epoch": 4.44928522804629, "grad_norm": 1.5549930334091187, "learning_rate": 3.2808893551008136e-07, "loss": 0.3107, "step": 6536 }, { "epoch": 4.4499659632403, "grad_norm": 1.3867629766464233, "learning_rate": 3.2728730070812506e-07, "loss": 0.4089, "step": 6537 }, { "epoch": 4.450646698434309, "grad_norm": 1.397860050201416, "learning_rate": 3.264866133017874e-07, "loss": 0.3666, "step": 6538 }, { "epoch": 4.451327433628318, "grad_norm": 1.2728780508041382, "learning_rate": 3.2568687345340576e-07, "loss": 0.4592, "step": 6539 }, { "epoch": 4.452008168822328, "grad_norm": 1.4173506498336792, "learning_rate": 3.2488808132512986e-07, "loss": 0.3115, "step": 6540 }, { "epoch": 4.4526889040163375, "grad_norm": 1.3767014741897583, "learning_rate": 3.240902370789156e-07, "loss": 0.2672, "step": 6541 }, { "epoch": 4.453369639210347, "grad_norm": 1.4159458875656128, "learning_rate": 3.2329334087652544e-07, "loss": 0.3219, "step": 6542 }, { "epoch": 4.454050374404357, "grad_norm": 1.4592970609664917, "learning_rate": 3.2249739287953106e-07, "loss": 0.3993, "step": 6543 }, { "epoch": 4.454731109598367, "grad_norm": 1.382317066192627, "learning_rate": 3.217023932493102e-07, "loss": 0.4104, "step": 6544 }, { "epoch": 4.455411844792375, "grad_norm": 1.5449377298355103, "learning_rate": 3.209083421470516e-07, "loss": 0.3989, "step": 6545 }, { "epoch": 4.456092579986385, "grad_norm": 1.2382197380065918, "learning_rate": 3.201152397337487e-07, "loss": 0.1691, "step": 6546 }, { "epoch": 4.456773315180395, "grad_norm": 1.4211310148239136, "learning_rate": 3.193230861702024e-07, "loss": 0.2604, "step": 6547 }, { "epoch": 4.4574540503744045, "grad_norm": 1.3383290767669678, "learning_rate": 3.185318816170235e-07, "loss": 0.4003, "step": 6548 }, { "epoch": 4.458134785568414, "grad_norm": 1.4149580001831055, "learning_rate": 3.1774162623463e-07, "loss": 0.2079, "step": 6549 }, { "epoch": 4.458815520762424, "grad_norm": 1.3814997673034668, "learning_rate": 3.169523201832458e-07, "loss": 0.3711, "step": 6550 }, { "epoch": 4.459496255956433, "grad_norm": 1.547478199005127, "learning_rate": 3.16163963622903e-07, "loss": 0.2748, "step": 6551 }, { "epoch": 4.460176991150442, "grad_norm": 1.2970366477966309, "learning_rate": 3.1537655671344204e-07, "loss": 0.2162, "step": 6552 }, { "epoch": 4.460857726344452, "grad_norm": 1.3926352262496948, "learning_rate": 3.145900996145101e-07, "loss": 0.413, "step": 6553 }, { "epoch": 4.461538461538462, "grad_norm": 1.4757084846496582, "learning_rate": 3.1380459248556095e-07, "loss": 0.3152, "step": 6554 }, { "epoch": 4.462219196732471, "grad_norm": 1.4809075593948364, "learning_rate": 3.130200354858587e-07, "loss": 0.1993, "step": 6555 }, { "epoch": 4.462899931926481, "grad_norm": 1.4414496421813965, "learning_rate": 3.1223642877447e-07, "loss": 0.3812, "step": 6556 }, { "epoch": 4.46358066712049, "grad_norm": 1.4668030738830566, "learning_rate": 3.1145377251027395e-07, "loss": 0.4092, "step": 6557 }, { "epoch": 4.4642614023144995, "grad_norm": 1.3980094194412231, "learning_rate": 3.106720668519536e-07, "loss": 0.2647, "step": 6558 }, { "epoch": 4.464942137508509, "grad_norm": 1.5444711446762085, "learning_rate": 3.0989131195800106e-07, "loss": 0.3474, "step": 6559 }, { "epoch": 4.465622872702519, "grad_norm": 1.490803837776184, "learning_rate": 3.091115079867141e-07, "loss": 0.4035, "step": 6560 }, { "epoch": 4.466303607896529, "grad_norm": 1.4499202966690063, "learning_rate": 3.083326550961985e-07, "loss": 0.4338, "step": 6561 }, { "epoch": 4.466984343090537, "grad_norm": 1.4452444314956665, "learning_rate": 3.075547534443679e-07, "loss": 0.2335, "step": 6562 }, { "epoch": 4.467665078284547, "grad_norm": 1.4085780382156372, "learning_rate": 3.067778031889418e-07, "loss": 0.4336, "step": 6563 }, { "epoch": 4.468345813478557, "grad_norm": 1.421976923942566, "learning_rate": 3.0600180448744744e-07, "loss": 0.3828, "step": 6564 }, { "epoch": 4.469026548672566, "grad_norm": 1.2467145919799805, "learning_rate": 3.0522675749721843e-07, "loss": 0.2637, "step": 6565 }, { "epoch": 4.469707283866576, "grad_norm": 1.3818525075912476, "learning_rate": 3.0445266237539793e-07, "loss": 0.3738, "step": 6566 }, { "epoch": 4.470388019060586, "grad_norm": 1.423318862915039, "learning_rate": 3.036795192789332e-07, "loss": 0.3407, "step": 6567 }, { "epoch": 4.471068754254595, "grad_norm": 1.4845590591430664, "learning_rate": 3.029073283645789e-07, "loss": 0.2512, "step": 6568 }, { "epoch": 4.471749489448604, "grad_norm": 1.407881259918213, "learning_rate": 3.021360897888981e-07, "loss": 0.5311, "step": 6569 }, { "epoch": 4.472430224642614, "grad_norm": 1.3916032314300537, "learning_rate": 3.0136580370825954e-07, "loss": 0.3103, "step": 6570 }, { "epoch": 4.473110959836624, "grad_norm": 1.385979413986206, "learning_rate": 3.005964702788389e-07, "loss": 0.3343, "step": 6571 }, { "epoch": 4.473791695030633, "grad_norm": 1.3545258045196533, "learning_rate": 2.9982808965662027e-07, "loss": 0.296, "step": 6572 }, { "epoch": 4.474472430224643, "grad_norm": 1.370484709739685, "learning_rate": 2.9906066199739235e-07, "loss": 0.5097, "step": 6573 }, { "epoch": 4.475153165418652, "grad_norm": 1.4455845355987549, "learning_rate": 2.982941874567513e-07, "loss": 0.2385, "step": 6574 }, { "epoch": 4.4758339006126615, "grad_norm": 1.4531161785125732, "learning_rate": 2.975286661901006e-07, "loss": 0.4032, "step": 6575 }, { "epoch": 4.476514635806671, "grad_norm": 1.3787785768508911, "learning_rate": 2.9676409835265095e-07, "loss": 0.3174, "step": 6576 }, { "epoch": 4.477195371000681, "grad_norm": 1.446433663368225, "learning_rate": 2.960004840994191e-07, "loss": 0.3118, "step": 6577 }, { "epoch": 4.477876106194691, "grad_norm": 1.4315346479415894, "learning_rate": 2.952378235852266e-07, "loss": 0.3463, "step": 6578 }, { "epoch": 4.4785568413887, "grad_norm": 1.6052981615066528, "learning_rate": 2.944761169647053e-07, "loss": 0.4547, "step": 6579 }, { "epoch": 4.479237576582709, "grad_norm": 1.4853371381759644, "learning_rate": 2.9371536439229074e-07, "loss": 0.3798, "step": 6580 }, { "epoch": 4.479918311776719, "grad_norm": 1.3851795196533203, "learning_rate": 2.929555660222261e-07, "loss": 0.3627, "step": 6581 }, { "epoch": 4.480599046970728, "grad_norm": 1.4136466979980469, "learning_rate": 2.9219672200856033e-07, "loss": 0.4664, "step": 6582 }, { "epoch": 4.481279782164738, "grad_norm": 1.5304591655731201, "learning_rate": 2.9143883250515146e-07, "loss": 0.3154, "step": 6583 }, { "epoch": 4.481960517358748, "grad_norm": 1.4491758346557617, "learning_rate": 2.906818976656611e-07, "loss": 0.2772, "step": 6584 }, { "epoch": 4.482641252552757, "grad_norm": 1.3747888803482056, "learning_rate": 2.8992591764355707e-07, "loss": 0.3979, "step": 6585 }, { "epoch": 4.483321987746766, "grad_norm": 1.345608115196228, "learning_rate": 2.8917089259211716e-07, "loss": 0.496, "step": 6586 }, { "epoch": 4.484002722940776, "grad_norm": 1.491726040840149, "learning_rate": 2.8841682266442127e-07, "loss": 0.2961, "step": 6587 }, { "epoch": 4.484683458134786, "grad_norm": 1.3303136825561523, "learning_rate": 2.876637080133582e-07, "loss": 0.2925, "step": 6588 }, { "epoch": 4.485364193328795, "grad_norm": 1.4093295335769653, "learning_rate": 2.86911548791623e-07, "loss": 0.4297, "step": 6589 }, { "epoch": 4.486044928522805, "grad_norm": 1.4305306673049927, "learning_rate": 2.861603451517159e-07, "loss": 0.244, "step": 6590 }, { "epoch": 4.486725663716814, "grad_norm": 1.424920678138733, "learning_rate": 2.8541009724594337e-07, "loss": 0.3161, "step": 6591 }, { "epoch": 4.4874063989108235, "grad_norm": 1.300352692604065, "learning_rate": 2.846608052264194e-07, "loss": 0.2979, "step": 6592 }, { "epoch": 4.488087134104833, "grad_norm": 1.4808769226074219, "learning_rate": 2.83912469245064e-07, "loss": 0.3919, "step": 6593 }, { "epoch": 4.488767869298843, "grad_norm": 1.402859091758728, "learning_rate": 2.83165089453602e-07, "loss": 0.4074, "step": 6594 }, { "epoch": 4.4894486044928525, "grad_norm": 1.4156864881515503, "learning_rate": 2.8241866600356494e-07, "loss": 0.3378, "step": 6595 }, { "epoch": 4.490129339686861, "grad_norm": 1.396122932434082, "learning_rate": 2.8167319904629174e-07, "loss": 0.5231, "step": 6596 }, { "epoch": 4.490810074880871, "grad_norm": 1.3599320650100708, "learning_rate": 2.8092868873292545e-07, "loss": 0.2312, "step": 6597 }, { "epoch": 4.491490810074881, "grad_norm": 1.3880088329315186, "learning_rate": 2.801851352144164e-07, "loss": 0.4559, "step": 6598 }, { "epoch": 4.49217154526889, "grad_norm": 1.3380016088485718, "learning_rate": 2.7944253864151895e-07, "loss": 0.394, "step": 6599 }, { "epoch": 4.4928522804629, "grad_norm": 1.4633594751358032, "learning_rate": 2.7870089916479715e-07, "loss": 0.4927, "step": 6600 }, { "epoch": 4.49353301565691, "grad_norm": 1.4785857200622559, "learning_rate": 2.7796021693461904e-07, "loss": 0.3076, "step": 6601 }, { "epoch": 4.4942137508509195, "grad_norm": 1.311046838760376, "learning_rate": 2.772204921011562e-07, "loss": 0.3432, "step": 6602 }, { "epoch": 4.494894486044928, "grad_norm": 1.3105096817016602, "learning_rate": 2.7648172481439137e-07, "loss": 0.5547, "step": 6603 }, { "epoch": 4.495575221238938, "grad_norm": 1.4745948314666748, "learning_rate": 2.757439152241076e-07, "loss": 0.3969, "step": 6604 }, { "epoch": 4.496255956432948, "grad_norm": 1.6034168004989624, "learning_rate": 2.750070634798979e-07, "loss": 0.3999, "step": 6605 }, { "epoch": 4.496936691626957, "grad_norm": 1.3661822080612183, "learning_rate": 2.742711697311573e-07, "loss": 0.2665, "step": 6606 }, { "epoch": 4.497617426820967, "grad_norm": 1.2912670373916626, "learning_rate": 2.735362341270914e-07, "loss": 0.2781, "step": 6607 }, { "epoch": 4.498298162014976, "grad_norm": 1.476188063621521, "learning_rate": 2.7280225681670714e-07, "loss": 0.3748, "step": 6608 }, { "epoch": 4.4989788972089855, "grad_norm": 1.3459099531173706, "learning_rate": 2.720692379488188e-07, "loss": 0.297, "step": 6609 }, { "epoch": 4.499659632402995, "grad_norm": 1.4698991775512695, "learning_rate": 2.7133717767204803e-07, "loss": 0.31, "step": 6610 }, { "epoch": 4.500340367597005, "grad_norm": 1.520550012588501, "learning_rate": 2.706060761348195e-07, "loss": 0.3378, "step": 6611 }, { "epoch": 4.5010211027910145, "grad_norm": 1.3815745115280151, "learning_rate": 2.698759334853651e-07, "loss": 0.3586, "step": 6612 }, { "epoch": 4.501701837985024, "grad_norm": 1.3611983060836792, "learning_rate": 2.6914674987171984e-07, "loss": 0.3707, "step": 6613 }, { "epoch": 4.502382573179033, "grad_norm": 1.369702696800232, "learning_rate": 2.6841852544172875e-07, "loss": 0.2212, "step": 6614 }, { "epoch": 4.503063308373043, "grad_norm": 1.3996046781539917, "learning_rate": 2.676912603430387e-07, "loss": 0.3134, "step": 6615 }, { "epoch": 4.503744043567052, "grad_norm": 1.4584743976593018, "learning_rate": 2.669649547231018e-07, "loss": 0.3073, "step": 6616 }, { "epoch": 4.504424778761062, "grad_norm": 1.3935303688049316, "learning_rate": 2.662396087291791e-07, "loss": 0.4802, "step": 6617 }, { "epoch": 4.505105513955072, "grad_norm": 1.3885473012924194, "learning_rate": 2.6551522250833464e-07, "loss": 0.4141, "step": 6618 }, { "epoch": 4.5057862491490805, "grad_norm": 1.3506088256835938, "learning_rate": 2.6479179620743756e-07, "loss": 0.2411, "step": 6619 }, { "epoch": 4.50646698434309, "grad_norm": 1.3225781917572021, "learning_rate": 2.6406932997316283e-07, "loss": 0.3538, "step": 6620 }, { "epoch": 4.5071477195371, "grad_norm": 1.2990585565567017, "learning_rate": 2.633478239519921e-07, "loss": 0.4983, "step": 6621 }, { "epoch": 4.50782845473111, "grad_norm": 1.3000988960266113, "learning_rate": 2.6262727829021005e-07, "loss": 0.3196, "step": 6622 }, { "epoch": 4.508509189925119, "grad_norm": 1.4728130102157593, "learning_rate": 2.6190769313390817e-07, "loss": 0.3616, "step": 6623 }, { "epoch": 4.509189925119129, "grad_norm": 1.2624160051345825, "learning_rate": 2.611890686289831e-07, "loss": 0.3653, "step": 6624 }, { "epoch": 4.509870660313139, "grad_norm": 1.3746284246444702, "learning_rate": 2.60471404921136e-07, "loss": 0.3511, "step": 6625 }, { "epoch": 4.5105513955071475, "grad_norm": 1.277571678161621, "learning_rate": 2.597547021558744e-07, "loss": 0.2907, "step": 6626 }, { "epoch": 4.511232130701157, "grad_norm": 1.1889593601226807, "learning_rate": 2.590389604785087e-07, "loss": 0.249, "step": 6627 }, { "epoch": 4.511912865895167, "grad_norm": 1.3277431726455688, "learning_rate": 2.5832418003415795e-07, "loss": 0.3392, "step": 6628 }, { "epoch": 4.5125936010891765, "grad_norm": 1.4806286096572876, "learning_rate": 2.5761036096774327e-07, "loss": 0.4191, "step": 6629 }, { "epoch": 4.513274336283186, "grad_norm": 1.4736829996109009, "learning_rate": 2.568975034239918e-07, "loss": 0.3148, "step": 6630 }, { "epoch": 4.513955071477195, "grad_norm": 1.49626886844635, "learning_rate": 2.5618560754743725e-07, "loss": 0.2873, "step": 6631 }, { "epoch": 4.514635806671205, "grad_norm": 1.5086562633514404, "learning_rate": 2.554746734824154e-07, "loss": 0.3348, "step": 6632 }, { "epoch": 4.515316541865214, "grad_norm": 1.481186032295227, "learning_rate": 2.547647013730686e-07, "loss": 0.4154, "step": 6633 }, { "epoch": 4.515997277059224, "grad_norm": 1.5234344005584717, "learning_rate": 2.540556913633446e-07, "loss": 0.4887, "step": 6634 }, { "epoch": 4.516678012253234, "grad_norm": 1.4124926328659058, "learning_rate": 2.533476435969967e-07, "loss": 0.3536, "step": 6635 }, { "epoch": 4.517358747447243, "grad_norm": 1.4363425970077515, "learning_rate": 2.5264055821758114e-07, "loss": 0.2026, "step": 6636 }, { "epoch": 4.518039482641252, "grad_norm": 1.42067551612854, "learning_rate": 2.519344353684594e-07, "loss": 0.5229, "step": 6637 }, { "epoch": 4.518720217835262, "grad_norm": 1.4234673976898193, "learning_rate": 2.512292751927992e-07, "loss": 0.3054, "step": 6638 }, { "epoch": 4.519400953029272, "grad_norm": 1.283867597579956, "learning_rate": 2.5052507783357215e-07, "loss": 0.1657, "step": 6639 }, { "epoch": 4.520081688223281, "grad_norm": 1.3350341320037842, "learning_rate": 2.498218434335542e-07, "loss": 0.4808, "step": 6640 }, { "epoch": 4.520762423417291, "grad_norm": 1.4314687252044678, "learning_rate": 2.4911957213532736e-07, "loss": 0.3123, "step": 6641 }, { "epoch": 4.5214431586113, "grad_norm": 1.3795719146728516, "learning_rate": 2.484182640812771e-07, "loss": 0.2767, "step": 6642 }, { "epoch": 4.522123893805309, "grad_norm": 1.3532519340515137, "learning_rate": 2.4771791941359313e-07, "loss": 0.2314, "step": 6643 }, { "epoch": 4.522804628999319, "grad_norm": 1.4914889335632324, "learning_rate": 2.470185382742724e-07, "loss": 0.4025, "step": 6644 }, { "epoch": 4.523485364193329, "grad_norm": 1.289562463760376, "learning_rate": 2.4632012080511526e-07, "loss": 0.3395, "step": 6645 }, { "epoch": 4.5241660993873385, "grad_norm": 1.3195997476577759, "learning_rate": 2.4562266714772465e-07, "loss": 0.3557, "step": 6646 }, { "epoch": 4.524846834581348, "grad_norm": 1.3652799129486084, "learning_rate": 2.449261774435102e-07, "loss": 0.3081, "step": 6647 }, { "epoch": 4.525527569775358, "grad_norm": 1.4607237577438354, "learning_rate": 2.4423065183368677e-07, "loss": 0.4387, "step": 6648 }, { "epoch": 4.526208304969367, "grad_norm": 1.5264078378677368, "learning_rate": 2.435360904592721e-07, "loss": 0.4579, "step": 6649 }, { "epoch": 4.526889040163376, "grad_norm": 1.476772427558899, "learning_rate": 2.428424934610873e-07, "loss": 0.2686, "step": 6650 }, { "epoch": 4.527569775357386, "grad_norm": 1.3873460292816162, "learning_rate": 2.421498609797618e-07, "loss": 0.3452, "step": 6651 }, { "epoch": 4.528250510551396, "grad_norm": 1.3921321630477905, "learning_rate": 2.4145819315572696e-07, "loss": 0.3947, "step": 6652 }, { "epoch": 4.528931245745405, "grad_norm": 1.4122904539108276, "learning_rate": 2.4076749012921843e-07, "loss": 0.3734, "step": 6653 }, { "epoch": 4.529611980939414, "grad_norm": 1.5389187335968018, "learning_rate": 2.4007775204027585e-07, "loss": 0.2965, "step": 6654 }, { "epoch": 4.530292716133424, "grad_norm": 1.3967159986495972, "learning_rate": 2.393889790287457e-07, "loss": 0.3043, "step": 6655 }, { "epoch": 4.530973451327434, "grad_norm": 1.4097449779510498, "learning_rate": 2.387011712342768e-07, "loss": 0.3978, "step": 6656 }, { "epoch": 4.531654186521443, "grad_norm": 1.4055670499801636, "learning_rate": 2.380143287963216e-07, "loss": 0.3367, "step": 6657 }, { "epoch": 4.532334921715453, "grad_norm": 1.2921844720840454, "learning_rate": 2.3732845185413912e-07, "loss": 0.2841, "step": 6658 }, { "epoch": 4.533015656909463, "grad_norm": 1.4332022666931152, "learning_rate": 2.366435405467904e-07, "loss": 0.1638, "step": 6659 }, { "epoch": 4.533696392103471, "grad_norm": 1.4310036897659302, "learning_rate": 2.3595959501314213e-07, "loss": 0.299, "step": 6660 }, { "epoch": 4.534377127297481, "grad_norm": 1.4059827327728271, "learning_rate": 2.3527661539186397e-07, "loss": 0.3179, "step": 6661 }, { "epoch": 4.535057862491491, "grad_norm": 1.3452789783477783, "learning_rate": 2.3459460182143235e-07, "loss": 0.3486, "step": 6662 }, { "epoch": 4.5357385976855005, "grad_norm": 1.4173624515533447, "learning_rate": 2.3391355444012442e-07, "loss": 0.4129, "step": 6663 }, { "epoch": 4.53641933287951, "grad_norm": 1.4511970281600952, "learning_rate": 2.332334733860231e-07, "loss": 0.313, "step": 6664 }, { "epoch": 4.537100068073519, "grad_norm": 1.3402701616287231, "learning_rate": 2.3255435879701637e-07, "loss": 0.2644, "step": 6665 }, { "epoch": 4.537780803267529, "grad_norm": 1.3427766561508179, "learning_rate": 2.3187621081079415e-07, "loss": 0.3115, "step": 6666 }, { "epoch": 4.538461538461538, "grad_norm": 1.5858246088027954, "learning_rate": 2.3119902956485142e-07, "loss": 0.3317, "step": 6667 }, { "epoch": 4.539142273655548, "grad_norm": 1.3369531631469727, "learning_rate": 2.3052281519648612e-07, "loss": 0.4154, "step": 6668 }, { "epoch": 4.539823008849558, "grad_norm": 1.2641944885253906, "learning_rate": 2.298475678428036e-07, "loss": 0.2601, "step": 6669 }, { "epoch": 4.540503744043567, "grad_norm": 1.407787561416626, "learning_rate": 2.2917328764070935e-07, "loss": 0.5418, "step": 6670 }, { "epoch": 4.541184479237577, "grad_norm": 1.431257963180542, "learning_rate": 2.28499974726914e-07, "loss": 0.352, "step": 6671 }, { "epoch": 4.541865214431586, "grad_norm": 1.4665731191635132, "learning_rate": 2.2782762923793277e-07, "loss": 0.4136, "step": 6672 }, { "epoch": 4.5425459496255955, "grad_norm": 1.461045265197754, "learning_rate": 2.271562513100839e-07, "loss": 0.2628, "step": 6673 }, { "epoch": 4.543226684819605, "grad_norm": 1.3035178184509277, "learning_rate": 2.2648584107948956e-07, "loss": 0.392, "step": 6674 }, { "epoch": 4.543907420013615, "grad_norm": 1.7983649969100952, "learning_rate": 2.258163986820755e-07, "loss": 0.3597, "step": 6675 }, { "epoch": 4.544588155207625, "grad_norm": 1.3000272512435913, "learning_rate": 2.2514792425357256e-07, "loss": 0.164, "step": 6676 }, { "epoch": 4.545268890401633, "grad_norm": 1.3196606636047363, "learning_rate": 2.2448041792951348e-07, "loss": 0.3948, "step": 6677 }, { "epoch": 4.545949625595643, "grad_norm": 1.3426570892333984, "learning_rate": 2.238138798452355e-07, "loss": 0.3701, "step": 6678 }, { "epoch": 4.546630360789653, "grad_norm": 1.345543622970581, "learning_rate": 2.2314831013588112e-07, "loss": 0.4697, "step": 6679 }, { "epoch": 4.5473110959836625, "grad_norm": 1.28163480758667, "learning_rate": 2.224837089363946e-07, "loss": 0.3995, "step": 6680 }, { "epoch": 4.547991831177672, "grad_norm": 1.3687189817428589, "learning_rate": 2.2182007638152314e-07, "loss": 0.3904, "step": 6681 }, { "epoch": 4.548672566371682, "grad_norm": 1.3692559003829956, "learning_rate": 2.2115741260581857e-07, "loss": 0.3148, "step": 6682 }, { "epoch": 4.549353301565691, "grad_norm": 1.2997568845748901, "learning_rate": 2.2049571774363844e-07, "loss": 0.5867, "step": 6683 }, { "epoch": 4.5500340367597, "grad_norm": 1.524479866027832, "learning_rate": 2.1983499192913982e-07, "loss": 0.4105, "step": 6684 }, { "epoch": 4.55071477195371, "grad_norm": 1.225845456123352, "learning_rate": 2.1917523529628615e-07, "loss": 0.2963, "step": 6685 }, { "epoch": 4.55139550714772, "grad_norm": 1.5343525409698486, "learning_rate": 2.1851644797884263e-07, "loss": 0.3362, "step": 6686 }, { "epoch": 4.552076242341729, "grad_norm": 1.305172085762024, "learning_rate": 2.1785863011038078e-07, "loss": 0.2395, "step": 6687 }, { "epoch": 4.552756977535738, "grad_norm": 1.373897910118103, "learning_rate": 2.1720178182427276e-07, "loss": 0.389, "step": 6688 }, { "epoch": 4.553437712729748, "grad_norm": 1.4080291986465454, "learning_rate": 2.1654590325369317e-07, "loss": 0.3588, "step": 6689 }, { "epoch": 4.5541184479237575, "grad_norm": 1.387131690979004, "learning_rate": 2.1589099453162455e-07, "loss": 0.3978, "step": 6690 }, { "epoch": 4.554799183117767, "grad_norm": 1.400722622871399, "learning_rate": 2.1523705579084896e-07, "loss": 0.3582, "step": 6691 }, { "epoch": 4.555479918311777, "grad_norm": 1.349153995513916, "learning_rate": 2.1458408716395152e-07, "loss": 0.3826, "step": 6692 }, { "epoch": 4.556160653505787, "grad_norm": 1.4856494665145874, "learning_rate": 2.1393208878332462e-07, "loss": 0.4699, "step": 6693 }, { "epoch": 4.556841388699796, "grad_norm": 1.3965084552764893, "learning_rate": 2.132810607811592e-07, "loss": 0.4008, "step": 6694 }, { "epoch": 4.557522123893805, "grad_norm": 1.4609249830245972, "learning_rate": 2.1263100328945353e-07, "loss": 0.33, "step": 6695 }, { "epoch": 4.558202859087815, "grad_norm": 1.535517930984497, "learning_rate": 2.11981916440005e-07, "loss": 0.3837, "step": 6696 }, { "epoch": 4.558883594281824, "grad_norm": 1.5107700824737549, "learning_rate": 2.1133380036441887e-07, "loss": 0.4893, "step": 6697 }, { "epoch": 4.559564329475834, "grad_norm": 1.3754926919937134, "learning_rate": 2.1068665519409948e-07, "loss": 0.3162, "step": 6698 }, { "epoch": 4.560245064669844, "grad_norm": 1.4707281589508057, "learning_rate": 2.1004048106025577e-07, "loss": 0.4034, "step": 6699 }, { "epoch": 4.560925799863853, "grad_norm": 1.4693379402160645, "learning_rate": 2.093952780939007e-07, "loss": 0.5916, "step": 6700 }, { "epoch": 4.561606535057862, "grad_norm": 1.3609708547592163, "learning_rate": 2.087510464258502e-07, "loss": 0.3767, "step": 6701 }, { "epoch": 4.562287270251872, "grad_norm": 1.3503382205963135, "learning_rate": 2.081077861867209e-07, "loss": 0.3088, "step": 6702 }, { "epoch": 4.562968005445882, "grad_norm": 1.3949121236801147, "learning_rate": 2.0746549750693513e-07, "loss": 0.3141, "step": 6703 }, { "epoch": 4.563648740639891, "grad_norm": 1.3258697986602783, "learning_rate": 2.0682418051671815e-07, "loss": 0.2468, "step": 6704 }, { "epoch": 4.564329475833901, "grad_norm": 1.5439839363098145, "learning_rate": 2.0618383534609655e-07, "loss": 0.2771, "step": 6705 }, { "epoch": 4.56501021102791, "grad_norm": 1.4818806648254395, "learning_rate": 2.0554446212490031e-07, "loss": 0.3316, "step": 6706 }, { "epoch": 4.5656909462219195, "grad_norm": 1.3449923992156982, "learning_rate": 2.049060609827641e-07, "loss": 0.5087, "step": 6707 }, { "epoch": 4.566371681415929, "grad_norm": 1.3523262739181519, "learning_rate": 2.0426863204912328e-07, "loss": 0.4309, "step": 6708 }, { "epoch": 4.567052416609939, "grad_norm": 1.363450050354004, "learning_rate": 2.0363217545321668e-07, "loss": 0.2662, "step": 6709 }, { "epoch": 4.567733151803949, "grad_norm": 1.5252492427825928, "learning_rate": 2.0299669132408716e-07, "loss": 0.2997, "step": 6710 }, { "epoch": 4.568413886997957, "grad_norm": 1.4409583806991577, "learning_rate": 2.0236217979057948e-07, "loss": 0.3589, "step": 6711 }, { "epoch": 4.569094622191967, "grad_norm": 1.3599485158920288, "learning_rate": 2.0172864098134014e-07, "loss": 0.5047, "step": 6712 }, { "epoch": 4.569775357385977, "grad_norm": 1.3140610456466675, "learning_rate": 2.010960750248203e-07, "loss": 0.3409, "step": 6713 }, { "epoch": 4.570456092579986, "grad_norm": 1.3725759983062744, "learning_rate": 2.0046448204927404e-07, "loss": 0.3047, "step": 6714 }, { "epoch": 4.571136827773996, "grad_norm": 1.409206748008728, "learning_rate": 1.9983386218275614e-07, "loss": 0.3091, "step": 6715 }, { "epoch": 4.571817562968006, "grad_norm": 1.4809879064559937, "learning_rate": 1.992042155531254e-07, "loss": 0.3134, "step": 6716 }, { "epoch": 4.5724982981620155, "grad_norm": 1.4042046070098877, "learning_rate": 1.985755422880431e-07, "loss": 0.3772, "step": 6717 }, { "epoch": 4.573179033356024, "grad_norm": 1.5006027221679688, "learning_rate": 1.979478425149739e-07, "loss": 0.3098, "step": 6718 }, { "epoch": 4.573859768550034, "grad_norm": 1.3167957067489624, "learning_rate": 1.9732111636118268e-07, "loss": 0.2195, "step": 6719 }, { "epoch": 4.574540503744044, "grad_norm": 1.5392229557037354, "learning_rate": 1.9669536395373945e-07, "loss": 0.2812, "step": 6720 }, { "epoch": 4.575221238938053, "grad_norm": 1.4512280225753784, "learning_rate": 1.9607058541951717e-07, "loss": 0.4468, "step": 6721 }, { "epoch": 4.575901974132063, "grad_norm": 1.281857967376709, "learning_rate": 1.9544678088518843e-07, "loss": 0.4836, "step": 6722 }, { "epoch": 4.576582709326072, "grad_norm": 1.4266000986099243, "learning_rate": 1.9482395047723036e-07, "loss": 0.3248, "step": 6723 }, { "epoch": 4.5772634445200815, "grad_norm": 1.4032485485076904, "learning_rate": 1.9420209432192306e-07, "loss": 0.2163, "step": 6724 }, { "epoch": 4.577944179714091, "grad_norm": 1.3606055974960327, "learning_rate": 1.935812125453479e-07, "loss": 0.2806, "step": 6725 }, { "epoch": 4.578624914908101, "grad_norm": 1.333430528640747, "learning_rate": 1.9296130527338753e-07, "loss": 0.4351, "step": 6726 }, { "epoch": 4.5793056501021105, "grad_norm": 1.6888277530670166, "learning_rate": 1.923423726317314e-07, "loss": 0.178, "step": 6727 }, { "epoch": 4.57998638529612, "grad_norm": 1.5064641237258911, "learning_rate": 1.9172441474586635e-07, "loss": 0.2651, "step": 6728 }, { "epoch": 4.580667120490129, "grad_norm": 1.4906924962997437, "learning_rate": 1.911074317410838e-07, "loss": 0.3605, "step": 6729 }, { "epoch": 4.581347855684139, "grad_norm": 1.4688239097595215, "learning_rate": 1.9049142374247875e-07, "loss": 0.303, "step": 6730 }, { "epoch": 4.582028590878148, "grad_norm": 1.409915804862976, "learning_rate": 1.898763908749468e-07, "loss": 0.3617, "step": 6731 }, { "epoch": 4.582709326072158, "grad_norm": 1.2548470497131348, "learning_rate": 1.8926233326318598e-07, "loss": 0.2552, "step": 6732 }, { "epoch": 4.583390061266168, "grad_norm": 1.420409917831421, "learning_rate": 1.8864925103169618e-07, "loss": 0.3083, "step": 6733 }, { "epoch": 4.584070796460177, "grad_norm": 1.465388536453247, "learning_rate": 1.8803714430478183e-07, "loss": 0.2589, "step": 6734 }, { "epoch": 4.584751531654186, "grad_norm": 1.5044078826904297, "learning_rate": 1.8742601320654697e-07, "loss": 0.2572, "step": 6735 }, { "epoch": 4.585432266848196, "grad_norm": 1.47450852394104, "learning_rate": 1.8681585786089862e-07, "loss": 0.3755, "step": 6736 }, { "epoch": 4.586113002042206, "grad_norm": 1.542891502380371, "learning_rate": 1.8620667839154616e-07, "loss": 0.3367, "step": 6737 }, { "epoch": 4.586793737236215, "grad_norm": 1.4472516775131226, "learning_rate": 1.8559847492200245e-07, "loss": 0.3017, "step": 6738 }, { "epoch": 4.587474472430225, "grad_norm": 1.3649840354919434, "learning_rate": 1.8499124757557995e-07, "loss": 0.2778, "step": 6739 }, { "epoch": 4.588155207624234, "grad_norm": 1.1977516412734985, "learning_rate": 1.8438499647539465e-07, "loss": 0.3074, "step": 6740 }, { "epoch": 4.5888359428182435, "grad_norm": 1.4668000936508179, "learning_rate": 1.8377972174436487e-07, "loss": 0.4296, "step": 6741 }, { "epoch": 4.589516678012253, "grad_norm": 1.3835293054580688, "learning_rate": 1.8317542350520967e-07, "loss": 0.3246, "step": 6742 }, { "epoch": 4.590197413206263, "grad_norm": 1.474536657333374, "learning_rate": 1.825721018804516e-07, "loss": 0.4834, "step": 6743 }, { "epoch": 4.5908781484002725, "grad_norm": 1.5656821727752686, "learning_rate": 1.8196975699241336e-07, "loss": 0.2898, "step": 6744 }, { "epoch": 4.591558883594281, "grad_norm": 1.630494236946106, "learning_rate": 1.8136838896322227e-07, "loss": 0.2558, "step": 6745 }, { "epoch": 4.592239618788291, "grad_norm": 1.4942326545715332, "learning_rate": 1.807679979148047e-07, "loss": 0.2563, "step": 6746 }, { "epoch": 4.592920353982301, "grad_norm": 1.620913028717041, "learning_rate": 1.8016858396889158e-07, "loss": 0.2885, "step": 6747 }, { "epoch": 4.59360108917631, "grad_norm": 1.5674116611480713, "learning_rate": 1.7957014724701404e-07, "loss": 0.3579, "step": 6748 }, { "epoch": 4.59428182437032, "grad_norm": 1.3838303089141846, "learning_rate": 1.7897268787050558e-07, "loss": 0.3457, "step": 6749 }, { "epoch": 4.59496255956433, "grad_norm": 1.4741004705429077, "learning_rate": 1.7837620596050154e-07, "loss": 0.3935, "step": 6750 }, { "epoch": 4.595643294758339, "grad_norm": 1.570205807685852, "learning_rate": 1.777807016379385e-07, "loss": 0.3562, "step": 6751 }, { "epoch": 4.596324029952348, "grad_norm": 1.3094570636749268, "learning_rate": 1.7718617502355594e-07, "loss": 0.3576, "step": 6752 }, { "epoch": 4.597004765146358, "grad_norm": 1.4152246713638306, "learning_rate": 1.765926262378942e-07, "loss": 0.2146, "step": 6753 }, { "epoch": 4.597685500340368, "grad_norm": 1.5331687927246094, "learning_rate": 1.7600005540129528e-07, "loss": 0.2908, "step": 6754 }, { "epoch": 4.598366235534377, "grad_norm": 1.2983876466751099, "learning_rate": 1.754084626339042e-07, "loss": 0.3262, "step": 6755 }, { "epoch": 4.599046970728387, "grad_norm": 1.5233757495880127, "learning_rate": 1.7481784805566672e-07, "loss": 0.3581, "step": 6756 }, { "epoch": 4.599727705922396, "grad_norm": 1.5941402912139893, "learning_rate": 1.7422821178632976e-07, "loss": 0.3836, "step": 6757 }, { "epoch": 4.6004084411164055, "grad_norm": 1.4382238388061523, "learning_rate": 1.736395539454422e-07, "loss": 0.3483, "step": 6758 }, { "epoch": 4.601089176310415, "grad_norm": 1.498661756515503, "learning_rate": 1.730518746523563e-07, "loss": 0.3937, "step": 6759 }, { "epoch": 4.601769911504425, "grad_norm": 1.3340195417404175, "learning_rate": 1.7246517402622342e-07, "loss": 0.4134, "step": 6760 }, { "epoch": 4.6024506466984345, "grad_norm": 1.3479267358779907, "learning_rate": 1.718794521859968e-07, "loss": 0.3496, "step": 6761 }, { "epoch": 4.603131381892444, "grad_norm": 1.4438246488571167, "learning_rate": 1.7129470925043358e-07, "loss": 0.5204, "step": 6762 }, { "epoch": 4.603812117086453, "grad_norm": 1.3717820644378662, "learning_rate": 1.7071094533808895e-07, "loss": 0.2911, "step": 6763 }, { "epoch": 4.604492852280463, "grad_norm": 1.6572171449661255, "learning_rate": 1.7012816056732273e-07, "loss": 0.3561, "step": 6764 }, { "epoch": 4.605173587474472, "grad_norm": 1.4959278106689453, "learning_rate": 1.695463550562948e-07, "loss": 0.3426, "step": 6765 }, { "epoch": 4.605854322668482, "grad_norm": 1.4045183658599854, "learning_rate": 1.6896552892296637e-07, "loss": 0.3822, "step": 6766 }, { "epoch": 4.606535057862492, "grad_norm": 1.5500839948654175, "learning_rate": 1.6838568228510043e-07, "loss": 0.3896, "step": 6767 }, { "epoch": 4.6072157930565005, "grad_norm": 1.3112417459487915, "learning_rate": 1.6780681526026078e-07, "loss": 0.3093, "step": 6768 }, { "epoch": 4.60789652825051, "grad_norm": 1.446159839630127, "learning_rate": 1.6722892796581401e-07, "loss": 0.2753, "step": 6769 }, { "epoch": 4.60857726344452, "grad_norm": 1.3924973011016846, "learning_rate": 1.6665202051892647e-07, "loss": 0.2709, "step": 6770 }, { "epoch": 4.60925799863853, "grad_norm": 1.4713705778121948, "learning_rate": 1.6607609303656623e-07, "loss": 0.2893, "step": 6771 }, { "epoch": 4.609938733832539, "grad_norm": 1.396796464920044, "learning_rate": 1.655011456355038e-07, "loss": 0.1915, "step": 6772 }, { "epoch": 4.610619469026549, "grad_norm": 1.4888967275619507, "learning_rate": 1.6492717843230978e-07, "loss": 0.361, "step": 6773 }, { "epoch": 4.611300204220559, "grad_norm": 1.3218308687210083, "learning_rate": 1.6435419154335662e-07, "loss": 0.3108, "step": 6774 }, { "epoch": 4.611980939414567, "grad_norm": 1.3691922426223755, "learning_rate": 1.6378218508481702e-07, "loss": 0.372, "step": 6775 }, { "epoch": 4.612661674608577, "grad_norm": 1.4461928606033325, "learning_rate": 1.6321115917266704e-07, "loss": 0.2542, "step": 6776 }, { "epoch": 4.613342409802587, "grad_norm": 1.316003680229187, "learning_rate": 1.626411139226819e-07, "loss": 0.5403, "step": 6777 }, { "epoch": 4.6140231449965965, "grad_norm": 1.5743622779846191, "learning_rate": 1.6207204945043742e-07, "loss": 0.4022, "step": 6778 }, { "epoch": 4.614703880190606, "grad_norm": 1.5250139236450195, "learning_rate": 1.6150396587131412e-07, "loss": 0.3266, "step": 6779 }, { "epoch": 4.615384615384615, "grad_norm": 1.3331842422485352, "learning_rate": 1.609368633004893e-07, "loss": 0.4078, "step": 6780 }, { "epoch": 4.616065350578625, "grad_norm": 1.474276065826416, "learning_rate": 1.6037074185294432e-07, "loss": 0.2445, "step": 6781 }, { "epoch": 4.616746085772634, "grad_norm": 1.4470880031585693, "learning_rate": 1.5980560164346015e-07, "loss": 0.2815, "step": 6782 }, { "epoch": 4.617426820966644, "grad_norm": 1.4268548488616943, "learning_rate": 1.5924144278662068e-07, "loss": 0.3556, "step": 6783 }, { "epoch": 4.618107556160654, "grad_norm": 1.4208053350448608, "learning_rate": 1.586782653968083e-07, "loss": 0.2861, "step": 6784 }, { "epoch": 4.618788291354663, "grad_norm": 1.4589688777923584, "learning_rate": 1.5811606958820726e-07, "loss": 0.3791, "step": 6785 }, { "epoch": 4.619469026548672, "grad_norm": 1.5366785526275635, "learning_rate": 1.575548554748041e-07, "loss": 0.1797, "step": 6786 }, { "epoch": 4.620149761742682, "grad_norm": 1.5124571323394775, "learning_rate": 1.5699462317038562e-07, "loss": 0.4478, "step": 6787 }, { "epoch": 4.620830496936692, "grad_norm": 1.4200196266174316, "learning_rate": 1.564353727885376e-07, "loss": 0.3245, "step": 6788 }, { "epoch": 4.621511232130701, "grad_norm": 1.4040868282318115, "learning_rate": 1.558771044426499e-07, "loss": 0.3275, "step": 6789 }, { "epoch": 4.622191967324711, "grad_norm": 1.4245805740356445, "learning_rate": 1.5531981824591147e-07, "loss": 0.2746, "step": 6790 }, { "epoch": 4.62287270251872, "grad_norm": 1.3233281373977661, "learning_rate": 1.54763514311313e-07, "loss": 0.3034, "step": 6791 }, { "epoch": 4.623553437712729, "grad_norm": 1.4264230728149414, "learning_rate": 1.5420819275164368e-07, "loss": 0.4366, "step": 6792 }, { "epoch": 4.624234172906739, "grad_norm": 1.280102252960205, "learning_rate": 1.5365385367949738e-07, "loss": 0.3685, "step": 6793 }, { "epoch": 4.624914908100749, "grad_norm": 1.4739892482757568, "learning_rate": 1.5310049720726583e-07, "loss": 0.3311, "step": 6794 }, { "epoch": 4.6255956432947585, "grad_norm": 1.284006953239441, "learning_rate": 1.525481234471421e-07, "loss": 0.4605, "step": 6795 }, { "epoch": 4.626276378488768, "grad_norm": 1.2761470079421997, "learning_rate": 1.51996732511121e-07, "loss": 0.2709, "step": 6796 }, { "epoch": 4.626957113682778, "grad_norm": 1.3411771059036255, "learning_rate": 1.5144632451099706e-07, "loss": 0.3316, "step": 6797 }, { "epoch": 4.627637848876787, "grad_norm": 1.306026816368103, "learning_rate": 1.508968995583654e-07, "loss": 0.3512, "step": 6798 }, { "epoch": 4.628318584070796, "grad_norm": 1.3247047662734985, "learning_rate": 1.5034845776462303e-07, "loss": 0.2978, "step": 6799 }, { "epoch": 4.628999319264806, "grad_norm": 1.4996858835220337, "learning_rate": 1.4980099924096714e-07, "loss": 0.3258, "step": 6800 }, { "epoch": 4.629680054458816, "grad_norm": 1.1869614124298096, "learning_rate": 1.4925452409839446e-07, "loss": 0.2091, "step": 6801 }, { "epoch": 4.630360789652825, "grad_norm": 1.3682141304016113, "learning_rate": 1.4870903244770307e-07, "loss": 0.4246, "step": 6802 }, { "epoch": 4.631041524846834, "grad_norm": 1.6510226726531982, "learning_rate": 1.4816452439949335e-07, "loss": 0.3893, "step": 6803 }, { "epoch": 4.631722260040844, "grad_norm": 1.4665735960006714, "learning_rate": 1.4762100006416314e-07, "loss": 0.3217, "step": 6804 }, { "epoch": 4.6324029952348535, "grad_norm": 1.3338710069656372, "learning_rate": 1.4707845955191202e-07, "loss": 0.3116, "step": 6805 }, { "epoch": 4.633083730428863, "grad_norm": 1.4425556659698486, "learning_rate": 1.4653690297274203e-07, "loss": 0.5152, "step": 6806 }, { "epoch": 4.633764465622873, "grad_norm": 1.4312095642089844, "learning_rate": 1.4599633043645311e-07, "loss": 0.4775, "step": 6807 }, { "epoch": 4.634445200816883, "grad_norm": 1.5603617429733276, "learning_rate": 1.4545674205264704e-07, "loss": 0.3711, "step": 6808 }, { "epoch": 4.635125936010891, "grad_norm": 1.3340877294540405, "learning_rate": 1.4491813793072517e-07, "loss": 0.2897, "step": 6809 }, { "epoch": 4.635806671204901, "grad_norm": 1.5569971799850464, "learning_rate": 1.443805181798913e-07, "loss": 0.2274, "step": 6810 }, { "epoch": 4.636487406398911, "grad_norm": 1.4512063264846802, "learning_rate": 1.4384388290914653e-07, "loss": 0.1678, "step": 6811 }, { "epoch": 4.6371681415929205, "grad_norm": 1.394593358039856, "learning_rate": 1.4330823222729496e-07, "loss": 0.3473, "step": 6812 }, { "epoch": 4.63784887678693, "grad_norm": 1.582526445388794, "learning_rate": 1.4277356624293915e-07, "loss": 0.4384, "step": 6813 }, { "epoch": 4.638529611980939, "grad_norm": 1.4441603422164917, "learning_rate": 1.4223988506448405e-07, "loss": 0.3207, "step": 6814 }, { "epoch": 4.639210347174949, "grad_norm": 1.4320623874664307, "learning_rate": 1.417071888001331e-07, "loss": 0.2962, "step": 6815 }, { "epoch": 4.639891082368958, "grad_norm": 1.4235122203826904, "learning_rate": 1.4117547755789108e-07, "loss": 0.3776, "step": 6816 }, { "epoch": 4.640571817562968, "grad_norm": 1.349183201789856, "learning_rate": 1.406447514455639e-07, "loss": 0.3288, "step": 6817 }, { "epoch": 4.641252552756978, "grad_norm": 1.4445422887802124, "learning_rate": 1.4011501057075493e-07, "loss": 0.4082, "step": 6818 }, { "epoch": 4.641933287950987, "grad_norm": 1.5067843198776245, "learning_rate": 1.3958625504087055e-07, "loss": 0.5481, "step": 6819 }, { "epoch": 4.642614023144997, "grad_norm": 1.4271434545516968, "learning_rate": 1.3905848496311546e-07, "loss": 0.2703, "step": 6820 }, { "epoch": 4.643294758339006, "grad_norm": 1.4546890258789062, "learning_rate": 1.3853170044449638e-07, "loss": 0.2824, "step": 6821 }, { "epoch": 4.6439754935330155, "grad_norm": 1.4108293056488037, "learning_rate": 1.380059015918189e-07, "loss": 0.4517, "step": 6822 }, { "epoch": 4.644656228727025, "grad_norm": 1.3761087656021118, "learning_rate": 1.374810885116884e-07, "loss": 0.4009, "step": 6823 }, { "epoch": 4.645336963921035, "grad_norm": 1.5393059253692627, "learning_rate": 1.3695726131051134e-07, "loss": 0.2789, "step": 6824 }, { "epoch": 4.646017699115045, "grad_norm": 1.4611862897872925, "learning_rate": 1.3643442009449503e-07, "loss": 0.3616, "step": 6825 }, { "epoch": 4.646698434309053, "grad_norm": 1.2917977571487427, "learning_rate": 1.3591256496964522e-07, "loss": 0.2638, "step": 6826 }, { "epoch": 4.647379169503063, "grad_norm": 1.4774707555770874, "learning_rate": 1.3539169604176728e-07, "loss": 0.2164, "step": 6827 }, { "epoch": 4.648059904697073, "grad_norm": 1.451346755027771, "learning_rate": 1.3487181341647003e-07, "loss": 0.2267, "step": 6828 }, { "epoch": 4.648740639891082, "grad_norm": 1.4681237936019897, "learning_rate": 1.343529171991581e-07, "loss": 0.4217, "step": 6829 }, { "epoch": 4.649421375085092, "grad_norm": 1.5723241567611694, "learning_rate": 1.338350074950384e-07, "loss": 0.2152, "step": 6830 }, { "epoch": 4.650102110279102, "grad_norm": 1.4676990509033203, "learning_rate": 1.3331808440911865e-07, "loss": 0.3961, "step": 6831 }, { "epoch": 4.650782845473111, "grad_norm": 1.297115683555603, "learning_rate": 1.3280214804620327e-07, "loss": 0.4593, "step": 6832 }, { "epoch": 4.65146358066712, "grad_norm": 1.5316169261932373, "learning_rate": 1.322871985109009e-07, "loss": 0.3025, "step": 6833 }, { "epoch": 4.65214431586113, "grad_norm": 1.4173469543457031, "learning_rate": 1.3177323590761627e-07, "loss": 0.3068, "step": 6834 }, { "epoch": 4.65282505105514, "grad_norm": 1.3563010692596436, "learning_rate": 1.312602603405566e-07, "loss": 0.2301, "step": 6835 }, { "epoch": 4.653505786249149, "grad_norm": 1.1932569742202759, "learning_rate": 1.307482719137282e-07, "loss": 0.2991, "step": 6836 }, { "epoch": 4.654186521443158, "grad_norm": 1.3596282005310059, "learning_rate": 1.3023727073093573e-07, "loss": 0.2864, "step": 6837 }, { "epoch": 4.654867256637168, "grad_norm": 1.4907382726669312, "learning_rate": 1.297272568957858e-07, "loss": 0.3159, "step": 6838 }, { "epoch": 4.6555479918311775, "grad_norm": 1.362707257270813, "learning_rate": 1.2921823051168514e-07, "loss": 0.4443, "step": 6839 }, { "epoch": 4.656228727025187, "grad_norm": 1.5249358415603638, "learning_rate": 1.2871019168183674e-07, "loss": 0.347, "step": 6840 }, { "epoch": 4.656909462219197, "grad_norm": 1.4653434753417969, "learning_rate": 1.2820314050924765e-07, "loss": 0.3768, "step": 6841 }, { "epoch": 4.657590197413207, "grad_norm": 1.4012839794158936, "learning_rate": 1.2769707709672285e-07, "loss": 0.2807, "step": 6842 }, { "epoch": 4.658270932607216, "grad_norm": 1.2620670795440674, "learning_rate": 1.2719200154686694e-07, "loss": 0.3247, "step": 6843 }, { "epoch": 4.658951667801225, "grad_norm": 1.4326939582824707, "learning_rate": 1.2668791396208303e-07, "loss": 0.3274, "step": 6844 }, { "epoch": 4.659632402995235, "grad_norm": 1.4215023517608643, "learning_rate": 1.2618481444457708e-07, "loss": 0.3916, "step": 6845 }, { "epoch": 4.660313138189244, "grad_norm": 1.399032711982727, "learning_rate": 1.2568270309635144e-07, "loss": 0.148, "step": 6846 }, { "epoch": 4.660993873383254, "grad_norm": 1.5086015462875366, "learning_rate": 1.2518158001920965e-07, "loss": 0.2348, "step": 6847 }, { "epoch": 4.661674608577264, "grad_norm": 1.5292056798934937, "learning_rate": 1.2468144531475602e-07, "loss": 0.2876, "step": 6848 }, { "epoch": 4.662355343771273, "grad_norm": 1.3623095750808716, "learning_rate": 1.241822990843916e-07, "loss": 0.4483, "step": 6849 }, { "epoch": 4.663036078965282, "grad_norm": 1.4190473556518555, "learning_rate": 1.2368414142931938e-07, "loss": 0.348, "step": 6850 }, { "epoch": 4.663716814159292, "grad_norm": 1.367927074432373, "learning_rate": 1.2318697245054078e-07, "loss": 0.3923, "step": 6851 }, { "epoch": 4.664397549353302, "grad_norm": 1.3736793994903564, "learning_rate": 1.226907922488585e-07, "loss": 0.5125, "step": 6852 }, { "epoch": 4.665078284547311, "grad_norm": 1.494445562362671, "learning_rate": 1.2219560092487149e-07, "loss": 0.3701, "step": 6853 }, { "epoch": 4.665759019741321, "grad_norm": 1.5948591232299805, "learning_rate": 1.217013985789811e-07, "loss": 0.3936, "step": 6854 }, { "epoch": 4.66643975493533, "grad_norm": 1.4139591455459595, "learning_rate": 1.2120818531138722e-07, "loss": 0.4052, "step": 6855 }, { "epoch": 4.6671204901293395, "grad_norm": 1.3310126066207886, "learning_rate": 1.2071596122208928e-07, "loss": 0.3332, "step": 6856 }, { "epoch": 4.667801225323349, "grad_norm": 1.532507061958313, "learning_rate": 1.2022472641088522e-07, "loss": 0.3479, "step": 6857 }, { "epoch": 4.668481960517359, "grad_norm": 1.3391801118850708, "learning_rate": 1.1973448097737427e-07, "loss": 0.3492, "step": 6858 }, { "epoch": 4.6691626957113685, "grad_norm": 1.3719911575317383, "learning_rate": 1.1924522502095359e-07, "loss": 0.3278, "step": 6859 }, { "epoch": 4.669843430905377, "grad_norm": 1.4147785902023315, "learning_rate": 1.1875695864081993e-07, "loss": 0.2193, "step": 6860 }, { "epoch": 4.670524166099387, "grad_norm": 1.4683122634887695, "learning_rate": 1.1826968193596911e-07, "loss": 0.2099, "step": 6861 }, { "epoch": 4.671204901293397, "grad_norm": 1.4009013175964355, "learning_rate": 1.1778339500519875e-07, "loss": 0.2272, "step": 6862 }, { "epoch": 4.671885636487406, "grad_norm": 1.449508547782898, "learning_rate": 1.1729809794710223e-07, "loss": 0.4678, "step": 6863 }, { "epoch": 4.672566371681416, "grad_norm": 1.5286812782287598, "learning_rate": 1.1681379086007361e-07, "loss": 0.3704, "step": 6864 }, { "epoch": 4.673247106875426, "grad_norm": 1.3229260444641113, "learning_rate": 1.1633047384230767e-07, "loss": 0.3332, "step": 6865 }, { "epoch": 4.6739278420694355, "grad_norm": 1.5569438934326172, "learning_rate": 1.1584814699179659e-07, "loss": 0.3585, "step": 6866 }, { "epoch": 4.674608577263444, "grad_norm": 1.3188050985336304, "learning_rate": 1.1536681040633214e-07, "loss": 0.2453, "step": 6867 }, { "epoch": 4.675289312457454, "grad_norm": 1.428128957748413, "learning_rate": 1.1488646418350568e-07, "loss": 0.4174, "step": 6868 }, { "epoch": 4.675970047651464, "grad_norm": 1.442459225654602, "learning_rate": 1.1440710842070935e-07, "loss": 0.2273, "step": 6869 }, { "epoch": 4.676650782845473, "grad_norm": 1.4441465139389038, "learning_rate": 1.1392874321513092e-07, "loss": 0.4179, "step": 6870 }, { "epoch": 4.677331518039483, "grad_norm": 1.4501935243606567, "learning_rate": 1.134513686637595e-07, "loss": 0.2369, "step": 6871 }, { "epoch": 4.678012253233492, "grad_norm": 1.397566795349121, "learning_rate": 1.1297498486338432e-07, "loss": 0.3951, "step": 6872 }, { "epoch": 4.6786929884275015, "grad_norm": 1.4161999225616455, "learning_rate": 1.1249959191059146e-07, "loss": 0.2674, "step": 6873 }, { "epoch": 4.679373723621511, "grad_norm": 1.4587000608444214, "learning_rate": 1.1202518990176714e-07, "loss": 0.406, "step": 6874 }, { "epoch": 4.680054458815521, "grad_norm": 1.3016104698181152, "learning_rate": 1.1155177893309665e-07, "loss": 0.3404, "step": 6875 }, { "epoch": 4.6807351940095305, "grad_norm": 1.4019769430160522, "learning_rate": 1.1107935910056545e-07, "loss": 0.3293, "step": 6876 }, { "epoch": 4.68141592920354, "grad_norm": 2.03558349609375, "learning_rate": 1.1060793049995578e-07, "loss": 0.3732, "step": 6877 }, { "epoch": 4.682096664397549, "grad_norm": 1.2533998489379883, "learning_rate": 1.1013749322685063e-07, "loss": 0.312, "step": 6878 }, { "epoch": 4.682777399591559, "grad_norm": 1.256449818611145, "learning_rate": 1.096680473766315e-07, "loss": 0.4294, "step": 6879 }, { "epoch": 4.683458134785568, "grad_norm": 1.3207120895385742, "learning_rate": 1.0919959304447891e-07, "loss": 0.3982, "step": 6880 }, { "epoch": 4.684138869979578, "grad_norm": 1.44569730758667, "learning_rate": 1.0873213032537133e-07, "loss": 0.3092, "step": 6881 }, { "epoch": 4.684819605173588, "grad_norm": 1.4267197847366333, "learning_rate": 1.082656593140885e-07, "loss": 0.4046, "step": 6882 }, { "epoch": 4.6855003403675966, "grad_norm": 1.529023289680481, "learning_rate": 1.0780018010520754e-07, "loss": 0.449, "step": 6883 }, { "epoch": 4.686181075561606, "grad_norm": 1.5658406019210815, "learning_rate": 1.0733569279310297e-07, "loss": 0.4006, "step": 6884 }, { "epoch": 4.686861810755616, "grad_norm": 1.2143677473068237, "learning_rate": 1.0687219747195165e-07, "loss": 0.2109, "step": 6885 }, { "epoch": 4.687542545949626, "grad_norm": 1.2735484838485718, "learning_rate": 1.0640969423572789e-07, "loss": 0.1756, "step": 6886 }, { "epoch": 4.688223281143635, "grad_norm": 1.4443827867507935, "learning_rate": 1.0594818317820332e-07, "loss": 0.4417, "step": 6887 }, { "epoch": 4.688904016337645, "grad_norm": 1.410610556602478, "learning_rate": 1.0548766439295033e-07, "loss": 0.4279, "step": 6888 }, { "epoch": 4.689584751531655, "grad_norm": 1.2657668590545654, "learning_rate": 1.0502813797333866e-07, "loss": 0.3188, "step": 6889 }, { "epoch": 4.6902654867256635, "grad_norm": 1.5079492330551147, "learning_rate": 1.0456960401253824e-07, "loss": 0.2911, "step": 6890 }, { "epoch": 4.690946221919673, "grad_norm": 1.6623083353042603, "learning_rate": 1.0411206260351747e-07, "loss": 0.2964, "step": 6891 }, { "epoch": 4.691626957113683, "grad_norm": 1.3583577871322632, "learning_rate": 1.036555138390416e-07, "loss": 0.3477, "step": 6892 }, { "epoch": 4.6923076923076925, "grad_norm": 1.6019138097763062, "learning_rate": 1.0319995781167769e-07, "loss": 0.3064, "step": 6893 }, { "epoch": 4.692988427501702, "grad_norm": 1.472921371459961, "learning_rate": 1.0274539461378962e-07, "loss": 0.3986, "step": 6894 }, { "epoch": 4.693669162695711, "grad_norm": 1.374017357826233, "learning_rate": 1.0229182433754092e-07, "loss": 0.4092, "step": 6895 }, { "epoch": 4.694349897889721, "grad_norm": 1.4423460960388184, "learning_rate": 1.018392470748919e-07, "loss": 0.215, "step": 6896 }, { "epoch": 4.69503063308373, "grad_norm": 1.325812578201294, "learning_rate": 1.0138766291760416e-07, "loss": 0.3512, "step": 6897 }, { "epoch": 4.69571136827774, "grad_norm": 1.4350507259368896, "learning_rate": 1.0093707195723668e-07, "loss": 0.5021, "step": 6898 }, { "epoch": 4.69639210347175, "grad_norm": 1.291142463684082, "learning_rate": 1.0048747428514582e-07, "loss": 0.342, "step": 6899 }, { "epoch": 4.697072838665759, "grad_norm": 1.4310314655303955, "learning_rate": 1.0003886999248924e-07, "loss": 0.4518, "step": 6900 }, { "epoch": 4.697753573859768, "grad_norm": 1.4380507469177246, "learning_rate": 9.959125917022028e-08, "loss": 0.2408, "step": 6901 }, { "epoch": 4.698434309053778, "grad_norm": 1.4245729446411133, "learning_rate": 9.914464190909412e-08, "loss": 0.3074, "step": 6902 }, { "epoch": 4.699115044247788, "grad_norm": 1.3635953664779663, "learning_rate": 9.869901829966111e-08, "loss": 0.3585, "step": 6903 }, { "epoch": 4.699795779441797, "grad_norm": 1.4384238719940186, "learning_rate": 9.825438843227287e-08, "loss": 0.4962, "step": 6904 }, { "epoch": 4.700476514635807, "grad_norm": 1.4778205156326294, "learning_rate": 9.781075239707838e-08, "loss": 0.3327, "step": 6905 }, { "epoch": 4.701157249829816, "grad_norm": 1.4326303005218506, "learning_rate": 9.736811028402349e-08, "loss": 0.4285, "step": 6906 }, { "epoch": 4.701837985023825, "grad_norm": 1.3903634548187256, "learning_rate": 9.69264621828564e-08, "loss": 0.2937, "step": 6907 }, { "epoch": 4.702518720217835, "grad_norm": 1.5835002660751343, "learning_rate": 9.648580818312048e-08, "loss": 0.4581, "step": 6908 }, { "epoch": 4.703199455411845, "grad_norm": 1.3642759323120117, "learning_rate": 9.604614837415815e-08, "loss": 0.2787, "step": 6909 }, { "epoch": 4.7038801906058545, "grad_norm": 1.4512265920639038, "learning_rate": 9.560748284511146e-08, "loss": 0.3833, "step": 6910 }, { "epoch": 4.704560925799864, "grad_norm": 1.5447213649749756, "learning_rate": 9.516981168492034e-08, "loss": 0.2656, "step": 6911 }, { "epoch": 4.705241660993874, "grad_norm": 1.470396876335144, "learning_rate": 9.473313498232218e-08, "loss": 0.4387, "step": 6912 }, { "epoch": 4.705922396187883, "grad_norm": 1.8735369443893433, "learning_rate": 9.429745282585389e-08, "loss": 0.2677, "step": 6913 }, { "epoch": 4.706603131381892, "grad_norm": 1.3520801067352295, "learning_rate": 9.386276530385042e-08, "loss": 0.3436, "step": 6914 }, { "epoch": 4.707283866575902, "grad_norm": 1.4302294254302979, "learning_rate": 9.34290725044451e-08, "loss": 0.3576, "step": 6915 }, { "epoch": 4.707964601769912, "grad_norm": 1.3373253345489502, "learning_rate": 9.29963745155682e-08, "loss": 0.4428, "step": 6916 }, { "epoch": 4.708645336963921, "grad_norm": 1.5263570547103882, "learning_rate": 9.256467142495118e-08, "loss": 0.5379, "step": 6917 }, { "epoch": 4.70932607215793, "grad_norm": 1.6167399883270264, "learning_rate": 9.213396332012126e-08, "loss": 0.4116, "step": 6918 }, { "epoch": 4.71000680735194, "grad_norm": 1.4400250911712646, "learning_rate": 9.170425028840468e-08, "loss": 0.4241, "step": 6919 }, { "epoch": 4.71068754254595, "grad_norm": 1.4832019805908203, "learning_rate": 9.127553241692621e-08, "loss": 0.3363, "step": 6920 }, { "epoch": 4.711368277739959, "grad_norm": 1.5409505367279053, "learning_rate": 9.08478097926091e-08, "loss": 0.464, "step": 6921 }, { "epoch": 4.712049012933969, "grad_norm": 1.5022995471954346, "learning_rate": 9.042108250217396e-08, "loss": 0.3664, "step": 6922 }, { "epoch": 4.712729748127979, "grad_norm": 1.3791635036468506, "learning_rate": 8.999535063213994e-08, "loss": 0.2033, "step": 6923 }, { "epoch": 4.713410483321987, "grad_norm": 1.4437663555145264, "learning_rate": 8.957061426882518e-08, "loss": 0.2349, "step": 6924 }, { "epoch": 4.714091218515997, "grad_norm": 1.45928955078125, "learning_rate": 8.914687349834473e-08, "loss": 0.4872, "step": 6925 }, { "epoch": 4.714771953710007, "grad_norm": 1.6532343626022339, "learning_rate": 8.872412840661149e-08, "loss": 0.2945, "step": 6926 }, { "epoch": 4.7154526889040165, "grad_norm": 1.2809991836547852, "learning_rate": 8.830237907933859e-08, "loss": 0.2597, "step": 6927 }, { "epoch": 4.716133424098026, "grad_norm": 1.4014168977737427, "learning_rate": 8.788162560203594e-08, "loss": 0.383, "step": 6928 }, { "epoch": 4.716814159292035, "grad_norm": 1.469293475151062, "learning_rate": 8.746186806001088e-08, "loss": 0.4479, "step": 6929 }, { "epoch": 4.717494894486045, "grad_norm": 1.3031764030456543, "learning_rate": 8.704310653836978e-08, "loss": 0.4207, "step": 6930 }, { "epoch": 4.718175629680054, "grad_norm": 1.4957696199417114, "learning_rate": 8.662534112201748e-08, "loss": 0.323, "step": 6931 }, { "epoch": 4.718856364874064, "grad_norm": 1.3088761568069458, "learning_rate": 8.620857189565512e-08, "loss": 0.3457, "step": 6932 }, { "epoch": 4.719537100068074, "grad_norm": 1.4506758451461792, "learning_rate": 8.579279894378289e-08, "loss": 0.3005, "step": 6933 }, { "epoch": 4.720217835262083, "grad_norm": 1.443945288658142, "learning_rate": 8.537802235070058e-08, "loss": 0.3501, "step": 6934 }, { "epoch": 4.720898570456093, "grad_norm": 1.4212597608566284, "learning_rate": 8.49642422005026e-08, "loss": 0.441, "step": 6935 }, { "epoch": 4.721579305650102, "grad_norm": 1.4729849100112915, "learning_rate": 8.455145857708402e-08, "loss": 0.4017, "step": 6936 }, { "epoch": 4.7222600408441116, "grad_norm": 1.4183868169784546, "learning_rate": 8.413967156413738e-08, "loss": 0.2225, "step": 6937 }, { "epoch": 4.722940776038121, "grad_norm": 1.4116764068603516, "learning_rate": 8.3728881245152e-08, "loss": 0.3165, "step": 6938 }, { "epoch": 4.723621511232131, "grad_norm": 1.361093521118164, "learning_rate": 8.331908770341624e-08, "loss": 0.2238, "step": 6939 }, { "epoch": 4.724302246426141, "grad_norm": 1.3601531982421875, "learning_rate": 8.291029102201587e-08, "loss": 0.4307, "step": 6940 }, { "epoch": 4.724982981620149, "grad_norm": 1.2995272874832153, "learning_rate": 8.250249128383514e-08, "loss": 0.4353, "step": 6941 }, { "epoch": 4.725663716814159, "grad_norm": 1.4018239974975586, "learning_rate": 8.209568857155569e-08, "loss": 0.2504, "step": 6942 }, { "epoch": 4.726344452008169, "grad_norm": 1.4362801313400269, "learning_rate": 8.1689882967656e-08, "loss": 0.4208, "step": 6943 }, { "epoch": 4.7270251872021785, "grad_norm": 1.4075968265533447, "learning_rate": 8.128507455441415e-08, "loss": 0.2371, "step": 6944 }, { "epoch": 4.727705922396188, "grad_norm": 1.4476169347763062, "learning_rate": 8.088126341390557e-08, "loss": 0.2519, "step": 6945 }, { "epoch": 4.728386657590198, "grad_norm": 1.4682939052581787, "learning_rate": 8.04784496280031e-08, "loss": 0.2661, "step": 6946 }, { "epoch": 4.729067392784207, "grad_norm": 1.4377107620239258, "learning_rate": 8.007663327837755e-08, "loss": 0.3038, "step": 6947 }, { "epoch": 4.729748127978216, "grad_norm": 1.3840638399124146, "learning_rate": 7.967581444649708e-08, "loss": 0.4033, "step": 6948 }, { "epoch": 4.730428863172226, "grad_norm": 1.3079520463943481, "learning_rate": 7.927599321362833e-08, "loss": 0.3698, "step": 6949 }, { "epoch": 4.731109598366236, "grad_norm": 1.4747529029846191, "learning_rate": 7.88771696608348e-08, "loss": 0.4261, "step": 6950 }, { "epoch": 4.731790333560245, "grad_norm": 1.3791955709457397, "learning_rate": 7.847934386897904e-08, "loss": 0.536, "step": 6951 }, { "epoch": 4.732471068754254, "grad_norm": 1.4300094842910767, "learning_rate": 7.80825159187204e-08, "loss": 0.4608, "step": 6952 }, { "epoch": 4.733151803948264, "grad_norm": 1.3622843027114868, "learning_rate": 7.768668589051453e-08, "loss": 0.3238, "step": 6953 }, { "epoch": 4.7338325391422735, "grad_norm": 1.5426126718521118, "learning_rate": 7.729185386461835e-08, "loss": 0.4349, "step": 6954 }, { "epoch": 4.734513274336283, "grad_norm": 1.6209362745285034, "learning_rate": 7.689801992108281e-08, "loss": 0.3212, "step": 6955 }, { "epoch": 4.735194009530293, "grad_norm": 1.3998138904571533, "learning_rate": 7.650518413975905e-08, "loss": 0.3438, "step": 6956 }, { "epoch": 4.735874744724303, "grad_norm": 1.404591679573059, "learning_rate": 7.61133466002939e-08, "loss": 0.2786, "step": 6957 }, { "epoch": 4.736555479918312, "grad_norm": 1.4816412925720215, "learning_rate": 7.572250738213327e-08, "loss": 0.2629, "step": 6958 }, { "epoch": 4.737236215112321, "grad_norm": 1.4235323667526245, "learning_rate": 7.533266656452043e-08, "loss": 0.5105, "step": 6959 }, { "epoch": 4.737916950306331, "grad_norm": 1.340583086013794, "learning_rate": 7.494382422649494e-08, "loss": 0.4024, "step": 6960 }, { "epoch": 4.73859768550034, "grad_norm": 1.3422834873199463, "learning_rate": 7.455598044689482e-08, "loss": 0.3773, "step": 6961 }, { "epoch": 4.73927842069435, "grad_norm": 1.3908766508102417, "learning_rate": 7.416913530435554e-08, "loss": 0.429, "step": 6962 }, { "epoch": 4.73995915588836, "grad_norm": 1.3339312076568604, "learning_rate": 7.378328887731156e-08, "loss": 0.4014, "step": 6963 }, { "epoch": 4.740639891082369, "grad_norm": 1.335888385772705, "learning_rate": 7.339844124399309e-08, "loss": 0.2462, "step": 6964 }, { "epoch": 4.741320626276378, "grad_norm": 1.6622406244277954, "learning_rate": 7.30145924824266e-08, "loss": 0.3839, "step": 6965 }, { "epoch": 4.742001361470388, "grad_norm": 1.4161479473114014, "learning_rate": 7.263174267043981e-08, "loss": 0.3368, "step": 6966 }, { "epoch": 4.742682096664398, "grad_norm": 1.635224461555481, "learning_rate": 7.224989188565513e-08, "loss": 0.3169, "step": 6967 }, { "epoch": 4.743362831858407, "grad_norm": 1.4806835651397705, "learning_rate": 7.186904020549168e-08, "loss": 0.2025, "step": 6968 }, { "epoch": 4.744043567052417, "grad_norm": 1.2941772937774658, "learning_rate": 7.14891877071694e-08, "loss": 0.2863, "step": 6969 }, { "epoch": 4.744724302246426, "grad_norm": 1.4006719589233398, "learning_rate": 7.111033446770221e-08, "loss": 0.2664, "step": 6970 }, { "epoch": 4.7454050374404355, "grad_norm": 1.4344711303710938, "learning_rate": 7.07324805639037e-08, "loss": 0.3514, "step": 6971 }, { "epoch": 4.746085772634445, "grad_norm": 1.4217065572738647, "learning_rate": 7.035562607238311e-08, "loss": 0.2779, "step": 6972 }, { "epoch": 4.746766507828455, "grad_norm": 1.4042739868164062, "learning_rate": 6.997977106954934e-08, "loss": 0.3475, "step": 6973 }, { "epoch": 4.747447243022465, "grad_norm": 1.3975974321365356, "learning_rate": 6.960491563160643e-08, "loss": 0.3869, "step": 6974 }, { "epoch": 4.748127978216473, "grad_norm": 1.5866798162460327, "learning_rate": 6.92310598345558e-08, "loss": 0.3689, "step": 6975 }, { "epoch": 4.748808713410483, "grad_norm": 1.5217835903167725, "learning_rate": 6.88582037541985e-08, "loss": 0.1822, "step": 6976 }, { "epoch": 4.749489448604493, "grad_norm": 1.3130606412887573, "learning_rate": 6.848634746613014e-08, "loss": 0.2382, "step": 6977 }, { "epoch": 4.750170183798502, "grad_norm": 1.3838669061660767, "learning_rate": 6.811549104574544e-08, "loss": 0.3895, "step": 6978 }, { "epoch": 4.750850918992512, "grad_norm": 1.4415898323059082, "learning_rate": 6.774563456823535e-08, "loss": 0.4029, "step": 6979 }, { "epoch": 4.751531654186522, "grad_norm": 1.4538770914077759, "learning_rate": 6.737677810858879e-08, "loss": 0.4161, "step": 6980 }, { "epoch": 4.752212389380531, "grad_norm": 1.4599136114120483, "learning_rate": 6.700892174159202e-08, "loss": 0.3805, "step": 6981 }, { "epoch": 4.75289312457454, "grad_norm": 1.4372422695159912, "learning_rate": 6.664206554182762e-08, "loss": 0.2547, "step": 6982 }, { "epoch": 4.75357385976855, "grad_norm": 1.4477295875549316, "learning_rate": 6.627620958367609e-08, "loss": 0.4747, "step": 6983 }, { "epoch": 4.75425459496256, "grad_norm": 1.460636019706726, "learning_rate": 6.591135394131477e-08, "loss": 0.4508, "step": 6984 }, { "epoch": 4.754935330156569, "grad_norm": 1.5205918550491333, "learning_rate": 6.554749868871891e-08, "loss": 0.5278, "step": 6985 }, { "epoch": 4.755616065350578, "grad_norm": 1.413299560546875, "learning_rate": 6.518464389966006e-08, "loss": 0.4645, "step": 6986 }, { "epoch": 4.756296800544588, "grad_norm": 1.2820355892181396, "learning_rate": 6.482278964770717e-08, "loss": 0.3749, "step": 6987 }, { "epoch": 4.7569775357385975, "grad_norm": 1.3946834802627563, "learning_rate": 6.446193600622652e-08, "loss": 0.4198, "step": 6988 }, { "epoch": 4.757658270932607, "grad_norm": 1.2350902557373047, "learning_rate": 6.410208304838128e-08, "loss": 0.3315, "step": 6989 }, { "epoch": 4.758339006126617, "grad_norm": 1.4813750982284546, "learning_rate": 6.374323084713252e-08, "loss": 0.3885, "step": 6990 }, { "epoch": 4.7590197413206266, "grad_norm": 1.224144458770752, "learning_rate": 6.338537947523704e-08, "loss": 0.2992, "step": 6991 }, { "epoch": 4.759700476514636, "grad_norm": 1.3414463996887207, "learning_rate": 6.302852900524958e-08, "loss": 0.4098, "step": 6992 }, { "epoch": 4.760381211708645, "grad_norm": 1.4175145626068115, "learning_rate": 6.267267950952228e-08, "loss": 0.3942, "step": 6993 }, { "epoch": 4.761061946902655, "grad_norm": 1.5480961799621582, "learning_rate": 6.23178310602035e-08, "loss": 0.4588, "step": 6994 }, { "epoch": 4.761742682096664, "grad_norm": 1.374801754951477, "learning_rate": 6.196398372923851e-08, "loss": 0.3845, "step": 6995 }, { "epoch": 4.762423417290674, "grad_norm": 1.3658840656280518, "learning_rate": 6.1611137588371e-08, "loss": 0.4049, "step": 6996 }, { "epoch": 4.763104152484684, "grad_norm": 1.3042739629745483, "learning_rate": 6.125929270914044e-08, "loss": 0.3487, "step": 6997 }, { "epoch": 4.763784887678693, "grad_norm": 1.571332573890686, "learning_rate": 6.090844916288363e-08, "loss": 0.3514, "step": 6998 }, { "epoch": 4.764465622872702, "grad_norm": 1.432688593864441, "learning_rate": 6.055860702073368e-08, "loss": 0.3345, "step": 6999 }, { "epoch": 4.765146358066712, "grad_norm": 1.351374626159668, "learning_rate": 6.020976635362219e-08, "loss": 0.3102, "step": 7000 }, { "epoch": 4.765827093260722, "grad_norm": 1.3383928537368774, "learning_rate": 5.986192723227701e-08, "loss": 0.3357, "step": 7001 }, { "epoch": 4.766507828454731, "grad_norm": 1.263969898223877, "learning_rate": 5.9515089727221195e-08, "loss": 0.421, "step": 7002 }, { "epoch": 4.767188563648741, "grad_norm": 1.4276340007781982, "learning_rate": 5.9169253908778476e-08, "loss": 0.3351, "step": 7003 }, { "epoch": 4.76786929884275, "grad_norm": 1.3336056470870972, "learning_rate": 5.882441984706555e-08, "loss": 0.3104, "step": 7004 }, { "epoch": 4.7685500340367595, "grad_norm": 1.403719425201416, "learning_rate": 5.8480587611998705e-08, "loss": 0.3951, "step": 7005 }, { "epoch": 4.769230769230769, "grad_norm": 1.4605368375778198, "learning_rate": 5.813775727328941e-08, "loss": 0.3146, "step": 7006 }, { "epoch": 4.769911504424779, "grad_norm": 1.5702440738677979, "learning_rate": 5.7795928900447606e-08, "loss": 0.3774, "step": 7007 }, { "epoch": 4.7705922396187885, "grad_norm": 1.4003171920776367, "learning_rate": 5.745510256277842e-08, "loss": 0.3885, "step": 7008 }, { "epoch": 4.771272974812797, "grad_norm": 1.2204509973526, "learning_rate": 5.71152783293849e-08, "loss": 0.2672, "step": 7009 }, { "epoch": 4.771953710006807, "grad_norm": 1.3270206451416016, "learning_rate": 5.67764562691675e-08, "loss": 0.3922, "step": 7010 }, { "epoch": 4.772634445200817, "grad_norm": 1.44560706615448, "learning_rate": 5.643863645082126e-08, "loss": 0.3519, "step": 7011 }, { "epoch": 4.773315180394826, "grad_norm": 1.2906806468963623, "learning_rate": 5.610181894283972e-08, "loss": 0.3325, "step": 7012 }, { "epoch": 4.773995915588836, "grad_norm": 1.6136971712112427, "learning_rate": 5.576600381351327e-08, "loss": 0.4977, "step": 7013 }, { "epoch": 4.774676650782846, "grad_norm": 1.3225374221801758, "learning_rate": 5.543119113092854e-08, "loss": 0.3067, "step": 7014 }, { "epoch": 4.775357385976855, "grad_norm": 1.3772042989730835, "learning_rate": 5.509738096296957e-08, "loss": 0.4082, "step": 7015 }, { "epoch": 4.776038121170864, "grad_norm": 1.8812928199768066, "learning_rate": 5.4764573377315e-08, "loss": 0.298, "step": 7016 }, { "epoch": 4.776718856364874, "grad_norm": 1.3443738222122192, "learning_rate": 5.443276844144363e-08, "loss": 0.3673, "step": 7017 }, { "epoch": 4.777399591558884, "grad_norm": 1.337367057800293, "learning_rate": 5.4101966222627753e-08, "loss": 0.4289, "step": 7018 }, { "epoch": 4.778080326752893, "grad_norm": 1.3783090114593506, "learning_rate": 5.377216678793817e-08, "loss": 0.4179, "step": 7019 }, { "epoch": 4.778761061946903, "grad_norm": 1.4553967714309692, "learning_rate": 5.3443370204241954e-08, "loss": 0.4292, "step": 7020 }, { "epoch": 4.779441797140912, "grad_norm": 1.4444832801818848, "learning_rate": 5.311557653820354e-08, "loss": 0.3654, "step": 7021 }, { "epoch": 4.7801225323349215, "grad_norm": 1.3102375268936157, "learning_rate": 5.2788785856281996e-08, "loss": 0.409, "step": 7022 }, { "epoch": 4.780803267528931, "grad_norm": 1.4224106073379517, "learning_rate": 5.2462998224734885e-08, "loss": 0.3904, "step": 7023 }, { "epoch": 4.781484002722941, "grad_norm": 1.433515191078186, "learning_rate": 5.213821370961658e-08, "loss": 0.3897, "step": 7024 }, { "epoch": 4.7821647379169505, "grad_norm": 1.4296534061431885, "learning_rate": 5.1814432376776635e-08, "loss": 0.2508, "step": 7025 }, { "epoch": 4.78284547311096, "grad_norm": 1.362226963043213, "learning_rate": 5.149165429186198e-08, "loss": 0.2301, "step": 7026 }, { "epoch": 4.783526208304969, "grad_norm": 1.4840031862258911, "learning_rate": 5.116987952031638e-08, "loss": 0.2963, "step": 7027 }, { "epoch": 4.784206943498979, "grad_norm": 1.32340407371521, "learning_rate": 5.084910812737987e-08, "loss": 0.3412, "step": 7028 }, { "epoch": 4.784887678692988, "grad_norm": 1.5299533605575562, "learning_rate": 5.052934017808875e-08, "loss": 0.3792, "step": 7029 }, { "epoch": 4.785568413886998, "grad_norm": 1.4205005168914795, "learning_rate": 5.021057573727617e-08, "loss": 0.379, "step": 7030 }, { "epoch": 4.786249149081008, "grad_norm": 1.380270004272461, "learning_rate": 4.989281486957154e-08, "loss": 0.1814, "step": 7031 }, { "epoch": 4.7869298842750165, "grad_norm": 1.365594506263733, "learning_rate": 4.957605763940221e-08, "loss": 0.4361, "step": 7032 }, { "epoch": 4.787610619469026, "grad_norm": 1.404097318649292, "learning_rate": 4.9260304110990145e-08, "loss": 0.1603, "step": 7033 }, { "epoch": 4.788291354663036, "grad_norm": 1.4574086666107178, "learning_rate": 4.8945554348354685e-08, "loss": 0.4252, "step": 7034 }, { "epoch": 4.788972089857046, "grad_norm": 1.6753138303756714, "learning_rate": 4.863180841531146e-08, "loss": 0.2984, "step": 7035 }, { "epoch": 4.789652825051055, "grad_norm": 1.3648484945297241, "learning_rate": 4.83190663754729e-08, "loss": 0.3646, "step": 7036 }, { "epoch": 4.790333560245065, "grad_norm": 1.3881218433380127, "learning_rate": 4.800732829224719e-08, "loss": 0.3207, "step": 7037 }, { "epoch": 4.791014295439075, "grad_norm": 1.3835819959640503, "learning_rate": 4.769659422883988e-08, "loss": 0.526, "step": 7038 }, { "epoch": 4.791695030633083, "grad_norm": 1.4727164506912231, "learning_rate": 4.738686424825223e-08, "loss": 0.4431, "step": 7039 }, { "epoch": 4.792375765827093, "grad_norm": 1.4505831003189087, "learning_rate": 4.7078138413282904e-08, "loss": 0.3069, "step": 7040 }, { "epoch": 4.793056501021103, "grad_norm": 1.438459038734436, "learning_rate": 4.6770416786525163e-08, "loss": 0.2876, "step": 7041 }, { "epoch": 4.7937372362151125, "grad_norm": 1.2846057415008545, "learning_rate": 4.646369943037077e-08, "loss": 0.2835, "step": 7042 }, { "epoch": 4.794417971409122, "grad_norm": 1.357107400894165, "learning_rate": 4.6157986407006085e-08, "loss": 0.3303, "step": 7043 }, { "epoch": 4.795098706603131, "grad_norm": 1.3319212198257446, "learning_rate": 4.5853277778415416e-08, "loss": 0.2108, "step": 7044 }, { "epoch": 4.795779441797141, "grad_norm": 1.4349807500839233, "learning_rate": 4.5549573606377686e-08, "loss": 0.3937, "step": 7045 }, { "epoch": 4.79646017699115, "grad_norm": 1.5183309316635132, "learning_rate": 4.5246873952470296e-08, "loss": 0.3074, "step": 7046 }, { "epoch": 4.79714091218516, "grad_norm": 1.5538212060928345, "learning_rate": 4.494517887806471e-08, "loss": 0.2985, "step": 7047 }, { "epoch": 4.79782164737917, "grad_norm": 1.608668565750122, "learning_rate": 4.464448844433034e-08, "loss": 0.3037, "step": 7048 }, { "epoch": 4.798502382573179, "grad_norm": 1.4953478574752808, "learning_rate": 4.434480271223229e-08, "loss": 0.3127, "step": 7049 }, { "epoch": 4.799183117767188, "grad_norm": 1.3799946308135986, "learning_rate": 4.4046121742531976e-08, "loss": 0.4063, "step": 7050 }, { "epoch": 4.799863852961198, "grad_norm": 1.4016343355178833, "learning_rate": 4.374844559578706e-08, "loss": 0.2401, "step": 7051 }, { "epoch": 4.800544588155208, "grad_norm": 1.7068573236465454, "learning_rate": 4.3451774332352036e-08, "loss": 0.4051, "step": 7052 }, { "epoch": 4.801225323349217, "grad_norm": 1.3355114459991455, "learning_rate": 4.315610801237713e-08, "loss": 0.3083, "step": 7053 }, { "epoch": 4.801906058543227, "grad_norm": 1.4622929096221924, "learning_rate": 4.2861446695808276e-08, "loss": 0.2902, "step": 7054 }, { "epoch": 4.802586793737236, "grad_norm": 1.4409300088882446, "learning_rate": 4.256779044238879e-08, "loss": 0.353, "step": 7055 }, { "epoch": 4.803267528931245, "grad_norm": 1.391060709953308, "learning_rate": 4.2275139311657165e-08, "loss": 0.6003, "step": 7056 }, { "epoch": 4.803948264125255, "grad_norm": 1.4366004467010498, "learning_rate": 4.1983493362949265e-08, "loss": 0.3634, "step": 7057 }, { "epoch": 4.804628999319265, "grad_norm": 1.5187124013900757, "learning_rate": 4.169285265539613e-08, "loss": 0.3255, "step": 7058 }, { "epoch": 4.8053097345132745, "grad_norm": 1.4438687562942505, "learning_rate": 4.140321724792562e-08, "loss": 0.5178, "step": 7059 }, { "epoch": 4.805990469707284, "grad_norm": 1.3435742855072021, "learning_rate": 4.1114587199261316e-08, "loss": 0.3103, "step": 7060 }, { "epoch": 4.806671204901294, "grad_norm": 1.4162836074829102, "learning_rate": 4.0826962567922515e-08, "loss": 0.3522, "step": 7061 }, { "epoch": 4.807351940095303, "grad_norm": 1.513740062713623, "learning_rate": 4.0540343412226455e-08, "loss": 0.3713, "step": 7062 }, { "epoch": 4.808032675289312, "grad_norm": 1.4723321199417114, "learning_rate": 4.0254729790285e-08, "loss": 0.4242, "step": 7063 }, { "epoch": 4.808713410483322, "grad_norm": 1.3766818046569824, "learning_rate": 3.997012176000625e-08, "loss": 0.4293, "step": 7064 }, { "epoch": 4.809394145677332, "grad_norm": 1.4147485494613647, "learning_rate": 3.9686519379094624e-08, "loss": 0.2694, "step": 7065 }, { "epoch": 4.810074880871341, "grad_norm": 1.4005392789840698, "learning_rate": 3.940392270505078e-08, "loss": 0.2768, "step": 7066 }, { "epoch": 4.81075561606535, "grad_norm": 1.3935478925704956, "learning_rate": 3.912233179517166e-08, "loss": 0.4159, "step": 7067 }, { "epoch": 4.81143635125936, "grad_norm": 1.4690488576889038, "learning_rate": 3.884174670654994e-08, "loss": 0.2716, "step": 7068 }, { "epoch": 4.8121170864533696, "grad_norm": 1.3879731893539429, "learning_rate": 3.8562167496074e-08, "loss": 0.2507, "step": 7069 }, { "epoch": 4.812797821647379, "grad_norm": 1.4496616125106812, "learning_rate": 3.8283594220429045e-08, "loss": 0.2868, "step": 7070 }, { "epoch": 4.813478556841389, "grad_norm": 1.356731653213501, "learning_rate": 3.8006026936096005e-08, "loss": 0.4149, "step": 7071 }, { "epoch": 4.814159292035399, "grad_norm": 1.4513227939605713, "learning_rate": 3.772946569935154e-08, "loss": 0.2205, "step": 7072 }, { "epoch": 4.814840027229407, "grad_norm": 1.4392417669296265, "learning_rate": 3.7453910566269123e-08, "loss": 0.3169, "step": 7073 }, { "epoch": 4.815520762423417, "grad_norm": 1.3442893028259277, "learning_rate": 3.71793615927174e-08, "loss": 0.2085, "step": 7074 }, { "epoch": 4.816201497617427, "grad_norm": 1.3851062059402466, "learning_rate": 3.690581883436128e-08, "loss": 0.219, "step": 7075 }, { "epoch": 4.8168822328114365, "grad_norm": 1.4160789251327515, "learning_rate": 3.6633282346662526e-08, "loss": 0.3961, "step": 7076 }, { "epoch": 4.817562968005446, "grad_norm": 1.3596245050430298, "learning_rate": 3.636175218487692e-08, "loss": 0.3105, "step": 7077 }, { "epoch": 4.818243703199455, "grad_norm": 1.3746516704559326, "learning_rate": 3.6091228404057654e-08, "loss": 0.3823, "step": 7078 }, { "epoch": 4.818924438393465, "grad_norm": 1.498335361480713, "learning_rate": 3.5821711059054745e-08, "loss": 0.3997, "step": 7079 }, { "epoch": 4.819605173587474, "grad_norm": 1.3883897066116333, "learning_rate": 3.5553200204511696e-08, "loss": 0.4112, "step": 7080 }, { "epoch": 4.820285908781484, "grad_norm": 1.308658480644226, "learning_rate": 3.528569589486941e-08, "loss": 0.3584, "step": 7081 }, { "epoch": 4.820966643975494, "grad_norm": 1.371856927871704, "learning_rate": 3.5019198184365053e-08, "loss": 0.5649, "step": 7082 }, { "epoch": 4.821647379169503, "grad_norm": 1.37489652633667, "learning_rate": 3.4753707127030964e-08, "loss": 0.2428, "step": 7083 }, { "epoch": 4.822328114363513, "grad_norm": 1.380425214767456, "learning_rate": 3.448922277669631e-08, "loss": 0.4453, "step": 7084 }, { "epoch": 4.823008849557522, "grad_norm": 1.310085654258728, "learning_rate": 3.4225745186984296e-08, "loss": 0.3339, "step": 7085 }, { "epoch": 4.8236895847515315, "grad_norm": 1.3046404123306274, "learning_rate": 3.3963274411315526e-08, "loss": 0.2015, "step": 7086 }, { "epoch": 4.824370319945541, "grad_norm": 1.3912736177444458, "learning_rate": 3.3701810502906885e-08, "loss": 0.3961, "step": 7087 }, { "epoch": 4.825051055139551, "grad_norm": 1.8023759126663208, "learning_rate": 3.344135351476929e-08, "loss": 0.2624, "step": 7088 }, { "epoch": 4.825731790333561, "grad_norm": 1.564799427986145, "learning_rate": 3.318190349971162e-08, "loss": 0.2684, "step": 7089 }, { "epoch": 4.826412525527569, "grad_norm": 1.4627196788787842, "learning_rate": 3.292346051033679e-08, "loss": 0.3775, "step": 7090 }, { "epoch": 4.827093260721579, "grad_norm": 1.383478045463562, "learning_rate": 3.2666024599044554e-08, "loss": 0.3498, "step": 7091 }, { "epoch": 4.827773995915589, "grad_norm": 1.339909315109253, "learning_rate": 3.240959581802983e-08, "loss": 0.2395, "step": 7092 }, { "epoch": 4.828454731109598, "grad_norm": 1.3723939657211304, "learning_rate": 3.215417421928435e-08, "loss": 0.3243, "step": 7093 }, { "epoch": 4.829135466303608, "grad_norm": 1.3391574621200562, "learning_rate": 3.189975985459503e-08, "loss": 0.265, "step": 7094 }, { "epoch": 4.829816201497618, "grad_norm": 1.4925843477249146, "learning_rate": 3.16463527755434e-08, "loss": 0.3893, "step": 7095 }, { "epoch": 4.830496936691627, "grad_norm": 1.2935129404067993, "learning_rate": 3.139395303350945e-08, "loss": 0.3146, "step": 7096 }, { "epoch": 4.831177671885636, "grad_norm": 1.5616849660873413, "learning_rate": 3.11425606796667e-08, "loss": 0.2675, "step": 7097 }, { "epoch": 4.831858407079646, "grad_norm": 1.439284324645996, "learning_rate": 3.089217576498493e-08, "loss": 0.3349, "step": 7098 }, { "epoch": 4.832539142273656, "grad_norm": 1.3894325494766235, "learning_rate": 3.0642798340230206e-08, "loss": 0.3614, "step": 7099 }, { "epoch": 4.833219877467665, "grad_norm": 1.363633394241333, "learning_rate": 3.039442845596319e-08, "loss": 0.2897, "step": 7100 }, { "epoch": 4.833900612661674, "grad_norm": 1.3432520627975464, "learning_rate": 3.014706616254248e-08, "loss": 0.3952, "step": 7101 }, { "epoch": 4.834581347855684, "grad_norm": 1.438782811164856, "learning_rate": 2.990071151011964e-08, "loss": 0.4031, "step": 7102 }, { "epoch": 4.8352620830496935, "grad_norm": 1.4514106512069702, "learning_rate": 2.9655364548643595e-08, "loss": 0.3567, "step": 7103 }, { "epoch": 4.835942818243703, "grad_norm": 1.4863039255142212, "learning_rate": 2.9411025327859e-08, "loss": 0.3583, "step": 7104 }, { "epoch": 4.836623553437713, "grad_norm": 1.774661660194397, "learning_rate": 2.916769389730567e-08, "loss": 0.2862, "step": 7105 }, { "epoch": 4.837304288631723, "grad_norm": 1.259684443473816, "learning_rate": 2.892537030631859e-08, "loss": 0.38, "step": 7106 }, { "epoch": 4.837985023825732, "grad_norm": 1.3307093381881714, "learning_rate": 2.868405460403012e-08, "loss": 0.3901, "step": 7107 }, { "epoch": 4.838665759019741, "grad_norm": 1.3542615175247192, "learning_rate": 2.8443746839366126e-08, "loss": 0.3352, "step": 7108 }, { "epoch": 4.839346494213751, "grad_norm": 1.2277801036834717, "learning_rate": 2.8204447061049856e-08, "loss": 0.1421, "step": 7109 }, { "epoch": 4.84002722940776, "grad_norm": 1.4227277040481567, "learning_rate": 2.7966155317599163e-08, "loss": 0.3449, "step": 7110 }, { "epoch": 4.84070796460177, "grad_norm": 1.4770724773406982, "learning_rate": 2.772887165732763e-08, "loss": 0.2669, "step": 7111 }, { "epoch": 4.84138869979578, "grad_norm": 1.4848079681396484, "learning_rate": 2.749259612834565e-08, "loss": 0.2953, "step": 7112 }, { "epoch": 4.842069434989789, "grad_norm": 1.4797987937927246, "learning_rate": 2.7257328778557135e-08, "loss": 0.3542, "step": 7113 }, { "epoch": 4.842750170183798, "grad_norm": 1.510695457458496, "learning_rate": 2.702306965566337e-08, "loss": 0.2828, "step": 7114 }, { "epoch": 4.843430905377808, "grad_norm": 1.4765928983688354, "learning_rate": 2.678981880716025e-08, "loss": 0.3564, "step": 7115 }, { "epoch": 4.844111640571818, "grad_norm": 1.3240593671798706, "learning_rate": 2.6557576280339393e-08, "loss": 0.2842, "step": 7116 }, { "epoch": 4.844792375765827, "grad_norm": 1.3670690059661865, "learning_rate": 2.6326342122288685e-08, "loss": 0.3239, "step": 7117 }, { "epoch": 4.845473110959837, "grad_norm": 1.3655167818069458, "learning_rate": 2.6096116379890622e-08, "loss": 0.3541, "step": 7118 }, { "epoch": 4.846153846153846, "grad_norm": 1.4197040796279907, "learning_rate": 2.586689909982343e-08, "loss": 0.4533, "step": 7119 }, { "epoch": 4.8468345813478555, "grad_norm": 1.7821630239486694, "learning_rate": 2.5638690328561588e-08, "loss": 0.2937, "step": 7120 }, { "epoch": 4.847515316541865, "grad_norm": 1.3904284238815308, "learning_rate": 2.5411490112374203e-08, "loss": 0.3584, "step": 7121 }, { "epoch": 4.848196051735875, "grad_norm": 1.2740535736083984, "learning_rate": 2.518529849732665e-08, "loss": 0.3053, "step": 7122 }, { "epoch": 4.8488767869298846, "grad_norm": 1.3645590543746948, "learning_rate": 2.4960115529278907e-08, "loss": 0.3537, "step": 7123 }, { "epoch": 4.849557522123893, "grad_norm": 1.442335605621338, "learning_rate": 2.473594125388723e-08, "loss": 0.3321, "step": 7124 }, { "epoch": 4.850238257317903, "grad_norm": 1.4068752527236938, "learning_rate": 2.4512775716603042e-08, "loss": 0.4545, "step": 7125 }, { "epoch": 4.850918992511913, "grad_norm": 1.2936530113220215, "learning_rate": 2.429061896267293e-08, "loss": 0.3771, "step": 7126 }, { "epoch": 4.851599727705922, "grad_norm": 1.4521690607070923, "learning_rate": 2.40694710371403e-08, "loss": 0.3414, "step": 7127 }, { "epoch": 4.852280462899932, "grad_norm": 1.3323384523391724, "learning_rate": 2.3849331984842627e-08, "loss": 0.2297, "step": 7128 }, { "epoch": 4.852961198093942, "grad_norm": 1.4795061349868774, "learning_rate": 2.3630201850412538e-08, "loss": 0.319, "step": 7129 }, { "epoch": 4.8536419332879515, "grad_norm": 1.21659517288208, "learning_rate": 2.3412080678280046e-08, "loss": 0.2606, "step": 7130 }, { "epoch": 4.85432266848196, "grad_norm": 1.4825592041015625, "learning_rate": 2.3194968512668114e-08, "loss": 0.4958, "step": 7131 }, { "epoch": 4.85500340367597, "grad_norm": 1.523219347000122, "learning_rate": 2.297886539759764e-08, "loss": 0.3979, "step": 7132 }, { "epoch": 4.85568413886998, "grad_norm": 1.4006258249282837, "learning_rate": 2.276377137688246e-08, "loss": 0.4243, "step": 7133 }, { "epoch": 4.856364874063989, "grad_norm": 1.4684380292892456, "learning_rate": 2.254968649413325e-08, "loss": 0.48, "step": 7134 }, { "epoch": 4.857045609257999, "grad_norm": 1.3207999467849731, "learning_rate": 2.2336610792756954e-08, "loss": 0.2263, "step": 7135 }, { "epoch": 4.857726344452008, "grad_norm": 1.5875420570373535, "learning_rate": 2.2124544315953455e-08, "loss": 0.3484, "step": 7136 }, { "epoch": 4.8584070796460175, "grad_norm": 1.3319414854049683, "learning_rate": 2.1913487106720588e-08, "loss": 0.2587, "step": 7137 }, { "epoch": 4.859087814840027, "grad_norm": 1.4845644235610962, "learning_rate": 2.170343920784912e-08, "loss": 0.2982, "step": 7138 }, { "epoch": 4.859768550034037, "grad_norm": 1.484757661819458, "learning_rate": 2.1494400661927207e-08, "loss": 0.4131, "step": 7139 }, { "epoch": 4.8604492852280465, "grad_norm": 1.3939192295074463, "learning_rate": 2.1286371511337613e-08, "loss": 0.3032, "step": 7140 }, { "epoch": 4.861130020422056, "grad_norm": 1.2955337762832642, "learning_rate": 2.107935179825771e-08, "loss": 0.4054, "step": 7141 }, { "epoch": 4.861810755616065, "grad_norm": 1.4709713459014893, "learning_rate": 2.0873341564661697e-08, "loss": 0.2819, "step": 7142 }, { "epoch": 4.862491490810075, "grad_norm": 1.4708539247512817, "learning_rate": 2.0668340852317837e-08, "loss": 0.3846, "step": 7143 }, { "epoch": 4.863172226004084, "grad_norm": 1.5931419134140015, "learning_rate": 2.0464349702789542e-08, "loss": 0.288, "step": 7144 }, { "epoch": 4.863852961198094, "grad_norm": 1.4259613752365112, "learning_rate": 2.0261368157437624e-08, "loss": 0.4128, "step": 7145 }, { "epoch": 4.864533696392104, "grad_norm": 1.3832048177719116, "learning_rate": 2.005939625741582e-08, "loss": 0.534, "step": 7146 }, { "epoch": 4.8652144315861126, "grad_norm": 1.4453190565109253, "learning_rate": 1.9858434043673602e-08, "loss": 0.4603, "step": 7147 }, { "epoch": 4.865895166780122, "grad_norm": 1.4798283576965332, "learning_rate": 1.9658481556957264e-08, "loss": 0.3781, "step": 7148 }, { "epoch": 4.866575901974132, "grad_norm": 1.5071734189987183, "learning_rate": 1.9459538837806603e-08, "loss": 0.4231, "step": 7149 }, { "epoch": 4.867256637168142, "grad_norm": 1.373756766319275, "learning_rate": 1.926160592655768e-08, "loss": 0.2093, "step": 7150 }, { "epoch": 4.867937372362151, "grad_norm": 1.4511773586273193, "learning_rate": 1.9064682863341177e-08, "loss": 0.2743, "step": 7151 }, { "epoch": 4.868618107556161, "grad_norm": 1.4748244285583496, "learning_rate": 1.8868769688084043e-08, "loss": 0.2218, "step": 7152 }, { "epoch": 4.869298842750171, "grad_norm": 1.3203951120376587, "learning_rate": 1.8673866440507282e-08, "loss": 0.3335, "step": 7153 }, { "epoch": 4.8699795779441795, "grad_norm": 1.439302682876587, "learning_rate": 1.8479973160127617e-08, "loss": 0.3263, "step": 7154 }, { "epoch": 4.870660313138189, "grad_norm": 1.3213084936141968, "learning_rate": 1.8287089886257493e-08, "loss": 0.3157, "step": 7155 }, { "epoch": 4.871341048332199, "grad_norm": 1.360066533088684, "learning_rate": 1.8095216658003956e-08, "loss": 0.3893, "step": 7156 }, { "epoch": 4.8720217835262085, "grad_norm": 1.3961602449417114, "learning_rate": 1.7904353514268668e-08, "loss": 0.2664, "step": 7157 }, { "epoch": 4.872702518720218, "grad_norm": 1.4189881086349487, "learning_rate": 1.771450049375012e-08, "loss": 0.3302, "step": 7158 }, { "epoch": 4.873383253914227, "grad_norm": 1.4324049949645996, "learning_rate": 1.7525657634941407e-08, "loss": 0.2454, "step": 7159 }, { "epoch": 4.874063989108237, "grad_norm": 1.4596971273422241, "learning_rate": 1.7337824976129126e-08, "loss": 0.356, "step": 7160 }, { "epoch": 4.874744724302246, "grad_norm": 1.3744257688522339, "learning_rate": 1.7151002555397813e-08, "loss": 0.3337, "step": 7161 }, { "epoch": 4.875425459496256, "grad_norm": 1.4267204999923706, "learning_rate": 1.6965190410625497e-08, "loss": 0.3614, "step": 7162 }, { "epoch": 4.876106194690266, "grad_norm": 1.6324304342269897, "learning_rate": 1.678038857948594e-08, "loss": 0.2943, "step": 7163 }, { "epoch": 4.876786929884275, "grad_norm": 1.2619701623916626, "learning_rate": 1.6596597099446942e-08, "loss": 0.2567, "step": 7164 }, { "epoch": 4.877467665078284, "grad_norm": 1.3687498569488525, "learning_rate": 1.6413816007773696e-08, "loss": 0.2589, "step": 7165 }, { "epoch": 4.878148400272294, "grad_norm": 1.3622735738754272, "learning_rate": 1.6232045341523784e-08, "loss": 0.4183, "step": 7166 }, { "epoch": 4.878829135466304, "grad_norm": 1.5208184719085693, "learning_rate": 1.6051285137552163e-08, "loss": 0.4045, "step": 7167 }, { "epoch": 4.879509870660313, "grad_norm": 1.4179799556732178, "learning_rate": 1.5871535432507856e-08, "loss": 0.3398, "step": 7168 }, { "epoch": 4.880190605854323, "grad_norm": 1.3870993852615356, "learning_rate": 1.5692796262835043e-08, "loss": 0.3229, "step": 7169 }, { "epoch": 4.880871341048332, "grad_norm": 1.5402636528015137, "learning_rate": 1.5515067664774176e-08, "loss": 0.3544, "step": 7170 }, { "epoch": 4.881552076242341, "grad_norm": 1.3736317157745361, "learning_rate": 1.5338349674358098e-08, "loss": 0.3828, "step": 7171 }, { "epoch": 4.882232811436351, "grad_norm": 1.291427493095398, "learning_rate": 1.5162642327418154e-08, "loss": 0.2375, "step": 7172 }, { "epoch": 4.882913546630361, "grad_norm": 1.4538984298706055, "learning_rate": 1.4987945659578617e-08, "loss": 0.47, "step": 7173 }, { "epoch": 4.8835942818243705, "grad_norm": 1.4574722051620483, "learning_rate": 1.4814259706258938e-08, "loss": 0.367, "step": 7174 }, { "epoch": 4.88427501701838, "grad_norm": 1.574761986732483, "learning_rate": 1.4641584502674832e-08, "loss": 0.4674, "step": 7175 }, { "epoch": 4.88495575221239, "grad_norm": 1.4397732019424438, "learning_rate": 1.4469920083834964e-08, "loss": 0.4095, "step": 7176 }, { "epoch": 4.885636487406399, "grad_norm": 1.3821024894714355, "learning_rate": 1.4299266484545937e-08, "loss": 0.2458, "step": 7177 }, { "epoch": 4.886317222600408, "grad_norm": 1.3290119171142578, "learning_rate": 1.4129623739407294e-08, "loss": 0.2275, "step": 7178 }, { "epoch": 4.886997957794418, "grad_norm": 1.3879631757736206, "learning_rate": 1.3960991882813745e-08, "loss": 0.1836, "step": 7179 }, { "epoch": 4.887678692988428, "grad_norm": 1.3973932266235352, "learning_rate": 1.379337094895572e-08, "loss": 0.5143, "step": 7180 }, { "epoch": 4.888359428182437, "grad_norm": 1.3611263036727905, "learning_rate": 1.3626760971819365e-08, "loss": 0.4336, "step": 7181 }, { "epoch": 4.889040163376446, "grad_norm": 1.355540156364441, "learning_rate": 1.3461161985183768e-08, "loss": 0.2178, "step": 7182 }, { "epoch": 4.889720898570456, "grad_norm": 1.4983245134353638, "learning_rate": 1.3296574022624854e-08, "loss": 0.2743, "step": 7183 }, { "epoch": 4.890401633764466, "grad_norm": 1.3811697959899902, "learning_rate": 1.3132997117512591e-08, "loss": 0.2542, "step": 7184 }, { "epoch": 4.891082368958475, "grad_norm": 1.342526912689209, "learning_rate": 1.2970431303012676e-08, "loss": 0.3121, "step": 7185 }, { "epoch": 4.891763104152485, "grad_norm": 1.4288722276687622, "learning_rate": 1.2808876612085408e-08, "loss": 0.3423, "step": 7186 }, { "epoch": 4.892443839346495, "grad_norm": 1.4758611917495728, "learning_rate": 1.2648333077486252e-08, "loss": 0.4362, "step": 7187 }, { "epoch": 4.893124574540503, "grad_norm": 1.3866634368896484, "learning_rate": 1.2488800731764728e-08, "loss": 0.4348, "step": 7188 }, { "epoch": 4.893805309734513, "grad_norm": 1.3268646001815796, "learning_rate": 1.2330279607266626e-08, "loss": 0.3985, "step": 7189 }, { "epoch": 4.894486044928523, "grad_norm": 1.4706050157546997, "learning_rate": 1.2172769736132905e-08, "loss": 0.3888, "step": 7190 }, { "epoch": 4.8951667801225325, "grad_norm": 1.5425902605056763, "learning_rate": 1.2016271150297465e-08, "loss": 0.4375, "step": 7191 }, { "epoch": 4.895847515316542, "grad_norm": 1.446274757385254, "learning_rate": 1.1860783881491589e-08, "loss": 0.3403, "step": 7192 }, { "epoch": 4.896528250510551, "grad_norm": 1.4622141122817993, "learning_rate": 1.1706307961239504e-08, "loss": 0.3528, "step": 7193 }, { "epoch": 4.897208985704561, "grad_norm": 1.4332447052001953, "learning_rate": 1.155284342086227e-08, "loss": 0.2749, "step": 7194 }, { "epoch": 4.89788972089857, "grad_norm": 1.407638669013977, "learning_rate": 1.140039029147444e-08, "loss": 0.4674, "step": 7195 }, { "epoch": 4.89857045609258, "grad_norm": 1.4300587177276611, "learning_rate": 1.1248948603985732e-08, "loss": 0.3964, "step": 7196 }, { "epoch": 4.89925119128659, "grad_norm": 1.440743327140808, "learning_rate": 1.1098518389101587e-08, "loss": 0.3433, "step": 7197 }, { "epoch": 4.899931926480599, "grad_norm": 1.2970082759857178, "learning_rate": 1.0949099677321495e-08, "loss": 0.3366, "step": 7198 }, { "epoch": 4.900612661674609, "grad_norm": 1.4988069534301758, "learning_rate": 1.0800692498940113e-08, "loss": 0.3601, "step": 7199 }, { "epoch": 4.901293396868618, "grad_norm": 1.3535337448120117, "learning_rate": 1.065329688404726e-08, "loss": 0.3134, "step": 7200 }, { "epoch": 4.9019741320626276, "grad_norm": 1.5271937847137451, "learning_rate": 1.0506912862527363e-08, "loss": 0.3597, "step": 7201 }, { "epoch": 4.902654867256637, "grad_norm": 1.3133279085159302, "learning_rate": 1.0361540464060016e-08, "loss": 0.3823, "step": 7202 }, { "epoch": 4.903335602450647, "grad_norm": 1.4522563219070435, "learning_rate": 1.0217179718119974e-08, "loss": 0.2026, "step": 7203 }, { "epoch": 4.904016337644657, "grad_norm": 1.3211925029754639, "learning_rate": 1.007383065397549e-08, "loss": 0.2312, "step": 7204 }, { "epoch": 4.904697072838665, "grad_norm": 1.3247402906417847, "learning_rate": 9.931493300691652e-09, "loss": 0.3853, "step": 7205 }, { "epoch": 4.905377808032675, "grad_norm": 1.35390305519104, "learning_rate": 9.790167687127039e-09, "loss": 0.3611, "step": 7206 }, { "epoch": 4.906058543226685, "grad_norm": 1.34708833694458, "learning_rate": 9.649853841935397e-09, "loss": 0.3975, "step": 7207 }, { "epoch": 4.9067392784206945, "grad_norm": 1.4619996547698975, "learning_rate": 9.510551793565636e-09, "loss": 0.3887, "step": 7208 }, { "epoch": 4.907420013614704, "grad_norm": 1.43156898021698, "learning_rate": 9.372261570261276e-09, "loss": 0.3885, "step": 7209 }, { "epoch": 4.908100748808714, "grad_norm": 1.4980560541152954, "learning_rate": 9.234983200060999e-09, "loss": 0.3028, "step": 7210 }, { "epoch": 4.908781484002723, "grad_norm": 1.3941092491149902, "learning_rate": 9.09871671079754e-09, "loss": 0.4586, "step": 7211 }, { "epoch": 4.909462219196732, "grad_norm": 1.4384541511535645, "learning_rate": 8.963462130099354e-09, "loss": 0.4274, "step": 7212 }, { "epoch": 4.910142954390742, "grad_norm": 1.4940721988677979, "learning_rate": 8.829219485389507e-09, "loss": 0.2916, "step": 7213 }, { "epoch": 4.910823689584752, "grad_norm": 1.165771245956421, "learning_rate": 8.695988803885668e-09, "loss": 0.2592, "step": 7214 }, { "epoch": 4.911504424778761, "grad_norm": 1.416357398033142, "learning_rate": 8.563770112600122e-09, "loss": 0.3412, "step": 7215 }, { "epoch": 4.91218515997277, "grad_norm": 1.4293084144592285, "learning_rate": 8.432563438340869e-09, "loss": 0.3095, "step": 7216 }, { "epoch": 4.91286589516678, "grad_norm": 1.3245190382003784, "learning_rate": 8.30236880770996e-09, "loss": 0.4187, "step": 7217 }, { "epoch": 4.9135466303607895, "grad_norm": 1.4263368844985962, "learning_rate": 8.17318624710406e-09, "loss": 0.326, "step": 7218 }, { "epoch": 4.914227365554799, "grad_norm": 1.329115390777588, "learning_rate": 8.045015782715549e-09, "loss": 0.3827, "step": 7219 }, { "epoch": 4.914908100748809, "grad_norm": 1.4288030862808228, "learning_rate": 7.917857440530863e-09, "loss": 0.2676, "step": 7220 }, { "epoch": 4.915588835942819, "grad_norm": 1.3010754585266113, "learning_rate": 7.791711246331046e-09, "loss": 0.244, "step": 7221 }, { "epoch": 4.916269571136827, "grad_norm": 1.4798753261566162, "learning_rate": 7.666577225693417e-09, "loss": 0.3247, "step": 7222 }, { "epoch": 4.916950306330837, "grad_norm": 1.4055064916610718, "learning_rate": 7.54245540398768e-09, "loss": 0.3666, "step": 7223 }, { "epoch": 4.917631041524847, "grad_norm": 1.3500690460205078, "learning_rate": 7.419345806380374e-09, "loss": 0.5407, "step": 7224 }, { "epoch": 4.918311776718856, "grad_norm": 1.3226724863052368, "learning_rate": 7.297248457832085e-09, "loss": 0.3012, "step": 7225 }, { "epoch": 4.918992511912866, "grad_norm": 1.4028385877609253, "learning_rate": 7.176163383097457e-09, "loss": 0.3966, "step": 7226 }, { "epoch": 4.919673247106876, "grad_norm": 1.4792696237564087, "learning_rate": 7.056090606727406e-09, "loss": 0.3035, "step": 7227 }, { "epoch": 4.920353982300885, "grad_norm": 1.3405731916427612, "learning_rate": 6.937030153066349e-09, "loss": 0.2192, "step": 7228 }, { "epoch": 4.921034717494894, "grad_norm": 1.4240148067474365, "learning_rate": 6.818982046253863e-09, "loss": 0.325, "step": 7229 }, { "epoch": 4.921715452688904, "grad_norm": 1.374204158782959, "learning_rate": 6.701946310224694e-09, "loss": 0.2798, "step": 7230 }, { "epoch": 4.922396187882914, "grad_norm": 1.4597947597503662, "learning_rate": 6.585922968707081e-09, "loss": 0.2632, "step": 7231 }, { "epoch": 4.923076923076923, "grad_norm": 1.4329804182052612, "learning_rate": 6.470912045225541e-09, "loss": 0.5219, "step": 7232 }, { "epoch": 4.923757658270933, "grad_norm": 1.636412501335144, "learning_rate": 6.356913563098643e-09, "loss": 0.3516, "step": 7233 }, { "epoch": 4.924438393464942, "grad_norm": 1.351597547531128, "learning_rate": 6.24392754543901e-09, "loss": 0.2808, "step": 7234 }, { "epoch": 4.9251191286589515, "grad_norm": 1.3918778896331787, "learning_rate": 6.131954015154984e-09, "loss": 0.3669, "step": 7235 }, { "epoch": 4.925799863852961, "grad_norm": 1.473175048828125, "learning_rate": 6.020992994949515e-09, "loss": 0.472, "step": 7236 }, { "epoch": 4.926480599046971, "grad_norm": 1.4153259992599487, "learning_rate": 5.911044507320163e-09, "loss": 0.3721, "step": 7237 }, { "epoch": 4.927161334240981, "grad_norm": 1.5686932802200317, "learning_rate": 5.802108574557985e-09, "loss": 0.4352, "step": 7238 }, { "epoch": 4.927842069434989, "grad_norm": 1.3776376247406006, "learning_rate": 5.694185218750869e-09, "loss": 0.4629, "step": 7239 }, { "epoch": 4.928522804628999, "grad_norm": 1.4553083181381226, "learning_rate": 5.587274461780201e-09, "loss": 0.4463, "step": 7240 }, { "epoch": 4.929203539823009, "grad_norm": 1.4621185064315796, "learning_rate": 5.481376325321974e-09, "loss": 0.2445, "step": 7241 }, { "epoch": 4.929884275017018, "grad_norm": 1.327610731124878, "learning_rate": 5.376490830847347e-09, "loss": 0.3714, "step": 7242 }, { "epoch": 4.930565010211028, "grad_norm": 1.380557894706726, "learning_rate": 5.272617999621532e-09, "loss": 0.4763, "step": 7243 }, { "epoch": 4.931245745405038, "grad_norm": 1.4077479839324951, "learning_rate": 5.1697578527049045e-09, "loss": 0.4151, "step": 7244 }, { "epoch": 4.931926480599047, "grad_norm": 1.3172930479049683, "learning_rate": 5.067910410953003e-09, "loss": 0.2767, "step": 7245 }, { "epoch": 4.932607215793056, "grad_norm": 1.3873717784881592, "learning_rate": 4.967075695014312e-09, "loss": 0.1623, "step": 7246 }, { "epoch": 4.933287950987066, "grad_norm": 1.4281977415084839, "learning_rate": 4.867253725334142e-09, "loss": 0.4153, "step": 7247 }, { "epoch": 4.933968686181076, "grad_norm": 1.4628478288650513, "learning_rate": 4.768444522151306e-09, "loss": 0.3668, "step": 7248 }, { "epoch": 4.934649421375085, "grad_norm": 1.3923815488815308, "learning_rate": 4.67064810549922e-09, "loss": 0.4062, "step": 7249 }, { "epoch": 4.935330156569094, "grad_norm": 1.341718316078186, "learning_rate": 4.57386449520647e-09, "loss": 0.3406, "step": 7250 }, { "epoch": 4.936010891763104, "grad_norm": 1.514147162437439, "learning_rate": 4.478093710895692e-09, "loss": 0.1899, "step": 7251 }, { "epoch": 4.9366916269571135, "grad_norm": 1.4595249891281128, "learning_rate": 4.383335771984132e-09, "loss": 0.4112, "step": 7252 }, { "epoch": 4.937372362151123, "grad_norm": 1.341776967048645, "learning_rate": 4.289590697684754e-09, "loss": 0.4173, "step": 7253 }, { "epoch": 4.938053097345133, "grad_norm": 1.419694185256958, "learning_rate": 4.19685850700402e-09, "loss": 0.3671, "step": 7254 }, { "epoch": 4.9387338325391426, "grad_norm": 1.4377061128616333, "learning_rate": 4.1051392187441144e-09, "loss": 0.3438, "step": 7255 }, { "epoch": 4.939414567733152, "grad_norm": 1.37592613697052, "learning_rate": 4.014432851500161e-09, "loss": 0.5184, "step": 7256 }, { "epoch": 4.940095302927161, "grad_norm": 1.3527828454971313, "learning_rate": 3.9247394236630045e-09, "loss": 0.3003, "step": 7257 }, { "epoch": 4.940776038121171, "grad_norm": 1.480565071105957, "learning_rate": 3.836058953419208e-09, "loss": 0.2274, "step": 7258 }, { "epoch": 4.94145677331518, "grad_norm": 1.4590873718261719, "learning_rate": 3.748391458747724e-09, "loss": 0.4015, "step": 7259 }, { "epoch": 4.94213750850919, "grad_norm": 1.54765784740448, "learning_rate": 3.661736957423223e-09, "loss": 0.4553, "step": 7260 }, { "epoch": 4.9428182437032, "grad_norm": 1.370877981185913, "learning_rate": 3.576095467015539e-09, "loss": 0.4779, "step": 7261 }, { "epoch": 4.943498978897209, "grad_norm": 1.4246457815170288, "learning_rate": 3.491467004888005e-09, "loss": 0.3491, "step": 7262 }, { "epoch": 4.944179714091218, "grad_norm": 1.3948227167129517, "learning_rate": 3.407851588199673e-09, "loss": 0.3412, "step": 7263 }, { "epoch": 4.944860449285228, "grad_norm": 1.4633419513702393, "learning_rate": 3.325249233903094e-09, "loss": 0.3807, "step": 7264 }, { "epoch": 4.945541184479238, "grad_norm": 1.4860085248947144, "learning_rate": 3.2436599587459815e-09, "loss": 0.5888, "step": 7265 }, { "epoch": 4.946221919673247, "grad_norm": 1.4619797468185425, "learning_rate": 3.1630837792712143e-09, "loss": 0.3314, "step": 7266 }, { "epoch": 4.946902654867257, "grad_norm": 1.4000015258789062, "learning_rate": 3.0835207118146136e-09, "loss": 0.2712, "step": 7267 }, { "epoch": 4.947583390061266, "grad_norm": 1.7227269411087036, "learning_rate": 3.0049707725088307e-09, "loss": 0.2785, "step": 7268 }, { "epoch": 4.9482641252552755, "grad_norm": 1.2380897998809814, "learning_rate": 2.927433977278904e-09, "loss": 0.3758, "step": 7269 }, { "epoch": 4.948944860449285, "grad_norm": 1.3525584936141968, "learning_rate": 2.850910341846147e-09, "loss": 0.3315, "step": 7270 }, { "epoch": 4.949625595643295, "grad_norm": 1.5383310317993164, "learning_rate": 2.7753998817259264e-09, "loss": 0.2612, "step": 7271 }, { "epoch": 4.9503063308373045, "grad_norm": 1.526383876800537, "learning_rate": 2.7009026122271076e-09, "loss": 0.3081, "step": 7272 }, { "epoch": 4.950987066031313, "grad_norm": 1.4841961860656738, "learning_rate": 2.62741854845483e-09, "loss": 0.2617, "step": 7273 }, { "epoch": 4.951667801225323, "grad_norm": 1.3506885766983032, "learning_rate": 2.554947705307731e-09, "loss": 0.2551, "step": 7274 }, { "epoch": 4.952348536419333, "grad_norm": 1.4514245986938477, "learning_rate": 2.4834900974796126e-09, "loss": 0.4639, "step": 7275 }, { "epoch": 4.953029271613342, "grad_norm": 1.4346450567245483, "learning_rate": 2.4130457394577754e-09, "loss": 0.4768, "step": 7276 }, { "epoch": 4.953710006807352, "grad_norm": 1.2731906175613403, "learning_rate": 2.3436146455263485e-09, "loss": 0.252, "step": 7277 }, { "epoch": 4.954390742001362, "grad_norm": 1.4647144079208374, "learning_rate": 2.2751968297607396e-09, "loss": 0.22, "step": 7278 }, { "epoch": 4.955071477195371, "grad_norm": 1.431475281715393, "learning_rate": 2.2077923060342956e-09, "loss": 0.4107, "step": 7279 }, { "epoch": 4.95575221238938, "grad_norm": 1.3439292907714844, "learning_rate": 2.1414010880121962e-09, "loss": 0.362, "step": 7280 }, { "epoch": 4.95643294758339, "grad_norm": 1.4260802268981934, "learning_rate": 2.0760231891558957e-09, "loss": 0.2556, "step": 7281 }, { "epoch": 4.9571136827774, "grad_norm": 1.4925892353057861, "learning_rate": 2.0116586227203473e-09, "loss": 0.3404, "step": 7282 }, { "epoch": 4.957794417971409, "grad_norm": 1.347633957862854, "learning_rate": 1.948307401756222e-09, "loss": 0.3001, "step": 7283 }, { "epoch": 4.958475153165419, "grad_norm": 1.3491458892822266, "learning_rate": 1.8859695391071353e-09, "loss": 0.2932, "step": 7284 }, { "epoch": 4.959155888359428, "grad_norm": 1.5007474422454834, "learning_rate": 1.8246450474129763e-09, "loss": 0.2119, "step": 7285 }, { "epoch": 4.9598366235534375, "grad_norm": 1.6048427820205688, "learning_rate": 1.7643339391065773e-09, "loss": 0.3513, "step": 7286 }, { "epoch": 4.960517358747447, "grad_norm": 1.343571424484253, "learning_rate": 1.705036226417045e-09, "loss": 0.2958, "step": 7287 }, { "epoch": 4.961198093941457, "grad_norm": 1.4435904026031494, "learning_rate": 1.6467519213658745e-09, "loss": 0.3391, "step": 7288 }, { "epoch": 4.9618788291354665, "grad_norm": 1.3017866611480713, "learning_rate": 1.5894810357708347e-09, "loss": 0.1838, "step": 7289 }, { "epoch": 4.962559564329476, "grad_norm": 1.4250668287277222, "learning_rate": 1.533223581243748e-09, "loss": 0.2223, "step": 7290 }, { "epoch": 4.963240299523485, "grad_norm": 1.3578444719314575, "learning_rate": 1.4779795691904907e-09, "loss": 0.346, "step": 7291 }, { "epoch": 4.963921034717495, "grad_norm": 1.3947213888168335, "learning_rate": 1.4237490108121033e-09, "loss": 0.3888, "step": 7292 }, { "epoch": 4.964601769911504, "grad_norm": 1.303780198097229, "learning_rate": 1.3705319171042341e-09, "loss": 0.2524, "step": 7293 }, { "epoch": 4.965282505105514, "grad_norm": 1.4619578123092651, "learning_rate": 1.3183282988554757e-09, "loss": 0.381, "step": 7294 }, { "epoch": 4.965963240299524, "grad_norm": 1.6667922735214233, "learning_rate": 1.2671381666512494e-09, "loss": 0.3621, "step": 7295 }, { "epoch": 4.9666439754935325, "grad_norm": 1.348131537437439, "learning_rate": 1.2169615308704751e-09, "loss": 0.3499, "step": 7296 }, { "epoch": 4.967324710687542, "grad_norm": 2.273983955383301, "learning_rate": 1.1677984016855715e-09, "loss": 0.2443, "step": 7297 }, { "epoch": 4.968005445881552, "grad_norm": 1.3584544658660889, "learning_rate": 1.1196487890652308e-09, "loss": 0.3844, "step": 7298 }, { "epoch": 4.968686181075562, "grad_norm": 1.4195045232772827, "learning_rate": 1.072512702771089e-09, "loss": 0.3757, "step": 7299 }, { "epoch": 4.969366916269571, "grad_norm": 1.811063289642334, "learning_rate": 1.0263901523605014e-09, "loss": 0.5018, "step": 7300 }, { "epoch": 4.970047651463581, "grad_norm": 1.4914082288742065, "learning_rate": 9.81281147184876e-10, "loss": 0.3348, "step": 7301 }, { "epoch": 4.970728386657591, "grad_norm": 1.4479435682296753, "learning_rate": 9.371856963896753e-10, "loss": 0.3413, "step": 7302 }, { "epoch": 4.9714091218515994, "grad_norm": 1.301883339881897, "learning_rate": 8.941038089160803e-10, "loss": 0.3225, "step": 7303 }, { "epoch": 4.972089857045609, "grad_norm": 1.3710325956344604, "learning_rate": 8.520354934982155e-10, "loss": 0.3787, "step": 7304 }, { "epoch": 4.972770592239619, "grad_norm": 1.4036242961883545, "learning_rate": 8.109807586653695e-10, "loss": 0.244, "step": 7305 }, { "epoch": 4.9734513274336285, "grad_norm": 1.3577402830123901, "learning_rate": 7.709396127425494e-10, "loss": 0.3572, "step": 7306 }, { "epoch": 4.974132062627638, "grad_norm": 1.3304187059402466, "learning_rate": 7.31912063846596e-10, "loss": 0.3492, "step": 7307 }, { "epoch": 4.974812797821647, "grad_norm": 1.5149956941604614, "learning_rate": 6.938981198917338e-10, "loss": 0.4435, "step": 7308 }, { "epoch": 4.975493533015657, "grad_norm": 1.415859341621399, "learning_rate": 6.568977885840211e-10, "loss": 0.2722, "step": 7309 }, { "epoch": 4.976174268209666, "grad_norm": 1.392090916633606, "learning_rate": 6.209110774263449e-10, "loss": 0.2634, "step": 7310 }, { "epoch": 4.976855003403676, "grad_norm": 1.3072715997695923, "learning_rate": 5.85937993714536e-10, "loss": 0.2467, "step": 7311 }, { "epoch": 4.977535738597686, "grad_norm": 1.4143497943878174, "learning_rate": 5.519785445401437e-10, "loss": 0.2725, "step": 7312 }, { "epoch": 4.978216473791695, "grad_norm": 1.4016669988632202, "learning_rate": 5.190327367871062e-10, "loss": 0.2907, "step": 7313 }, { "epoch": 4.978897208985704, "grad_norm": 1.4132447242736816, "learning_rate": 4.871005771361903e-10, "loss": 0.4354, "step": 7314 }, { "epoch": 4.979577944179714, "grad_norm": 1.4291924238204956, "learning_rate": 4.5618207206166164e-10, "loss": 0.2888, "step": 7315 }, { "epoch": 4.980258679373724, "grad_norm": 1.3145562410354614, "learning_rate": 4.2627722783183943e-10, "loss": 0.359, "step": 7316 }, { "epoch": 4.980939414567733, "grad_norm": 1.415999412536621, "learning_rate": 3.9738605051020675e-10, "loss": 0.2715, "step": 7317 }, { "epoch": 4.981620149761743, "grad_norm": 1.508621096611023, "learning_rate": 3.695085459543002e-10, "loss": 0.3058, "step": 7318 }, { "epoch": 4.982300884955752, "grad_norm": 1.4157618284225464, "learning_rate": 3.4264471981626525e-10, "loss": 0.2836, "step": 7319 }, { "epoch": 4.982981620149761, "grad_norm": 1.2763605117797852, "learning_rate": 3.1679457754341113e-10, "loss": 0.3373, "step": 7320 }, { "epoch": 4.983662355343771, "grad_norm": 1.4782259464263916, "learning_rate": 2.919581243759906e-10, "loss": 0.3817, "step": 7321 }, { "epoch": 4.984343090537781, "grad_norm": 1.4032889604568481, "learning_rate": 2.681353653499752e-10, "loss": 0.2685, "step": 7322 }, { "epoch": 4.9850238257317905, "grad_norm": 1.3768481016159058, "learning_rate": 2.4532630529594537e-10, "loss": 0.3087, "step": 7323 }, { "epoch": 4.9857045609258, "grad_norm": 1.3734564781188965, "learning_rate": 2.235309488374249e-10, "loss": 0.4377, "step": 7324 }, { "epoch": 4.98638529611981, "grad_norm": 1.3652901649475098, "learning_rate": 2.027493003942116e-10, "loss": 0.2688, "step": 7325 }, { "epoch": 4.987066031313819, "grad_norm": 1.4022290706634521, "learning_rate": 1.8298136417960188e-10, "loss": 0.4025, "step": 7326 }, { "epoch": 4.987746766507828, "grad_norm": 1.4316959381103516, "learning_rate": 1.6422714420150088e-10, "loss": 0.2534, "step": 7327 }, { "epoch": 4.988427501701838, "grad_norm": 1.410892128944397, "learning_rate": 1.4648664426242243e-10, "loss": 0.4566, "step": 7328 }, { "epoch": 4.989108236895848, "grad_norm": 1.385138988494873, "learning_rate": 1.2975986795893402e-10, "loss": 0.1852, "step": 7329 }, { "epoch": 4.989788972089857, "grad_norm": 1.5980631113052368, "learning_rate": 1.1404681868332212e-10, "loss": 0.2808, "step": 7330 }, { "epoch": 4.990469707283866, "grad_norm": 1.4162646532058716, "learning_rate": 9.934749962026147e-11, "loss": 0.4981, "step": 7331 }, { "epoch": 4.991150442477876, "grad_norm": 1.4853359460830688, "learning_rate": 8.566191375070088e-11, "loss": 0.2742, "step": 7332 }, { "epoch": 4.9918311776718856, "grad_norm": 1.437730073928833, "learning_rate": 7.299006384908769e-11, "loss": 0.3129, "step": 7333 }, { "epoch": 4.992511912865895, "grad_norm": 1.3525949716567993, "learning_rate": 6.133195248503309e-11, "loss": 0.3991, "step": 7334 }, { "epoch": 4.993192648059905, "grad_norm": 1.283522129058838, "learning_rate": 5.068758202164681e-11, "loss": 0.4024, "step": 7335 }, { "epoch": 4.993873383253915, "grad_norm": 1.2930777072906494, "learning_rate": 4.105695461775749e-11, "loss": 0.4308, "step": 7336 }, { "epoch": 4.994554118447923, "grad_norm": 1.3882343769073486, "learning_rate": 3.244007222569234e-11, "loss": 0.315, "step": 7337 }, { "epoch": 4.995234853641933, "grad_norm": 1.4248794317245483, "learning_rate": 2.4836936592387283e-11, "loss": 0.4902, "step": 7338 }, { "epoch": 4.995915588835943, "grad_norm": 1.4660519361495972, "learning_rate": 1.8247549259386988e-11, "loss": 0.2941, "step": 7339 }, { "epoch": 4.9965963240299525, "grad_norm": 1.503678321838379, "learning_rate": 1.2671911563399975e-11, "loss": 0.4493, "step": 7340 }, { "epoch": 4.997277059223962, "grad_norm": 1.4689490795135498, "learning_rate": 8.110024633523061e-12, "loss": 0.3508, "step": 7341 }, { "epoch": 4.997957794417971, "grad_norm": 1.2619237899780273, "learning_rate": 4.561889396237362e-12, "loss": 0.2285, "step": 7342 }, { "epoch": 4.998638529611981, "grad_norm": 1.3391642570495605, "learning_rate": 2.0275065693020622e-12, "loss": 0.2308, "step": 7343 }, { "epoch": 4.99931926480599, "grad_norm": 1.4951974153518677, "learning_rate": 5.068766678606452e-13, "loss": 0.4098, "step": 7344 }, { "epoch": 5.0, "grad_norm": 1.3021950721740723, "learning_rate": 0.0, "loss": 0.3612, "step": 7345 } ], "logging_steps": 1.0, "max_steps": 7345, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.221104209541202e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }